42.117. DQN On Foreign Exchange Market#

42.117.1. Load dataset#

# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load in

import random
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import os

from keras.models import Sequential
from keras.layers import Dense
from keras.optimizers import Adam

from collections import deque

Flags for the debuging purposes are presented here.

# If this flag is set, each step of the environment's state will be printed
ENVIRONMENT_DEBUG = False
ds = pd.read_csv(
    "https://static-1300131294.cos.ap-shanghai.myqcloud.com/data/deep-learning/dqn/USD_TRY%20Gemi%20Verileri.csv"
)
ds["Tarih"] = pd.to_datetime(ds["Tarih"], format='%d/%m/%Y', errors="coerce")
ds["Şimdi"] = pd.to_numeric(ds["Şimdi"].str.replace(",", "."), errors="coerce")
ds["Fark %"] = ds["Fark %"].str.replace("%", "")
ds["Fark %"] = ds["Fark %"].str.replace(",", ".")

ds.head(10)
Tarih Şimdi Açılış Yüksek Düşük Fark %
0 NaT 5.0839 5,0924 5,1151 5,0612 0.35
1 NaT 5.0660 4,9924 5,0944 4,9591 1.42
2 NaT 4.9950 4,9136 5,0183 4,9086 1.64
3 NaT 4.9142 4,8845 4,9324 4,8734 0.65
4 NaT 4.8823 4,8459 4,9104 4,8407 0.52
5 NaT 4.8572 4,8554 4,8584 4,8544 0.09
6 NaT 4.8529 4,8644 4,8830 4,8340 -0.25
7 NaT 4.8649 4,7705 4,8845 4,7660 1.93
8 NaT 4.7728 4,8883 4,8924 4,7728 -2.32
9 NaT 4.8861 4,7415 4,9403 4,7345 3.09

The reverse version of the data will be used. It is expected to make the learning stronger since from 2002 to this date, usd is increasing with respect to try.

X = ds["Şimdi"]
Y = ds["Tarih"]
X = np.array(X).reshape((len(X), 1))
Y = np.array(Y).reshape((len(Y), 1))
fig = plt.figure()
ax = fig.add_subplot(111)
ax.plot_date(Y, X, ".")
plt.show()
../../../_images/dqn-on-foreign-exchange-market_6_0.png
# Split train and test data

date_split = 4000
train = ds[:date_split]
test = ds[date_split:]
len(train), len(test)
(4000, 207)

42.117.2. Define envireonment#

class Environment:
    def __init__(self, data, tl, history_t=10):
        self.data = data
        self.history_t = history_t
        self.tl_start = tl
        self.reset()

    def reset(self):
        self.tl = self.tl_start
        self.usd = 0
        self.done = False
        self.profits = 0
        self.current_position = "none"
        self.history = [self.data.iloc[x, :]["Fark %"] for x in range(self.history_t)]
        self.t = self.history_t
        self.last_tl = 0
        return self.history

    def step(self, act):
        reward = 0
        if act == 0:  # Hold
            self.current_position = self.current_position
        elif act == 1:  # Buy
            if self.current_position == "none":
                self.current_position = "long"
                # Buy usd

                self.last_tl = self.tl
                self.usd = self.tl / (self.data.iloc[self.t, :]["Şimdi"])
                self.tl = 0
            else:
                self.current_position = self.current_position

        else:  # sell
            if self.current_position == "long":
                self.current_position = "none"
                # Sell usd

                self.tl = self.usd * (self.data.iloc[self.t, :]["Şimdi"])
                self.usd = 0
                self.profits = self.profits + (self.tl - self.last_tl)

                if (self.tl - self.last_tl) > 0:
                    reward = 1
                else:
                    reward = -1
            else:
                self.current_position = self.current_position

        # et next time
        self.t += 1
        # print("history before: ",self.history)
        self.history.pop(0)
        self.history.append(self.data.iloc[self.t, :]["Fark %"])
        # print("history after: ",self.history)

        # print("reward: ",reward)

        if ENVIRONMENT_DEBUG == True:
            print(
                "t: ",
                (self.t - self.history_t),
                " reward: ",
                reward,
                " profits: ",
                self.profits,
                " current position: ",
                self.current_position,
                " done: ",
                self.done,
            )

        if self.t == (len(self.data) - 1):
            self.done = True
            print(
                "Total steps: ",
                (self.t - self.history_t),
                " TotalProfit: ",
                self.profits,
                " done: ",
                self.done,
            )

        return self.history, reward, self.done, self.profits  # obs, reward, done

42.117.3. Agent class#

# Deep Q-learning Agent
class DQNAgent:
    def __init__(self, state_size, action_size, hidden_layer_size):
        self.state_size = state_size
        self.action_size = action_size
        self.hidden_layer_size = hidden_layer_size
        self.memory = deque(maxlen=2000)
        self.gamma = 0.95  # discount rate
        self.epsilon = 1.0  # exploration rate
        self.epsilon_min = 0.01
        self.epsilon_decay = 0.995
        self.learning_rate = 0.001
        self.model = self._build_model()

    def _build_model(self):
        # Neural Net for Deep-Q learning Model
        model = Sequential()
        model.add(
            Dense(self.hidden_layer_size, input_dim=self.state_size, activation="relu")
        )
        model.add(Dense(self.hidden_layer_size, activation="relu"))
        model.add(Dense(self.action_size, activation="linear"))
        model.compile(loss="mse", optimizer=Adam(learning_rate=self.learning_rate))
        return model

    def remember(self, state, action, reward, next_state, done):
        self.memory.append((state, action, reward, next_state, done))

    def act(self, state):
        state = np.array(state).astype(float)
        state = np.reshape(state, [1, self.state_size])
        if np.random.rand() <= self.epsilon:
            return random.randrange(self.action_size)
        act_values = self.model.predict(state)
        return np.argmax(act_values[0])  # returns action

    def act_greedy(self, state):
        state = np.array(state).astype(float)
        state = np.reshape(state, [1, self.state_size])
        act_values = self.model.predict(state)
        return np.argmax(act_values[0])  # returns action

    def replay(self, batch_size):
        minibatch = random.sample(self.memory, batch_size)
        for state, action, reward, next_state, done in minibatch:
            target = reward
            if not done:
                target = reward + self.gamma * np.amax(
                    self.model.predict(next_state)[0]
                )
            target_f = self.model.predict(state)
            target_f[0][action] = target
            self.model.fit(state, target_f, epochs=1, verbose=0)
        if self.epsilon > self.epsilon_min:
            self.epsilon *= self.epsilon_decay

42.117.4. Train the DQN#

if __name__ == "__main__":
    # macros
    EPISODES = 1
    STATE_SIZE = 90

    # profits list
    total_profits = []

    # initialize environment and the agent
    env = Environment(train, 100, STATE_SIZE)  # 100tl, 60 history
    agent = DQNAgent(STATE_SIZE, 3, 100)

    # Iterate the game
    for e in range(EPISODES):
        # check if the buy and sell actions are taken
        actions_count = 0

        # reset state in the beginning of each game
        state = env.reset()
        state = np.reshape(state, [1, STATE_SIZE])

        # time_t represents each frame of the game
        # the more time_t the more score
        for time_t in range(5000):
            # Decide action
            action = agent.act(state)

            if (action == 1) or (action == 2):
                actions_count = actions_count + 1

            # Advance the game to the next frame based on the action.
            next_state, reward, done, profits = env.step(action)
            next_state = np.array(next_state, dtype=np.float32)
            next_state = np.reshape(next_state, [1, STATE_SIZE])

            # make rewards = profits (EXPERIMENTAL)
            reward = profits

            # Remember the previous state, action, reward, and done
            agent.remember(state, action, reward, next_state, done)

            # make next_state the new current state for the next frame.
            state = next_state

            # done becomes True when the game ends
            if done:
                total_profits.append(profits)
                # print the score and break out of the loop
                # print("number of actions taken other than hold in this iteration is ",actions_count,"\n")
                # print("episode: {}/{}, score: {}".format(e, EPISODES, time_t))
                break
        # train the agent with the experience of the episode
        agent.replay(32)
WARNING:tensorflow:From d:\conda\envs\py39\lib\site-packages\keras\src\backend.py:873: The name tf.get_default_graph is deprecated. Please use tf.compat.v1.get_default_graph instead.

Total steps:  3909  TotalProfit:  -36.91076629230869  done:  True
1/1 [==============================] - 1s 603ms/step
1/1 [==============================] - 0s 126ms/step
WARNING:tensorflow:From d:\conda\envs\py39\lib\site-packages\keras\src\utils\tf_utils.py:492: The name tf.ragged.RaggedTensorValue is deprecated. Please use tf.compat.v1.ragged.RaggedTensorValue instead.

1/1 [==============================] - 0s 148ms/step
1/1 [==============================] - 0s 82ms/step
1/1 [==============================] - 0s 79ms/step
1/1 [==============================] - 0s 239ms/step
1/1 [==============================] - 0s 147ms/step
1/1 [==============================] - 0s 136ms/step
1/1 [==============================] - 0s 110ms/step
1/1 [==============================] - 0s 140ms/step
1/1 [==============================] - 0s 110ms/step
1/1 [==============================] - 0s 116ms/step
1/1 [==============================] - 0s 207ms/step
1/1 [==============================] - 0s 91ms/step
1/1 [==============================] - 0s 224ms/step
1/1 [==============================] - 0s 203ms/step
1/1 [==============================] - 0s 142ms/step
1/1 [==============================] - 0s 93ms/step
1/1 [==============================] - 0s 73ms/step
1/1 [==============================] - 0s 112ms/step
1/1 [==============================] - 0s 112ms/step
1/1 [==============================] - 0s 247ms/step
1/1 [==============================] - 0s 195ms/step
1/1 [==============================] - 0s 126ms/step
1/1 [==============================] - 0s 219ms/step
1/1 [==============================] - 0s 133ms/step
1/1 [==============================] - 0s 92ms/step
1/1 [==============================] - 0s 93ms/step
1/1 [==============================] - 0s 157ms/step
1/1 [==============================] - 0s 196ms/step
1/1 [==============================] - 0s 172ms/step
1/1 [==============================] - 0s 176ms/step
1/1 [==============================] - 0s 163ms/step
1/1 [==============================] - 0s 113ms/step
1/1 [==============================] - 0s 116ms/step
1/1 [==============================] - 0s 155ms/step
1/1 [==============================] - 0s 188ms/step
1/1 [==============================] - 0s 147ms/step
1/1 [==============================] - 0s 101ms/step
1/1 [==============================] - 0s 162ms/step
1/1 [==============================] - 0s 181ms/step
1/1 [==============================] - 0s 150ms/step
1/1 [==============================] - 0s 123ms/step
1/1 [==============================] - 0s 127ms/step
1/1 [==============================] - 0s 154ms/step
1/1 [==============================] - 0s 183ms/step
1/1 [==============================] - 0s 215ms/step
1/1 [==============================] - 0s 63ms/step
1/1 [==============================] - 0s 149ms/step
1/1 [==============================] - 0s 122ms/step
1/1 [==============================] - 0s 90ms/step
1/1 [==============================] - 0s 135ms/step
1/1 [==============================] - 0s 157ms/step
1/1 [==============================] - 0s 123ms/step
1/1 [==============================] - 0s 142ms/step
1/1 [==============================] - 0s 126ms/step
1/1 [==============================] - 0s 177ms/step
1/1 [==============================] - 0s 202ms/step
1/1 [==============================] - 0s 67ms/step
1/1 [==============================] - 0s 153ms/step
1/1 [==============================] - 0s 89ms/step
1/1 [==============================] - 0s 99ms/step
1/1 [==============================] - 0s 131ms/step
1/1 [==============================] - 0s 234ms/step
def plot_profits(total_profits):
    epoch_count = range(1, len(total_profits) + 1)
    fig = plt.figure(figsize=(30, 10))
    plt.plot(epoch_count, total_profits, "b-")
    plt.legend("Total Profits")
    plt.xlabel("Epoch")
    plt.ylabel("Total Profits")
    plt.figure(figsize=(50, 10))
    plt.show();
plot_profits(total_profits)
../../../_images/dqn-on-foreign-exchange-market_15_0.png
<Figure size 5000x1000 with 0 Axes>

Now test the agent with real world data.

test = test.iloc[::-1]
test.head(10)
Tarih Şimdi Açılış Yüksek Düşük Fark %
4206 NaT 1.6225 1,6210 1,6725 1,6090 0.06
4205 NaT 1.6245 1,6150 1,6345 1,6000 0.12
4204 NaT 1.6150 1,6250 1,6720 1,5850 -0.58
4203 NaT 1.6900 1,6200 1,6950 1,6200 4.64
4202 NaT 1.6680 1,6870 1,7110 1,6450 -1.30
4201 NaT 1.6615 1,6660 1,6695 1,6400 -0.39
4200 NaT 1.6800 1,6530 1,7100 1,6080 1.11
4199 NaT 1.6660 1,6700 1,6810 1,6450 -0.83
4198 NaT 1.6780 1,6590 1,6970 1,6550 0.72
4197 NaT 1.6850 1,6740 1,7010 1,6600 0.42
env_test = Environment(test, 100, STATE_SIZE)  # 100tl, 60 history

# Iterate the game
for e in range(1):
    # check if the buy and sell actions are taken
    actions_count = 0

    # reset state in the beginning of each game
    state = env_test.reset()
    state = np.reshape(state, [1, STATE_SIZE])

    # time_t represents each frame of the game
    # the more time_t the more score
    for time_t in range(5000):
        # Decide action
        action = agent.act_greedy(state)

        if (action == 1) or (action == 2):
            actions_count = actions_count + 1

        # Advance the game to the next frame based on the action.
        next_state, reward, done, profits = env_test.step(action)

        next_state = np.reshape(next_state, [1, STATE_SIZE])

        # make rewards = profits (EXPERIMENTAL)
        reward = profits

        # Remember the previous state, action, reward, and done
        agent.remember(state, action, reward, next_state, done)

        # make next_state the new current state for the next frame.
        state = next_state

        # done becomes True when the game ends
        if done:
            break
1/1 [==============================] - 0s 306ms/step
1/1 [==============================] - 0s 122ms/step
1/1 [==============================] - 0s 131ms/step
1/1 [==============================] - 0s 98ms/step
1/1 [==============================] - 0s 60ms/step
1/1 [==============================] - 0s 110ms/step
1/1 [==============================] - 0s 219ms/step
1/1 [==============================] - 0s 155ms/step
1/1 [==============================] - 0s 212ms/step
1/1 [==============================] - 0s 89ms/step
1/1 [==============================] - 0s 161ms/step
1/1 [==============================] - 0s 197ms/step
1/1 [==============================] - 0s 120ms/step
1/1 [==============================] - 0s 137ms/step
1/1 [==============================] - 0s 120ms/step
1/1 [==============================] - 0s 100ms/step
1/1 [==============================] - 0s 83ms/step
1/1 [==============================] - 0s 146ms/step
1/1 [==============================] - 0s 100ms/step
1/1 [==============================] - 0s 150ms/step
1/1 [==============================] - 0s 160ms/step
1/1 [==============================] - 0s 191ms/step
1/1 [==============================] - 0s 95ms/step
1/1 [==============================] - 0s 140ms/step
1/1 [==============================] - 0s 176ms/step
1/1 [==============================] - 0s 307ms/step
1/1 [==============================] - 0s 284ms/step
1/1 [==============================] - 0s 109ms/step
1/1 [==============================] - 0s 106ms/step
1/1 [==============================] - 0s 93ms/step
1/1 [==============================] - 0s 185ms/step
1/1 [==============================] - 0s 148ms/step
1/1 [==============================] - 0s 411ms/step
1/1 [==============================] - 0s 326ms/step
1/1 [==============================] - 0s 193ms/step
1/1 [==============================] - 0s 331ms/step
1/1 [==============================] - 0s 179ms/step
1/1 [==============================] - 0s 111ms/step
1/1 [==============================] - 0s 154ms/step
1/1 [==============================] - 0s 83ms/step
1/1 [==============================] - 0s 107ms/step
1/1 [==============================] - 0s 220ms/step
1/1 [==============================] - 0s 194ms/step
1/1 [==============================] - 0s 360ms/step
1/1 [==============================] - 0s 224ms/step
1/1 [==============================] - 0s 133ms/step
1/1 [==============================] - 0s 259ms/step
1/1 [==============================] - 0s 326ms/step
1/1 [==============================] - 0s 126ms/step
1/1 [==============================] - 0s 232ms/step
1/1 [==============================] - 0s 196ms/step
1/1 [==============================] - 0s 81ms/step
1/1 [==============================] - 0s 182ms/step
1/1 [==============================] - 0s 168ms/step
1/1 [==============================] - 0s 239ms/step
1/1 [==============================] - 0s 221ms/step
1/1 [==============================] - 0s 291ms/step
1/1 [==============================] - 0s 181ms/step
1/1 [==============================] - 0s 121ms/step
1/1 [==============================] - 0s 163ms/step
1/1 [==============================] - 0s 203ms/step
1/1 [==============================] - 0s 145ms/step
1/1 [==============================] - 0s 96ms/step
1/1 [==============================] - 0s 150ms/step
1/1 [==============================] - 0s 137ms/step
1/1 [==============================] - 0s 178ms/step
1/1 [==============================] - 0s 109ms/step
1/1 [==============================] - 0s 437ms/step
1/1 [==============================] - 0s 288ms/step
1/1 [==============================] - 0s 230ms/step
1/1 [==============================] - 0s 296ms/step
1/1 [==============================] - 0s 207ms/step
1/1 [==============================] - 0s 219ms/step
1/1 [==============================] - 0s 143ms/step
1/1 [==============================] - 0s 174ms/step
1/1 [==============================] - 0s 182ms/step
1/1 [==============================] - 0s 166ms/step
1/1 [==============================] - 0s 142ms/step
1/1 [==============================] - 0s 156ms/step
1/1 [==============================] - 0s 97ms/step
1/1 [==============================] - 0s 206ms/step
1/1 [==============================] - 0s 139ms/step
1/1 [==============================] - 0s 120ms/step
1/1 [==============================] - 0s 118ms/step
1/1 [==============================] - 0s 139ms/step
1/1 [==============================] - 0s 181ms/step
1/1 [==============================] - 0s 189ms/step
1/1 [==============================] - 0s 147ms/step
1/1 [==============================] - 0s 199ms/step
1/1 [==============================] - 0s 173ms/step
1/1 [==============================] - 0s 90ms/step
1/1 [==============================] - 0s 447ms/step
1/1 [==============================] - 0s 429ms/step
1/1 [==============================] - 0s 283ms/step
1/1 [==============================] - 0s 161ms/step
1/1 [==============================] - 0s 145ms/step
1/1 [==============================] - 0s 165ms/step
1/1 [==============================] - 0s 106ms/step
1/1 [==============================] - 0s 109ms/step
1/1 [==============================] - 0s 105ms/step
1/1 [==============================] - 0s 155ms/step
1/1 [==============================] - 0s 68ms/step
1/1 [==============================] - 0s 102ms/step
1/1 [==============================] - 0s 180ms/step
1/1 [==============================] - 0s 304ms/step
1/1 [==============================] - 1s 565ms/step
1/1 [==============================] - 0s 201ms/step
1/1 [==============================] - 0s 407ms/step
1/1 [==============================] - 0s 317ms/step
1/1 [==============================] - 0s 177ms/step
1/1 [==============================] - 0s 155ms/step
1/1 [==============================] - 0s 142ms/step
1/1 [==============================] - 0s 99ms/step
1/1 [==============================] - 0s 149ms/step
1/1 [==============================] - 0s 157ms/step
1/1 [==============================] - 0s 99ms/step
Total steps:  116  TotalProfit:  -0.4191616766467092  done:  True

42.117.5. Acknowledgement#

Thanks to emrebulbul23 for creating DQN on foreign exchange market. It inspired the majority of the content in this article.