{ "cells": [ { "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ "# DQN On Foreign Exchange Market\n", "## Load dataset" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "_cell_guid": "b1076dfc-b9ad-4769-8c92-a6c4dae69d19", "_uuid": "8f2839f25d086af736a60e9eeb907d3b93b6e0e5" }, "outputs": [], "source": [ "# This Python 3 environment comes with many helpful analytics libraries installed\n", "# It is defined by the kaggle/python docker image: https://github.com/kaggle/docker-python\n", "# For example, here's several helpful packages to load in\n", "\n", "import random\n", "import numpy as np\n", "import pandas as pd\n", "import matplotlib.pyplot as plt\n", "import os\n", "\n", "from keras.models import Sequential\n", "from keras.layers import Dense\n", "from keras.optimizers import Adam\n", "\n", "from collections import deque\n" ] }, { "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ "Flags for the debuging purposes are presented here." ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "# If this flag is set, each step of the environment's state will be printed\n", "ENVIRONMENT_DEBUG = False" ] }, { "cell_type": "code", "execution_count": 4, "metadata": { "_cell_guid": "79c7e3d0-c299-4dcb-8224-4455121ee9b0", "_uuid": "d629ff2d2480ee46fbb7e2d37f6b5fab8052498a" }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
TarihŞimdiAçılışYüksekDüşükFark %
0NaT5.08395,09245,11515,06120.35
1NaT5.06604,99245,09444,95911.42
2NaT4.99504,91365,01834,90861.64
3NaT4.91424,88454,93244,87340.65
4NaT4.88234,84594,91044,84070.52
5NaT4.85724,85544,85844,85440.09
6NaT4.85294,86444,88304,8340-0.25
7NaT4.86494,77054,88454,76601.93
8NaT4.77284,88834,89244,7728-2.32
9NaT4.88614,74154,94034,73453.09
\n", "
" ], "text/plain": [ " Tarih Şimdi Açılış Yüksek Düşük Fark %\n", "0 NaT 5.0839 5,0924 5,1151 5,0612 0.35\n", "1 NaT 5.0660 4,9924 5,0944 4,9591 1.42\n", "2 NaT 4.9950 4,9136 5,0183 4,9086 1.64\n", "3 NaT 4.9142 4,8845 4,9324 4,8734 0.65\n", "4 NaT 4.8823 4,8459 4,9104 4,8407 0.52\n", "5 NaT 4.8572 4,8554 4,8584 4,8544 0.09\n", "6 NaT 4.8529 4,8644 4,8830 4,8340 -0.25\n", "7 NaT 4.8649 4,7705 4,8845 4,7660 1.93\n", "8 NaT 4.7728 4,8883 4,8924 4,7728 -2.32\n", "9 NaT 4.8861 4,7415 4,9403 4,7345 3.09" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "ds = pd.read_csv(\n", " \"https://static-1300131294.cos.ap-shanghai.myqcloud.com/data/deep-learning/dqn/USD_TRY%20Gemi%20Verileri.csv\"\n", ")\n", "ds[\"Tarih\"] = pd.to_datetime(ds[\"Tarih\"], format='%d/%m/%Y', errors=\"coerce\")\n", "ds[\"Şimdi\"] = pd.to_numeric(ds[\"Şimdi\"].str.replace(\",\", \".\"), errors=\"coerce\")\n", "ds[\"Fark %\"] = ds[\"Fark %\"].str.replace(\"%\", \"\")\n", "ds[\"Fark %\"] = ds[\"Fark %\"].str.replace(\",\", \".\")\n", "\n", "ds.head(10)" ] }, { "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ "The reverse version of the data will be used. It is expected to make the learning stronger since from 2002 to this date, usd is increasing with respect to try." ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "data": { "image/png": "", "text/plain": [ "
" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "X = ds[\"Şimdi\"]\n", "Y = ds[\"Tarih\"]\n", "X = np.array(X).reshape((len(X), 1))\n", "Y = np.array(Y).reshape((len(Y), 1))\n", "fig = plt.figure()\n", "ax = fig.add_subplot(111)\n", "ax.plot_date(Y, X, \".\")\n", "plt.show()" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(4000, 207)" ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Split train and test data\n", "\n", "date_split = 4000\n", "train = ds[:date_split]\n", "test = ds[date_split:]\n", "len(train), len(test)" ] }, { "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ "## Define envireonment" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [], "source": [ "class Environment:\n", " def __init__(self, data, tl, history_t=10):\n", " self.data = data\n", " self.history_t = history_t\n", " self.tl_start = tl\n", " self.reset()\n", "\n", " def reset(self):\n", " self.tl = self.tl_start\n", " self.usd = 0\n", " self.done = False\n", " self.profits = 0\n", " self.current_position = \"none\"\n", " self.history = [self.data.iloc[x, :][\"Fark %\"] for x in range(self.history_t)]\n", " self.t = self.history_t\n", " self.last_tl = 0\n", " return self.history\n", "\n", " def step(self, act):\n", " reward = 0\n", " if act == 0: # Hold\n", " self.current_position = self.current_position\n", " elif act == 1: # Buy\n", " if self.current_position == \"none\":\n", " self.current_position = \"long\"\n", " # Buy usd\n", "\n", " self.last_tl = self.tl\n", " self.usd = self.tl / (self.data.iloc[self.t, :][\"Şimdi\"])\n", " self.tl = 0\n", " else:\n", " self.current_position = self.current_position\n", "\n", " else: # sell\n", " if self.current_position == \"long\":\n", " self.current_position = \"none\"\n", " # Sell usd\n", "\n", " self.tl = self.usd * (self.data.iloc[self.t, :][\"Şimdi\"])\n", " self.usd = 0\n", " self.profits = self.profits + (self.tl - self.last_tl)\n", "\n", " if (self.tl - self.last_tl) > 0:\n", " reward = 1\n", " else:\n", " reward = -1\n", " else:\n", " self.current_position = self.current_position\n", "\n", " # et next time\n", " self.t += 1\n", " # print(\"history before: \",self.history)\n", " self.history.pop(0)\n", " self.history.append(self.data.iloc[self.t, :][\"Fark %\"])\n", " # print(\"history after: \",self.history)\n", "\n", " # print(\"reward: \",reward)\n", "\n", " if ENVIRONMENT_DEBUG == True:\n", " print(\n", " \"t: \",\n", " (self.t - self.history_t),\n", " \" reward: \",\n", " reward,\n", " \" profits: \",\n", " self.profits,\n", " \" current position: \",\n", " self.current_position,\n", " \" done: \",\n", " self.done,\n", " )\n", "\n", " if self.t == (len(self.data) - 1):\n", " self.done = True\n", " print(\n", " \"Total steps: \",\n", " (self.t - self.history_t),\n", " \" TotalProfit: \",\n", " self.profits,\n", " \" done: \",\n", " self.done,\n", " )\n", "\n", " return self.history, reward, self.done, self.profits # obs, reward, done" ] }, { "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ "## Agent class" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [], "source": [ "# Deep Q-learning Agent\n", "class DQNAgent:\n", " def __init__(self, state_size, action_size, hidden_layer_size):\n", " self.state_size = state_size\n", " self.action_size = action_size\n", " self.hidden_layer_size = hidden_layer_size\n", " self.memory = deque(maxlen=2000)\n", " self.gamma = 0.95 # discount rate\n", " self.epsilon = 1.0 # exploration rate\n", " self.epsilon_min = 0.01\n", " self.epsilon_decay = 0.995\n", " self.learning_rate = 0.001\n", " self.model = self._build_model()\n", "\n", " def _build_model(self):\n", " # Neural Net for Deep-Q learning Model\n", " model = Sequential()\n", " model.add(\n", " Dense(self.hidden_layer_size, input_dim=self.state_size, activation=\"relu\")\n", " )\n", " model.add(Dense(self.hidden_layer_size, activation=\"relu\"))\n", " model.add(Dense(self.action_size, activation=\"linear\"))\n", " model.compile(loss=\"mse\", optimizer=Adam(learning_rate=self.learning_rate))\n", " return model\n", "\n", " def remember(self, state, action, reward, next_state, done):\n", " self.memory.append((state, action, reward, next_state, done))\n", "\n", " def act(self, state):\n", " state = np.array(state).astype(float)\n", " state = np.reshape(state, [1, self.state_size])\n", " if np.random.rand() <= self.epsilon:\n", " return random.randrange(self.action_size)\n", " act_values = self.model.predict(state)\n", " return np.argmax(act_values[0]) # returns action\n", "\n", " def act_greedy(self, state):\n", " state = np.array(state).astype(float)\n", " state = np.reshape(state, [1, self.state_size])\n", " act_values = self.model.predict(state)\n", " return np.argmax(act_values[0]) # returns action\n", "\n", " def replay(self, batch_size):\n", " minibatch = random.sample(self.memory, batch_size)\n", " for state, action, reward, next_state, done in minibatch:\n", " target = reward\n", " if not done:\n", " target = reward + self.gamma * np.amax(\n", " self.model.predict(next_state)[0]\n", " )\n", " target_f = self.model.predict(state)\n", " target_f[0][action] = target\n", " self.model.fit(state, target_f, epochs=1, verbose=0)\n", " if self.epsilon > self.epsilon_min:\n", " self.epsilon *= self.epsilon_decay" ] }, { "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ "## Train the DQN" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "WARNING:tensorflow:From d:\\conda\\envs\\py39\\lib\\site-packages\\keras\\src\\backend.py:873: The name tf.get_default_graph is deprecated. Please use tf.compat.v1.get_default_graph instead.\n", "\n", "Total steps: 3909 TotalProfit: -36.91076629230869 done: True\n", "1/1 [==============================] - 1s 603ms/step\n", "1/1 [==============================] - 0s 126ms/step\n", "WARNING:tensorflow:From d:\\conda\\envs\\py39\\lib\\site-packages\\keras\\src\\utils\\tf_utils.py:492: The name tf.ragged.RaggedTensorValue is deprecated. Please use tf.compat.v1.ragged.RaggedTensorValue instead.\n", "\n", "1/1 [==============================] - 0s 148ms/step\n", "1/1 [==============================] - 0s 82ms/step\n", "1/1 [==============================] - 0s 79ms/step\n", "1/1 [==============================] - 0s 239ms/step\n", "1/1 [==============================] - 0s 147ms/step\n", "1/1 [==============================] - 0s 136ms/step\n", "1/1 [==============================] - 0s 110ms/step\n", "1/1 [==============================] - 0s 140ms/step\n", "1/1 [==============================] - 0s 110ms/step\n", "1/1 [==============================] - 0s 116ms/step\n", "1/1 [==============================] - 0s 207ms/step\n", "1/1 [==============================] - 0s 91ms/step\n", "1/1 [==============================] - 0s 224ms/step\n", "1/1 [==============================] - 0s 203ms/step\n", "1/1 [==============================] - 0s 142ms/step\n", "1/1 [==============================] - 0s 93ms/step\n", "1/1 [==============================] - 0s 73ms/step\n", "1/1 [==============================] - 0s 112ms/step\n", "1/1 [==============================] - 0s 112ms/step\n", "1/1 [==============================] - 0s 247ms/step\n", "1/1 [==============================] - 0s 195ms/step\n", "1/1 [==============================] - 0s 126ms/step\n", "1/1 [==============================] - 0s 219ms/step\n", "1/1 [==============================] - 0s 133ms/step\n", "1/1 [==============================] - 0s 92ms/step\n", "1/1 [==============================] - 0s 93ms/step\n", "1/1 [==============================] - 0s 157ms/step\n", "1/1 [==============================] - 0s 196ms/step\n", "1/1 [==============================] - 0s 172ms/step\n", "1/1 [==============================] - 0s 176ms/step\n", "1/1 [==============================] - 0s 163ms/step\n", "1/1 [==============================] - 0s 113ms/step\n", "1/1 [==============================] - 0s 116ms/step\n", "1/1 [==============================] - 0s 155ms/step\n", "1/1 [==============================] - 0s 188ms/step\n", "1/1 [==============================] - 0s 147ms/step\n", "1/1 [==============================] - 0s 101ms/step\n", "1/1 [==============================] - 0s 162ms/step\n", "1/1 [==============================] - 0s 181ms/step\n", "1/1 [==============================] - 0s 150ms/step\n", "1/1 [==============================] - 0s 123ms/step\n", "1/1 [==============================] - 0s 127ms/step\n", "1/1 [==============================] - 0s 154ms/step\n", "1/1 [==============================] - 0s 183ms/step\n", "1/1 [==============================] - 0s 215ms/step\n", "1/1 [==============================] - 0s 63ms/step\n", "1/1 [==============================] - 0s 149ms/step\n", "1/1 [==============================] - 0s 122ms/step\n", "1/1 [==============================] - 0s 90ms/step\n", "1/1 [==============================] - 0s 135ms/step\n", "1/1 [==============================] - 0s 157ms/step\n", "1/1 [==============================] - 0s 123ms/step\n", "1/1 [==============================] - 0s 142ms/step\n", "1/1 [==============================] - 0s 126ms/step\n", "1/1 [==============================] - 0s 177ms/step\n", "1/1 [==============================] - 0s 202ms/step\n", "1/1 [==============================] - 0s 67ms/step\n", "1/1 [==============================] - 0s 153ms/step\n", "1/1 [==============================] - 0s 89ms/step\n", "1/1 [==============================] - 0s 99ms/step\n", "1/1 [==============================] - 0s 131ms/step\n", "1/1 [==============================] - 0s 234ms/step\n" ] } ], "source": [ "if __name__ == \"__main__\":\n", " # macros\n", " EPISODES = 1\n", " STATE_SIZE = 90\n", "\n", " # profits list\n", " total_profits = []\n", "\n", " # initialize environment and the agent\n", " env = Environment(train, 100, STATE_SIZE) # 100tl, 60 history\n", " agent = DQNAgent(STATE_SIZE, 3, 100)\n", "\n", " # Iterate the game\n", " for e in range(EPISODES):\n", " # check if the buy and sell actions are taken\n", " actions_count = 0\n", "\n", " # reset state in the beginning of each game\n", " state = env.reset()\n", " state = np.reshape(state, [1, STATE_SIZE])\n", "\n", " # time_t represents each frame of the game\n", " # the more time_t the more score\n", " for time_t in range(5000):\n", " # Decide action\n", " action = agent.act(state)\n", "\n", " if (action == 1) or (action == 2):\n", " actions_count = actions_count + 1\n", "\n", " # Advance the game to the next frame based on the action.\n", " next_state, reward, done, profits = env.step(action)\n", " next_state = np.array(next_state, dtype=np.float32)\n", " next_state = np.reshape(next_state, [1, STATE_SIZE])\n", "\n", " # make rewards = profits (EXPERIMENTAL)\n", " reward = profits\n", "\n", " # Remember the previous state, action, reward, and done\n", " agent.remember(state, action, reward, next_state, done)\n", "\n", " # make next_state the new current state for the next frame.\n", " state = next_state\n", "\n", " # done becomes True when the game ends\n", " if done:\n", " total_profits.append(profits)\n", " # print the score and break out of the loop\n", " # print(\"number of actions taken other than hold in this iteration is \",actions_count,\"\\n\")\n", " # print(\"episode: {}/{}, score: {}\".format(e, EPISODES, time_t))\n", " break\n", " # train the agent with the experience of the episode\n", " agent.replay(32)" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [], "source": [ "def plot_profits(total_profits):\n", " epoch_count = range(1, len(total_profits) + 1)\n", " fig = plt.figure(figsize=(30, 10))\n", " plt.plot(epoch_count, total_profits, \"b-\")\n", " plt.legend(\"Total Profits\")\n", " plt.xlabel(\"Epoch\")\n", " plt.ylabel(\"Total Profits\")\n", " plt.figure(figsize=(50, 10))\n", " plt.show();" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [ { "data": { "image/png": "", "text/plain": [ "
" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/plain": [ "
" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "plot_profits(total_profits)" ] }, { "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ "Now test the agent with real world data." ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
TarihŞimdiAçılışYüksekDüşükFark %
4206NaT1.62251,62101,67251,60900.06
4205NaT1.62451,61501,63451,60000.12
4204NaT1.61501,62501,67201,5850-0.58
4203NaT1.69001,62001,69501,62004.64
4202NaT1.66801,68701,71101,6450-1.30
4201NaT1.66151,66601,66951,6400-0.39
4200NaT1.68001,65301,71001,60801.11
4199NaT1.66601,67001,68101,6450-0.83
4198NaT1.67801,65901,69701,65500.72
4197NaT1.68501,67401,70101,66000.42
\n", "
" ], "text/plain": [ " Tarih Şimdi Açılış Yüksek Düşük Fark %\n", "4206 NaT 1.6225 1,6210 1,6725 1,6090 0.06\n", "4205 NaT 1.6245 1,6150 1,6345 1,6000 0.12\n", "4204 NaT 1.6150 1,6250 1,6720 1,5850 -0.58\n", "4203 NaT 1.6900 1,6200 1,6950 1,6200 4.64\n", "4202 NaT 1.6680 1,6870 1,7110 1,6450 -1.30\n", "4201 NaT 1.6615 1,6660 1,6695 1,6400 -0.39\n", "4200 NaT 1.6800 1,6530 1,7100 1,6080 1.11\n", "4199 NaT 1.6660 1,6700 1,6810 1,6450 -0.83\n", "4198 NaT 1.6780 1,6590 1,6970 1,6550 0.72\n", "4197 NaT 1.6850 1,6740 1,7010 1,6600 0.42" ] }, "execution_count": 12, "metadata": {}, "output_type": "execute_result" } ], "source": [ "test = test.iloc[::-1]\n", "test.head(10)" ] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "1/1 [==============================] - 0s 306ms/step\n", "1/1 [==============================] - 0s 122ms/step\n", "1/1 [==============================] - 0s 131ms/step\n", "1/1 [==============================] - 0s 98ms/step\n", "1/1 [==============================] - 0s 60ms/step\n", "1/1 [==============================] - 0s 110ms/step\n", "1/1 [==============================] - 0s 219ms/step\n", "1/1 [==============================] - 0s 155ms/step\n", "1/1 [==============================] - 0s 212ms/step\n", "1/1 [==============================] - 0s 89ms/step\n", "1/1 [==============================] - 0s 161ms/step\n", "1/1 [==============================] - 0s 197ms/step\n", "1/1 [==============================] - 0s 120ms/step\n", "1/1 [==============================] - 0s 137ms/step\n", "1/1 [==============================] - 0s 120ms/step\n", "1/1 [==============================] - 0s 100ms/step\n", "1/1 [==============================] - 0s 83ms/step\n", "1/1 [==============================] - 0s 146ms/step\n", "1/1 [==============================] - 0s 100ms/step\n", "1/1 [==============================] - 0s 150ms/step\n", "1/1 [==============================] - 0s 160ms/step\n", "1/1 [==============================] - 0s 191ms/step\n", "1/1 [==============================] - 0s 95ms/step\n", "1/1 [==============================] - 0s 140ms/step\n", "1/1 [==============================] - 0s 176ms/step\n", "1/1 [==============================] - 0s 307ms/step\n", "1/1 [==============================] - 0s 284ms/step\n", "1/1 [==============================] - 0s 109ms/step\n", "1/1 [==============================] - 0s 106ms/step\n", "1/1 [==============================] - 0s 93ms/step\n", "1/1 [==============================] - 0s 185ms/step\n", "1/1 [==============================] - 0s 148ms/step\n", "1/1 [==============================] - 0s 411ms/step\n", "1/1 [==============================] - 0s 326ms/step\n", "1/1 [==============================] - 0s 193ms/step\n", "1/1 [==============================] - 0s 331ms/step\n", "1/1 [==============================] - 0s 179ms/step\n", "1/1 [==============================] - 0s 111ms/step\n", "1/1 [==============================] - 0s 154ms/step\n", "1/1 [==============================] - 0s 83ms/step\n", "1/1 [==============================] - 0s 107ms/step\n", "1/1 [==============================] - 0s 220ms/step\n", "1/1 [==============================] - 0s 194ms/step\n", "1/1 [==============================] - 0s 360ms/step\n", "1/1 [==============================] - 0s 224ms/step\n", "1/1 [==============================] - 0s 133ms/step\n", "1/1 [==============================] - 0s 259ms/step\n", "1/1 [==============================] - 0s 326ms/step\n", "1/1 [==============================] - 0s 126ms/step\n", "1/1 [==============================] - 0s 232ms/step\n", "1/1 [==============================] - 0s 196ms/step\n", "1/1 [==============================] - 0s 81ms/step\n", "1/1 [==============================] - 0s 182ms/step\n", "1/1 [==============================] - 0s 168ms/step\n", "1/1 [==============================] - 0s 239ms/step\n", "1/1 [==============================] - 0s 221ms/step\n", "1/1 [==============================] - 0s 291ms/step\n", "1/1 [==============================] - 0s 181ms/step\n", "1/1 [==============================] - 0s 121ms/step\n", "1/1 [==============================] - 0s 163ms/step\n", "1/1 [==============================] - 0s 203ms/step\n", "1/1 [==============================] - 0s 145ms/step\n", "1/1 [==============================] - 0s 96ms/step\n", "1/1 [==============================] - 0s 150ms/step\n", "1/1 [==============================] - 0s 137ms/step\n", "1/1 [==============================] - 0s 178ms/step\n", "1/1 [==============================] - 0s 109ms/step\n", "1/1 [==============================] - 0s 437ms/step\n", "1/1 [==============================] - 0s 288ms/step\n", "1/1 [==============================] - 0s 230ms/step\n", "1/1 [==============================] - 0s 296ms/step\n", "1/1 [==============================] - 0s 207ms/step\n", "1/1 [==============================] - 0s 219ms/step\n", "1/1 [==============================] - 0s 143ms/step\n", "1/1 [==============================] - 0s 174ms/step\n", "1/1 [==============================] - 0s 182ms/step\n", "1/1 [==============================] - 0s 166ms/step\n", "1/1 [==============================] - 0s 142ms/step\n", "1/1 [==============================] - 0s 156ms/step\n", "1/1 [==============================] - 0s 97ms/step\n", "1/1 [==============================] - 0s 206ms/step\n", "1/1 [==============================] - 0s 139ms/step\n", "1/1 [==============================] - 0s 120ms/step\n", "1/1 [==============================] - 0s 118ms/step\n", "1/1 [==============================] - 0s 139ms/step\n", "1/1 [==============================] - 0s 181ms/step\n", "1/1 [==============================] - 0s 189ms/step\n", "1/1 [==============================] - 0s 147ms/step\n", "1/1 [==============================] - 0s 199ms/step\n", "1/1 [==============================] - 0s 173ms/step\n", "1/1 [==============================] - 0s 90ms/step\n", "1/1 [==============================] - 0s 447ms/step\n", "1/1 [==============================] - 0s 429ms/step\n", "1/1 [==============================] - 0s 283ms/step\n", "1/1 [==============================] - 0s 161ms/step\n", "1/1 [==============================] - 0s 145ms/step\n", "1/1 [==============================] - 0s 165ms/step\n", "1/1 [==============================] - 0s 106ms/step\n", "1/1 [==============================] - 0s 109ms/step\n", "1/1 [==============================] - 0s 105ms/step\n", "1/1 [==============================] - 0s 155ms/step\n", "1/1 [==============================] - 0s 68ms/step\n", "1/1 [==============================] - 0s 102ms/step\n", "1/1 [==============================] - 0s 180ms/step\n", "1/1 [==============================] - 0s 304ms/step\n", "1/1 [==============================] - 1s 565ms/step\n", "1/1 [==============================] - 0s 201ms/step\n", "1/1 [==============================] - 0s 407ms/step\n", "1/1 [==============================] - 0s 317ms/step\n", "1/1 [==============================] - 0s 177ms/step\n", "1/1 [==============================] - 0s 155ms/step\n", "1/1 [==============================] - 0s 142ms/step\n", "1/1 [==============================] - 0s 99ms/step\n", "1/1 [==============================] - 0s 149ms/step\n", "1/1 [==============================] - 0s 157ms/step\n", "1/1 [==============================] - 0s 99ms/step\n", "Total steps: 116 TotalProfit: -0.4191616766467092 done: True\n" ] } ], "source": [ "env_test = Environment(test, 100, STATE_SIZE) # 100tl, 60 history\n", "\n", "# Iterate the game\n", "for e in range(1):\n", " # check if the buy and sell actions are taken\n", " actions_count = 0\n", "\n", " # reset state in the beginning of each game\n", " state = env_test.reset()\n", " state = np.reshape(state, [1, STATE_SIZE])\n", "\n", " # time_t represents each frame of the game\n", " # the more time_t the more score\n", " for time_t in range(5000):\n", " # Decide action\n", " action = agent.act_greedy(state)\n", "\n", " if (action == 1) or (action == 2):\n", " actions_count = actions_count + 1\n", "\n", " # Advance the game to the next frame based on the action.\n", " next_state, reward, done, profits = env_test.step(action)\n", "\n", " next_state = np.reshape(next_state, [1, STATE_SIZE])\n", "\n", " # make rewards = profits (EXPERIMENTAL)\n", " reward = profits\n", "\n", " # Remember the previous state, action, reward, and done\n", " agent.remember(state, action, reward, next_state, done)\n", "\n", " # make next_state the new current state for the next frame.\n", " state = next_state\n", "\n", " # done becomes True when the game ends\n", " if done:\n", " break" ] }, { "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ "## Acknowledgement\n", "Thanks to [emrebulbul23](https://www.kaggle.com/emrebulbul23) for creating [DQN on foreign exchange market](https://www.kaggle.com/code/emrebulbul23/dqn-on-foreign-exchange-market). It inspired the majority of the content in this article." ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.9.18" } }, "nbformat": 4, "nbformat_minor": 1 }