42.109. Time Series Forecasting Assignment#

42.109.1. Tasks#

types

42.109.1.1. Task1#

Given 10 timesteps of features, predict the USDT of the next timestep (many to one)

42.109.1.2. Task2#

Given 10 timestaps of features, predict the USDT of those timesteps (many to many)

42.109.2. Imports#

import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
import pandas as pd
from tensorflow import keras
from sklearn.preprocessing import StandardScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout, Flatten, Conv1D, SimpleRNN
# import plotly.express as px
tf.random.set_seed(42)
np.random.seed(42)

42.109.3. Loading Data#

42.109.3.1. CSV Data Path#

data_path = 'https://static-1300131294.cos.ap-shanghai.myqcloud.com/data/deep-learning/time-series/timeseries.csv'

42.109.3.2. Reading CSV file with Pandas#

data = pd.read_csv(data_path)
data.head()

42.109.3.3. Normalizing Data#

from sklearn import preprocessing
features = ['high', 'low', 'open', 'close', 'Volume XRP', 'Volume USDT']

0 unix 1334 non-null float64 1 date 1334 non-null object 2 symbol 1334 non-null object 3 open 1334 non-null float64 4 high 1334 non-null float64 5 low 1334 non-null float64 6 close 1334 non-null float64 7 Volume XRP 1334 non-null float64 8 Volume USDT 1334 non-null float64

42.109.4. Task1#

42.109.4.1. Data Preparation#

Preparing the data for the first task

We will try to predict the value of USDT on the 11th day using the data of other features from day 1 to 10

def sequential_window_dataset(series, window_size):
    series = tf.expand_dims(series, axis=-1)
    ds = tf.data.Dataset.from_tensor_slices(series)
    ds = ds.window(window_size + 1, shift=window_size, drop_remainder=True)
    ds = ds.flat_map(lambda window: window.batch(window_size + 1))
    ds = ds.map(lambda window: (window[:-1], window[1:]))
    return ds.batch(1).prefetch(1)
def data_preparation(df, lookback, future, scaler):
    date_train=pd.to_datetime(df['date'])
    features = ['high', 'low', 'open', 'close', 'Volume XRP', 'Volume USDT']    
    df_train=df[features]
    df_train=df_train.astype(float)
    
    df_train_scaled = scaler.transform(df_train)

    X, y = [],[]
    for i in range(lookback, len(df_train_scaled)-future+1):
        X.append(df_train_scaled[i-lookback:i, 0:df_train.shape[1]])
        y.append(df_train_scaled[i+future-1:i+future, 0])
        
    return np.array(X), np.array(y), df_train, date_train
def predict(model, date_train,X_train, future_steps, ds):
    # Extracting dates
    dates = pd.date_range(list(date_train)[-1], periods=future, freq='1d').tolist()
    # use the last future steps from X_train     
    predicted = model.predict(X_train[-future_steps:])
    predicted = np.repeat(predicted, ds.shape[1], axis=-1)
    nsamples, nx, ny = predicted.shape
    predicted = predicted.reshape((nsamples,nx*ny))

    return predicted, dates

def output_preparation(forecasting_dates, predictions, date_column = 'date', predicted_column='Volume USDT'):
    dates=[]
    for date in forecasting_dates:
        dates.append(date.date())
    predicted_df=pd.DataFrame(columns=[date_column, predicted_column])
    predicted_df[date_column] = pd.to_datetime(dates)
    predicted_df[predicted_column]= predictions
    return predicted_df
def results(df, lookback, future, scaler, col, X_train, y_train, df_train, date_train, model):
    predictions, forecasting_dates = predict(model, date_train, X_train, future, df)
    results = output_preparation(forecasting_dates, predictions)   
    print(results.head())
scaler = StandardScaler()
features = ['high', 'low', 'open', 'close', 'Volume XRP', 'Volume USDT']    
df_train=data[features]
df_train=df_train.astype(float)

scaler = scaler.fit(df_train)
df_scaled = scaler.transform(df_train)
scaler_y = StandardScaler()
df_y = scaler_y.fit_transform(data[['Volume USDT']].to_numpy())
window_size = 10
future = 1
X_train, y_train, df_train, date_train = data_preparation(data, window_size, future, scaler)

42.109.4.2. Modeling#

42.109.4.2.1. ANN#

from keras.callbacks import EarlyStopping, ModelCheckpoint
def ann_builder(X, y):
    model = tf.keras.models.Sequential()    
    model.add(Dense(12, activation='selu', input_shape=(X.shape[1], X.shape[2])))
    model.add(Dense(y.shape[1]))

    optimizer = tf.keras.optimizers.Adam()  
    model.compile(
        loss='huber',
        optimizer=optimizer,
        metrics=['mse']
    )
    
    callbacks = [EarlyStopping(monitor='val_loss', patience=20),
                 ModelCheckpoint(filepath='best_model_ann.h5', monitor='val_loss', save_best_only=True)]

    model.fit(X, y, epochs=50, verbose=1, validation_split=0.1, callbacks = callbacks, batch_size=16)
    return model
model = ann_builder(X_train, y_train)
pd.DataFrame(model.history.history).plot()
#if you need a model trained, you can use this cell
import tensorflow as tf
from tensorflow.keras.models import load_model
from tensorflow.keras.utils import get_file
model_url = "https://static-1300131294.cos.ap-shanghai.myqcloud.com/data/deep-learning/time-series/model/best_model_ann.h5"
model_path = get_file("best_model_ann.h5", model_url)
model = load_model(model_path)
results(data, window_size, future, scaler, 'Volume USDT', X_train, y_train, df_train, date_train, model)

42.109.4.2.2. CNN#

def cnn_builder(X,y):
    model = tf.keras.models.Sequential()    
    model.add(Conv1D(32, (3), activation='relu', input_shape=(X.shape[1], X.shape[2])))
    model.add(Dense(y.shape[1]))

    optimizer = tf.keras.optimizers.Adam()  
    model.compile(
            loss='huber',
            optimizer=optimizer,
            metrics=['mse']
        )
        
    callbacks = [EarlyStopping(monitor='val_loss', patience=20),
                 ModelCheckpoint(filepath='best_model_cnn.h5', monitor='val_loss', save_best_only=True)]

    model.fit(X, y, epochs=50, verbose=1, validation_split=0.1, callbacks = callbacks, batch_size=16)
    return model
model = cnn_builder(X_train, y_train)
pd.DataFrame(model.history.history).plot()
#if you need a model trained, you can use this cell
import tensorflow as tf
from tensorflow.keras.models import load_model
from tensorflow.keras.utils import get_file
model_url = "https://static-1300131294.cos.ap-shanghai.myqcloud.com/data/deep-learning/time-series/model/best_model_cnn.h5"
model_path = get_file("best_model_cnn_2.h5", model_url)
model = load_model(model_path)
results(data, window_size, future, scaler, 'Volume USDT', X_train, y_train, df_train, date_train, model)

42.109.4.2.3. Simple RNN#

def rnn_builder(X,y):
    model = tf.keras.models.Sequential()    
    model.add(SimpleRNN(1, input_shape=(X.shape[1], X.shape[2]), return_sequences=True))
    model.add(Dense(y.shape[1]))

    optimizer = tf.keras.optimizers.Adam()  
    model.compile(
            loss='huber',
            optimizer=optimizer,
            metrics=['mse']
        )
        
    callbacks = [EarlyStopping(monitor='val_loss', patience=20),
                 ModelCheckpoint(filepath='best_model_rnn.h5', monitor='val_loss', save_best_only=True)]

    model.fit(X, y, epochs=50, verbose=1, validation_split=0.1, callbacks = callbacks, batch_size=16)
    return model
model = rnn_builder(X_train, y_train)
pd.DataFrame(model.history.history).plot()
#if you need a model trained, you can use this cell
import tensorflow as tf
from tensorflow.keras.models import load_model
from tensorflow.keras.utils import get_file
model_url = "https://static-1300131294.cos.ap-shanghai.myqcloud.com/data/deep-learning/time-series/model/best_model_rnn.h5"
model_path = get_file("best_model_rnn.h5", model_url)
model = load_model(model_path)
results(data, window_size, future, scaler, 'Volume USDT', X_train, y_train, df_train, date_train, model)

42.109.4.2.4. LSTM#

def lstm_builder(X,y):
    model = tf.keras.models.Sequential()    
    model.add(LSTM(1, input_shape=(X.shape[1], X.shape[2]), return_sequences=True))
    model.add(Dense(y.shape[1]))

    optimizer = tf.keras.optimizers.Adam()  
    model.compile(
            loss='huber',
            optimizer=optimizer,
            metrics=['mse']
        )
        
    callbacks = [EarlyStopping(monitor='val_loss', patience=20),
                 ModelCheckpoint(filepath='best_model_lstm.h5', monitor='val_loss', save_best_only=True)]

    model.fit(X, y, epochs=50, verbose=1, validation_split=0.1, callbacks = callbacks, batch_size=16)
    return model
model = lstm_builder(X_train,y_train)
#if you need a model trained, you can use this cell
import tensorflow as tf
from tensorflow.keras.models import load_model
from tensorflow.keras.utils import get_file
model_url = "https://static-1300131294.cos.ap-shanghai.myqcloud.com/data/deep-learning/time-series/model/best_model_lstm.h5"
model_path = get_file("best_model_lstm.h5", model_url)
model = load_model(model_path)
results(data, window_size, future, scaler, 'Volume USDT', X_train, y_train, df_train, date_train, model)

42.109.5. Task 2#

Just setting future to 10 and the window to 10

window_size = 20
future = 20
X_train, y_train, df_train, date_train = data_preparation(data, window_size, future, scaler)
def results(df, lookback, future, scaler, col, X_train, y_train, df_train, date_train, model):
    predictions, forecasting_dates = predict(model, date_train, X_train, future, df)
    results = output_preparation(forecasting_dates, predictions)   
    print(results.head())
    fig = plt.plot(results['date'], results['Volume USDT'])
    plt.show()

Same models

42.109.5.1. ANN#

def ann_builder(X,y):
    model = tf.keras.models.Sequential()    
    model.add(Dense(32, activation='selu', input_shape=(X.shape[1], X.shape[2])))
    model.add(Dense(y.shape[1]))

    optimizer = tf.keras.optimizers.Adam()  
    model.compile(
            loss='huber',
            optimizer=optimizer,
            metrics=['mse']
        )
        
    callbacks = [EarlyStopping(monitor='val_loss', patience=20),
                 ModelCheckpoint(filepath='best_model_ann_2.h5', monitor='val_loss', save_best_only=True)]

    model.fit(X, y, epochs=50, verbose=1, validation_split=0.1, callbacks = callbacks, batch_size=16)
    return model
model = ann_builder(X_train,y_train)
#if you need a model trained, you can use this cell
import tensorflow as tf
from tensorflow.keras.models import load_model
from tensorflow.keras.utils import get_file
model_url = "https://static-1300131294.cos.ap-shanghai.myqcloud.com/data/deep-learning/time-series/model/best_model_ann_2.h5"
model_path = get_file("best_model_ann_2.h5", model_url)
model = load_model(model_path)
results(data, window_size, future, scaler, 'Volume USDT', X_train, y_train, df_train, date_train, model)

42.109.5.2. RNN#

def rnn_builder(X,y):
    model = tf.keras.models.Sequential()    
    model.add(SimpleRNN(1, input_shape=(X.shape[1], X.shape[2]), return_sequences=True))
    model.add(Dense(y.shape[1]))

    optimizer = tf.keras.optimizers.Adam()  
    model.compile(
            loss='huber',
            optimizer=optimizer,
            metrics=['mse']
        )
        
    callbacks = [EarlyStopping(monitor='val_loss', patience=20),
                 ModelCheckpoint(filepath='best_model_rnn_2.h5', monitor='val_loss', save_best_only=True)]

    model.fit(X, y, epochs=50, verbose=1, validation_split=0.1, callbacks = callbacks, batch_size=16)
    return model
model = rnn_builder(X_train, y_train)
pd.DataFrame(model.history.history).plot()
#if you need a model trained, you can use this cell
import tensorflow as tf
from tensorflow.keras.models import load_model
from tensorflow.keras.utils import get_file
model_url = "https://static-1300131294.cos.ap-shanghai.myqcloud.com/data/deep-learning/time-series/model/best_model_rnn_2.h5"
model_path = get_file("best_model_rnn_2.h5", model_url)
model = load_model(model_path)
results(data, window_size, future, scaler, 'Volume USDT', X_train, y_train, df_train, date_train, model)

42.109.5.3. CNN#

def cnn_builder(X,y):
    model = tf.keras.models.Sequential()    
    model.add(Conv1D(32, (3), activation='relu', input_shape=(X.shape[1], X.shape[2])))
    model.add(Dense(y.shape[1]))

    optimizer = tf.keras.optimizers.Adam()  
    model.compile(
            loss='huber',
            optimizer=optimizer,
            metrics=['mse']
        )
        
    callbacks = [EarlyStopping(monitor='val_loss', patience=20),
                 ModelCheckpoint(filepath='best_model_cnn_2.h5', monitor='val_loss', save_best_only=True)]

    model.fit(X, y, epochs=50, verbose=1, validation_split=0.1, batch_size=16)
    return model
model = cnn_builder(X_train, y_train)
pd.DataFrame(model.history.history).plot()
# if you need a model trained, you can use this cell
import tensorflow as tf
from tensorflow.keras.models import load_model
from tensorflow.keras.utils import get_file
model_url = "https://static-1300131294.cos.ap-shanghai.myqcloud.com/data/deep-learning/time-series/model/best_model_cnn_2.h5"
model_path = get_file('best_model_cnn_2.h5', model_url)
model = load_model( model_path )
results(data, window_size, future, scaler, 'Volume USDT', X_train, y_train, df_train, date_train, model)

42.109.5.4. LSTM#

def lstm_builder(X,y):
    model = tf.keras.models.Sequential()    
    model.add(LSTM(1, input_shape=(X.shape[1], X.shape[2]), return_sequences=True))
    model.add(Dense(y.shape[1]))

    optimizer = tf.keras.optimizers.Adam()  
    model.compile(
            loss='huber',
            optimizer=optimizer,
            metrics=['mse']
        )
        
    callbacks = [EarlyStopping(monitor='val_loss', patience=20),
                 ModelCheckpoint(filepath='best_model_lstm_2.h5', monitor='val_loss', save_best_only=True)]

    model.fit(X, y, epochs=50, verbose=1, validation_split=0.1, callbacks = callbacks, batch_size=16)
    return model
model = lstm_builder(X_train, y_train)
pd.DataFrame(model.history.history).plot()
# if you need a model trained, you can use this cell
import tensorflow as tf
from tensorflow.keras.models import load_model
from tensorflow.keras.utils import get_file
model_url = "https://static-1300131294.cos.ap-shanghai.myqcloud.com/data/deep-learning/time-series/model/best_model_lstm_2.h5"
model_path = get_file("best_model_lstm_2.h5", model_url)
model = load_model(model_path)
results(data, window_size, future, scaler, 'Volume USDT', X_train, y_train, df_train, date_train, model)

42.110. Acknowledgments#

Thanks to Taha Bouhsine for creating the open-source course WTS2: Time Series Forecasting with TF . It inspires the majority of the content in this chapter.