stock-prediction/bot/core.py

#!/usr/bin/python
import time
import pandas as pd
import numpy as np
import datetime as dt
import matplotlib.pyplot as plt
import matplotlib.dates as md
import tensorflow as tf
import urllib.request, json

from bot.shared_config import *


def main():
    start_time = time.time()

    dump(yellow("Retrieving market data from API"))

    with urllib.request.urlopen("https://api.kraken.com/0/public/OHLC?pair=ETHUSD&interval=15") as url:
        data = json.loads(url.read().decode())
        timestamps = []
        prices = []
        volumes = []
        pricevol = []

        for set in data["result"]['XETHZUSD'][-601:]:
            timestamps.append(int(set[0]))
            prices.append(float(set[4]))
            volumes.append(float(set[6]))
            pricevol.append([float(set[4]), float(set[6])])

    dump(green("Retrieved API in {0:.3f}ms sec".format((time.time() - start_time)*100)))
    dump(yellow("Initialize Tensorflow"))

    f_horizon = 10  # forecast horizon, one period into the future
    num_periods = 20  # number of periods per vector we are using to predict one period ahead
    inputs = 2  # number of vectors submitted
    hidden = 100  # number of neurons we will recursively work through, can be changed to improve accuracy
    output = 1  # number of output vectors

    TS = np.array(pricevol)
    TSo = np.array(prices)

    x_data = TS[:(len(TS) - (len(TS) % num_periods))]
    x_batches = x_data.reshape(-1, 20, 2)

    y_data = TSo[1:(len(TSo) - (len(TSo) % num_periods)) + f_horizon]
    y_batches = y_data.reshape(-1, 20, 1)

    def test_data(forecast, num_periods):
        test_x_setup = TS[-(num_periods + forecast):]
        testX = test_x_setup[:num_periods].reshape(-1, 20, 2)
        testY = TSo[-(num_periods):].reshape(-1, 20, 1)
        return testX, testY

    X_test, Y_test = test_data(f_horizon, num_periods)

    tf.reset_default_graph()  # We didn't have any previous graph objects running, but this would reset the graphs

    X = tf.placeholder(tf.float32, [None, num_periods, inputs])  # create variable objects
    y = tf.placeholder(tf.float32, [None, num_periods, output])

    basic_cell = tf.contrib.rnn.BasicRNNCell(num_units=hidden, activation=tf.nn.relu)  # create our RNN object
    rnn_output, states = tf.nn.dynamic_rnn(basic_cell, X, dtype=tf.float32)  # choose dynamic over static

    learning_rate = 0.001  # small learning rate so we don't overshoot the minimum

    stacked_rnn_output = tf.reshape(rnn_output, [-1, hidden])  # change the form into a tensor
    stacked_outputs = tf.layers.dense(stacked_rnn_output, output)  # specify the type of layer (dense)
    outputs = tf.reshape(stacked_outputs, [-1, num_periods, output])  # shape of results

    loss = tf.reduce_sum(tf.square(outputs - y))  # define the cost function which evaluates the quality of our model
    optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)  # gradient descent method
    training_op = optimizer.minimize(
        loss)  # train the result of the application of the cost_function

    init = tf.global_variables_initializer()  # initialize all the variables

    epochs = 1000  # number of iterations or training cycles, includes both the FeedFoward and Backpropogation

    with tf.Session() as sess:
        init.run()
        dump(green("Initialized Tensorflow in {0:.3f}ms sec".format((time.time() - start_time) * 100)))
        dump(yellow("Start Training"))

        for ep in range(epochs):
            sess.run(training_op, feed_dict={X: x_batches, y: y_batches})
            if ep % 100 == 0:
                mse = loss.eval(feed_dict={X: x_batches, y: y_batches})
                print(ep, "\tMSE:", mse)

        dump(green("Finished training in {0:.3f}ms sec".format((time.time() - start_time) * 100)))

        dump(yellow("Start Predicting"))
        y_pred = sess.run(outputs, feed_dict={X: X_test})
        dump(green("Prediction finished in {0:.3f}ms sec".format((time.time() - start_time) * 100)))

    dump(yellow("Start Plotting and output"))

    actual_series = pd.Series(np.concatenate([np.ravel(X_test)[::2], np.ravel(Y_test)]))
    actual_prediction = pd.Series(np.concatenate([np.ravel(X_test)[::2], np.ravel(y_pred)]))

    plt.title("Forecast vs Actual", fontsize=14)

    xfmt = md.DateFormatter('%d.%m %H:%M:%S')
    plt.subplots_adjust(bottom=0.2)
    plt.xticks(rotation=25)
    plt.gca().xaxis.set_major_formatter(xfmt)

    dates = [dt.datetime.fromtimestamp(int(ts)) for ts in timestamps]
    datenums = md.date2num(dates)

    plt.plot(datenums[-40:], actual_series, "b--", linewidth=1.0, label="Actual")
    plt.plot(datenums[-40:], actual_prediction, "r--", linewidth=1.0, label="Forecast")
    plt.plot(datenums[-40:], actual_series.rolling(window=3, center=False).mean(),  "y-", linewidth=2.0, label="Actual MA")
    plt.plot(datenums[-40:], actual_prediction.rolling(window=3, center=False).mean(),  "g-", linewidth=2.0, label="Predicted MA")

    plt.legend(loc="upper left")
    plt.xlabel("Time Periods")

    dump(green("Finished complete program in {0:.3f}ms sec".format((time.time() - start_time) * 100)))
    plt.show()


if __name__ == '__main__':
        print("Starting prediction ...")
        main()