mirror of
https://github.com/wassname/rl-portfolio-management.git
synced 2026-06-27 16:46:41 +08:00
1211 lines
40 KiB
Plaintext
1211 lines
40 KiB
Plaintext
{
|
|
"cells": [
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"Uses tensorforce tensorforce-0.2.0"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 1,
|
|
"metadata": {
|
|
"ExecuteTime": {
|
|
"end_time": "2017-10-15T02:29:58.356761Z",
|
|
"start_time": "2017-10-15T02:29:49.898984Z"
|
|
},
|
|
"collapsed": true
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"# plotting\n",
|
|
"%matplotlib inline\n",
|
|
"from matplotlib import pyplot as plt\n",
|
|
"import seaborn as sns\n",
|
|
"plt.style.use('ggplot')\n",
|
|
"\n",
|
|
"# numeric\n",
|
|
"import numpy as np\n",
|
|
"from numpy import random\n",
|
|
"import pandas as pd\n",
|
|
"\n",
|
|
"# util\n",
|
|
"from collections import Counter\n",
|
|
"import pdb\n",
|
|
"import glob\n",
|
|
"import time\n",
|
|
"import tempfile\n",
|
|
"import itertools\n",
|
|
"from tqdm import tqdm_notebook as tqdm\n",
|
|
"import datetime\n",
|
|
"\n",
|
|
"# logging\n",
|
|
"import logging\n",
|
|
"logger = log = logging.getLogger(__name__)\n",
|
|
"# log.setLevel(logging.INFO)\n",
|
|
"logging.basicConfig()\n",
|
|
"log.info('%s logger started.', __name__)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 2,
|
|
"metadata": {
|
|
"ExecuteTime": {
|
|
"end_time": "2017-10-15T02:29:59.755732Z",
|
|
"start_time": "2017-10-15T02:29:58.358390Z"
|
|
},
|
|
"collapsed": true
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"import gym\n",
|
|
"from gym import error, spaces, utils\n",
|
|
"from gym.utils import seeding"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 3,
|
|
"metadata": {
|
|
"ExecuteTime": {
|
|
"end_time": "2017-10-15T02:29:59.821678Z",
|
|
"start_time": "2017-10-15T02:29:59.757241Z"
|
|
},
|
|
"collapsed": true
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"import os\n",
|
|
"os.sys.path.append(os.path.abspath('.'))\n",
|
|
"%reload_ext autoreload\n",
|
|
"%autoreload 2"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 33,
|
|
"metadata": {
|
|
"ExecuteTime": {
|
|
"end_time": "2017-10-15T02:34:29.996288Z",
|
|
"start_time": "2017-10-15T02:34:29.918670Z"
|
|
}
|
|
},
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"\r",
|
|
" \r",
|
|
"95/|/reward: -0.0000 [-0.0000, -0.0000], portfolio_value: 0.9998 [ 0.9998, 0.9998] expl= 81.00% 0%|| 95/200000 [03:10<105:36:04, 1.90s/it]"
|
|
]
|
|
},
|
|
{
|
|
"data": {
|
|
"text/plain": [
|
|
"'./outputs/tensorforce-PPO-prioritised/tensorforce-PPO-prioritised_20171015_02-34-23.model'"
|
|
]
|
|
},
|
|
"execution_count": 33,
|
|
"metadata": {},
|
|
"output_type": "execute_result"
|
|
}
|
|
],
|
|
"source": [
|
|
"# params\n",
|
|
"window_length = 50\n",
|
|
"import datetime\n",
|
|
"ts = datetime.datetime.utcnow().strftime('%Y%m%d_%H-%M-%S')\n",
|
|
"save_path = './outputs/tensorforce-PPO-prioritised/tensorforce-PPO-prioritised_%s.model' % ts\n",
|
|
"save_path = './outputs/tensorforce-PPO-prioritised/tensorforce-PPO-prioritised_20171015_02-34-23.model'\n",
|
|
"save_path\n"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 34,
|
|
"metadata": {
|
|
"ExecuteTime": {
|
|
"end_time": "2017-10-15T02:34:30.609819Z",
|
|
"start_time": "2017-10-15T02:34:30.524610Z"
|
|
}
|
|
},
|
|
"outputs": [
|
|
{
|
|
"data": {
|
|
"text/plain": [
|
|
"'logs/tensorforce-PPO-prioritised_20171015_02-34-23'"
|
|
]
|
|
},
|
|
"execution_count": 34,
|
|
"metadata": {},
|
|
"output_type": "execute_result"
|
|
}
|
|
],
|
|
"source": [
|
|
"log_dir = os.path.join('logs', os.path.splitext(os.path.basename(save_path))[0])\n",
|
|
"try:\n",
|
|
" os.makedirs(log_dir)\n",
|
|
"except OSError:\n",
|
|
" pass\n",
|
|
"log_dir"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"# Enviroment"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 6,
|
|
"metadata": {
|
|
"ExecuteTime": {
|
|
"end_time": "2017-10-15T02:29:59.948831Z",
|
|
"start_time": "2017-10-15T02:29:59.910316Z"
|
|
},
|
|
"collapsed": true
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"from src.environments.portfolio import PortfolioEnv"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 7,
|
|
"metadata": {
|
|
"ExecuteTime": {
|
|
"end_time": "2017-10-15T02:30:00.017952Z",
|
|
"start_time": "2017-10-15T02:29:59.949921Z"
|
|
},
|
|
"collapsed": true
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"from rl.memory import SequentialMemory, Memory\n",
|
|
"from collections import deque\n",
|
|
"\n",
|
|
"class EnvWrapper(PortfolioEnv):\n",
|
|
" \"\"\"Wraps env to normalise and reshape action.\"\"\"\n",
|
|
" def __init__(self, window_length=50, *args, **kwargs):\n",
|
|
" super().__init__(*args, **kwargs)\n",
|
|
" \n",
|
|
" def step(self, action):\n",
|
|
" # also it puts it in a list\n",
|
|
" if isinstance(action, list):\n",
|
|
" action = action[0]\n",
|
|
" \n",
|
|
" # we have to normalise for some reason softmax wont work\n",
|
|
" if isinstance(action, dict):\n",
|
|
" action = np.abs(list(action.values()))\n",
|
|
" action /= action.sum() \n",
|
|
" \n",
|
|
" return super().step(action) "
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 8,
|
|
"metadata": {
|
|
"ExecuteTime": {
|
|
"end_time": "2017-10-15T02:30:00.993572Z",
|
|
"start_time": "2017-10-15T02:30:00.019275Z"
|
|
}
|
|
},
|
|
"outputs": [
|
|
{
|
|
"name": "stderr",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"INFO:gym.envs.registration:Making new env: CartPole-v0\n",
|
|
"[2017-10-15 10:30:00,929] Making new env: CartPole-v0\n",
|
|
"INFO:gym.envs.registration:Making new env: CartPole-v0\n",
|
|
"[2017-10-15 10:30:00,987] Making new env: CartPole-v0\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"df_train = pd.read_hdf('./data/poloniex_30m.hf',key='train')\n",
|
|
"env = EnvWrapper(\n",
|
|
" df=df_train,\n",
|
|
" steps=300, \n",
|
|
" scale=True, \n",
|
|
" augment=0.000,\n",
|
|
" trading_cost=0, # let just overfit first,\n",
|
|
" window_length = window_length,\n",
|
|
" \n",
|
|
")\n",
|
|
"env.seed = 0 \n",
|
|
"\n",
|
|
"df_test = pd.read_hdf('./data/poloniex_30m.hf',key='test')\n",
|
|
"env_test = EnvWrapper(\n",
|
|
" df=df_test,\n",
|
|
" steps=300, \n",
|
|
" scale=True, \n",
|
|
" augment=0.00,\n",
|
|
" trading_cost=0, # let just overfit first\n",
|
|
" window_length=window_length,\n",
|
|
")\n",
|
|
"env_test.seed = 0 \n",
|
|
"\n",
|
|
"from tensorforce.contrib.openai_gym import OpenAIGym\n",
|
|
"environment = OpenAIGym('CartPole-v0')\n",
|
|
"environment.gym = env\n",
|
|
"\n",
|
|
"environment_test = OpenAIGym('CartPole-v0')\n",
|
|
"environment_test.gym = env_test"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 9,
|
|
"metadata": {
|
|
"ExecuteTime": {
|
|
"end_time": "2017-10-15T02:30:01.088045Z",
|
|
"start_time": "2017-10-15T02:30:00.994806Z"
|
|
}
|
|
},
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"0.0 False {'reward': 0.0, 'log_return': 0.0, 'portfolio_value': 1.0, 'return': 1.0023510634010704, 'rate_of_return': 0.0, 'cash_bias': 0.093732749593602255, 'cost': 0.0, 'market_value': 1.0023516084586421, 'date': 1429261200.0, 'steps': 2}\n",
|
|
"(5, 50, 3) (5, 50, 3)\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"# check shapes\n",
|
|
"obs1, reward, done, info=env.step(np.random.random(env.action_space.shape))\n",
|
|
"print(reward, done, info)\n",
|
|
"obs2 = env.reset()\n",
|
|
"print(obs1.shape,obs2.shape)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {
|
|
"ExecuteTime": {
|
|
"end_time": "2017-07-16T04:41:21.116729Z",
|
|
"start_time": "2017-07-16T12:41:21.086620+08:00"
|
|
}
|
|
},
|
|
"source": [
|
|
"# Model\n",
|
|
"\n",
|
|
"Derived from https://github.com/reinforceio/tensorforce/blob/0d07fadec03f76537a2431e17c51cd759d53b5e9/tensorforce/core/networks/layers.py#L90"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 14,
|
|
"metadata": {
|
|
"ExecuteTime": {
|
|
"end_time": "2017-10-15T02:30:55.981253Z",
|
|
"start_time": "2017-10-15T02:30:55.907749Z"
|
|
},
|
|
"collapsed": true
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"from tensorforce import Configuration\n",
|
|
"from tensorforce.agents import PPOAgent\n",
|
|
"from tensorforce.core.networks import layered_network_builder"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 15,
|
|
"metadata": {
|
|
"ExecuteTime": {
|
|
"end_time": "2017-10-15T02:30:56.395442Z",
|
|
"start_time": "2017-10-15T02:30:56.144527Z"
|
|
},
|
|
"collapsed": true
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"# layer helpers from:\n",
|
|
"# https://github.com/reinforceio/tensorforce/blob/0d07fadec03f76537a2431e17c51cd759d53b5e9/tensorforce/core/networks/layers.py#L90\n",
|
|
"import tensorflow as tf\n",
|
|
"from math import sqrt\n",
|
|
"from tensorforce import util\n",
|
|
"from tensorforce import TensorForceError\n",
|
|
"\n",
|
|
"def linear(x, size, bias=True, l2_regularization=0.0):\n",
|
|
" if util.rank(x) != 2:\n",
|
|
" raise TensorForceError('Invalid input rank for linear layer.')\n",
|
|
" with tf.variable_scope('linear'):\n",
|
|
" weights = tf.Variable(initial_value=tf.random_normal(shape=(x.get_shape()[1].value, size), stddev=sqrt(2.0 / (x.get_shape()[1].value + size))))\n",
|
|
" if l2_regularization > 0.0:\n",
|
|
" tf.losses.add_loss(l2_regularization * tf.nn.l2_loss(t=weights))\n",
|
|
" x = tf.matmul(a=x, b=weights)\n",
|
|
" if bias:\n",
|
|
" bias = tf.Variable(initial_value=tf.zeros(shape=(size,)))\n",
|
|
" if l2_regularization > 0.0:\n",
|
|
" tf.losses.add_loss(l2_regularization * tf.nn.l2_loss(t=bias))\n",
|
|
" x = tf.nn.bias_add(value=x, bias=bias)\n",
|
|
" return x\n",
|
|
"\n",
|
|
"def nonlinearity(x, name='relu'):\n",
|
|
" with tf.variable_scope('nonlinearity'):\n",
|
|
" if name == 'elu':\n",
|
|
" x = tf.nn.elu(features=x)\n",
|
|
" elif name == 'relu':\n",
|
|
" x = tf.nn.relu(features=x)\n",
|
|
" elif name == 'selu':\n",
|
|
" # https://arxiv.org/pdf/1706.02515.pdf\n",
|
|
" alpha = 1.6732632423543772848170429916717\n",
|
|
" scale = 1.0507009873554804934193349852946\n",
|
|
" negative = alpha * tf.nn.elu(features=x)\n",
|
|
" x = scale * tf.where(condition=(x >= 0.0), x=x, y=negative)\n",
|
|
" elif name == 'sigmoid':\n",
|
|
" x = tf.sigmoid(x=x)\n",
|
|
" elif name == 'softmax':\n",
|
|
" x = tf.nn.softmax(logits=x)\n",
|
|
" elif name == 'tanh':\n",
|
|
" x = tf.nn.tanh(x=x)\n",
|
|
" else:\n",
|
|
" raise TensorForceError('Invalid nonlinearity.')\n",
|
|
" return x\n",
|
|
"\n",
|
|
"def dense(x, size, bias=True, activation='relu', l2_regularization=0.0):\n",
|
|
" if util.rank(x) != 2:\n",
|
|
" raise TensorForceError('Invalid input rank for dense layer.')\n",
|
|
" with tf.variable_scope('dense'):\n",
|
|
" x = linear(x=x, size=size, bias=bias, l2_regularization=l2_regularization)\n",
|
|
" x = nonlinearity(x=x, name=activation)\n",
|
|
" return x\n",
|
|
"\n",
|
|
"def flatten(x):\n",
|
|
" with tf.variable_scope('flatten'):\n",
|
|
" x = tf.reshape(tensor=x, shape=(-1, util.prod(x.get_shape().as_list()[1:])))\n",
|
|
" return x\n",
|
|
"\n",
|
|
"def conv2d(x, size, window=(3,3), stride=(1,1), bias=False, activation='relu', l2_regularization=0.0, padding='SAME'):\n",
|
|
" if util.rank(x) != 4:\n",
|
|
" raise TensorForceError('Invalid input rank for conv2d layer.')\n",
|
|
" with tf.variable_scope('conv2d'):\n",
|
|
" filters = tf.Variable(initial_value=tf.random_normal(shape=(window[0], window[1], x.get_shape()[3].value, size), stddev=sqrt(2.0 / size)))\n",
|
|
" if l2_regularization > 0.0:\n",
|
|
" tf.losses.add_loss(l2_regularization * tf.nn.l2_loss(t=filters))\n",
|
|
" x = tf.nn.conv2d(input=x, filter=filters, strides=(1, stride[0], stride[1], 1), padding=padding)\n",
|
|
" if bias:\n",
|
|
" bias = tf.Variable(initial_value=tf.zeros(shape=(size,)))\n",
|
|
" if l2_regularization > 0.0:\n",
|
|
" tf.losses.add_loss(l2_regularization * tf.nn.l2_loss(t=bias))\n",
|
|
" x = tf.nn.bias_add(value=x, bias=bias)\n",
|
|
" x = nonlinearity(x=x, name=activation)\n",
|
|
" return x\n"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 16,
|
|
"metadata": {
|
|
"ExecuteTime": {
|
|
"end_time": "2017-10-15T02:30:56.471251Z",
|
|
"start_time": "2017-10-15T02:30:56.396534Z"
|
|
},
|
|
"collapsed": true
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"# build a network for a given input\n",
|
|
"def network_builder(inputs, summary_level):\n",
|
|
" if len(inputs) != 1:\n",
|
|
" raise TensorForceError('Layered network must have only one input.')\n",
|
|
" x = next(iter(inputs.values()))\n",
|
|
" \n",
|
|
" x = conv2d(x=x, size=2, window=(1,3), bias=True, activation='relu', l2_regularization=1e-8, padding='VALID')\n",
|
|
" x = conv2d(x=x, size=20, window=(1,window_length-2), bias=True, activation='relu', l2_regularization=1e-8, padding='VALID')\n",
|
|
" x = conv2d(x=x, size=1, window=(1,1), bias=True, activation='relu', l2_regularization=1e-8, padding='VALID')\n",
|
|
" x = flatten(x)\n",
|
|
" x = nonlinearity(x,name='softmax')\n",
|
|
" \n",
|
|
" return x\n",
|
|
"network=network_builder"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"# Agent"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 18,
|
|
"metadata": {
|
|
"ExecuteTime": {
|
|
"end_time": "2017-10-15T02:31:00.442210Z",
|
|
"start_time": "2017-10-15T02:31:00.356486Z"
|
|
}
|
|
},
|
|
"outputs": [
|
|
{
|
|
"data": {
|
|
"text/plain": [
|
|
"({'action0': {'epsilon': 1,\n",
|
|
" 'epsilon_final': 0.005,\n",
|
|
" 'epsilon_timesteps': 100000.0,\n",
|
|
" 'start_after': 0,\n",
|
|
" 'type': 'epsilon_anneal'},\n",
|
|
" 'action1': {'epsilon': 1,\n",
|
|
" 'epsilon_final': 0.005,\n",
|
|
" 'epsilon_timesteps': 100000.0,\n",
|
|
" 'start_after': 0,\n",
|
|
" 'type': 'epsilon_anneal'},\n",
|
|
" 'action2': {'epsilon': 1,\n",
|
|
" 'epsilon_final': 0.005,\n",
|
|
" 'epsilon_timesteps': 100000.0,\n",
|
|
" 'start_after': 0,\n",
|
|
" 'type': 'epsilon_anneal'},\n",
|
|
" 'action3': {'epsilon': 1,\n",
|
|
" 'epsilon_final': 0.005,\n",
|
|
" 'epsilon_timesteps': 100000.0,\n",
|
|
" 'start_after': 0,\n",
|
|
" 'type': 'epsilon_anneal'},\n",
|
|
" 'action4': {'epsilon': 1,\n",
|
|
" 'epsilon_final': 0.005,\n",
|
|
" 'epsilon_timesteps': 100000.0,\n",
|
|
" 'start_after': 0,\n",
|
|
" 'type': 'epsilon_anneal'},\n",
|
|
" 'action5': {'epsilon': 1,\n",
|
|
" 'epsilon_final': 0.005,\n",
|
|
" 'epsilon_timesteps': 100000.0,\n",
|
|
" 'start_after': 0,\n",
|
|
" 'type': 'epsilon_anneal'}},)"
|
|
]
|
|
},
|
|
"execution_count": 18,
|
|
"metadata": {},
|
|
"output_type": "execute_result"
|
|
}
|
|
],
|
|
"source": [
|
|
"exploration=dict(\n",
|
|
" type=\"epsilon_anneal\",\n",
|
|
" epsilon=1,\n",
|
|
" epsilon_final= 0.005,\n",
|
|
" epsilon_timesteps= 1e5,\n",
|
|
" start_after=0,\n",
|
|
")\n",
|
|
"{'action' + str(n): exploration for n in range(env.action_space.shape[0])},"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 19,
|
|
"metadata": {
|
|
"ExecuteTime": {
|
|
"end_time": "2017-10-15T02:31:03.222746Z",
|
|
"start_time": "2017-10-15T02:31:01.171736Z"
|
|
}
|
|
},
|
|
"outputs": [
|
|
{
|
|
"name": "stderr",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"WARNING:tensorforce.agents.agent:Configuration values not accessed: first_update, memory_capacity, memory, update_frequency, repeat_update\n",
|
|
"[2017-10-15 10:31:03,214] Configuration values not accessed: first_update, memory_capacity, memory, update_frequency, repeat_update\n"
|
|
]
|
|
},
|
|
{
|
|
"data": {
|
|
"text/plain": [
|
|
"<tensorforce.agents.ppo_agent.PPOAgent at 0x7fa421752fd0>"
|
|
]
|
|
},
|
|
"execution_count": 19,
|
|
"metadata": {},
|
|
"output_type": "execute_result"
|
|
}
|
|
],
|
|
"source": [
|
|
"batch_size=256\n",
|
|
"exploration=dict(\n",
|
|
" type=\"epsilon_anneal\",\n",
|
|
" epsilon=1,\n",
|
|
" epsilon_final= 0.005,\n",
|
|
" epsilon_timesteps= 1e5,\n",
|
|
" start_after=0,\n",
|
|
")\n",
|
|
"config = Configuration( \n",
|
|
" # Each agent requires the following ``Configuration`` parameters:\n",
|
|
" # https://github.com/reinforceio/tensorforce/blob/master/tensorforce/agents/agent.py#L32\n",
|
|
" network=network,\n",
|
|
" states=dict(shape=tuple(env.observation_space.shape), type='float'),\n",
|
|
" actions={'action' + str(n): dict(continuous=True) for n in range(env.action_space.shape[0])},\n",
|
|
" preprocessing = None,# dict or list containing state preprocessing configuration.\n",
|
|
" exploration = {'action' + str(n): exploration for n in range(env.action_space.shape[0])}, # dict containing action exploration configuration.\n",
|
|
" \n",
|
|
" \n",
|
|
" # The `MemoryAgent` class additionally requires the following parameters:\n",
|
|
" first_update = batch_size*2, # integer indicating the number of steps to pass before the first update.\n",
|
|
" memory_capacity = 300000, # integer of maximum experiences to store. (takes 2s to sample with 100k)\n",
|
|
" memory = 'prioritized_replay', # string indicating memory type ('replay' or 'prioritized_replay').\n",
|
|
" update_frequency = int(batch_size/2), # integer indicating the number of steps between model updates.\n",
|
|
" repeat_update = 2, # integer indicating how often to repeat the model update.\n",
|
|
"\n",
|
|
" # Each model requires the following configuration parameters:\n",
|
|
" # https://github.com/reinforceio/tensorforce/blob/master/tensorforce/models/model.py#L33\n",
|
|
" discount = 0.97, # float of discount factor (gamma).\n",
|
|
" learning_rate = 1e-3, # float of learning rate (alpha). (3e-4 in paper 1e-3 (atari) and 3e-4 in baselines)\n",
|
|
" optimizer = 'adam', # string of optimizer to use (e.g. 'adam' in paper).\n",
|
|
" device = None, # string of tensorflow device name.\n",
|
|
"# tf_summary = log_dir, # string directory to write tensorflow summaries. Default None\n",
|
|
"# tf_summary_level = 1, # int indicating which tensorflow summaries to create.\n",
|
|
" tf_summary_interval = 1000, # int number of calls to get_action until writing tensorflow summaries on update.\n",
|
|
" log_level = 'info', # string containing log level (e.g. 'info').\n",
|
|
" distributed = False, # boolean indicating whether to use distributed tensorflow.\n",
|
|
" global_model = False, # global model.\n",
|
|
" session = None, # session to use. \n",
|
|
"\n",
|
|
" # A Policy Gradient Model expects the following additional configuration parameters:\n",
|
|
" # https://github.com/reinforceio/tensorforce/blob/master/tensorforce/models/policy_gradient_model.py#L35\n",
|
|
" # I edited my tensorflow install to have a flatten layer to make this work (my branch is in requirements.txt)\n",
|
|
" baseline=dict(\n",
|
|
" type=\"mlp\",\n",
|
|
" sizes=[128, 128],\n",
|
|
" epochs=1,\n",
|
|
" update_batch_size=128,\n",
|
|
" learning_rate=0.01\n",
|
|
" ), # string indicating the baseline value function (currently 'linear' or 'mlp').\n",
|
|
" gae_rewards= True, # boolean indicating whether to use GAE.\n",
|
|
" gae_lambda= 0.97, # float of the Generalized Advantage Estimation lambda.\n",
|
|
" normalize_rewards= False,# boolean indicating whether to normalize the advantage or not.\n",
|
|
" \n",
|
|
" # PPO Params \n",
|
|
" # https://github.com/reinforceio/tensorforce/blob/master/tensorforce/models/ppo_model.py\n",
|
|
" entropy_penalty=0.01, # 0 and 0.01 in baselines\n",
|
|
" loss_clipping=0.1, # Trust region clipping\n",
|
|
" epochs=4, # Number of training epochs for SGD, data is repeated this much 4 (atari),10 in baselines, 10 in paper\n",
|
|
" optimizer_batch_size=32, # Batch size for optimiser, should be small (e.g. 64 in paper)\n",
|
|
" random_sampling=True # Sampling strategy for minibatch replay memory\n",
|
|
")\n",
|
|
"\n",
|
|
"# Create a Trust Region Policy Optimization agent\n",
|
|
"agent = PPOAgent(config=config)\n",
|
|
"agent"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {
|
|
"ExecuteTime": {
|
|
"end_time": "2017-10-15T01:53:53.000937Z",
|
|
"start_time": "2017-10-15T01:53:52.906552Z"
|
|
}
|
|
},
|
|
"outputs": [],
|
|
"source": []
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"# Train\n",
|
|
"\n",
|
|
"## Callbacks"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 21,
|
|
"metadata": {
|
|
"ExecuteTime": {
|
|
"end_time": "2017-10-15T02:31:15.507302Z",
|
|
"start_time": "2017-10-15T02:31:15.427324Z"
|
|
},
|
|
"collapsed": true
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"def sharpe(returns, freq=30, rfr=0):\n",
|
|
" \"\"\"Given a set of returns, calculates naive (rfr=0) sharpe (eq 28) \"\"\"\n",
|
|
" return (np.sqrt(freq) * np.mean(returns-rfr)) / np.std(returns - rfr)\n",
|
|
"\n",
|
|
"def MDD(returns):\n",
|
|
" \"\"\"Max drawdown.\"\"\"\n",
|
|
" peak = returns.max()\n",
|
|
" i = returns.argmax()\n",
|
|
" trough = returns[returns.argmax():].min()\n",
|
|
" return (trough-peak)/trough "
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 22,
|
|
"metadata": {
|
|
"ExecuteTime": {
|
|
"end_time": "2017-10-15T02:31:15.751990Z",
|
|
"start_time": "2017-10-15T02:31:15.662780Z"
|
|
},
|
|
"code_folding": [],
|
|
"collapsed": true
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"# Callback function printing episode statistics\n",
|
|
"class EpisodeFinished(object):\n",
|
|
" \"\"\"Logger callback for tensorforce runner\"\"\"\n",
|
|
" \n",
|
|
" def __init__(self, log_intv):\n",
|
|
" self.log_intv = log_intv\n",
|
|
" self.portfolio_values = [] \n",
|
|
" self.mdds=[]\n",
|
|
" self.sharpes=[]\n",
|
|
" \n",
|
|
" def __call__(self, r):\n",
|
|
" if len(r.environment.gym.sim.infos):\n",
|
|
" self.portfolio_values.append(r.environment.gym.sim.infos[-1]['portfolio_value'])\n",
|
|
" \n",
|
|
" df = pd.DataFrame(r.environment.gym.sim.infos)\n",
|
|
" self.mdds.append(MDD(df.rate_of_return+1))\n",
|
|
" self.sharpes.append(sharpe(df.rate_of_return))\n",
|
|
" if r.episode % self.log_intv == 0:\n",
|
|
" print(\n",
|
|
" \"Finished episode {ep} after {ts} timesteps (reward: {reward: 2.4f} [{rewards_min: 2.4f}, {rewards_max: 2.4f}]) portfolio_value: {portfolio_value: 2.4f} [{portfolio_value_min: 2.4f}, {portfolio_value_max: 2.4f}] mdd={mdd: 2.2%} sharpe={sharpe: 2.2f}\".\n",
|
|
" format(\n",
|
|
" ep=r.episode,\n",
|
|
" ts=r.timestep,\n",
|
|
" reward=np.mean(r.episode_rewards[-self.log_intv:]),\n",
|
|
" rewards_min=np.min(r.episode_rewards[-self.log_intv:]),\n",
|
|
" rewards_max=np.max(r.episode_rewards[-self.log_intv:]),\n",
|
|
" portfolio_value=np.mean(self.portfolio_values[-self.log_intv:]),\n",
|
|
" portfolio_value_min=np.min(self.portfolio_values[-self.log_intv:]),\n",
|
|
" portfolio_value_max=np.max(self.portfolio_values[-self.log_intv:]),\n",
|
|
" mdd=np.mean(self.mdds[-self.log_intv:]),\n",
|
|
" sharpe=np.mean(self.sharpes[-self.log_intv:]),\n",
|
|
" )\n",
|
|
" )\n",
|
|
" return True"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 23,
|
|
"metadata": {
|
|
"ExecuteTime": {
|
|
"end_time": "2017-10-15T02:31:15.934227Z",
|
|
"start_time": "2017-10-15T02:31:15.825377Z"
|
|
}
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"\n",
|
|
"import tensorflow as tf\n",
|
|
"import numpy as np\n",
|
|
"class TensorBoardLogger(object):\n",
|
|
" \"\"\"\n",
|
|
" Log scalar and histograms/distributions to tensorboard.\n",
|
|
" Usage:\n",
|
|
" ```\n",
|
|
" logger = TensorBoardLogger(log_dir = '/tmp/test')\n",
|
|
" for i in range(10):\n",
|
|
" logger.log(\n",
|
|
" logs=dict(\n",
|
|
" float_test=np.random.random(),\n",
|
|
" int_test=np.random.randint(0,4),\n",
|
|
" ),\n",
|
|
" histograms=dict(\n",
|
|
" actions=np.random.randint(0,3,size=np.random.randint(5,20))\n",
|
|
" )\n",
|
|
" )\n",
|
|
" ```\n",
|
|
" Ref: https://github.com/fchollet/keras/blob/master/keras/callbacks.py\n",
|
|
" Url: https://gist.github.com/wassname/b692f8e8686655011618dfbe8d8a9e3f\n",
|
|
" \"\"\"\n",
|
|
"\n",
|
|
" def __init__(self, log_dir, session=None, episode=0):\n",
|
|
" self.log_dir = log_dir\n",
|
|
" self.writer = tf.summary.FileWriter(self.log_dir)\n",
|
|
" self.episode = episode\n",
|
|
" print('TensorBoardLogger started. Run `tensorboard --logdir={}` to visualize'.format(os.path.abspath(self.log_dir)))\n",
|
|
"\n",
|
|
" self.histograms = {}\n",
|
|
" self.histogram_inputs = {}\n",
|
|
" self.session = session or tf.get_default_session() or tf.Session()\n",
|
|
"\n",
|
|
" def log(self, logs={}, histograms={}, episode=None):\n",
|
|
" episode = episode or self.episode\n",
|
|
" # scalar logging\n",
|
|
" for name, value in logs.items():\n",
|
|
" summary = tf.Summary()\n",
|
|
" summary_value = summary.value.add()\n",
|
|
" summary_value.simple_value = value\n",
|
|
" summary_value.tag = name\n",
|
|
" self.writer.add_summary(summary, episode)\n",
|
|
"\n",
|
|
" # histograms\n",
|
|
" for name, value in histograms.items():\n",
|
|
" if name not in self.histograms:\n",
|
|
" # make a tensor with no fixed shape\n",
|
|
" self.histogram_inputs[name] = tf.Variable(value,validate_shape=False)\n",
|
|
" self.histograms[name] = tf.summary.histogram(name, self.histogram_inputs[name])\n",
|
|
"\n",
|
|
" input_tensor = self.histogram_inputs[name]\n",
|
|
" summary = self.histograms[name]\n",
|
|
" summary_str = summary.eval(session=self.session, feed_dict={input_tensor.name:value})\n",
|
|
" self.writer.add_summary(summary_str, episode)\n",
|
|
"\n",
|
|
" self.writer.flush()\n",
|
|
" self.episode += 1"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 24,
|
|
"metadata": {
|
|
"ExecuteTime": {
|
|
"end_time": "2017-10-15T02:31:16.177868Z",
|
|
"start_time": "2017-10-15T02:31:16.058908Z"
|
|
},
|
|
"code_folding": []
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"# Callback EpisodeFinishedTQDM\n",
|
|
"\n",
|
|
"from tqdm import tqdm_notebook\n",
|
|
"class EpisodeFinishedTQDM(EpisodeFinished):\n",
|
|
" \"\"\"Logger for tensorforce using tqdm_notebook for jupyter-notebook.\"\"\"\n",
|
|
" \n",
|
|
" def __init__(self, episodes, log_intv, session=None, log_dir=None, episode=0):\n",
|
|
" \"\"\"\n",
|
|
" log_intv - print the mean metrics every log_intv episodes\n",
|
|
" \"\"\"\n",
|
|
" super().__init__(log_intv=log_intv)\n",
|
|
" self.episodes = episodes\n",
|
|
" self.progbar = tqdm_notebook(desc='', \n",
|
|
" total=episodes, \n",
|
|
" leave=True, mininterval=5)\n",
|
|
" \n",
|
|
" # tensorboard\n",
|
|
" if log_dir:\n",
|
|
" self.log_dir = log_dir\n",
|
|
" elif save_path:\n",
|
|
" self.log_dir = '/tmp/'+os.path.basename(save_path)\n",
|
|
" else:\n",
|
|
" self.log_dir = '/tmp/StepsProgressBar'\n",
|
|
" self.tensor_board_logger = TensorBoardLogger(self.log_dir, session=session, episode=episode)\n",
|
|
" \n",
|
|
" def __call__(self, r):\n",
|
|
" super().__call__(r)\n",
|
|
" oai_env = r.environment.gym.unwrapped\n",
|
|
" exploration = r.agent.exploration.get('action0', lambda x,y:0)(r.episode, np.sum(r.episode_lengths))\n",
|
|
" desc = \"reward: {reward: 2.4f} [{rewards_min: 2.4f}, {rewards_max: 2.4f}], portfolio_value: {portfolio_value: 2.4f} [{portfolio_value_min: 2.4f}, {portfolio_value_max: 2.4f}] expl={exploration: 2.2%}\".format(\n",
|
|
" reward=np.mean(r.episode_rewards[-1:]),\n",
|
|
" rewards_min=np.min(r.episode_rewards[-1:]),\n",
|
|
" rewards_max=np.max(r.episode_rewards[-1:]),\n",
|
|
" portfolio_value=np.mean(self.portfolio_values[-1:]),\n",
|
|
" portfolio_value_min=np.min(self.portfolio_values[-1:]),\n",
|
|
" portfolio_value_max=np.max(self.portfolio_values[-1:]),\n",
|
|
" exploration=exploration\n",
|
|
" )\n",
|
|
" self.progbar.desc = desc\n",
|
|
" self.progbar.update(1) # update\n",
|
|
" \n",
|
|
" # log to tensorboard\n",
|
|
" logs=dict(\n",
|
|
" rewards=r.episode_rewards[-1],\n",
|
|
" episode_lengths=r.episode_lengths[-1],\n",
|
|
" episode_time=r.episode_times[-1],\n",
|
|
" portfolio_value=np.mean(self.portfolio_values[-1:]),\n",
|
|
" portfolio_value_min=np.min(self.portfolio_values[-1:]),\n",
|
|
" portfolio_value_max=np.max(self.portfolio_values[-1:]),\n",
|
|
" exploration=exploration\n",
|
|
" )\n",
|
|
" df_info = pd.DataFrame(oai_env.infos)\n",
|
|
" ep_infos = df_info.mean().to_dict()\n",
|
|
" logs.update(ep_infos)\n",
|
|
" self.tensor_board_logger.log(\n",
|
|
" logs=logs,\n",
|
|
" histograms=dict(\n",
|
|
" returns=df_info['return'].values,\n",
|
|
" portfolio_value=df_info.portfolio_value.values,\n",
|
|
" market_value=df_info.market_value.values,\n",
|
|
" ),\n",
|
|
" episode=r.episode\n",
|
|
" )\n",
|
|
" return True"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {
|
|
"ExecuteTime": {
|
|
"end_time": "2017-10-15T02:19:28.278977Z",
|
|
"start_time": "2017-10-15T02:19:28.132177Z"
|
|
}
|
|
},
|
|
"source": [
|
|
"## Train"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 35,
|
|
"metadata": {
|
|
"ExecuteTime": {
|
|
"end_time": "2017-10-15T02:34:35.038267Z",
|
|
"start_time": "2017-10-15T02:34:34.963840Z"
|
|
},
|
|
"collapsed": true
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"from tensorforce.execution import Runner\n",
|
|
"runner = Runner(agent=agent, environment=environment, save_path=save_path, save_episodes=1000)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 36,
|
|
"metadata": {
|
|
"ExecuteTime": {
|
|
"end_time": "2017-10-15T02:34:35.214512Z",
|
|
"start_time": "2017-10-15T02:34:35.156499Z"
|
|
}
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"\n",
|
|
"# Check my PR is included\n",
|
|
"import tensorforce.core.memories\n",
|
|
"assert isinstance(runner.agent.memory,tensorforce.core.memories.PrioritizedReplay)\n",
|
|
"assert isinstance(runner.agent, tensorforce.agents.MemoryAgent)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 37,
|
|
"metadata": {
|
|
"ExecuteTime": {
|
|
"end_time": "2017-10-15T02:34:35.498415Z",
|
|
"start_time": "2017-10-15T02:34:35.437300Z"
|
|
}
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"# resume\n",
|
|
"saves=glob.glob(save_path+'-*')\n",
|
|
"if len(saves)>0:\n",
|
|
" # load saved\n",
|
|
" last_save = os.path.splitext(saves[0])[0]\n",
|
|
" runner.agent.load_model(last_save)\n",
|
|
" print('loaded', last_save)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {
|
|
"ExecuteTime": {
|
|
"start_time": "2017-10-15T02:34:35.964Z"
|
|
},
|
|
"scrolled": true
|
|
},
|
|
"outputs": [
|
|
{
|
|
"data": {
|
|
"application/vnd.jupyter.widget-view+json": {
|
|
"model_id": "492cbe81a26f4eed8dfe1e2f3db257d8"
|
|
}
|
|
},
|
|
"metadata": {},
|
|
"output_type": "display_data"
|
|
},
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"TensorBoardLogger started. Run `tensorboard --logdir=/media/isisilon/Data/My_Documents/Documents/eclipse-workspace/rl_keras_finance/portfolio-rl-jiang_2017/logs/tensorforce-PPO-prioritised_20171015_02-34-23` to visualize\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"episodes = int(6e6 / 30)\n",
|
|
"runner.run(\n",
|
|
" episodes=episodes,\n",
|
|
" max_timesteps=200,\n",
|
|
" episode_finished=EpisodeFinishedTQDM(\n",
|
|
" log_intv=100, \n",
|
|
" episodes=episodes,\n",
|
|
" log_dir=log_dir,\n",
|
|
" session=runner.agent.model.session, \n",
|
|
" )\n",
|
|
")"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {
|
|
"ExecuteTime": {
|
|
"end_time": "2017-10-15T02:30:35.073461Z",
|
|
"start_time": "2017-10-15T02:29:49.726Z"
|
|
},
|
|
"collapsed": true
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"# save\n",
|
|
"agent.save_model(save_path)\n",
|
|
"save_path"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"# Test"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {
|
|
"ExecuteTime": {
|
|
"end_time": "2017-07-19T00:09:54.262405Z",
|
|
"start_time": "2017-07-19T08:09:54.226639+08:00"
|
|
},
|
|
"collapsed": true
|
|
},
|
|
"outputs": [],
|
|
"source": []
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {
|
|
"ExecuteTime": {
|
|
"end_time": "2017-10-15T02:30:35.074688Z",
|
|
"start_time": "2017-10-15T02:29:49.729Z"
|
|
},
|
|
"collapsed": true
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"# one big test\n",
|
|
"df_test = pd.read_hdf('./data/poloniex_30m.hf',key='test')\n",
|
|
"steps=2400#len(df_test)-window_length-2\n",
|
|
"env_test = EnvWrapper(\n",
|
|
" df=df_test,\n",
|
|
" steps=steps, \n",
|
|
" scale=True, \n",
|
|
" augment=0.00,\n",
|
|
" trading_cost=0, # let just overfit first\n",
|
|
" window_length=window_length,\n",
|
|
")\n",
|
|
"env_test.seed = 0 \n",
|
|
"environment_test = OpenAIGym('CartPole-v0')\n",
|
|
"environment_test.gym = env_test\n",
|
|
"\n",
|
|
"agent.load_model(save_path)\n",
|
|
"runner_test = Runner(agent=agent, environment=environment_test)\n",
|
|
"runner_test.run(\n",
|
|
"episodes=1, max_timesteps=steps, episode_finished=EpisodeFinished(10))\n",
|
|
"\n",
|
|
"df = pd.DataFrame(env_test.infos)\n",
|
|
"df.index=df['index']\n",
|
|
"\n",
|
|
"s=sharpe(df.rate_of_return+1)\n",
|
|
"mdd=MDD(df.rate_of_return+1)\n",
|
|
"apv=df.portfolio_value.iloc[-1]\n",
|
|
"print('APV (Accumulated portfolio value): \\t{: 2.6f}'.format(apv))\n",
|
|
"print('SR (Sharpe ratio): \\t{: 2.6f}'.format( s))\n",
|
|
"print('MDD (max drawdown): \\t{: 2.6%}'.format( mdd))\n",
|
|
"print('MMR (mean market returns): \\t{: 2.6f}'.format(df.mean_market_returns.cumprod().iloc[-1]))\n",
|
|
"print('')\n",
|
|
"\n",
|
|
"# show one run vs average market performance\n",
|
|
"plt.title('test MDD={}, Sharpe={}, APV={}'.format(mdd,s,apv))\n",
|
|
"df.portfolio_value.plot()\n",
|
|
"df.mean_market_returns.cumprod().plot(label='mean market performance')\n",
|
|
"plt.legend()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {
|
|
"ExecuteTime": {
|
|
"end_time": "2017-07-19T00:48:39.193976Z",
|
|
"start_time": "2017-07-19T08:48:39.154752+08:00"
|
|
},
|
|
"collapsed": true
|
|
},
|
|
"outputs": [],
|
|
"source": []
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {
|
|
"ExecuteTime": {
|
|
"end_time": "2017-10-15T02:30:35.075752Z",
|
|
"start_time": "2017-10-15T02:29:49.733Z"
|
|
},
|
|
"collapsed": true
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"\n",
|
|
"\n",
|
|
"data=[]\n",
|
|
"for i in range(10):\n",
|
|
" agent.load_model(save_path)\n",
|
|
" df_test = pd.read_hdf('./data/poloniex_30m.hf',key='test')\n",
|
|
" \n",
|
|
" env_test = EnvWrapper(\n",
|
|
" df=df_test,\n",
|
|
" steps=1800, \n",
|
|
" scale=True, \n",
|
|
" augment=0.00,\n",
|
|
" trading_cost=0, # let just overfit first\n",
|
|
" window_length=window_length,\n",
|
|
" )\n",
|
|
" env_test.seed = 0 \n",
|
|
"\n",
|
|
"\n",
|
|
" environment_test = OpenAIGym('CartPole-v0')\n",
|
|
" environment_test.gym = env_test\n",
|
|
"\n",
|
|
" runner_test = Runner(agent=agent, environment=environment_test)\n",
|
|
" np.random.seed(i)\n",
|
|
" runner_test.run(\n",
|
|
" episodes=2, max_timesteps=32, episode_finished=EpisodeFinished(10))\n",
|
|
" df = pd.DataFrame(environment_test.gym.infos)\n",
|
|
"# df.index=df['index']\n",
|
|
" \n",
|
|
" s=sharpe(df.rate_of_return+1)\n",
|
|
" mdd=MDD(df.rate_of_return+1)\n",
|
|
" data.append(dict(sharpe=s,mdd=mdd))\n",
|
|
" print('APV (Accumulated portfolio value): \\t{: 2.6f}'.format(df.portfolio_value.iloc[-1]))\n",
|
|
" print('SR (Sharpe ratio): \\t{: 2.6f}'.format( s))\n",
|
|
" print('MDD (max drawdown): \\t{: 2.6%}'.format( mdd))\n",
|
|
" print('MMR (mean market returns): \\t{: 2.6f}'.format(df.mean_market_returns.cumprod().iloc[-1]))\n",
|
|
" print('')\n",
|
|
" df.portfolio_value.plot(label=str(i))\n",
|
|
"plt.legend()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {
|
|
"ExecuteTime": {
|
|
"end_time": "2017-10-15T02:30:35.076620Z",
|
|
"start_time": "2017-10-15T02:29:49.737Z"
|
|
},
|
|
"collapsed": true
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"data"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {
|
|
"collapsed": true
|
|
},
|
|
"outputs": [],
|
|
"source": []
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {
|
|
"ExecuteTime": {
|
|
"end_time": "2017-10-15T02:30:35.077480Z",
|
|
"start_time": "2017-10-15T02:29:49.742Z"
|
|
},
|
|
"collapsed": true
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"# one big test over train\n",
|
|
"# one big test\n",
|
|
"df_train = pd.read_hdf('./data/poloniex_30m.hf',key='train')\n",
|
|
"steps=len(df_train)-window_length-2\n",
|
|
"env = EnvWrapper(\n",
|
|
" df=df_train,\n",
|
|
" steps=steps, \n",
|
|
" scale=True, \n",
|
|
" augment=0.00,\n",
|
|
" trading_cost=0, # let just overfit first\n",
|
|
" window_length=window_length,\n",
|
|
")\n",
|
|
"env.seed = 0 \n",
|
|
"environment = OpenAIGym('CartPole-v0')\n",
|
|
"environment.gym = env\n",
|
|
"\n",
|
|
"agent.load_model(save_path)\n",
|
|
"runner = Runner(agent=agent, environment=environment)\n",
|
|
"runner.run(\n",
|
|
"episodes=1, max_timesteps=steps, episode_finished=EpisodeFinished(10))\n",
|
|
"\n",
|
|
"df = pd.DataFrame(env.infos)\n",
|
|
"df.index=df['index']\n",
|
|
"\n",
|
|
"s=sharpe(df.rate_of_return+1)\n",
|
|
"mdd=MDD(df.rate_of_return+1)\n",
|
|
"data.append(dict(sharpe=s,mdd=mdd))\n",
|
|
"print('APV (Accumulated portfolio value): \\t{: 2.6f}'.format(df.portfolio_value.iloc[-1]))\n",
|
|
"print('SR (Sharpe ratio): \\t{: 2.6f}'.format( s))\n",
|
|
"print('MDD (max drawdown): \\t{: 2.6%}'.format( mdd))\n",
|
|
"print('')\n",
|
|
"\n",
|
|
"# show one run vs average market performance\n",
|
|
"plt.title('train')\n",
|
|
"df.portfolio_value.plot()\n",
|
|
"df.mean_market_returns.cumprod().plot(label='mean market performance')\n",
|
|
"plt.legend()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {
|
|
"collapsed": true
|
|
},
|
|
"outputs": [],
|
|
"source": []
|
|
}
|
|
],
|
|
"metadata": {
|
|
"kernelspec": {
|
|
"display_name": "jupyter3",
|
|
"language": "python",
|
|
"name": "jupyter3"
|
|
},
|
|
"language_info": {
|
|
"codemirror_mode": {
|
|
"name": "ipython",
|
|
"version": 3
|
|
},
|
|
"file_extension": ".py",
|
|
"mimetype": "text/x-python",
|
|
"name": "python",
|
|
"nbconvert_exporter": "python",
|
|
"pygments_lexer": "ipython3",
|
|
"version": "3.6.0"
|
|
}
|
|
},
|
|
"nbformat": 4,
|
|
"nbformat_minor": 2
|
|
}
|