Files
rl-portfolio-management/tensorforce-PPO.ipynb
T
2017-10-15 10:36:44 +08:00

1211 lines
40 KiB
Plaintext

{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Uses tensorforce tensorforce-0.2.0"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {
"ExecuteTime": {
"end_time": "2017-10-15T02:29:58.356761Z",
"start_time": "2017-10-15T02:29:49.898984Z"
},
"collapsed": true
},
"outputs": [],
"source": [
"# plotting\n",
"%matplotlib inline\n",
"from matplotlib import pyplot as plt\n",
"import seaborn as sns\n",
"plt.style.use('ggplot')\n",
"\n",
"# numeric\n",
"import numpy as np\n",
"from numpy import random\n",
"import pandas as pd\n",
"\n",
"# util\n",
"from collections import Counter\n",
"import pdb\n",
"import glob\n",
"import time\n",
"import tempfile\n",
"import itertools\n",
"from tqdm import tqdm_notebook as tqdm\n",
"import datetime\n",
"\n",
"# logging\n",
"import logging\n",
"logger = log = logging.getLogger(__name__)\n",
"# log.setLevel(logging.INFO)\n",
"logging.basicConfig()\n",
"log.info('%s logger started.', __name__)"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {
"ExecuteTime": {
"end_time": "2017-10-15T02:29:59.755732Z",
"start_time": "2017-10-15T02:29:58.358390Z"
},
"collapsed": true
},
"outputs": [],
"source": [
"import gym\n",
"from gym import error, spaces, utils\n",
"from gym.utils import seeding"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {
"ExecuteTime": {
"end_time": "2017-10-15T02:29:59.821678Z",
"start_time": "2017-10-15T02:29:59.757241Z"
},
"collapsed": true
},
"outputs": [],
"source": [
"import os\n",
"os.sys.path.append(os.path.abspath('.'))\n",
"%reload_ext autoreload\n",
"%autoreload 2"
]
},
{
"cell_type": "code",
"execution_count": 33,
"metadata": {
"ExecuteTime": {
"end_time": "2017-10-15T02:34:29.996288Z",
"start_time": "2017-10-15T02:34:29.918670Z"
}
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"\r",
" \r",
"95/|/reward: -0.0000 [-0.0000, -0.0000], portfolio_value: 0.9998 [ 0.9998, 0.9998] expl= 81.00% 0%|| 95/200000 [03:10<105:36:04, 1.90s/it]"
]
},
{
"data": {
"text/plain": [
"'./outputs/tensorforce-PPO-prioritised/tensorforce-PPO-prioritised_20171015_02-34-23.model'"
]
},
"execution_count": 33,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# params\n",
"window_length = 50\n",
"import datetime\n",
"ts = datetime.datetime.utcnow().strftime('%Y%m%d_%H-%M-%S')\n",
"save_path = './outputs/tensorforce-PPO-prioritised/tensorforce-PPO-prioritised_%s.model' % ts\n",
"save_path = './outputs/tensorforce-PPO-prioritised/tensorforce-PPO-prioritised_20171015_02-34-23.model'\n",
"save_path\n"
]
},
{
"cell_type": "code",
"execution_count": 34,
"metadata": {
"ExecuteTime": {
"end_time": "2017-10-15T02:34:30.609819Z",
"start_time": "2017-10-15T02:34:30.524610Z"
}
},
"outputs": [
{
"data": {
"text/plain": [
"'logs/tensorforce-PPO-prioritised_20171015_02-34-23'"
]
},
"execution_count": 34,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"log_dir = os.path.join('logs', os.path.splitext(os.path.basename(save_path))[0])\n",
"try:\n",
" os.makedirs(log_dir)\n",
"except OSError:\n",
" pass\n",
"log_dir"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Enviroment"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {
"ExecuteTime": {
"end_time": "2017-10-15T02:29:59.948831Z",
"start_time": "2017-10-15T02:29:59.910316Z"
},
"collapsed": true
},
"outputs": [],
"source": [
"from src.environments.portfolio import PortfolioEnv"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {
"ExecuteTime": {
"end_time": "2017-10-15T02:30:00.017952Z",
"start_time": "2017-10-15T02:29:59.949921Z"
},
"collapsed": true
},
"outputs": [],
"source": [
"from rl.memory import SequentialMemory, Memory\n",
"from collections import deque\n",
"\n",
"class EnvWrapper(PortfolioEnv):\n",
" \"\"\"Wraps env to normalise and reshape action.\"\"\"\n",
" def __init__(self, window_length=50, *args, **kwargs):\n",
" super().__init__(*args, **kwargs)\n",
" \n",
" def step(self, action):\n",
" # also it puts it in a list\n",
" if isinstance(action, list):\n",
" action = action[0]\n",
" \n",
" # we have to normalise for some reason softmax wont work\n",
" if isinstance(action, dict):\n",
" action = np.abs(list(action.values()))\n",
" action /= action.sum() \n",
" \n",
" return super().step(action) "
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {
"ExecuteTime": {
"end_time": "2017-10-15T02:30:00.993572Z",
"start_time": "2017-10-15T02:30:00.019275Z"
}
},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"INFO:gym.envs.registration:Making new env: CartPole-v0\n",
"[2017-10-15 10:30:00,929] Making new env: CartPole-v0\n",
"INFO:gym.envs.registration:Making new env: CartPole-v0\n",
"[2017-10-15 10:30:00,987] Making new env: CartPole-v0\n"
]
}
],
"source": [
"df_train = pd.read_hdf('./data/poloniex_30m.hf',key='train')\n",
"env = EnvWrapper(\n",
" df=df_train,\n",
" steps=300, \n",
" scale=True, \n",
" augment=0.000,\n",
" trading_cost=0, # let just overfit first,\n",
" window_length = window_length,\n",
" \n",
")\n",
"env.seed = 0 \n",
"\n",
"df_test = pd.read_hdf('./data/poloniex_30m.hf',key='test')\n",
"env_test = EnvWrapper(\n",
" df=df_test,\n",
" steps=300, \n",
" scale=True, \n",
" augment=0.00,\n",
" trading_cost=0, # let just overfit first\n",
" window_length=window_length,\n",
")\n",
"env_test.seed = 0 \n",
"\n",
"from tensorforce.contrib.openai_gym import OpenAIGym\n",
"environment = OpenAIGym('CartPole-v0')\n",
"environment.gym = env\n",
"\n",
"environment_test = OpenAIGym('CartPole-v0')\n",
"environment_test.gym = env_test"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {
"ExecuteTime": {
"end_time": "2017-10-15T02:30:01.088045Z",
"start_time": "2017-10-15T02:30:00.994806Z"
}
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"0.0 False {'reward': 0.0, 'log_return': 0.0, 'portfolio_value': 1.0, 'return': 1.0023510634010704, 'rate_of_return': 0.0, 'cash_bias': 0.093732749593602255, 'cost': 0.0, 'market_value': 1.0023516084586421, 'date': 1429261200.0, 'steps': 2}\n",
"(5, 50, 3) (5, 50, 3)\n"
]
}
],
"source": [
"# check shapes\n",
"obs1, reward, done, info=env.step(np.random.random(env.action_space.shape))\n",
"print(reward, done, info)\n",
"obs2 = env.reset()\n",
"print(obs1.shape,obs2.shape)"
]
},
{
"cell_type": "markdown",
"metadata": {
"ExecuteTime": {
"end_time": "2017-07-16T04:41:21.116729Z",
"start_time": "2017-07-16T12:41:21.086620+08:00"
}
},
"source": [
"# Model\n",
"\n",
"Derived from https://github.com/reinforceio/tensorforce/blob/0d07fadec03f76537a2431e17c51cd759d53b5e9/tensorforce/core/networks/layers.py#L90"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {
"ExecuteTime": {
"end_time": "2017-10-15T02:30:55.981253Z",
"start_time": "2017-10-15T02:30:55.907749Z"
},
"collapsed": true
},
"outputs": [],
"source": [
"from tensorforce import Configuration\n",
"from tensorforce.agents import PPOAgent\n",
"from tensorforce.core.networks import layered_network_builder"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {
"ExecuteTime": {
"end_time": "2017-10-15T02:30:56.395442Z",
"start_time": "2017-10-15T02:30:56.144527Z"
},
"collapsed": true
},
"outputs": [],
"source": [
"# layer helpers from:\n",
"# https://github.com/reinforceio/tensorforce/blob/0d07fadec03f76537a2431e17c51cd759d53b5e9/tensorforce/core/networks/layers.py#L90\n",
"import tensorflow as tf\n",
"from math import sqrt\n",
"from tensorforce import util\n",
"from tensorforce import TensorForceError\n",
"\n",
"def linear(x, size, bias=True, l2_regularization=0.0):\n",
" if util.rank(x) != 2:\n",
" raise TensorForceError('Invalid input rank for linear layer.')\n",
" with tf.variable_scope('linear'):\n",
" weights = tf.Variable(initial_value=tf.random_normal(shape=(x.get_shape()[1].value, size), stddev=sqrt(2.0 / (x.get_shape()[1].value + size))))\n",
" if l2_regularization > 0.0:\n",
" tf.losses.add_loss(l2_regularization * tf.nn.l2_loss(t=weights))\n",
" x = tf.matmul(a=x, b=weights)\n",
" if bias:\n",
" bias = tf.Variable(initial_value=tf.zeros(shape=(size,)))\n",
" if l2_regularization > 0.0:\n",
" tf.losses.add_loss(l2_regularization * tf.nn.l2_loss(t=bias))\n",
" x = tf.nn.bias_add(value=x, bias=bias)\n",
" return x\n",
"\n",
"def nonlinearity(x, name='relu'):\n",
" with tf.variable_scope('nonlinearity'):\n",
" if name == 'elu':\n",
" x = tf.nn.elu(features=x)\n",
" elif name == 'relu':\n",
" x = tf.nn.relu(features=x)\n",
" elif name == 'selu':\n",
" # https://arxiv.org/pdf/1706.02515.pdf\n",
" alpha = 1.6732632423543772848170429916717\n",
" scale = 1.0507009873554804934193349852946\n",
" negative = alpha * tf.nn.elu(features=x)\n",
" x = scale * tf.where(condition=(x >= 0.0), x=x, y=negative)\n",
" elif name == 'sigmoid':\n",
" x = tf.sigmoid(x=x)\n",
" elif name == 'softmax':\n",
" x = tf.nn.softmax(logits=x)\n",
" elif name == 'tanh':\n",
" x = tf.nn.tanh(x=x)\n",
" else:\n",
" raise TensorForceError('Invalid nonlinearity.')\n",
" return x\n",
"\n",
"def dense(x, size, bias=True, activation='relu', l2_regularization=0.0):\n",
" if util.rank(x) != 2:\n",
" raise TensorForceError('Invalid input rank for dense layer.')\n",
" with tf.variable_scope('dense'):\n",
" x = linear(x=x, size=size, bias=bias, l2_regularization=l2_regularization)\n",
" x = nonlinearity(x=x, name=activation)\n",
" return x\n",
"\n",
"def flatten(x):\n",
" with tf.variable_scope('flatten'):\n",
" x = tf.reshape(tensor=x, shape=(-1, util.prod(x.get_shape().as_list()[1:])))\n",
" return x\n",
"\n",
"def conv2d(x, size, window=(3,3), stride=(1,1), bias=False, activation='relu', l2_regularization=0.0, padding='SAME'):\n",
" if util.rank(x) != 4:\n",
" raise TensorForceError('Invalid input rank for conv2d layer.')\n",
" with tf.variable_scope('conv2d'):\n",
" filters = tf.Variable(initial_value=tf.random_normal(shape=(window[0], window[1], x.get_shape()[3].value, size), stddev=sqrt(2.0 / size)))\n",
" if l2_regularization > 0.0:\n",
" tf.losses.add_loss(l2_regularization * tf.nn.l2_loss(t=filters))\n",
" x = tf.nn.conv2d(input=x, filter=filters, strides=(1, stride[0], stride[1], 1), padding=padding)\n",
" if bias:\n",
" bias = tf.Variable(initial_value=tf.zeros(shape=(size,)))\n",
" if l2_regularization > 0.0:\n",
" tf.losses.add_loss(l2_regularization * tf.nn.l2_loss(t=bias))\n",
" x = tf.nn.bias_add(value=x, bias=bias)\n",
" x = nonlinearity(x=x, name=activation)\n",
" return x\n"
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {
"ExecuteTime": {
"end_time": "2017-10-15T02:30:56.471251Z",
"start_time": "2017-10-15T02:30:56.396534Z"
},
"collapsed": true
},
"outputs": [],
"source": [
"# build a network for a given input\n",
"def network_builder(inputs, summary_level):\n",
" if len(inputs) != 1:\n",
" raise TensorForceError('Layered network must have only one input.')\n",
" x = next(iter(inputs.values()))\n",
" \n",
" x = conv2d(x=x, size=2, window=(1,3), bias=True, activation='relu', l2_regularization=1e-8, padding='VALID')\n",
" x = conv2d(x=x, size=20, window=(1,window_length-2), bias=True, activation='relu', l2_regularization=1e-8, padding='VALID')\n",
" x = conv2d(x=x, size=1, window=(1,1), bias=True, activation='relu', l2_regularization=1e-8, padding='VALID')\n",
" x = flatten(x)\n",
" x = nonlinearity(x,name='softmax')\n",
" \n",
" return x\n",
"network=network_builder"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Agent"
]
},
{
"cell_type": "code",
"execution_count": 18,
"metadata": {
"ExecuteTime": {
"end_time": "2017-10-15T02:31:00.442210Z",
"start_time": "2017-10-15T02:31:00.356486Z"
}
},
"outputs": [
{
"data": {
"text/plain": [
"({'action0': {'epsilon': 1,\n",
" 'epsilon_final': 0.005,\n",
" 'epsilon_timesteps': 100000.0,\n",
" 'start_after': 0,\n",
" 'type': 'epsilon_anneal'},\n",
" 'action1': {'epsilon': 1,\n",
" 'epsilon_final': 0.005,\n",
" 'epsilon_timesteps': 100000.0,\n",
" 'start_after': 0,\n",
" 'type': 'epsilon_anneal'},\n",
" 'action2': {'epsilon': 1,\n",
" 'epsilon_final': 0.005,\n",
" 'epsilon_timesteps': 100000.0,\n",
" 'start_after': 0,\n",
" 'type': 'epsilon_anneal'},\n",
" 'action3': {'epsilon': 1,\n",
" 'epsilon_final': 0.005,\n",
" 'epsilon_timesteps': 100000.0,\n",
" 'start_after': 0,\n",
" 'type': 'epsilon_anneal'},\n",
" 'action4': {'epsilon': 1,\n",
" 'epsilon_final': 0.005,\n",
" 'epsilon_timesteps': 100000.0,\n",
" 'start_after': 0,\n",
" 'type': 'epsilon_anneal'},\n",
" 'action5': {'epsilon': 1,\n",
" 'epsilon_final': 0.005,\n",
" 'epsilon_timesteps': 100000.0,\n",
" 'start_after': 0,\n",
" 'type': 'epsilon_anneal'}},)"
]
},
"execution_count": 18,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"exploration=dict(\n",
" type=\"epsilon_anneal\",\n",
" epsilon=1,\n",
" epsilon_final= 0.005,\n",
" epsilon_timesteps= 1e5,\n",
" start_after=0,\n",
")\n",
"{'action' + str(n): exploration for n in range(env.action_space.shape[0])},"
]
},
{
"cell_type": "code",
"execution_count": 19,
"metadata": {
"ExecuteTime": {
"end_time": "2017-10-15T02:31:03.222746Z",
"start_time": "2017-10-15T02:31:01.171736Z"
}
},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"WARNING:tensorforce.agents.agent:Configuration values not accessed: first_update, memory_capacity, memory, update_frequency, repeat_update\n",
"[2017-10-15 10:31:03,214] Configuration values not accessed: first_update, memory_capacity, memory, update_frequency, repeat_update\n"
]
},
{
"data": {
"text/plain": [
"<tensorforce.agents.ppo_agent.PPOAgent at 0x7fa421752fd0>"
]
},
"execution_count": 19,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"batch_size=256\n",
"exploration=dict(\n",
" type=\"epsilon_anneal\",\n",
" epsilon=1,\n",
" epsilon_final= 0.005,\n",
" epsilon_timesteps= 1e5,\n",
" start_after=0,\n",
")\n",
"config = Configuration( \n",
" # Each agent requires the following ``Configuration`` parameters:\n",
" # https://github.com/reinforceio/tensorforce/blob/master/tensorforce/agents/agent.py#L32\n",
" network=network,\n",
" states=dict(shape=tuple(env.observation_space.shape), type='float'),\n",
" actions={'action' + str(n): dict(continuous=True) for n in range(env.action_space.shape[0])},\n",
" preprocessing = None,# dict or list containing state preprocessing configuration.\n",
" exploration = {'action' + str(n): exploration for n in range(env.action_space.shape[0])}, # dict containing action exploration configuration.\n",
" \n",
" \n",
" # The `MemoryAgent` class additionally requires the following parameters:\n",
" first_update = batch_size*2, # integer indicating the number of steps to pass before the first update.\n",
" memory_capacity = 300000, # integer of maximum experiences to store. (takes 2s to sample with 100k)\n",
" memory = 'prioritized_replay', # string indicating memory type ('replay' or 'prioritized_replay').\n",
" update_frequency = int(batch_size/2), # integer indicating the number of steps between model updates.\n",
" repeat_update = 2, # integer indicating how often to repeat the model update.\n",
"\n",
" # Each model requires the following configuration parameters:\n",
" # https://github.com/reinforceio/tensorforce/blob/master/tensorforce/models/model.py#L33\n",
" discount = 0.97, # float of discount factor (gamma).\n",
" learning_rate = 1e-3, # float of learning rate (alpha). (3e-4 in paper 1e-3 (atari) and 3e-4 in baselines)\n",
" optimizer = 'adam', # string of optimizer to use (e.g. 'adam' in paper).\n",
" device = None, # string of tensorflow device name.\n",
"# tf_summary = log_dir, # string directory to write tensorflow summaries. Default None\n",
"# tf_summary_level = 1, # int indicating which tensorflow summaries to create.\n",
" tf_summary_interval = 1000, # int number of calls to get_action until writing tensorflow summaries on update.\n",
" log_level = 'info', # string containing log level (e.g. 'info').\n",
" distributed = False, # boolean indicating whether to use distributed tensorflow.\n",
" global_model = False, # global model.\n",
" session = None, # session to use. \n",
"\n",
" # A Policy Gradient Model expects the following additional configuration parameters:\n",
" # https://github.com/reinforceio/tensorforce/blob/master/tensorforce/models/policy_gradient_model.py#L35\n",
" # I edited my tensorflow install to have a flatten layer to make this work (my branch is in requirements.txt)\n",
" baseline=dict(\n",
" type=\"mlp\",\n",
" sizes=[128, 128],\n",
" epochs=1,\n",
" update_batch_size=128,\n",
" learning_rate=0.01\n",
" ), # string indicating the baseline value function (currently 'linear' or 'mlp').\n",
" gae_rewards= True, # boolean indicating whether to use GAE.\n",
" gae_lambda= 0.97, # float of the Generalized Advantage Estimation lambda.\n",
" normalize_rewards= False,# boolean indicating whether to normalize the advantage or not.\n",
" \n",
" # PPO Params \n",
" # https://github.com/reinforceio/tensorforce/blob/master/tensorforce/models/ppo_model.py\n",
" entropy_penalty=0.01, # 0 and 0.01 in baselines\n",
" loss_clipping=0.1, # Trust region clipping\n",
" epochs=4, # Number of training epochs for SGD, data is repeated this much 4 (atari),10 in baselines, 10 in paper\n",
" optimizer_batch_size=32, # Batch size for optimiser, should be small (e.g. 64 in paper)\n",
" random_sampling=True # Sampling strategy for minibatch replay memory\n",
")\n",
"\n",
"# Create a Trust Region Policy Optimization agent\n",
"agent = PPOAgent(config=config)\n",
"agent"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"ExecuteTime": {
"end_time": "2017-10-15T01:53:53.000937Z",
"start_time": "2017-10-15T01:53:52.906552Z"
}
},
"outputs": [],
"source": []
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Train\n",
"\n",
"## Callbacks"
]
},
{
"cell_type": "code",
"execution_count": 21,
"metadata": {
"ExecuteTime": {
"end_time": "2017-10-15T02:31:15.507302Z",
"start_time": "2017-10-15T02:31:15.427324Z"
},
"collapsed": true
},
"outputs": [],
"source": [
"def sharpe(returns, freq=30, rfr=0):\n",
" \"\"\"Given a set of returns, calculates naive (rfr=0) sharpe (eq 28) \"\"\"\n",
" return (np.sqrt(freq) * np.mean(returns-rfr)) / np.std(returns - rfr)\n",
"\n",
"def MDD(returns):\n",
" \"\"\"Max drawdown.\"\"\"\n",
" peak = returns.max()\n",
" i = returns.argmax()\n",
" trough = returns[returns.argmax():].min()\n",
" return (trough-peak)/trough "
]
},
{
"cell_type": "code",
"execution_count": 22,
"metadata": {
"ExecuteTime": {
"end_time": "2017-10-15T02:31:15.751990Z",
"start_time": "2017-10-15T02:31:15.662780Z"
},
"code_folding": [],
"collapsed": true
},
"outputs": [],
"source": [
"# Callback function printing episode statistics\n",
"class EpisodeFinished(object):\n",
" \"\"\"Logger callback for tensorforce runner\"\"\"\n",
" \n",
" def __init__(self, log_intv):\n",
" self.log_intv = log_intv\n",
" self.portfolio_values = [] \n",
" self.mdds=[]\n",
" self.sharpes=[]\n",
" \n",
" def __call__(self, r):\n",
" if len(r.environment.gym.sim.infos):\n",
" self.portfolio_values.append(r.environment.gym.sim.infos[-1]['portfolio_value'])\n",
" \n",
" df = pd.DataFrame(r.environment.gym.sim.infos)\n",
" self.mdds.append(MDD(df.rate_of_return+1))\n",
" self.sharpes.append(sharpe(df.rate_of_return))\n",
" if r.episode % self.log_intv == 0:\n",
" print(\n",
" \"Finished episode {ep} after {ts} timesteps (reward: {reward: 2.4f} [{rewards_min: 2.4f}, {rewards_max: 2.4f}]) portfolio_value: {portfolio_value: 2.4f} [{portfolio_value_min: 2.4f}, {portfolio_value_max: 2.4f}] mdd={mdd: 2.2%} sharpe={sharpe: 2.2f}\".\n",
" format(\n",
" ep=r.episode,\n",
" ts=r.timestep,\n",
" reward=np.mean(r.episode_rewards[-self.log_intv:]),\n",
" rewards_min=np.min(r.episode_rewards[-self.log_intv:]),\n",
" rewards_max=np.max(r.episode_rewards[-self.log_intv:]),\n",
" portfolio_value=np.mean(self.portfolio_values[-self.log_intv:]),\n",
" portfolio_value_min=np.min(self.portfolio_values[-self.log_intv:]),\n",
" portfolio_value_max=np.max(self.portfolio_values[-self.log_intv:]),\n",
" mdd=np.mean(self.mdds[-self.log_intv:]),\n",
" sharpe=np.mean(self.sharpes[-self.log_intv:]),\n",
" )\n",
" )\n",
" return True"
]
},
{
"cell_type": "code",
"execution_count": 23,
"metadata": {
"ExecuteTime": {
"end_time": "2017-10-15T02:31:15.934227Z",
"start_time": "2017-10-15T02:31:15.825377Z"
}
},
"outputs": [],
"source": [
"\n",
"import tensorflow as tf\n",
"import numpy as np\n",
"class TensorBoardLogger(object):\n",
" \"\"\"\n",
" Log scalar and histograms/distributions to tensorboard.\n",
" Usage:\n",
" ```\n",
" logger = TensorBoardLogger(log_dir = '/tmp/test')\n",
" for i in range(10):\n",
" logger.log(\n",
" logs=dict(\n",
" float_test=np.random.random(),\n",
" int_test=np.random.randint(0,4),\n",
" ),\n",
" histograms=dict(\n",
" actions=np.random.randint(0,3,size=np.random.randint(5,20))\n",
" )\n",
" )\n",
" ```\n",
" Ref: https://github.com/fchollet/keras/blob/master/keras/callbacks.py\n",
" Url: https://gist.github.com/wassname/b692f8e8686655011618dfbe8d8a9e3f\n",
" \"\"\"\n",
"\n",
" def __init__(self, log_dir, session=None, episode=0):\n",
" self.log_dir = log_dir\n",
" self.writer = tf.summary.FileWriter(self.log_dir)\n",
" self.episode = episode\n",
" print('TensorBoardLogger started. Run `tensorboard --logdir={}` to visualize'.format(os.path.abspath(self.log_dir)))\n",
"\n",
" self.histograms = {}\n",
" self.histogram_inputs = {}\n",
" self.session = session or tf.get_default_session() or tf.Session()\n",
"\n",
" def log(self, logs={}, histograms={}, episode=None):\n",
" episode = episode or self.episode\n",
" # scalar logging\n",
" for name, value in logs.items():\n",
" summary = tf.Summary()\n",
" summary_value = summary.value.add()\n",
" summary_value.simple_value = value\n",
" summary_value.tag = name\n",
" self.writer.add_summary(summary, episode)\n",
"\n",
" # histograms\n",
" for name, value in histograms.items():\n",
" if name not in self.histograms:\n",
" # make a tensor with no fixed shape\n",
" self.histogram_inputs[name] = tf.Variable(value,validate_shape=False)\n",
" self.histograms[name] = tf.summary.histogram(name, self.histogram_inputs[name])\n",
"\n",
" input_tensor = self.histogram_inputs[name]\n",
" summary = self.histograms[name]\n",
" summary_str = summary.eval(session=self.session, feed_dict={input_tensor.name:value})\n",
" self.writer.add_summary(summary_str, episode)\n",
"\n",
" self.writer.flush()\n",
" self.episode += 1"
]
},
{
"cell_type": "code",
"execution_count": 24,
"metadata": {
"ExecuteTime": {
"end_time": "2017-10-15T02:31:16.177868Z",
"start_time": "2017-10-15T02:31:16.058908Z"
},
"code_folding": []
},
"outputs": [],
"source": [
"# Callback EpisodeFinishedTQDM\n",
"\n",
"from tqdm import tqdm_notebook\n",
"class EpisodeFinishedTQDM(EpisodeFinished):\n",
" \"\"\"Logger for tensorforce using tqdm_notebook for jupyter-notebook.\"\"\"\n",
" \n",
" def __init__(self, episodes, log_intv, session=None, log_dir=None, episode=0):\n",
" \"\"\"\n",
" log_intv - print the mean metrics every log_intv episodes\n",
" \"\"\"\n",
" super().__init__(log_intv=log_intv)\n",
" self.episodes = episodes\n",
" self.progbar = tqdm_notebook(desc='', \n",
" total=episodes, \n",
" leave=True, mininterval=5)\n",
" \n",
" # tensorboard\n",
" if log_dir:\n",
" self.log_dir = log_dir\n",
" elif save_path:\n",
" self.log_dir = '/tmp/'+os.path.basename(save_path)\n",
" else:\n",
" self.log_dir = '/tmp/StepsProgressBar'\n",
" self.tensor_board_logger = TensorBoardLogger(self.log_dir, session=session, episode=episode)\n",
" \n",
" def __call__(self, r):\n",
" super().__call__(r)\n",
" oai_env = r.environment.gym.unwrapped\n",
" exploration = r.agent.exploration.get('action0', lambda x,y:0)(r.episode, np.sum(r.episode_lengths))\n",
" desc = \"reward: {reward: 2.4f} [{rewards_min: 2.4f}, {rewards_max: 2.4f}], portfolio_value: {portfolio_value: 2.4f} [{portfolio_value_min: 2.4f}, {portfolio_value_max: 2.4f}] expl={exploration: 2.2%}\".format(\n",
" reward=np.mean(r.episode_rewards[-1:]),\n",
" rewards_min=np.min(r.episode_rewards[-1:]),\n",
" rewards_max=np.max(r.episode_rewards[-1:]),\n",
" portfolio_value=np.mean(self.portfolio_values[-1:]),\n",
" portfolio_value_min=np.min(self.portfolio_values[-1:]),\n",
" portfolio_value_max=np.max(self.portfolio_values[-1:]),\n",
" exploration=exploration\n",
" )\n",
" self.progbar.desc = desc\n",
" self.progbar.update(1) # update\n",
" \n",
" # log to tensorboard\n",
" logs=dict(\n",
" rewards=r.episode_rewards[-1],\n",
" episode_lengths=r.episode_lengths[-1],\n",
" episode_time=r.episode_times[-1],\n",
" portfolio_value=np.mean(self.portfolio_values[-1:]),\n",
" portfolio_value_min=np.min(self.portfolio_values[-1:]),\n",
" portfolio_value_max=np.max(self.portfolio_values[-1:]),\n",
" exploration=exploration\n",
" )\n",
" df_info = pd.DataFrame(oai_env.infos)\n",
" ep_infos = df_info.mean().to_dict()\n",
" logs.update(ep_infos)\n",
" self.tensor_board_logger.log(\n",
" logs=logs,\n",
" histograms=dict(\n",
" returns=df_info['return'].values,\n",
" portfolio_value=df_info.portfolio_value.values,\n",
" market_value=df_info.market_value.values,\n",
" ),\n",
" episode=r.episode\n",
" )\n",
" return True"
]
},
{
"cell_type": "markdown",
"metadata": {
"ExecuteTime": {
"end_time": "2017-10-15T02:19:28.278977Z",
"start_time": "2017-10-15T02:19:28.132177Z"
}
},
"source": [
"## Train"
]
},
{
"cell_type": "code",
"execution_count": 35,
"metadata": {
"ExecuteTime": {
"end_time": "2017-10-15T02:34:35.038267Z",
"start_time": "2017-10-15T02:34:34.963840Z"
},
"collapsed": true
},
"outputs": [],
"source": [
"from tensorforce.execution import Runner\n",
"runner = Runner(agent=agent, environment=environment, save_path=save_path, save_episodes=1000)"
]
},
{
"cell_type": "code",
"execution_count": 36,
"metadata": {
"ExecuteTime": {
"end_time": "2017-10-15T02:34:35.214512Z",
"start_time": "2017-10-15T02:34:35.156499Z"
}
},
"outputs": [],
"source": [
"\n",
"# Check my PR is included\n",
"import tensorforce.core.memories\n",
"assert isinstance(runner.agent.memory,tensorforce.core.memories.PrioritizedReplay)\n",
"assert isinstance(runner.agent, tensorforce.agents.MemoryAgent)"
]
},
{
"cell_type": "code",
"execution_count": 37,
"metadata": {
"ExecuteTime": {
"end_time": "2017-10-15T02:34:35.498415Z",
"start_time": "2017-10-15T02:34:35.437300Z"
}
},
"outputs": [],
"source": [
"# resume\n",
"saves=glob.glob(save_path+'-*')\n",
"if len(saves)>0:\n",
" # load saved\n",
" last_save = os.path.splitext(saves[0])[0]\n",
" runner.agent.load_model(last_save)\n",
" print('loaded', last_save)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"ExecuteTime": {
"start_time": "2017-10-15T02:34:35.964Z"
},
"scrolled": true
},
"outputs": [
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "492cbe81a26f4eed8dfe1e2f3db257d8"
}
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"TensorBoardLogger started. Run `tensorboard --logdir=/media/isisilon/Data/My_Documents/Documents/eclipse-workspace/rl_keras_finance/portfolio-rl-jiang_2017/logs/tensorforce-PPO-prioritised_20171015_02-34-23` to visualize\n"
]
}
],
"source": [
"episodes = int(6e6 / 30)\n",
"runner.run(\n",
" episodes=episodes,\n",
" max_timesteps=200,\n",
" episode_finished=EpisodeFinishedTQDM(\n",
" log_intv=100, \n",
" episodes=episodes,\n",
" log_dir=log_dir,\n",
" session=runner.agent.model.session, \n",
" )\n",
")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"ExecuteTime": {
"end_time": "2017-10-15T02:30:35.073461Z",
"start_time": "2017-10-15T02:29:49.726Z"
},
"collapsed": true
},
"outputs": [],
"source": [
"# save\n",
"agent.save_model(save_path)\n",
"save_path"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Test"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"ExecuteTime": {
"end_time": "2017-07-19T00:09:54.262405Z",
"start_time": "2017-07-19T08:09:54.226639+08:00"
},
"collapsed": true
},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"ExecuteTime": {
"end_time": "2017-10-15T02:30:35.074688Z",
"start_time": "2017-10-15T02:29:49.729Z"
},
"collapsed": true
},
"outputs": [],
"source": [
"# one big test\n",
"df_test = pd.read_hdf('./data/poloniex_30m.hf',key='test')\n",
"steps=2400#len(df_test)-window_length-2\n",
"env_test = EnvWrapper(\n",
" df=df_test,\n",
" steps=steps, \n",
" scale=True, \n",
" augment=0.00,\n",
" trading_cost=0, # let just overfit first\n",
" window_length=window_length,\n",
")\n",
"env_test.seed = 0 \n",
"environment_test = OpenAIGym('CartPole-v0')\n",
"environment_test.gym = env_test\n",
"\n",
"agent.load_model(save_path)\n",
"runner_test = Runner(agent=agent, environment=environment_test)\n",
"runner_test.run(\n",
"episodes=1, max_timesteps=steps, episode_finished=EpisodeFinished(10))\n",
"\n",
"df = pd.DataFrame(env_test.infos)\n",
"df.index=df['index']\n",
"\n",
"s=sharpe(df.rate_of_return+1)\n",
"mdd=MDD(df.rate_of_return+1)\n",
"apv=df.portfolio_value.iloc[-1]\n",
"print('APV (Accumulated portfolio value): \\t{: 2.6f}'.format(apv))\n",
"print('SR (Sharpe ratio): \\t{: 2.6f}'.format( s))\n",
"print('MDD (max drawdown): \\t{: 2.6%}'.format( mdd))\n",
"print('MMR (mean market returns): \\t{: 2.6f}'.format(df.mean_market_returns.cumprod().iloc[-1]))\n",
"print('')\n",
"\n",
"# show one run vs average market performance\n",
"plt.title('test MDD={}, Sharpe={}, APV={}'.format(mdd,s,apv))\n",
"df.portfolio_value.plot()\n",
"df.mean_market_returns.cumprod().plot(label='mean market performance')\n",
"plt.legend()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"ExecuteTime": {
"end_time": "2017-07-19T00:48:39.193976Z",
"start_time": "2017-07-19T08:48:39.154752+08:00"
},
"collapsed": true
},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"ExecuteTime": {
"end_time": "2017-10-15T02:30:35.075752Z",
"start_time": "2017-10-15T02:29:49.733Z"
},
"collapsed": true
},
"outputs": [],
"source": [
"\n",
"\n",
"data=[]\n",
"for i in range(10):\n",
" agent.load_model(save_path)\n",
" df_test = pd.read_hdf('./data/poloniex_30m.hf',key='test')\n",
" \n",
" env_test = EnvWrapper(\n",
" df=df_test,\n",
" steps=1800, \n",
" scale=True, \n",
" augment=0.00,\n",
" trading_cost=0, # let just overfit first\n",
" window_length=window_length,\n",
" )\n",
" env_test.seed = 0 \n",
"\n",
"\n",
" environment_test = OpenAIGym('CartPole-v0')\n",
" environment_test.gym = env_test\n",
"\n",
" runner_test = Runner(agent=agent, environment=environment_test)\n",
" np.random.seed(i)\n",
" runner_test.run(\n",
" episodes=2, max_timesteps=32, episode_finished=EpisodeFinished(10))\n",
" df = pd.DataFrame(environment_test.gym.infos)\n",
"# df.index=df['index']\n",
" \n",
" s=sharpe(df.rate_of_return+1)\n",
" mdd=MDD(df.rate_of_return+1)\n",
" data.append(dict(sharpe=s,mdd=mdd))\n",
" print('APV (Accumulated portfolio value): \\t{: 2.6f}'.format(df.portfolio_value.iloc[-1]))\n",
" print('SR (Sharpe ratio): \\t{: 2.6f}'.format( s))\n",
" print('MDD (max drawdown): \\t{: 2.6%}'.format( mdd))\n",
" print('MMR (mean market returns): \\t{: 2.6f}'.format(df.mean_market_returns.cumprod().iloc[-1]))\n",
" print('')\n",
" df.portfolio_value.plot(label=str(i))\n",
"plt.legend()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"ExecuteTime": {
"end_time": "2017-10-15T02:30:35.076620Z",
"start_time": "2017-10-15T02:29:49.737Z"
},
"collapsed": true
},
"outputs": [],
"source": [
"data"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"ExecuteTime": {
"end_time": "2017-10-15T02:30:35.077480Z",
"start_time": "2017-10-15T02:29:49.742Z"
},
"collapsed": true
},
"outputs": [],
"source": [
"# one big test over train\n",
"# one big test\n",
"df_train = pd.read_hdf('./data/poloniex_30m.hf',key='train')\n",
"steps=len(df_train)-window_length-2\n",
"env = EnvWrapper(\n",
" df=df_train,\n",
" steps=steps, \n",
" scale=True, \n",
" augment=0.00,\n",
" trading_cost=0, # let just overfit first\n",
" window_length=window_length,\n",
")\n",
"env.seed = 0 \n",
"environment = OpenAIGym('CartPole-v0')\n",
"environment.gym = env\n",
"\n",
"agent.load_model(save_path)\n",
"runner = Runner(agent=agent, environment=environment)\n",
"runner.run(\n",
"episodes=1, max_timesteps=steps, episode_finished=EpisodeFinished(10))\n",
"\n",
"df = pd.DataFrame(env.infos)\n",
"df.index=df['index']\n",
"\n",
"s=sharpe(df.rate_of_return+1)\n",
"mdd=MDD(df.rate_of_return+1)\n",
"data.append(dict(sharpe=s,mdd=mdd))\n",
"print('APV (Accumulated portfolio value): \\t{: 2.6f}'.format(df.portfolio_value.iloc[-1]))\n",
"print('SR (Sharpe ratio): \\t{: 2.6f}'.format( s))\n",
"print('MDD (max drawdown): \\t{: 2.6%}'.format( mdd))\n",
"print('')\n",
"\n",
"# show one run vs average market performance\n",
"plt.title('train')\n",
"df.portfolio_value.plot()\n",
"df.mean_market_returns.cumprod().plot(label='mean market performance')\n",
"plt.legend()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "jupyter3",
"language": "python",
"name": "jupyter3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.0"
}
},
"nbformat": 4,
"nbformat_minor": 2
}