update notebook and requirements

2026-06-27 16:46:41 +08:00 · 2017-11-11 17:01:08 +08:00
parent a08cd3361e
commit d3c819c06e
6 changed files with 4221 additions and 2490 deletions
@@ -135,20 +135,18 @@ We have partial test coverage of the environment, just run:
 # Files

 - enviroments/portfolio.py - contains an openai environment for porfolio trading
- tensorforce-VPG.ipynb - notebook to try a policy gradient agent
- tensorforce-PPO - notebook to try a PPO agent
- data/poloniex_30m.hdf - hdf file with cryptocurrency 30 minutes prices
+- tensorforce-PPO-IEET.ipynb - notebook to try a policy gradient agent

 # Differences in implementation

 The main differences from Jiang et. al. 2017 are:

 - The first step in a deep learning project should be to make sure the model can overfit, this provides a sanity check. So I am first trying to acheive good results with no trading costs.
- I have not used portfolio vector memory. For ease of implementation I made the information available by replacing the oldest timestep. Your model can slice it, or a Dense or CNN models can just be given the information.
+- I have not used portfolio vector memory. For ease of implementation I made the information available by using the last weights.
 - Instead of DPG ([deterministic policy gradient](http://jmlr.org/proceedings/papers/v32/silver14.pdf)) I tried and DDPG ([deep deterministic policy gradient]( http://arxiv.org/pdf/1509.02971v2.pdf)) and VPG (vanilla policy gradient) with generalized advantage estimation and PPO.
 - I tried to replicate the best performing CNN model from the paper and haven't attempted the LSTM or RNN models.
- instead of selecting 12 assets for each window I chose 5 assets that have existed for the longest time
- My topology had an extra layer [see issue 3](https://github.com/wassname/rl-portfolio-management/issues/3)
+- instead of selecting 12 assets for each window I chose 3 assets that have existed for the longest time
+- ~~My topology had an extra layer [see issue 3](https://github.com/wassname/rl-portfolio-management/issues/3)~~ fixed

 # TODO

@@ -1,13 +1,10 @@
-affine==2.0.0.post1
-alembic==0.9.1
-amqp==1.4.9
-anyjson==0.3.3
-appdirs==1.4.3
+affine==2.1.0
+apipkg==1.4
 arrow==0.10.0
-asn1crypto==0.22.0
 atari-py==0.1.1
+autopep8==1.3.2
 azure==1.0.3
-azure-common==1.1.6
+azure-common==1.1.8
 azure-mgmt==0.20.2
 azure-mgmt-common==0.20.0
 azure-mgmt-compute==0.20.1
@@ -19,282 +16,159 @@ azure-nspkg==2.0.0
 azure-servicebus==0.20.1
 azure-servicemanagement-legacy==0.20.2
 azure-storage==0.20.3
-Babel==2.2.0
-backoff==1.4.0
+backoff==1.4.3
 backports.weakref==1.0rc1
-baselines==0.1.3
-bcolz==0.12.1
-bcrypt==2.0.0
-beautifulsoup4==4.5.3
-billiard==3.3.0.23
+-e git+https://github.com/openai/baselines.git@4993286230ac92ead39a66005b7042b56b8598b0#egg=baselines
+bcolz==1.1.2
+beautifulsoup4==4.6.0
+bench==2.8
+BitcoinExchangeFH==0.2.3
 bleach==1.5.0
-blessings==1.6
-blinker==1.4
-boto==2.39.0
-Bottleneck==1.2.1
-bpython==0.16
-bqplot==0.9.0
+boto3==1.4.6
+botocore==1.6.3
 bs4==0.0.1
-bt==0.2.5
-cached-property==1.3.0
-cachetools==2.0.0
-celery==3.1.19
-cffi==1.10.0
-cfscrape==1.8.0
-chardet==2.3.0
-ChatterBot==0.6.0
-chatterbot-corpus==0.0.1
-chatterbot-voice==0.1.3
+certifi==2017.7.27.1
+chardet==3.0.4
 click==6.7
 click-plugins==1.0.3
 cligj==0.4.0
-cloudpickle==0.2.2
-colorama==0.3.9
-contextlib2==0.5.5
-cryptography==1.8.1
-cssmin==0.2.0
-curtsies==0.2.11
+cntk==2.2
+coverage==4.4.1
+cvxopt==1.1.9
 cycler==0.10.0
-cyordereddict==1.0.0
-Cython==0.25.2
-dask==0.15.0
-dataset==0.8.0
-deap==1.0.2
-decorator==4.0.11
-descartes==1.1.0
-dill==0.2.6
-docker==2.3.0
-docker-compose==1.13.0
-docker-pycreds==0.2.1
-dockerpty==0.4.1
-docopt==0.6.2
-dominate==2.3.1
-earthengine-api==0.1.111
-easydict==1.6
-EbookLib==0.16
-ee==0.2
-elasticsearch==5.3.0
-elasticsearch-dsl==5.2.0
-empyrical==0.2.2
-entrypoints==0.2.2
-ephem==3.7.6.0
-eventlet==0.17.4
-e git+git@github.com:wassname/FanFicFare.git@1da7f6ec159bee18b650fa074d2d59d4721e3653#egg=FanFicFare
-fastkml==0.11
-ffn==0.3.2
-Fiona==1.7.5
-fix-yahoo-finance==0.0.7
-Flask==0.12.1
-Flask-AlchemyDumps==0.0.10
-Flask-Assets==0.11
-Flask-Bcrypt==0.7.1
-Flask-Bootstrap==3.3.5.7
-Flask-Celery-Helper==1.1.0
-Flask-DebugToolbar==0.10.0
-Flask-Login==0.3.2
-Flask-Mail==0.9.1
-Flask-Migrate==1.7.0
-Flask-Principal==0.4.0
-Flask-Redis==0.1.0
-Flask-Script==2.0.5
-Flask-Security==1.7.5
-Flask-SQLAlchemy==2.1
-Flask-WTF==0.12
-flower==0.8.4
-Folio==0.4
-future==0.15.2
-futures==3.1.1
-gbdx-auth==0.2.4
-gbdxtools==0.11.7
-GDAL==2.1.0
-geomet==0.1.1
-geopandas==0.2.1
-google-api-python-client==1.6.2
-google-auth==1.0.1
-google-auth-httplib2==0.0.2
-google-cloud-bigquery==0.25.0
-google-cloud-core==0.25.0
-googleapis-common-protos==1.5.2
-googletrans==2.1.2
-goslate==1.5.1
-greenlet==0.4.12
-gunicorn==19.4.5
-gym==0.9.2
+Cython==0.26.1
+decorator==4.1.2
+dill==0.2.7.1
+docutils==0.14
+entrypoints==0.2.3
+execnet==1.4.1
+flake8==3.4.1
+future==0.16.0
+gym==0.9.3
 h5py==2.7.0
-hammock==0.2.4
-html2text==2016.9.19
 html5lib==0.9999999
-httplib2==0.10.3
-idna==2.5
-inflection==0.3.1
-intervaltree==2.1.0
-ipykernel==4.6.0
-ipython==5.3.0
+idna==2.6
+imageio==2.2.0
+ipykernel==4.6.1
+ipython==6.1.0
 ipython-genutils==0.2.0
 ipywidgets==6.0.0
-itsdangerous==0.24
 jedi==0.10.2
-jellyfish==0.5.6
 Jinja2==2.9.6
+jmespath==0.9.3
 joblib==0.11
-jsmin==2.2.1
-jsondatabase==0.1.7
 jsonschema==2.6.0
 jupyter==1.0.0
-jupyter-client==5.0.1
+jupyter-client==5.1.0
 jupyter-console==5.1.0
 jupyter-contrib-core==0.3.1
+jupyter-contrib-nbextensions==0.2.8
 jupyter-core==4.3.0
+jupyter-highlight-selected-word==0.0.11
+jupyter-latex-envs==1.3.8.4
 jupyter-nbextensions-configurator==0.2.5
 Keras==2.0.5
 keras-contrib==1.2.1
-keras-rl==0.3.0
+keras-rl==0.3.1
 keras-tqdm==2.0.1
-kombu==3.0.37
-leven==1.0.4
-Logbook==1.0.0
-lru-dict==1.1.6
-lxml==3.7.3
-Mako==1.0.6
-Markdown==2.6.8
+-e git+git@github.com:wassname/libcryptomarket.git@7351c794ed43b11fdd187cc9c5e85eb4275c554a#egg=libcryptomarket
+Markdown==2.6.9
 MarkupSafe==1.0
-matplotlib==2.0.0
+matplotlib==2.0.2
+mccabe==0.6.1
 mistune==0.7.4
-more-itertools==3.2.0
-MosT==0.12
-mpmath==0.19
-mtranslate==1.6
-multipledispatch==0.4.9
-munch==2.1.1
-nbconvert==5.1.1
+mock==2.0.0
+mpi4py==2.0.0
+mujoco-py==0.5.7
+nbconvert==5.2.1
 nbformat==4.3.0
-ndg-httpsclient==0.4.2
 networkx==1.11
-nltk==3.2.2
-normality==0.4.0
-nose==1.3.7
 notebook==5.0.0
 numexpr==2.6.2
-numpy==1.13.1
-oauth2client==4.0.0
-oauthlib==2.0.2
+numpy==1.13.3
 olefile==0.44
-packaging==16.8
+opencv-python==3.3.0.10
 pandas==0.20.3
-pandas-datareader==0.4.0
+pandas-datareader==0.5.0
 pandas-profiling==1.4.0
 pandocfilters==1.4.1
-passlib==1.7.1
-path.py==10.1
+path.py==10.3.1
 patsy==0.4.1
-pbr==3.0.1
+pbr==3.1.1
+pep8==1.7.0
 pexpect==4.2.1
 pickleshare==0.7.4
-Pillow==4.1.0
-planet==1.0.0
-poloniex==0.4.6
+Pillow==4.3.0
+pluggy==0.5.2
 pprint==0.1
-praw==4.4.0
-prawcore==0.8.0
-progressbar2==3.30.2
-prompt-toolkit==1.0.14
-protobuf==3.3.0
-psycopg2==2.7.1
-ptyprocess==0.5.1
-py==1.4.33
-py-googletrans==1.2
-PyAlgoTrade==0.18
-pyasn1==0.2.3
-pyasn1-modules==0.0.9
-pycparser==2.17
-pycurl==7.43.0
-PyExecJS==1.4.0
-pygeoif==0.6
+progressbar2==3.34.3
+prompt-toolkit==1.0.15
+protobuf==3.4.0
+psutil==5.2.2
+ptyprocess==0.5.2
+py==1.4.34
+pycodestyle==2.3.1
+pyflakes==1.5.0
 pyglet==1.2.4
 Pygments==2.2.0
-PyJWT==1.4.2
-pymongo==3.4.0
+PyMySQL==0.7.11
 PyOpenGL==3.1.0
-pyOpenSSL==17.0.0
 pyparsing==2.2.0
-PyPrind==2.11.1
 pyproj==1.9.5.1
-pyshp==1.2.10
-pysle==1.5.7
-pytest==3.0.7
-python-dateutil==2.6.0
-python-editor==1.0.3
-python-twitter==3.2.1
-python-utils==2.1.0
+pytest==3.2.3
+pytest-cache==1.0
+pytest-cov==2.5.1
+pytest-forked==0.2
+pytest-pep8==1.0.6
+pytest-xdist==1.20.0
+python-coveralls==2.9.1
+python-dateutil==2.6.1
+python-utils==2.2.0
 pytz==2017.2
+PyWavelets==0.5.2
 PyYAML==3.12
 pyzmq==16.0.2
-qgrid==0.3.2
+qPython==1.2.2
 qtconsole==4.3.0
-Quandl==3.2.0
-rasterio==1.0a8
-recurrentshop==0.0.1
-redis==2.10.3
-requests==2.11.1
+rasterio==0.36.0
+requests==2.18.4
 requests-file==1.4.2
 requests-ftp==0.3.1
-requests-futures==0.9.7
-requests-oauthlib==0.8.0
-rsa==3.4.2
-scikit-learn==0.18.1
+s3transfer==0.1.10
+scikit-image==0.13.0
+scikit-learn==0.18.2
 scipy==0.19.1
-seaborn==0.7.1
-selenium==3.4.3
-seq2seq==0.1.0
-Shapely==1.5.17
+seaborn==0.8
+Shapely==1.6.0
 simplegeneric==0.8.1
-simplejson==3.10.0
-six==1.10.0
+six==1.11.0
 sklearn==0.0
 snuggs==1.4.1
-sortedcontainers==1.5.7
-SpeechRecognition==3.6.5
-SQLAlchemy==1.1.9
-SQLAlchemy-FullText-Search==0.2.3
-SQLAlchemy-Utils==0.31.5
 statsmodels==0.8.0
-stevedore==1.21.0
-sympy==1.0
-tables==3.3.0
-tabulate==0.7.7
-tensor2tensor==1.0.5
-tensorflow==1.2.1
-tensorflow-gpu==1.2.0
-e git+git@github.com:reinforceio/tensorforce.git@21323bf4226a93892637551cd8d75b8e13d04eaa#egg=tensorforce
+tables==3.4.2
+tensorboard-logger==0.0.4
+tensorflow==1.3.0
+tensorflow-gpu==1.3.0
+tensorflow-tensorboard==0.1.8
+-e git+https://github.com/wassname/tensorforce.git@266fc21f472fc1ed7edb8ff3b6fd8450b3ef4828#egg=tensorforce
 terminado==0.6
-testpath==0.3
-texttable==0.8.8
+testpath==0.3.1
 Theano==0.9.0
-toolz==0.8.2
-torch==0.1.12.post2
+timer==0.1
+torch==0.2.0.post3
 torchvision==0.1.8
-tornado==4.4.3
-TPOT==0.8.3
-tqdm==4.11.2
+tornado==4.5.1
+tox==2.9.1
+tqdm==4.19.2
 traitlets==4.3.2
-traittypes==0.0.6
-Unipath==1.1
-update-checker==0.16
-uritemplate==3.0.0
-urllib3==1.21.1
+universal-portfolios==0.3.2
+urllib3==1.22
 virtualenv==15.1.0
-virtualenv-clone==0.2.6
-virtualenvwrapper==4.7.2
-visitor==0.1.3
+visdom==0.1.5
 wcwidth==0.1.7
-webassets==0.12.1
 webencodings==0.5.1
-websocket-client==0.40.0
+websocket-client==0.44.0
 Werkzeug==0.12.2
 widgetsnbextension==2.0.0
-WTForms==2.1
-WTForms-ParsleyJS==2.0.1
-xmltodict==0.10.2
-yapf==0.16.1
-zipline==1.1.0
+yapf==0.16.3
 zmq==0.0.0
@@ -1,22 +1,16 @@
 # data
 # rl
-gym==0.9.2
-tensorflow-gpu==1.2.0
-# for mpi4py youo need libopenmpi-dev
-mpi4py
-https://github.com/wassname/baselines/archive/676b6f0b18fe9855579cb271484dcdf118e2588c.zip
-https://github.com/farizrahman4u/keras-contrib/archive/15133077af4d9c379c8219574898ce29cae12e48.zip
+gym==0.9.3
+tensorflow-gpu==1.3.0
 h5py==2.7.0
-keras-rl==0.3.0
-keras-tqdm==2.0.1
-# use my branch with prioritised ppo, untill mergeed
-https://github.com/wassname/tensorforce/archive/merged_6b.zip
+# tensorforce 0.3.1 a specific commit
+https://github.com/reinforceio/tensorforce/archive/17cf4b84d8fbad9d7c65d1d7bde36faabddd6a36.zip
 # numbers
 tables==3.4.2
-seaborn==0.7.1
+seaborn==0.8
 pandas==0.20.3
-numpy==1.12.1
-matplotlib==2.0.0
+numpy==1.13.3
+matplotlib==2.0.2
 # misc
-tqdm==4.11.2
-seaborn==0.7.1
+tqdm==4.19.2
+seaborn==0.8
@@ -1,975 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "Uses tensorforce tensorforce-0.2.0"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 1,
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2017-10-15T02:50:29.756080Z",
-     "start_time": "2017-10-15T02:50:28.814041Z"
-    },
-    "collapsed": true
-   },
-   "outputs": [],
-   "source": [
-    "# plotting\n",
-    "%matplotlib inline\n",
-    "from matplotlib import pyplot as plt\n",
-    "import seaborn as sns\n",
-    "plt.style.use('ggplot')\n",
-    "\n",
-    "# numeric\n",
-    "import numpy as np\n",
-    "from numpy import random\n",
-    "import pandas as pd\n",
-    "\n",
-    "# util\n",
-    "from collections import Counter\n",
-    "import pdb\n",
-    "import glob\n",
-    "import time\n",
-    "import tempfile\n",
-    "import itertools\n",
-    "from tqdm import tqdm_notebook as tqdm\n",
-    "import datetime\n",
-    "\n",
-    "# logging\n",
-    "import logging\n",
-    "logger = log = logging.getLogger(__name__)\n",
-    "# log.setLevel(logging.INFO)\n",
-    "logging.basicConfig()\n",
-    "log.info('%s logger started.', __name__)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 2,
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2017-10-15T02:50:29.899144Z",
-     "start_time": "2017-10-15T02:50:29.759262Z"
-    },
-    "collapsed": true
-   },
-   "outputs": [],
-   "source": [
-    "import gym\n",
-    "from gym import error, spaces, utils\n",
-    "from gym.utils import seeding"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 3,
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2017-10-15T02:50:29.989592Z",
-     "start_time": "2017-10-15T02:50:29.901021Z"
-    },
-    "collapsed": true
-   },
-   "outputs": [],
-   "source": [
-    "import os\n",
-    "os.sys.path.append(os.path.abspath('.'))\n",
-    "%reload_ext autoreload\n",
-    "%autoreload 2"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 4,
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2017-10-15T02:50:30.089437Z",
-     "start_time": "2017-10-15T02:50:29.991629Z"
-    }
-   },
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "'./outputs/tensorforce-PPO-prioritised/tensorforce-PPO-prioritised_20171015_02-50-30.model'"
-      ]
-     },
-     "execution_count": 4,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "# params\n",
-    "window_length = 50\n",
-    "import datetime\n",
-    "ts = datetime.datetime.utcnow().strftime('%Y%m%d_%H-%M-%S')\n",
-    "save_path = './outputs/tensorforce-PPO-prioritised/tensorforce-PPO-prioritised_%s.model' % ts\n",
-    "save_path = './outputs/tensorforce-PPO-prioritised/tensorforce-PPO-prioritised_20171015_02-50-30.model'\n",
-    "save_path\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 5,
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2017-10-15T02:50:30.165055Z",
-     "start_time": "2017-10-15T02:50:30.091635Z"
-    }
-   },
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "'logs/tensorforce-PPO-prioritised_20171015_02-50-30'"
-      ]
-     },
-     "execution_count": 5,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "log_dir = os.path.join('logs', os.path.splitext(os.path.basename(save_path))[0])\n",
-    "try:\n",
-    "    os.makedirs(log_dir)\n",
-    "except OSError:\n",
-    "    pass\n",
-    "log_dir"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "# Enviroment"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 6,
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2017-10-15T02:50:30.239055Z",
-     "start_time": "2017-10-15T02:50:30.168273Z"
-    },
-    "collapsed": true
-   },
-   "outputs": [],
-   "source": [
-    "from rl_portfolio_management.environments.portfolio import PortfolioEnv"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 7,
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2017-10-15T02:50:30.334097Z",
-     "start_time": "2017-10-15T02:50:30.241263Z"
-    },
-    "collapsed": true
-   },
-   "outputs": [],
-   "source": [
-    "from rl.memory import  SequentialMemory, Memory\n",
-    "from collections import deque\n",
-    "\n",
-    "class EnvWrapper(PortfolioEnv):\n",
-    "    \"\"\"Wraps env to normalise and reshape action.\"\"\"\n",
-    "    def __init__(self, window_length=50, *args, **kwargs):\n",
-    "        super().__init__(*args, **kwargs)\n",
-    "        \n",
-    "    def step(self, action):\n",
-    "        # also it puts it in a list\n",
-    "        if isinstance(action, list):\n",
-    "            action = action[0]\n",
-    "        \n",
-    "        # we have to normalise for some reason softmax wont work\n",
-    "        if isinstance(action, dict):\n",
-    "            action = np.abs(list(action.values()))\n",
-    "            action /= action.sum()        \n",
-    "        \n",
-    "        return super().step(action) "
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 8,
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2017-10-15T02:50:30.635247Z",
-     "start_time": "2017-10-15T02:50:30.337761Z"
-    }
-   },
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "INFO:gym.envs.registration:Making new env: CartPole-v0\n",
-      "[2017-10-15 10:50:30,622] Making new env: CartPole-v0\n",
-      "INFO:gym.envs.registration:Making new env: CartPole-v0\n",
-      "[2017-10-15 10:50:30,629] Making new env: CartPole-v0\n"
-     ]
-    }
-   ],
-   "source": [
-    "df_train = pd.read_hdf('./data/poloniex_30m.hf',key='train')\n",
-    "env = EnvWrapper(\n",
-    "    df=df_train,\n",
-    "    steps=300, \n",
-    "    scale=True, \n",
-    "    augment=0.000,\n",
-    "    trading_cost=0, # let just overfit first,\n",
-    "    window_length = window_length,\n",
-    "    \n",
-    ")\n",
-    "env.seed = 0   \n",
-    "\n",
-    "df_test = pd.read_hdf('./data/poloniex_30m.hf',key='test')\n",
-    "env_test = EnvWrapper(\n",
-    "    df=df_test,\n",
-    "    steps=300, \n",
-    "    scale=True, \n",
-    "    augment=0.00,\n",
-    "    trading_cost=0, # let just overfit first\n",
-    "    window_length=window_length,\n",
-    ")\n",
-    "env_test.seed = 0  \n",
-    "\n",
-    "from tensorforce.contrib.openai_gym import OpenAIGym\n",
-    "environment = OpenAIGym('CartPole-v0')\n",
-    "environment.gym = env\n",
-    "\n",
-    "environment_test = OpenAIGym('CartPole-v0')\n",
-    "environment_test.gym = env_test"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 9,
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2017-10-15T02:50:30.725276Z",
-     "start_time": "2017-10-15T02:50:30.636432Z"
-    }
-   },
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "0.0 False {'reward': 0.0, 'log_return': 0.0, 'portfolio_value': 1.0, 'return': 0.9994203267253029, 'rate_of_return': 0.0, 'weights_mean': 0.16666666666666666, 'weights_std': 0.092620963292867634, 'cost': 0.0, 'market_value': 0.99991966957965184, 'date': 1463925600.0, 'steps': 2}\n",
-      "(5, 50, 3) (5, 50, 3)\n"
-     ]
-    }
-   ],
-   "source": [
-    "# check shapes\n",
-    "obs1, reward, done, info=env.step(np.random.random(env.action_space.shape))\n",
-    "print(reward, done, info)\n",
-    "obs2 = env.reset()\n",
-    "print(obs1.shape,obs2.shape)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2017-07-16T04:41:21.116729Z",
-     "start_time": "2017-07-16T12:41:21.086620+08:00"
-    }
-   },
-   "source": [
-    "# Model\n",
-    "\n",
-    "Derived from  https://github.com/reinforceio/tensorforce/blob/0d07fadec03f76537a2431e17c51cd759d53b5e9/tensorforce/core/networks/layers.py#L90"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 10,
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2017-10-15T02:50:53.928811Z",
-     "start_time": "2017-10-15T02:50:30.726693Z"
-    },
-    "collapsed": true
-   },
-   "outputs": [],
-   "source": [
-    "from tensorforce import Configuration\n",
-    "from tensorforce.agents import PPOAgent\n",
-    "from tensorforce.core.networks import layered_network_builder"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 11,
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2017-10-15T02:50:54.317167Z",
-     "start_time": "2017-10-15T02:50:53.931397Z"
-    },
-    "collapsed": true
-   },
-   "outputs": [],
-   "source": [
-    "# layer helpers from:\n",
-    "# https://github.com/reinforceio/tensorforce/blob/0d07fadec03f76537a2431e17c51cd759d53b5e9/tensorforce/core/networks/layers.py#L90\n",
-    "import tensorflow as tf\n",
-    "from math import sqrt\n",
-    "from tensorforce import util\n",
-    "from tensorforce import TensorForceError\n",
-    "\n",
-    "def linear(x, size, bias=True, l2_regularization=0.0):\n",
-    "    if util.rank(x) != 2:\n",
-    "        raise TensorForceError('Invalid input rank for linear layer.')\n",
-    "    with tf.variable_scope('linear'):\n",
-    "        weights = tf.Variable(initial_value=tf.random_normal(shape=(x.get_shape()[1].value, size), stddev=sqrt(2.0 / (x.get_shape()[1].value + size))))\n",
-    "        if l2_regularization > 0.0:\n",
-    "            tf.losses.add_loss(l2_regularization * tf.nn.l2_loss(t=weights))\n",
-    "        x = tf.matmul(a=x, b=weights)\n",
-    "        if bias:\n",
-    "            bias = tf.Variable(initial_value=tf.zeros(shape=(size,)))\n",
-    "            if l2_regularization > 0.0:\n",
-    "                tf.losses.add_loss(l2_regularization * tf.nn.l2_loss(t=bias))\n",
-    "            x = tf.nn.bias_add(value=x, bias=bias)\n",
-    "    return x\n",
-    "\n",
-    "def nonlinearity(x, name='relu'):\n",
-    "    with tf.variable_scope('nonlinearity'):\n",
-    "        if name == 'elu':\n",
-    "            x = tf.nn.elu(features=x)\n",
-    "        elif name == 'relu':\n",
-    "            x = tf.nn.relu(features=x)\n",
-    "        elif name == 'selu':\n",
-    "            # https://arxiv.org/pdf/1706.02515.pdf\n",
-    "            alpha = 1.6732632423543772848170429916717\n",
-    "            scale = 1.0507009873554804934193349852946\n",
-    "            negative = alpha * tf.nn.elu(features=x)\n",
-    "            x = scale * tf.where(condition=(x >= 0.0), x=x, y=negative)\n",
-    "        elif name == 'sigmoid':\n",
-    "            x = tf.sigmoid(x=x)\n",
-    "        elif name == 'softmax':\n",
-    "            x = tf.nn.softmax(logits=x)\n",
-    "        elif name == 'tanh':\n",
-    "            x = tf.nn.tanh(x=x)\n",
-    "        else:\n",
-    "            raise TensorForceError('Invalid nonlinearity.')\n",
-    "    return x\n",
-    "\n",
-    "def dense(x, size, bias=True, activation='relu', l2_regularization=0.0):\n",
-    "    if util.rank(x) != 2:\n",
-    "        raise TensorForceError('Invalid input rank for dense layer.')\n",
-    "    with tf.variable_scope('dense'):\n",
-    "        x = linear(x=x, size=size, bias=bias, l2_regularization=l2_regularization)\n",
-    "        x = nonlinearity(x=x, name=activation)\n",
-    "    return x\n",
-    "\n",
-    "def flatten(x):\n",
-    "    with tf.variable_scope('flatten'):\n",
-    "        x = tf.reshape(tensor=x, shape=(-1, util.prod(x.get_shape().as_list()[1:])))\n",
-    "    return x\n",
-    "\n",
-    "def conv2d(x, size, window=(3,3), stride=(1,1), bias=False, activation='relu', l2_regularization=0.0, padding='SAME'):\n",
-    "    if util.rank(x) != 4:\n",
-    "        raise TensorForceError('Invalid input rank for conv2d layer.')\n",
-    "    with tf.variable_scope('conv2d'):\n",
-    "        filters = tf.Variable(initial_value=tf.random_normal(shape=(window[0], window[1], x.get_shape()[3].value, size), stddev=sqrt(2.0 / size)))\n",
-    "        if l2_regularization > 0.0:\n",
-    "            tf.losses.add_loss(l2_regularization * tf.nn.l2_loss(t=filters))\n",
-    "        x = tf.nn.conv2d(input=x, filter=filters, strides=(1, stride[0], stride[1], 1), padding=padding)\n",
-    "        if bias:\n",
-    "            bias = tf.Variable(initial_value=tf.zeros(shape=(size,)))\n",
-    "            if l2_regularization > 0.0:\n",
-    "                tf.losses.add_loss(l2_regularization * tf.nn.l2_loss(t=bias))\n",
-    "            x = tf.nn.bias_add(value=x, bias=bias)\n",
-    "        x = nonlinearity(x=x, name=activation)\n",
-    "    return x\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 12,
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2017-10-15T02:50:54.442239Z",
-     "start_time": "2017-10-15T02:50:54.318527Z"
-    },
-    "collapsed": true
-   },
-   "outputs": [],
-   "source": [
-    "# build a network for a given input\n",
-    "def network_builder(inputs, summary_level):\n",
-    "    if len(inputs) != 1:\n",
-    "        raise TensorForceError('Layered network must have only one input.')\n",
-    "    x = next(iter(inputs.values()))\n",
-    "    \n",
-    "    x = conv2d(x=x, size=2, window=(1,3), bias=True, activation='relu', l2_regularization=1e-8, padding='VALID')\n",
-    "    x = conv2d(x=x, size=20, window=(1,window_length-2), bias=True, activation='relu', l2_regularization=1e-8, padding='VALID')\n",
-    "    x = conv2d(x=x, size=1, window=(1,1), bias=True, activation='relu', l2_regularization=1e-8, padding='VALID')\n",
-    "    x = flatten(x)\n",
-    "    x = nonlinearity(x,name='softmax')\n",
-    "    \n",
-    "    return x\n",
-    "network=network_builder"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "# Agent"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 13,
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2017-10-15T02:50:54.566983Z",
-     "start_time": "2017-10-15T02:50:54.444213Z"
-    }
-   },
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "({'action0': {'epsilon': 1,\n",
-       "   'epsilon_final': 0.005,\n",
-       "   'epsilon_timesteps': 100000.0,\n",
-       "   'start_after': 0,\n",
-       "   'type': 'epsilon_anneal'},\n",
-       "  'action1': {'epsilon': 1,\n",
-       "   'epsilon_final': 0.005,\n",
-       "   'epsilon_timesteps': 100000.0,\n",
-       "   'start_after': 0,\n",
-       "   'type': 'epsilon_anneal'},\n",
-       "  'action2': {'epsilon': 1,\n",
-       "   'epsilon_final': 0.005,\n",
-       "   'epsilon_timesteps': 100000.0,\n",
-       "   'start_after': 0,\n",
-       "   'type': 'epsilon_anneal'},\n",
-       "  'action3': {'epsilon': 1,\n",
-       "   'epsilon_final': 0.005,\n",
-       "   'epsilon_timesteps': 100000.0,\n",
-       "   'start_after': 0,\n",
-       "   'type': 'epsilon_anneal'},\n",
-       "  'action4': {'epsilon': 1,\n",
-       "   'epsilon_final': 0.005,\n",
-       "   'epsilon_timesteps': 100000.0,\n",
-       "   'start_after': 0,\n",
-       "   'type': 'epsilon_anneal'},\n",
-       "  'action5': {'epsilon': 1,\n",
-       "   'epsilon_final': 0.005,\n",
-       "   'epsilon_timesteps': 100000.0,\n",
-       "   'start_after': 0,\n",
-       "   'type': 'epsilon_anneal'}},)"
-      ]
-     },
-     "execution_count": 13,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "exploration=dict(\n",
-    "    type=\"epsilon_anneal\",\n",
-    "    epsilon=1,\n",
-    "    epsilon_final= 0.005,\n",
-    "    epsilon_timesteps= 1e5,\n",
-    "    start_after=0,\n",
-    ")\n",
-    "{'action' + str(n): exploration for n in range(env.action_space.shape[0])},"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 14,
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2017-10-15T02:50:57.570700Z",
-     "start_time": "2017-10-15T02:50:54.568901Z"
-    }
-   },
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "WARNING:tensorforce.agents.agent:Configuration values not accessed: first_update, memory_capacity, memory, update_frequency, repeat_update\n",
-      "[2017-10-15 10:50:57,553] Configuration values not accessed: first_update, memory_capacity, memory, update_frequency, repeat_update\n"
-     ]
-    },
-    {
-     "data": {
-      "text/plain": [
-       "<tensorforce.agents.ppo_agent.PPOAgent at 0x7fc5592f0da0>"
-      ]
-     },
-     "execution_count": 14,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "batch_size=256\n",
-    "exploration=dict(\n",
-    "    type=\"epsilon_anneal\",\n",
-    "    epsilon=1,\n",
-    "    epsilon_final= 0.005,\n",
-    "    epsilon_timesteps= 1e5,\n",
-    "    start_after=0,\n",
-    ")\n",
-    "config = Configuration(   \n",
-    "    # Each agent requires the following ``Configuration`` parameters:\n",
-    "    # https://github.com/reinforceio/tensorforce/blob/master/tensorforce/agents/agent.py#L32\n",
-    "    network=network,\n",
-    "    states=dict(shape=tuple(env.observation_space.shape), type='float'),\n",
-    "    actions={'action' + str(n): dict(continuous=True) for n in range(env.action_space.shape[0])},\n",
-    "    preprocessing = None,# dict or list containing state preprocessing configuration.\n",
-    "    exploration = {'action' + str(n): exploration for n in range(env.action_space.shape[0])}, # dict containing action exploration configuration.\n",
-    "      \n",
-    "    \n",
-    "    # The `MemoryAgent` class additionally requires the following parameters:\n",
-    "    first_update = batch_size*2, # integer indicating the number of steps to pass before the first update.\n",
-    "    memory_capacity = 300000, # integer of maximum experiences to store. (takes 2s to sample with 100k)\n",
-    "    memory = 'prioritized_replay', # string indicating memory type ('replay' or 'prioritized_replay').\n",
-    "    update_frequency = int(batch_size/2), # integer indicating the number of steps between model updates.\n",
-    "    repeat_update = 2, # integer indicating how often to repeat the model update.\n",
-    "\n",
-    "    # Each model requires the following configuration parameters:\n",
-    "    # https://github.com/reinforceio/tensorforce/blob/master/tensorforce/models/model.py#L33\n",
-    "    discount = 0.97, # float of discount factor (gamma).\n",
-    "    learning_rate = 1e-3, # float of learning rate (alpha). (3e-4 in paper 1e-3 (atari) and 3e-4 in baselines)\n",
-    "    optimizer = 'adam', # string of optimizer to use (e.g. 'adam' in paper).\n",
-    "    device = None, # string of tensorflow device name.\n",
-    "#     tf_summary = log_dir, # string directory to write tensorflow summaries. Default None\n",
-    "#     tf_summary_level = 1, # int indicating which tensorflow summaries to create.\n",
-    "    tf_summary_interval = 1000, # int number of calls to get_action until writing tensorflow summaries on update.\n",
-    "    log_level = 'info', # string containing log level (e.g. 'info').\n",
-    "    distributed = False, # boolean indicating whether to use distributed tensorflow.\n",
-    "    global_model = False, # global model.\n",
-    "    session = None, # session to use. \n",
-    "\n",
-    "    # A Policy Gradient Model expects the following additional configuration parameters:\n",
-    "    # https://github.com/reinforceio/tensorforce/blob/master/tensorforce/models/policy_gradient_model.py#L35\n",
-    "    # I edited my tensorflow install to have a flatten layer to make this work (my branch is in requirements.txt)\n",
-    "    baseline=dict(\n",
-    "        type=\"mlp\",\n",
-    "        sizes=[128, 128],\n",
-    "        epochs=1,\n",
-    "        update_batch_size=128,\n",
-    "        learning_rate=0.01\n",
-    "    ), # string indicating the baseline value function (currently 'linear' or 'mlp').\n",
-    "    gae_rewards= True, # boolean indicating whether to use GAE.\n",
-    "    gae_lambda= 0.97, # float of the Generalized Advantage Estimation lambda.\n",
-    "    normalize_rewards= False,# boolean indicating whether to normalize the advantage or not.\n",
-    "    \n",
-    "    # PPO Params \n",
-    "    # https://github.com/reinforceio/tensorforce/blob/master/tensorforce/models/ppo_model.py\n",
-    "    entropy_penalty=0.01, # 0 and 0.01 in baselines\n",
-    "    loss_clipping=0.1,  # Trust region clipping\n",
-    "    epochs=4,  # Number of training epochs for SGD, data is repeated this much 4 (atari),10 in baselines, 10 in paper\n",
-    "    optimizer_batch_size=32,  # Batch size for optimiser, should be small (e.g. 64 in paper)\n",
-    "    random_sampling=True  # Sampling strategy for minibatch replay memory\n",
-    ")\n",
-    "\n",
-    "# Create a Trust Region Policy Optimization agent\n",
-    "agent = PPOAgent(config=config)\n",
-    "agent"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "# Train\n",
-    "\n",
-    "## Callbacks"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 15,
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2017-10-15T02:50:57.755421Z",
-     "start_time": "2017-10-15T02:50:57.573140Z"
-    }
-   },
-   "outputs": [],
-   "source": [
-    "from rl_portfolio_management.callbacks.tensorforce import EpisodeFinishedTQDM\n",
-    "from rl_portfolio_management.util import MDD, sharpe"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2017-10-15T02:19:28.278977Z",
-     "start_time": "2017-10-15T02:19:28.132177Z"
-    }
-   },
-   "source": [
-    "## Train"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 16,
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2017-10-15T02:50:57.895354Z",
-     "start_time": "2017-10-15T02:50:57.761809Z"
-    },
-    "collapsed": true
-   },
-   "outputs": [],
-   "source": [
-    "from tensorforce.execution import Runner\n",
-    "runner = Runner(agent=agent, environment=environment, save_path=save_path, save_episodes=1000)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 17,
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2017-10-15T02:50:57.998473Z",
-     "start_time": "2017-10-15T02:50:57.897093Z"
-    }
-   },
-   "outputs": [],
-   "source": [
-    "# Check my PR is included, https://github.com/wassname/tensorforce/tree/merged_6b\n",
-    "import tensorforce.core.memories\n",
-    "assert isinstance(runner.agent.memory,tensorforce.core.memories.PrioritizedReplay)\n",
-    "assert isinstance(runner.agent, tensorforce.agents.MemoryAgent)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "ExecuteTime": {
-     "start_time": "2017-10-15T02:50:28.630Z"
-    }
-   },
-   "outputs": [],
-   "source": [
-    "# resume\n",
-    "saves=glob.glob(save_path+'-*')\n",
-    "if len(saves)>0:\n",
-    "    # load saved\n",
-    "    last_save = os.path.splitext(saves[0])[0]\n",
-    "    runner.agent.load_model(last_save)\n",
-    "    print('loaded', last_save)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "ExecuteTime": {
-     "start_time": "2017-10-15T02:50:28.632Z"
-    },
-    "scrolled": true
-   },
-   "outputs": [
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "b018b4d97fbd4f1daaf1b5e0e8e9bd09"
-      }
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "TensorBoardLogger started. Run `tensorboard --logdir=/media/isisilon/Data/My_Documents/Documents/eclipse-workspace/rl_keras_finance/portfolio-rl-jiang_2017/logs/tensorforce-PPO-prioritised_20171015_02-50-30` to visualize\n"
-     ]
-    }
-   ],
-   "source": [
-    "episodes = int(6e6 / 30)\n",
-    "runner.run(\n",
-    "    episodes=episodes,\n",
-    "    max_timesteps=200,\n",
-    "    episode_finished=EpisodeFinishedTQDM(\n",
-    "        log_intv=100, \n",
-    "        episodes=episodes,\n",
-    "        log_dir=log_dir,\n",
-    "        session=runner.agent.model.session, \n",
-    "    )\n",
-    ")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "ExecuteTime": {
-     "start_time": "2017-10-15T02:50:28.633Z"
-    },
-    "collapsed": true
-   },
-   "outputs": [],
-   "source": [
-    "# save\n",
-    "agent.save_model(save_path)\n",
-    "save_path"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "# Test"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2017-07-19T00:09:54.262405Z",
-     "start_time": "2017-07-19T08:09:54.226639+08:00"
-    },
-    "collapsed": true
-   },
-   "outputs": [],
-   "source": []
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "ExecuteTime": {
-     "start_time": "2017-10-15T02:50:28.635Z"
-    },
-    "collapsed": true
-   },
-   "outputs": [],
-   "source": [
-    "# one big test\n",
-    "df_test = pd.read_hdf('./data/poloniex_30m.hf',key='test')\n",
-    "steps=2400#len(df_test)-window_length-2\n",
-    "env_test = EnvWrapper(\n",
-    "    df=df_test,\n",
-    "    steps=steps, \n",
-    "    scale=True, \n",
-    "    augment=0.00,\n",
-    "    trading_cost=0, # let just overfit first\n",
-    "    window_length=window_length,\n",
-    ")\n",
-    "env_test.seed = 0  \n",
-    "environment_test = OpenAIGym('CartPole-v0')\n",
-    "environment_test.gym = env_test\n",
-    "\n",
-    "agent.load_model(save_path)\n",
-    "runner_test = Runner(agent=agent, environment=environment_test)\n",
-    "runner_test.run(\n",
-    "episodes=1, max_timesteps=steps, episode_finished=EpisodeFinished(10))\n",
-    "\n",
-    "df = pd.DataFrame(env_test.infos)\n",
-    "df.index=df['index']\n",
-    "\n",
-    "s=sharpe(df.rate_of_return+1)\n",
-    "mdd=MDD(df.rate_of_return+1)\n",
-    "apv=df.portfolio_value.iloc[-1]\n",
-    "print('APV (Accumulated portfolio value): \\t{: 2.6f}'.format(apv))\n",
-    "print('SR (Sharpe ratio):                 \\t{: 2.6f}'.format( s))\n",
-    "print('MDD (max drawdown):                \\t{: 2.6%}'.format( mdd))\n",
-    "print('MMR (mean market returns):         \\t{: 2.6f}'.format(df.mean_market_returns.cumprod().iloc[-1]))\n",
-    "print('')\n",
-    "\n",
-    "# show one run vs average market performance\n",
-    "plt.title('test MDD={}, Sharpe={}, APV={}'.format(mdd,s,apv))\n",
-    "df.portfolio_value.plot()\n",
-    "df.mean_market_returns.cumprod().plot(label='mean market performance')\n",
-    "plt.legend()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2017-07-19T00:48:39.193976Z",
-     "start_time": "2017-07-19T08:48:39.154752+08:00"
-    },
-    "collapsed": true
-   },
-   "outputs": [],
-   "source": []
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "ExecuteTime": {
-     "start_time": "2017-10-15T02:50:28.637Z"
-    },
-    "collapsed": true
-   },
-   "outputs": [],
-   "source": [
-    "\n",
-    "\n",
-    "data=[]\n",
-    "for i in range(10):\n",
-    "    agent.load_model(save_path)\n",
-    "    df_test = pd.read_hdf('./data/poloniex_30m.hf',key='test')\n",
-    "    \n",
-    "    env_test = EnvWrapper(\n",
-    "        df=df_test,\n",
-    "        steps=1800, \n",
-    "        scale=True, \n",
-    "        augment=0.00,\n",
-    "        trading_cost=0, # let just overfit first\n",
-    "        window_length=window_length,\n",
-    "    )\n",
-    "    env_test.seed = 0  \n",
-    "\n",
-    "\n",
-    "    environment_test = OpenAIGym('CartPole-v0')\n",
-    "    environment_test.gym = env_test\n",
-    "\n",
-    "    runner_test = Runner(agent=agent, environment=environment_test)\n",
-    "    np.random.seed(i)\n",
-    "    runner_test.run(\n",
-    "    episodes=2, max_timesteps=32, episode_finished=EpisodeFinished(10))\n",
-    "    df = pd.DataFrame(environment_test.gym.infos)\n",
-    "#     df.index=df['index']\n",
-    "    \n",
-    "    s=sharpe(df.rate_of_return+1)\n",
-    "    mdd=MDD(df.rate_of_return+1)\n",
-    "    data.append(dict(sharpe=s,mdd=mdd))\n",
-    "    print('APV (Accumulated portfolio value): \\t{: 2.6f}'.format(df.portfolio_value.iloc[-1]))\n",
-    "    print('SR (Sharpe ratio):                 \\t{: 2.6f}'.format( s))\n",
-    "    print('MDD (max drawdown):                \\t{: 2.6%}'.format( mdd))\n",
-    "    print('MMR (mean market returns):         \\t{: 2.6f}'.format(df.mean_market_returns.cumprod().iloc[-1]))\n",
-    "    print('')\n",
-    "    df.portfolio_value.plot(label=str(i))\n",
-    "plt.legend()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "ExecuteTime": {
-     "start_time": "2017-10-15T02:50:28.638Z"
-    },
-    "collapsed": true
-   },
-   "outputs": [],
-   "source": [
-    "data"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "collapsed": true
-   },
-   "outputs": [],
-   "source": []
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "ExecuteTime": {
-     "start_time": "2017-10-15T02:50:28.640Z"
-    },
-    "collapsed": true
-   },
-   "outputs": [],
-   "source": [
-    "# one big test over train\n",
-    "# one big test\n",
-    "df_train = pd.read_hdf('./data/poloniex_30m.hf',key='train')\n",
-    "steps=len(df_train)-window_length-2\n",
-    "env = EnvWrapper(\n",
-    "    df=df_train,\n",
-    "    steps=steps, \n",
-    "    scale=True, \n",
-    "    augment=0.00,\n",
-    "    trading_cost=0, # let just overfit first\n",
-    "    window_length=window_length,\n",
-    ")\n",
-    "env.seed = 0  \n",
-    "environment = OpenAIGym('CartPole-v0')\n",
-    "environment.gym = env\n",
-    "\n",
-    "agent.load_model(save_path)\n",
-    "runner = Runner(agent=agent, environment=environment)\n",
-    "runner.run(\n",
-    "episodes=1, max_timesteps=steps, episode_finished=EpisodeFinished(10))\n",
-    "\n",
-    "df = pd.DataFrame(env.infos)\n",
-    "df.index=df['index']\n",
-    "\n",
-    "s=sharpe(df.rate_of_return+1)\n",
-    "mdd=MDD(df.rate_of_return+1)\n",
-    "data.append(dict(sharpe=s,mdd=mdd))\n",
-    "print('APV (Accumulated portfolio value): \\t{: 2.6f}'.format(df.portfolio_value.iloc[-1]))\n",
-    "print('SR (Sharpe ratio):                 \\t{: 2.6f}'.format( s))\n",
-    "print('MDD (max drawdown):                \\t{: 2.6%}'.format( mdd))\n",
-    "print('')\n",
-    "\n",
-    "# show one run vs average market performance\n",
-    "plt.title('train')\n",
-    "df.portfolio_value.plot()\n",
-    "df.mean_market_returns.cumprod().plot(label='mean market performance')\n",
-    "plt.legend()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "collapsed": true
-   },
-   "outputs": [],
-   "source": []
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "jupyter3",
-   "language": "python",
-   "name": "jupyter3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.6.0"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 2
-}