update notebook and requirements

This commit is contained in:
wassname
2017-11-11 17:01:08 +08:00
parent a08cd3361e
commit d3c819c06e
6 changed files with 4221 additions and 2490 deletions
+4 -6
View File
@@ -135,20 +135,18 @@ We have partial test coverage of the environment, just run:
# Files
- enviroments/portfolio.py - contains an openai environment for porfolio trading
- tensorforce-VPG.ipynb - notebook to try a policy gradient agent
- tensorforce-PPO - notebook to try a PPO agent
- data/poloniex_30m.hdf - hdf file with cryptocurrency 30 minutes prices
- tensorforce-PPO-IEET.ipynb - notebook to try a policy gradient agent
# Differences in implementation
The main differences from Jiang et. al. 2017 are:
- The first step in a deep learning project should be to make sure the model can overfit, this provides a sanity check. So I am first trying to acheive good results with no trading costs.
- I have not used portfolio vector memory. For ease of implementation I made the information available by replacing the oldest timestep. Your model can slice it, or a Dense or CNN models can just be given the information.
- I have not used portfolio vector memory. For ease of implementation I made the information available by using the last weights.
- Instead of DPG ([deterministic policy gradient](http://jmlr.org/proceedings/papers/v32/silver14.pdf)) I tried and DDPG ([deep deterministic policy gradient]( http://arxiv.org/pdf/1509.02971v2.pdf)) and VPG (vanilla policy gradient) with generalized advantage estimation and PPO.
- I tried to replicate the best performing CNN model from the paper and haven't attempted the LSTM or RNN models.
- instead of selecting 12 assets for each window I chose 5 assets that have existed for the longest time
- My topology had an extra layer [see issue 3](https://github.com/wassname/rl-portfolio-management/issues/3)
- instead of selecting 12 assets for each window I chose 3 assets that have existed for the longest time
- ~~My topology had an extra layer [see issue 3](https://github.com/wassname/rl-portfolio-management/issues/3)~~ fixed
# TODO
+97 -223
View File
@@ -1,13 +1,10 @@
affine==2.0.0.post1
alembic==0.9.1
amqp==1.4.9
anyjson==0.3.3
appdirs==1.4.3
affine==2.1.0
apipkg==1.4
arrow==0.10.0
asn1crypto==0.22.0
atari-py==0.1.1
autopep8==1.3.2
azure==1.0.3
azure-common==1.1.6
azure-common==1.1.8
azure-mgmt==0.20.2
azure-mgmt-common==0.20.0
azure-mgmt-compute==0.20.1
@@ -19,282 +16,159 @@ azure-nspkg==2.0.0
azure-servicebus==0.20.1
azure-servicemanagement-legacy==0.20.2
azure-storage==0.20.3
Babel==2.2.0
backoff==1.4.0
backoff==1.4.3
backports.weakref==1.0rc1
baselines==0.1.3
bcolz==0.12.1
bcrypt==2.0.0
beautifulsoup4==4.5.3
billiard==3.3.0.23
-e git+https://github.com/openai/baselines.git@4993286230ac92ead39a66005b7042b56b8598b0#egg=baselines
bcolz==1.1.2
beautifulsoup4==4.6.0
bench==2.8
BitcoinExchangeFH==0.2.3
bleach==1.5.0
blessings==1.6
blinker==1.4
boto==2.39.0
Bottleneck==1.2.1
bpython==0.16
bqplot==0.9.0
boto3==1.4.6
botocore==1.6.3
bs4==0.0.1
bt==0.2.5
cached-property==1.3.0
cachetools==2.0.0
celery==3.1.19
cffi==1.10.0
cfscrape==1.8.0
chardet==2.3.0
ChatterBot==0.6.0
chatterbot-corpus==0.0.1
chatterbot-voice==0.1.3
certifi==2017.7.27.1
chardet==3.0.4
click==6.7
click-plugins==1.0.3
cligj==0.4.0
cloudpickle==0.2.2
colorama==0.3.9
contextlib2==0.5.5
cryptography==1.8.1
cssmin==0.2.0
curtsies==0.2.11
cntk==2.2
coverage==4.4.1
cvxopt==1.1.9
cycler==0.10.0
cyordereddict==1.0.0
Cython==0.25.2
dask==0.15.0
dataset==0.8.0
deap==1.0.2
decorator==4.0.11
descartes==1.1.0
dill==0.2.6
docker==2.3.0
docker-compose==1.13.0
docker-pycreds==0.2.1
dockerpty==0.4.1
docopt==0.6.2
dominate==2.3.1
earthengine-api==0.1.111
easydict==1.6
EbookLib==0.16
ee==0.2
elasticsearch==5.3.0
elasticsearch-dsl==5.2.0
empyrical==0.2.2
entrypoints==0.2.2
ephem==3.7.6.0
eventlet==0.17.4
-e git+git@github.com:wassname/FanFicFare.git@1da7f6ec159bee18b650fa074d2d59d4721e3653#egg=FanFicFare
fastkml==0.11
ffn==0.3.2
Fiona==1.7.5
fix-yahoo-finance==0.0.7
Flask==0.12.1
Flask-AlchemyDumps==0.0.10
Flask-Assets==0.11
Flask-Bcrypt==0.7.1
Flask-Bootstrap==3.3.5.7
Flask-Celery-Helper==1.1.0
Flask-DebugToolbar==0.10.0
Flask-Login==0.3.2
Flask-Mail==0.9.1
Flask-Migrate==1.7.0
Flask-Principal==0.4.0
Flask-Redis==0.1.0
Flask-Script==2.0.5
Flask-Security==1.7.5
Flask-SQLAlchemy==2.1
Flask-WTF==0.12
flower==0.8.4
Folio==0.4
future==0.15.2
futures==3.1.1
gbdx-auth==0.2.4
gbdxtools==0.11.7
GDAL==2.1.0
geomet==0.1.1
geopandas==0.2.1
google-api-python-client==1.6.2
google-auth==1.0.1
google-auth-httplib2==0.0.2
google-cloud-bigquery==0.25.0
google-cloud-core==0.25.0
googleapis-common-protos==1.5.2
googletrans==2.1.2
goslate==1.5.1
greenlet==0.4.12
gunicorn==19.4.5
gym==0.9.2
Cython==0.26.1
decorator==4.1.2
dill==0.2.7.1
docutils==0.14
entrypoints==0.2.3
execnet==1.4.1
flake8==3.4.1
future==0.16.0
gym==0.9.3
h5py==2.7.0
hammock==0.2.4
html2text==2016.9.19
html5lib==0.9999999
httplib2==0.10.3
idna==2.5
inflection==0.3.1
intervaltree==2.1.0
ipykernel==4.6.0
ipython==5.3.0
idna==2.6
imageio==2.2.0
ipykernel==4.6.1
ipython==6.1.0
ipython-genutils==0.2.0
ipywidgets==6.0.0
itsdangerous==0.24
jedi==0.10.2
jellyfish==0.5.6
Jinja2==2.9.6
jmespath==0.9.3
joblib==0.11
jsmin==2.2.1
jsondatabase==0.1.7
jsonschema==2.6.0
jupyter==1.0.0
jupyter-client==5.0.1
jupyter-client==5.1.0
jupyter-console==5.1.0
jupyter-contrib-core==0.3.1
jupyter-contrib-nbextensions==0.2.8
jupyter-core==4.3.0
jupyter-highlight-selected-word==0.0.11
jupyter-latex-envs==1.3.8.4
jupyter-nbextensions-configurator==0.2.5
Keras==2.0.5
keras-contrib==1.2.1
keras-rl==0.3.0
keras-rl==0.3.1
keras-tqdm==2.0.1
kombu==3.0.37
leven==1.0.4
Logbook==1.0.0
lru-dict==1.1.6
lxml==3.7.3
Mako==1.0.6
Markdown==2.6.8
-e git+git@github.com:wassname/libcryptomarket.git@7351c794ed43b11fdd187cc9c5e85eb4275c554a#egg=libcryptomarket
Markdown==2.6.9
MarkupSafe==1.0
matplotlib==2.0.0
matplotlib==2.0.2
mccabe==0.6.1
mistune==0.7.4
more-itertools==3.2.0
MosT==0.12
mpmath==0.19
mtranslate==1.6
multipledispatch==0.4.9
munch==2.1.1
nbconvert==5.1.1
mock==2.0.0
mpi4py==2.0.0
mujoco-py==0.5.7
nbconvert==5.2.1
nbformat==4.3.0
ndg-httpsclient==0.4.2
networkx==1.11
nltk==3.2.2
normality==0.4.0
nose==1.3.7
notebook==5.0.0
numexpr==2.6.2
numpy==1.13.1
oauth2client==4.0.0
oauthlib==2.0.2
numpy==1.13.3
olefile==0.44
packaging==16.8
opencv-python==3.3.0.10
pandas==0.20.3
pandas-datareader==0.4.0
pandas-datareader==0.5.0
pandas-profiling==1.4.0
pandocfilters==1.4.1
passlib==1.7.1
path.py==10.1
path.py==10.3.1
patsy==0.4.1
pbr==3.0.1
pbr==3.1.1
pep8==1.7.0
pexpect==4.2.1
pickleshare==0.7.4
Pillow==4.1.0
planet==1.0.0
poloniex==0.4.6
Pillow==4.3.0
pluggy==0.5.2
pprint==0.1
praw==4.4.0
prawcore==0.8.0
progressbar2==3.30.2
prompt-toolkit==1.0.14
protobuf==3.3.0
psycopg2==2.7.1
ptyprocess==0.5.1
py==1.4.33
py-googletrans==1.2
PyAlgoTrade==0.18
pyasn1==0.2.3
pyasn1-modules==0.0.9
pycparser==2.17
pycurl==7.43.0
PyExecJS==1.4.0
pygeoif==0.6
progressbar2==3.34.3
prompt-toolkit==1.0.15
protobuf==3.4.0
psutil==5.2.2
ptyprocess==0.5.2
py==1.4.34
pycodestyle==2.3.1
pyflakes==1.5.0
pyglet==1.2.4
Pygments==2.2.0
PyJWT==1.4.2
pymongo==3.4.0
PyMySQL==0.7.11
PyOpenGL==3.1.0
pyOpenSSL==17.0.0
pyparsing==2.2.0
PyPrind==2.11.1
pyproj==1.9.5.1
pyshp==1.2.10
pysle==1.5.7
pytest==3.0.7
python-dateutil==2.6.0
python-editor==1.0.3
python-twitter==3.2.1
python-utils==2.1.0
pytest==3.2.3
pytest-cache==1.0
pytest-cov==2.5.1
pytest-forked==0.2
pytest-pep8==1.0.6
pytest-xdist==1.20.0
python-coveralls==2.9.1
python-dateutil==2.6.1
python-utils==2.2.0
pytz==2017.2
PyWavelets==0.5.2
PyYAML==3.12
pyzmq==16.0.2
qgrid==0.3.2
qPython==1.2.2
qtconsole==4.3.0
Quandl==3.2.0
rasterio==1.0a8
recurrentshop==0.0.1
redis==2.10.3
requests==2.11.1
rasterio==0.36.0
requests==2.18.4
requests-file==1.4.2
requests-ftp==0.3.1
requests-futures==0.9.7
requests-oauthlib==0.8.0
rsa==3.4.2
scikit-learn==0.18.1
s3transfer==0.1.10
scikit-image==0.13.0
scikit-learn==0.18.2
scipy==0.19.1
seaborn==0.7.1
selenium==3.4.3
seq2seq==0.1.0
Shapely==1.5.17
seaborn==0.8
Shapely==1.6.0
simplegeneric==0.8.1
simplejson==3.10.0
six==1.10.0
six==1.11.0
sklearn==0.0
snuggs==1.4.1
sortedcontainers==1.5.7
SpeechRecognition==3.6.5
SQLAlchemy==1.1.9
SQLAlchemy-FullText-Search==0.2.3
SQLAlchemy-Utils==0.31.5
statsmodels==0.8.0
stevedore==1.21.0
sympy==1.0
tables==3.3.0
tabulate==0.7.7
tensor2tensor==1.0.5
tensorflow==1.2.1
tensorflow-gpu==1.2.0
-e git+git@github.com:reinforceio/tensorforce.git@21323bf4226a93892637551cd8d75b8e13d04eaa#egg=tensorforce
tables==3.4.2
tensorboard-logger==0.0.4
tensorflow==1.3.0
tensorflow-gpu==1.3.0
tensorflow-tensorboard==0.1.8
-e git+https://github.com/wassname/tensorforce.git@266fc21f472fc1ed7edb8ff3b6fd8450b3ef4828#egg=tensorforce
terminado==0.6
testpath==0.3
texttable==0.8.8
testpath==0.3.1
Theano==0.9.0
toolz==0.8.2
torch==0.1.12.post2
timer==0.1
torch==0.2.0.post3
torchvision==0.1.8
tornado==4.4.3
TPOT==0.8.3
tqdm==4.11.2
tornado==4.5.1
tox==2.9.1
tqdm==4.19.2
traitlets==4.3.2
traittypes==0.0.6
Unipath==1.1
update-checker==0.16
uritemplate==3.0.0
urllib3==1.21.1
universal-portfolios==0.3.2
urllib3==1.22
virtualenv==15.1.0
virtualenv-clone==0.2.6
virtualenvwrapper==4.7.2
visitor==0.1.3
visdom==0.1.5
wcwidth==0.1.7
webassets==0.12.1
webencodings==0.5.1
websocket-client==0.40.0
websocket-client==0.44.0
Werkzeug==0.12.2
widgetsnbextension==2.0.0
WTForms==2.1
WTForms-ParsleyJS==2.0.1
xmltodict==0.10.2
yapf==0.16.1
zipline==1.1.0
yapf==0.16.3
zmq==0.0.0
+9 -15
View File
@@ -1,22 +1,16 @@
# data
# rl
gym==0.9.2
tensorflow-gpu==1.2.0
# for mpi4py youo need libopenmpi-dev
mpi4py
https://github.com/wassname/baselines/archive/676b6f0b18fe9855579cb271484dcdf118e2588c.zip
https://github.com/farizrahman4u/keras-contrib/archive/15133077af4d9c379c8219574898ce29cae12e48.zip
gym==0.9.3
tensorflow-gpu==1.3.0
h5py==2.7.0
keras-rl==0.3.0
keras-tqdm==2.0.1
# use my branch with prioritised ppo, untill mergeed
https://github.com/wassname/tensorforce/archive/merged_6b.zip
# tensorforce 0.3.1 a specific commit
https://github.com/reinforceio/tensorforce/archive/17cf4b84d8fbad9d7c65d1d7bde36faabddd6a36.zip
# numbers
tables==3.4.2
seaborn==0.7.1
seaborn==0.8
pandas==0.20.3
numpy==1.12.1
matplotlib==2.0.0
numpy==1.13.3
matplotlib==2.0.2
# misc
tqdm==4.11.2
seaborn==0.7.1
tqdm==4.19.2
seaborn==0.8
File diff suppressed because one or more lines are too long
-975
View File
@@ -1,975 +0,0 @@
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Uses tensorforce tensorforce-0.2.0"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {
"ExecuteTime": {
"end_time": "2017-10-15T02:50:29.756080Z",
"start_time": "2017-10-15T02:50:28.814041Z"
},
"collapsed": true
},
"outputs": [],
"source": [
"# plotting\n",
"%matplotlib inline\n",
"from matplotlib import pyplot as plt\n",
"import seaborn as sns\n",
"plt.style.use('ggplot')\n",
"\n",
"# numeric\n",
"import numpy as np\n",
"from numpy import random\n",
"import pandas as pd\n",
"\n",
"# util\n",
"from collections import Counter\n",
"import pdb\n",
"import glob\n",
"import time\n",
"import tempfile\n",
"import itertools\n",
"from tqdm import tqdm_notebook as tqdm\n",
"import datetime\n",
"\n",
"# logging\n",
"import logging\n",
"logger = log = logging.getLogger(__name__)\n",
"# log.setLevel(logging.INFO)\n",
"logging.basicConfig()\n",
"log.info('%s logger started.', __name__)"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {
"ExecuteTime": {
"end_time": "2017-10-15T02:50:29.899144Z",
"start_time": "2017-10-15T02:50:29.759262Z"
},
"collapsed": true
},
"outputs": [],
"source": [
"import gym\n",
"from gym import error, spaces, utils\n",
"from gym.utils import seeding"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {
"ExecuteTime": {
"end_time": "2017-10-15T02:50:29.989592Z",
"start_time": "2017-10-15T02:50:29.901021Z"
},
"collapsed": true
},
"outputs": [],
"source": [
"import os\n",
"os.sys.path.append(os.path.abspath('.'))\n",
"%reload_ext autoreload\n",
"%autoreload 2"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {
"ExecuteTime": {
"end_time": "2017-10-15T02:50:30.089437Z",
"start_time": "2017-10-15T02:50:29.991629Z"
}
},
"outputs": [
{
"data": {
"text/plain": [
"'./outputs/tensorforce-PPO-prioritised/tensorforce-PPO-prioritised_20171015_02-50-30.model'"
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# params\n",
"window_length = 50\n",
"import datetime\n",
"ts = datetime.datetime.utcnow().strftime('%Y%m%d_%H-%M-%S')\n",
"save_path = './outputs/tensorforce-PPO-prioritised/tensorforce-PPO-prioritised_%s.model' % ts\n",
"save_path = './outputs/tensorforce-PPO-prioritised/tensorforce-PPO-prioritised_20171015_02-50-30.model'\n",
"save_path\n"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {
"ExecuteTime": {
"end_time": "2017-10-15T02:50:30.165055Z",
"start_time": "2017-10-15T02:50:30.091635Z"
}
},
"outputs": [
{
"data": {
"text/plain": [
"'logs/tensorforce-PPO-prioritised_20171015_02-50-30'"
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"log_dir = os.path.join('logs', os.path.splitext(os.path.basename(save_path))[0])\n",
"try:\n",
" os.makedirs(log_dir)\n",
"except OSError:\n",
" pass\n",
"log_dir"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Enviroment"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {
"ExecuteTime": {
"end_time": "2017-10-15T02:50:30.239055Z",
"start_time": "2017-10-15T02:50:30.168273Z"
},
"collapsed": true
},
"outputs": [],
"source": [
"from rl_portfolio_management.environments.portfolio import PortfolioEnv"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {
"ExecuteTime": {
"end_time": "2017-10-15T02:50:30.334097Z",
"start_time": "2017-10-15T02:50:30.241263Z"
},
"collapsed": true
},
"outputs": [],
"source": [
"from rl.memory import SequentialMemory, Memory\n",
"from collections import deque\n",
"\n",
"class EnvWrapper(PortfolioEnv):\n",
" \"\"\"Wraps env to normalise and reshape action.\"\"\"\n",
" def __init__(self, window_length=50, *args, **kwargs):\n",
" super().__init__(*args, **kwargs)\n",
" \n",
" def step(self, action):\n",
" # also it puts it in a list\n",
" if isinstance(action, list):\n",
" action = action[0]\n",
" \n",
" # we have to normalise for some reason softmax wont work\n",
" if isinstance(action, dict):\n",
" action = np.abs(list(action.values()))\n",
" action /= action.sum() \n",
" \n",
" return super().step(action) "
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {
"ExecuteTime": {
"end_time": "2017-10-15T02:50:30.635247Z",
"start_time": "2017-10-15T02:50:30.337761Z"
}
},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"INFO:gym.envs.registration:Making new env: CartPole-v0\n",
"[2017-10-15 10:50:30,622] Making new env: CartPole-v0\n",
"INFO:gym.envs.registration:Making new env: CartPole-v0\n",
"[2017-10-15 10:50:30,629] Making new env: CartPole-v0\n"
]
}
],
"source": [
"df_train = pd.read_hdf('./data/poloniex_30m.hf',key='train')\n",
"env = EnvWrapper(\n",
" df=df_train,\n",
" steps=300, \n",
" scale=True, \n",
" augment=0.000,\n",
" trading_cost=0, # let just overfit first,\n",
" window_length = window_length,\n",
" \n",
")\n",
"env.seed = 0 \n",
"\n",
"df_test = pd.read_hdf('./data/poloniex_30m.hf',key='test')\n",
"env_test = EnvWrapper(\n",
" df=df_test,\n",
" steps=300, \n",
" scale=True, \n",
" augment=0.00,\n",
" trading_cost=0, # let just overfit first\n",
" window_length=window_length,\n",
")\n",
"env_test.seed = 0 \n",
"\n",
"from tensorforce.contrib.openai_gym import OpenAIGym\n",
"environment = OpenAIGym('CartPole-v0')\n",
"environment.gym = env\n",
"\n",
"environment_test = OpenAIGym('CartPole-v0')\n",
"environment_test.gym = env_test"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {
"ExecuteTime": {
"end_time": "2017-10-15T02:50:30.725276Z",
"start_time": "2017-10-15T02:50:30.636432Z"
}
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"0.0 False {'reward': 0.0, 'log_return': 0.0, 'portfolio_value': 1.0, 'return': 0.9994203267253029, 'rate_of_return': 0.0, 'weights_mean': 0.16666666666666666, 'weights_std': 0.092620963292867634, 'cost': 0.0, 'market_value': 0.99991966957965184, 'date': 1463925600.0, 'steps': 2}\n",
"(5, 50, 3) (5, 50, 3)\n"
]
}
],
"source": [
"# check shapes\n",
"obs1, reward, done, info=env.step(np.random.random(env.action_space.shape))\n",
"print(reward, done, info)\n",
"obs2 = env.reset()\n",
"print(obs1.shape,obs2.shape)"
]
},
{
"cell_type": "markdown",
"metadata": {
"ExecuteTime": {
"end_time": "2017-07-16T04:41:21.116729Z",
"start_time": "2017-07-16T12:41:21.086620+08:00"
}
},
"source": [
"# Model\n",
"\n",
"Derived from https://github.com/reinforceio/tensorforce/blob/0d07fadec03f76537a2431e17c51cd759d53b5e9/tensorforce/core/networks/layers.py#L90"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {
"ExecuteTime": {
"end_time": "2017-10-15T02:50:53.928811Z",
"start_time": "2017-10-15T02:50:30.726693Z"
},
"collapsed": true
},
"outputs": [],
"source": [
"from tensorforce import Configuration\n",
"from tensorforce.agents import PPOAgent\n",
"from tensorforce.core.networks import layered_network_builder"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {
"ExecuteTime": {
"end_time": "2017-10-15T02:50:54.317167Z",
"start_time": "2017-10-15T02:50:53.931397Z"
},
"collapsed": true
},
"outputs": [],
"source": [
"# layer helpers from:\n",
"# https://github.com/reinforceio/tensorforce/blob/0d07fadec03f76537a2431e17c51cd759d53b5e9/tensorforce/core/networks/layers.py#L90\n",
"import tensorflow as tf\n",
"from math import sqrt\n",
"from tensorforce import util\n",
"from tensorforce import TensorForceError\n",
"\n",
"def linear(x, size, bias=True, l2_regularization=0.0):\n",
" if util.rank(x) != 2:\n",
" raise TensorForceError('Invalid input rank for linear layer.')\n",
" with tf.variable_scope('linear'):\n",
" weights = tf.Variable(initial_value=tf.random_normal(shape=(x.get_shape()[1].value, size), stddev=sqrt(2.0 / (x.get_shape()[1].value + size))))\n",
" if l2_regularization > 0.0:\n",
" tf.losses.add_loss(l2_regularization * tf.nn.l2_loss(t=weights))\n",
" x = tf.matmul(a=x, b=weights)\n",
" if bias:\n",
" bias = tf.Variable(initial_value=tf.zeros(shape=(size,)))\n",
" if l2_regularization > 0.0:\n",
" tf.losses.add_loss(l2_regularization * tf.nn.l2_loss(t=bias))\n",
" x = tf.nn.bias_add(value=x, bias=bias)\n",
" return x\n",
"\n",
"def nonlinearity(x, name='relu'):\n",
" with tf.variable_scope('nonlinearity'):\n",
" if name == 'elu':\n",
" x = tf.nn.elu(features=x)\n",
" elif name == 'relu':\n",
" x = tf.nn.relu(features=x)\n",
" elif name == 'selu':\n",
" # https://arxiv.org/pdf/1706.02515.pdf\n",
" alpha = 1.6732632423543772848170429916717\n",
" scale = 1.0507009873554804934193349852946\n",
" negative = alpha * tf.nn.elu(features=x)\n",
" x = scale * tf.where(condition=(x >= 0.0), x=x, y=negative)\n",
" elif name == 'sigmoid':\n",
" x = tf.sigmoid(x=x)\n",
" elif name == 'softmax':\n",
" x = tf.nn.softmax(logits=x)\n",
" elif name == 'tanh':\n",
" x = tf.nn.tanh(x=x)\n",
" else:\n",
" raise TensorForceError('Invalid nonlinearity.')\n",
" return x\n",
"\n",
"def dense(x, size, bias=True, activation='relu', l2_regularization=0.0):\n",
" if util.rank(x) != 2:\n",
" raise TensorForceError('Invalid input rank for dense layer.')\n",
" with tf.variable_scope('dense'):\n",
" x = linear(x=x, size=size, bias=bias, l2_regularization=l2_regularization)\n",
" x = nonlinearity(x=x, name=activation)\n",
" return x\n",
"\n",
"def flatten(x):\n",
" with tf.variable_scope('flatten'):\n",
" x = tf.reshape(tensor=x, shape=(-1, util.prod(x.get_shape().as_list()[1:])))\n",
" return x\n",
"\n",
"def conv2d(x, size, window=(3,3), stride=(1,1), bias=False, activation='relu', l2_regularization=0.0, padding='SAME'):\n",
" if util.rank(x) != 4:\n",
" raise TensorForceError('Invalid input rank for conv2d layer.')\n",
" with tf.variable_scope('conv2d'):\n",
" filters = tf.Variable(initial_value=tf.random_normal(shape=(window[0], window[1], x.get_shape()[3].value, size), stddev=sqrt(2.0 / size)))\n",
" if l2_regularization > 0.0:\n",
" tf.losses.add_loss(l2_regularization * tf.nn.l2_loss(t=filters))\n",
" x = tf.nn.conv2d(input=x, filter=filters, strides=(1, stride[0], stride[1], 1), padding=padding)\n",
" if bias:\n",
" bias = tf.Variable(initial_value=tf.zeros(shape=(size,)))\n",
" if l2_regularization > 0.0:\n",
" tf.losses.add_loss(l2_regularization * tf.nn.l2_loss(t=bias))\n",
" x = tf.nn.bias_add(value=x, bias=bias)\n",
" x = nonlinearity(x=x, name=activation)\n",
" return x\n"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {
"ExecuteTime": {
"end_time": "2017-10-15T02:50:54.442239Z",
"start_time": "2017-10-15T02:50:54.318527Z"
},
"collapsed": true
},
"outputs": [],
"source": [
"# build a network for a given input\n",
"def network_builder(inputs, summary_level):\n",
" if len(inputs) != 1:\n",
" raise TensorForceError('Layered network must have only one input.')\n",
" x = next(iter(inputs.values()))\n",
" \n",
" x = conv2d(x=x, size=2, window=(1,3), bias=True, activation='relu', l2_regularization=1e-8, padding='VALID')\n",
" x = conv2d(x=x, size=20, window=(1,window_length-2), bias=True, activation='relu', l2_regularization=1e-8, padding='VALID')\n",
" x = conv2d(x=x, size=1, window=(1,1), bias=True, activation='relu', l2_regularization=1e-8, padding='VALID')\n",
" x = flatten(x)\n",
" x = nonlinearity(x,name='softmax')\n",
" \n",
" return x\n",
"network=network_builder"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Agent"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {
"ExecuteTime": {
"end_time": "2017-10-15T02:50:54.566983Z",
"start_time": "2017-10-15T02:50:54.444213Z"
}
},
"outputs": [
{
"data": {
"text/plain": [
"({'action0': {'epsilon': 1,\n",
" 'epsilon_final': 0.005,\n",
" 'epsilon_timesteps': 100000.0,\n",
" 'start_after': 0,\n",
" 'type': 'epsilon_anneal'},\n",
" 'action1': {'epsilon': 1,\n",
" 'epsilon_final': 0.005,\n",
" 'epsilon_timesteps': 100000.0,\n",
" 'start_after': 0,\n",
" 'type': 'epsilon_anneal'},\n",
" 'action2': {'epsilon': 1,\n",
" 'epsilon_final': 0.005,\n",
" 'epsilon_timesteps': 100000.0,\n",
" 'start_after': 0,\n",
" 'type': 'epsilon_anneal'},\n",
" 'action3': {'epsilon': 1,\n",
" 'epsilon_final': 0.005,\n",
" 'epsilon_timesteps': 100000.0,\n",
" 'start_after': 0,\n",
" 'type': 'epsilon_anneal'},\n",
" 'action4': {'epsilon': 1,\n",
" 'epsilon_final': 0.005,\n",
" 'epsilon_timesteps': 100000.0,\n",
" 'start_after': 0,\n",
" 'type': 'epsilon_anneal'},\n",
" 'action5': {'epsilon': 1,\n",
" 'epsilon_final': 0.005,\n",
" 'epsilon_timesteps': 100000.0,\n",
" 'start_after': 0,\n",
" 'type': 'epsilon_anneal'}},)"
]
},
"execution_count": 13,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"exploration=dict(\n",
" type=\"epsilon_anneal\",\n",
" epsilon=1,\n",
" epsilon_final= 0.005,\n",
" epsilon_timesteps= 1e5,\n",
" start_after=0,\n",
")\n",
"{'action' + str(n): exploration for n in range(env.action_space.shape[0])},"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {
"ExecuteTime": {
"end_time": "2017-10-15T02:50:57.570700Z",
"start_time": "2017-10-15T02:50:54.568901Z"
}
},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"WARNING:tensorforce.agents.agent:Configuration values not accessed: first_update, memory_capacity, memory, update_frequency, repeat_update\n",
"[2017-10-15 10:50:57,553] Configuration values not accessed: first_update, memory_capacity, memory, update_frequency, repeat_update\n"
]
},
{
"data": {
"text/plain": [
"<tensorforce.agents.ppo_agent.PPOAgent at 0x7fc5592f0da0>"
]
},
"execution_count": 14,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"batch_size=256\n",
"exploration=dict(\n",
" type=\"epsilon_anneal\",\n",
" epsilon=1,\n",
" epsilon_final= 0.005,\n",
" epsilon_timesteps= 1e5,\n",
" start_after=0,\n",
")\n",
"config = Configuration( \n",
" # Each agent requires the following ``Configuration`` parameters:\n",
" # https://github.com/reinforceio/tensorforce/blob/master/tensorforce/agents/agent.py#L32\n",
" network=network,\n",
" states=dict(shape=tuple(env.observation_space.shape), type='float'),\n",
" actions={'action' + str(n): dict(continuous=True) for n in range(env.action_space.shape[0])},\n",
" preprocessing = None,# dict or list containing state preprocessing configuration.\n",
" exploration = {'action' + str(n): exploration for n in range(env.action_space.shape[0])}, # dict containing action exploration configuration.\n",
" \n",
" \n",
" # The `MemoryAgent` class additionally requires the following parameters:\n",
" first_update = batch_size*2, # integer indicating the number of steps to pass before the first update.\n",
" memory_capacity = 300000, # integer of maximum experiences to store. (takes 2s to sample with 100k)\n",
" memory = 'prioritized_replay', # string indicating memory type ('replay' or 'prioritized_replay').\n",
" update_frequency = int(batch_size/2), # integer indicating the number of steps between model updates.\n",
" repeat_update = 2, # integer indicating how often to repeat the model update.\n",
"\n",
" # Each model requires the following configuration parameters:\n",
" # https://github.com/reinforceio/tensorforce/blob/master/tensorforce/models/model.py#L33\n",
" discount = 0.97, # float of discount factor (gamma).\n",
" learning_rate = 1e-3, # float of learning rate (alpha). (3e-4 in paper 1e-3 (atari) and 3e-4 in baselines)\n",
" optimizer = 'adam', # string of optimizer to use (e.g. 'adam' in paper).\n",
" device = None, # string of tensorflow device name.\n",
"# tf_summary = log_dir, # string directory to write tensorflow summaries. Default None\n",
"# tf_summary_level = 1, # int indicating which tensorflow summaries to create.\n",
" tf_summary_interval = 1000, # int number of calls to get_action until writing tensorflow summaries on update.\n",
" log_level = 'info', # string containing log level (e.g. 'info').\n",
" distributed = False, # boolean indicating whether to use distributed tensorflow.\n",
" global_model = False, # global model.\n",
" session = None, # session to use. \n",
"\n",
" # A Policy Gradient Model expects the following additional configuration parameters:\n",
" # https://github.com/reinforceio/tensorforce/blob/master/tensorforce/models/policy_gradient_model.py#L35\n",
" # I edited my tensorflow install to have a flatten layer to make this work (my branch is in requirements.txt)\n",
" baseline=dict(\n",
" type=\"mlp\",\n",
" sizes=[128, 128],\n",
" epochs=1,\n",
" update_batch_size=128,\n",
" learning_rate=0.01\n",
" ), # string indicating the baseline value function (currently 'linear' or 'mlp').\n",
" gae_rewards= True, # boolean indicating whether to use GAE.\n",
" gae_lambda= 0.97, # float of the Generalized Advantage Estimation lambda.\n",
" normalize_rewards= False,# boolean indicating whether to normalize the advantage or not.\n",
" \n",
" # PPO Params \n",
" # https://github.com/reinforceio/tensorforce/blob/master/tensorforce/models/ppo_model.py\n",
" entropy_penalty=0.01, # 0 and 0.01 in baselines\n",
" loss_clipping=0.1, # Trust region clipping\n",
" epochs=4, # Number of training epochs for SGD, data is repeated this much 4 (atari),10 in baselines, 10 in paper\n",
" optimizer_batch_size=32, # Batch size for optimiser, should be small (e.g. 64 in paper)\n",
" random_sampling=True # Sampling strategy for minibatch replay memory\n",
")\n",
"\n",
"# Create a Trust Region Policy Optimization agent\n",
"agent = PPOAgent(config=config)\n",
"agent"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Train\n",
"\n",
"## Callbacks"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {
"ExecuteTime": {
"end_time": "2017-10-15T02:50:57.755421Z",
"start_time": "2017-10-15T02:50:57.573140Z"
}
},
"outputs": [],
"source": [
"from rl_portfolio_management.callbacks.tensorforce import EpisodeFinishedTQDM\n",
"from rl_portfolio_management.util import MDD, sharpe"
]
},
{
"cell_type": "markdown",
"metadata": {
"ExecuteTime": {
"end_time": "2017-10-15T02:19:28.278977Z",
"start_time": "2017-10-15T02:19:28.132177Z"
}
},
"source": [
"## Train"
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {
"ExecuteTime": {
"end_time": "2017-10-15T02:50:57.895354Z",
"start_time": "2017-10-15T02:50:57.761809Z"
},
"collapsed": true
},
"outputs": [],
"source": [
"from tensorforce.execution import Runner\n",
"runner = Runner(agent=agent, environment=environment, save_path=save_path, save_episodes=1000)"
]
},
{
"cell_type": "code",
"execution_count": 17,
"metadata": {
"ExecuteTime": {
"end_time": "2017-10-15T02:50:57.998473Z",
"start_time": "2017-10-15T02:50:57.897093Z"
}
},
"outputs": [],
"source": [
"# Check my PR is included, https://github.com/wassname/tensorforce/tree/merged_6b\n",
"import tensorforce.core.memories\n",
"assert isinstance(runner.agent.memory,tensorforce.core.memories.PrioritizedReplay)\n",
"assert isinstance(runner.agent, tensorforce.agents.MemoryAgent)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"ExecuteTime": {
"start_time": "2017-10-15T02:50:28.630Z"
}
},
"outputs": [],
"source": [
"# resume\n",
"saves=glob.glob(save_path+'-*')\n",
"if len(saves)>0:\n",
" # load saved\n",
" last_save = os.path.splitext(saves[0])[0]\n",
" runner.agent.load_model(last_save)\n",
" print('loaded', last_save)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"ExecuteTime": {
"start_time": "2017-10-15T02:50:28.632Z"
},
"scrolled": true
},
"outputs": [
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "b018b4d97fbd4f1daaf1b5e0e8e9bd09"
}
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"TensorBoardLogger started. Run `tensorboard --logdir=/media/isisilon/Data/My_Documents/Documents/eclipse-workspace/rl_keras_finance/portfolio-rl-jiang_2017/logs/tensorforce-PPO-prioritised_20171015_02-50-30` to visualize\n"
]
}
],
"source": [
"episodes = int(6e6 / 30)\n",
"runner.run(\n",
" episodes=episodes,\n",
" max_timesteps=200,\n",
" episode_finished=EpisodeFinishedTQDM(\n",
" log_intv=100, \n",
" episodes=episodes,\n",
" log_dir=log_dir,\n",
" session=runner.agent.model.session, \n",
" )\n",
")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"ExecuteTime": {
"start_time": "2017-10-15T02:50:28.633Z"
},
"collapsed": true
},
"outputs": [],
"source": [
"# save\n",
"agent.save_model(save_path)\n",
"save_path"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Test"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"ExecuteTime": {
"end_time": "2017-07-19T00:09:54.262405Z",
"start_time": "2017-07-19T08:09:54.226639+08:00"
},
"collapsed": true
},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"ExecuteTime": {
"start_time": "2017-10-15T02:50:28.635Z"
},
"collapsed": true
},
"outputs": [],
"source": [
"# one big test\n",
"df_test = pd.read_hdf('./data/poloniex_30m.hf',key='test')\n",
"steps=2400#len(df_test)-window_length-2\n",
"env_test = EnvWrapper(\n",
" df=df_test,\n",
" steps=steps, \n",
" scale=True, \n",
" augment=0.00,\n",
" trading_cost=0, # let just overfit first\n",
" window_length=window_length,\n",
")\n",
"env_test.seed = 0 \n",
"environment_test = OpenAIGym('CartPole-v0')\n",
"environment_test.gym = env_test\n",
"\n",
"agent.load_model(save_path)\n",
"runner_test = Runner(agent=agent, environment=environment_test)\n",
"runner_test.run(\n",
"episodes=1, max_timesteps=steps, episode_finished=EpisodeFinished(10))\n",
"\n",
"df = pd.DataFrame(env_test.infos)\n",
"df.index=df['index']\n",
"\n",
"s=sharpe(df.rate_of_return+1)\n",
"mdd=MDD(df.rate_of_return+1)\n",
"apv=df.portfolio_value.iloc[-1]\n",
"print('APV (Accumulated portfolio value): \\t{: 2.6f}'.format(apv))\n",
"print('SR (Sharpe ratio): \\t{: 2.6f}'.format( s))\n",
"print('MDD (max drawdown): \\t{: 2.6%}'.format( mdd))\n",
"print('MMR (mean market returns): \\t{: 2.6f}'.format(df.mean_market_returns.cumprod().iloc[-1]))\n",
"print('')\n",
"\n",
"# show one run vs average market performance\n",
"plt.title('test MDD={}, Sharpe={}, APV={}'.format(mdd,s,apv))\n",
"df.portfolio_value.plot()\n",
"df.mean_market_returns.cumprod().plot(label='mean market performance')\n",
"plt.legend()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"ExecuteTime": {
"end_time": "2017-07-19T00:48:39.193976Z",
"start_time": "2017-07-19T08:48:39.154752+08:00"
},
"collapsed": true
},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"ExecuteTime": {
"start_time": "2017-10-15T02:50:28.637Z"
},
"collapsed": true
},
"outputs": [],
"source": [
"\n",
"\n",
"data=[]\n",
"for i in range(10):\n",
" agent.load_model(save_path)\n",
" df_test = pd.read_hdf('./data/poloniex_30m.hf',key='test')\n",
" \n",
" env_test = EnvWrapper(\n",
" df=df_test,\n",
" steps=1800, \n",
" scale=True, \n",
" augment=0.00,\n",
" trading_cost=0, # let just overfit first\n",
" window_length=window_length,\n",
" )\n",
" env_test.seed = 0 \n",
"\n",
"\n",
" environment_test = OpenAIGym('CartPole-v0')\n",
" environment_test.gym = env_test\n",
"\n",
" runner_test = Runner(agent=agent, environment=environment_test)\n",
" np.random.seed(i)\n",
" runner_test.run(\n",
" episodes=2, max_timesteps=32, episode_finished=EpisodeFinished(10))\n",
" df = pd.DataFrame(environment_test.gym.infos)\n",
"# df.index=df['index']\n",
" \n",
" s=sharpe(df.rate_of_return+1)\n",
" mdd=MDD(df.rate_of_return+1)\n",
" data.append(dict(sharpe=s,mdd=mdd))\n",
" print('APV (Accumulated portfolio value): \\t{: 2.6f}'.format(df.portfolio_value.iloc[-1]))\n",
" print('SR (Sharpe ratio): \\t{: 2.6f}'.format( s))\n",
" print('MDD (max drawdown): \\t{: 2.6%}'.format( mdd))\n",
" print('MMR (mean market returns): \\t{: 2.6f}'.format(df.mean_market_returns.cumprod().iloc[-1]))\n",
" print('')\n",
" df.portfolio_value.plot(label=str(i))\n",
"plt.legend()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"ExecuteTime": {
"start_time": "2017-10-15T02:50:28.638Z"
},
"collapsed": true
},
"outputs": [],
"source": [
"data"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"ExecuteTime": {
"start_time": "2017-10-15T02:50:28.640Z"
},
"collapsed": true
},
"outputs": [],
"source": [
"# one big test over train\n",
"# one big test\n",
"df_train = pd.read_hdf('./data/poloniex_30m.hf',key='train')\n",
"steps=len(df_train)-window_length-2\n",
"env = EnvWrapper(\n",
" df=df_train,\n",
" steps=steps, \n",
" scale=True, \n",
" augment=0.00,\n",
" trading_cost=0, # let just overfit first\n",
" window_length=window_length,\n",
")\n",
"env.seed = 0 \n",
"environment = OpenAIGym('CartPole-v0')\n",
"environment.gym = env\n",
"\n",
"agent.load_model(save_path)\n",
"runner = Runner(agent=agent, environment=environment)\n",
"runner.run(\n",
"episodes=1, max_timesteps=steps, episode_finished=EpisodeFinished(10))\n",
"\n",
"df = pd.DataFrame(env.infos)\n",
"df.index=df['index']\n",
"\n",
"s=sharpe(df.rate_of_return+1)\n",
"mdd=MDD(df.rate_of_return+1)\n",
"data.append(dict(sharpe=s,mdd=mdd))\n",
"print('APV (Accumulated portfolio value): \\t{: 2.6f}'.format(df.portfolio_value.iloc[-1]))\n",
"print('SR (Sharpe ratio): \\t{: 2.6f}'.format( s))\n",
"print('MDD (max drawdown): \\t{: 2.6%}'.format( mdd))\n",
"print('')\n",
"\n",
"# show one run vs average market performance\n",
"plt.title('train')\n",
"df.portfolio_value.plot()\n",
"df.mean_market_returns.cumprod().plot(label='mean market performance')\n",
"plt.legend()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "jupyter3",
"language": "python",
"name": "jupyter3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.0"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
File diff suppressed because one or more lines are too long