mirror of
https://github.com/wassname/rl-portfolio-management.git
synced 2026-06-27 16:46:41 +08:00
update notebook and requirements
This commit is contained in:
@@ -135,20 +135,18 @@ We have partial test coverage of the environment, just run:
|
||||
# Files
|
||||
|
||||
- enviroments/portfolio.py - contains an openai environment for porfolio trading
|
||||
- tensorforce-VPG.ipynb - notebook to try a policy gradient agent
|
||||
- tensorforce-PPO - notebook to try a PPO agent
|
||||
- data/poloniex_30m.hdf - hdf file with cryptocurrency 30 minutes prices
|
||||
- tensorforce-PPO-IEET.ipynb - notebook to try a policy gradient agent
|
||||
|
||||
# Differences in implementation
|
||||
|
||||
The main differences from Jiang et. al. 2017 are:
|
||||
|
||||
- The first step in a deep learning project should be to make sure the model can overfit, this provides a sanity check. So I am first trying to acheive good results with no trading costs.
|
||||
- I have not used portfolio vector memory. For ease of implementation I made the information available by replacing the oldest timestep. Your model can slice it, or a Dense or CNN models can just be given the information.
|
||||
- I have not used portfolio vector memory. For ease of implementation I made the information available by using the last weights.
|
||||
- Instead of DPG ([deterministic policy gradient](http://jmlr.org/proceedings/papers/v32/silver14.pdf)) I tried and DDPG ([deep deterministic policy gradient]( http://arxiv.org/pdf/1509.02971v2.pdf)) and VPG (vanilla policy gradient) with generalized advantage estimation and PPO.
|
||||
- I tried to replicate the best performing CNN model from the paper and haven't attempted the LSTM or RNN models.
|
||||
- instead of selecting 12 assets for each window I chose 5 assets that have existed for the longest time
|
||||
- My topology had an extra layer [see issue 3](https://github.com/wassname/rl-portfolio-management/issues/3)
|
||||
- instead of selecting 12 assets for each window I chose 3 assets that have existed for the longest time
|
||||
- ~~My topology had an extra layer [see issue 3](https://github.com/wassname/rl-portfolio-management/issues/3)~~ fixed
|
||||
|
||||
# TODO
|
||||
|
||||
|
||||
+97
-223
@@ -1,13 +1,10 @@
|
||||
affine==2.0.0.post1
|
||||
alembic==0.9.1
|
||||
amqp==1.4.9
|
||||
anyjson==0.3.3
|
||||
appdirs==1.4.3
|
||||
affine==2.1.0
|
||||
apipkg==1.4
|
||||
arrow==0.10.0
|
||||
asn1crypto==0.22.0
|
||||
atari-py==0.1.1
|
||||
autopep8==1.3.2
|
||||
azure==1.0.3
|
||||
azure-common==1.1.6
|
||||
azure-common==1.1.8
|
||||
azure-mgmt==0.20.2
|
||||
azure-mgmt-common==0.20.0
|
||||
azure-mgmt-compute==0.20.1
|
||||
@@ -19,282 +16,159 @@ azure-nspkg==2.0.0
|
||||
azure-servicebus==0.20.1
|
||||
azure-servicemanagement-legacy==0.20.2
|
||||
azure-storage==0.20.3
|
||||
Babel==2.2.0
|
||||
backoff==1.4.0
|
||||
backoff==1.4.3
|
||||
backports.weakref==1.0rc1
|
||||
baselines==0.1.3
|
||||
bcolz==0.12.1
|
||||
bcrypt==2.0.0
|
||||
beautifulsoup4==4.5.3
|
||||
billiard==3.3.0.23
|
||||
-e git+https://github.com/openai/baselines.git@4993286230ac92ead39a66005b7042b56b8598b0#egg=baselines
|
||||
bcolz==1.1.2
|
||||
beautifulsoup4==4.6.0
|
||||
bench==2.8
|
||||
BitcoinExchangeFH==0.2.3
|
||||
bleach==1.5.0
|
||||
blessings==1.6
|
||||
blinker==1.4
|
||||
boto==2.39.0
|
||||
Bottleneck==1.2.1
|
||||
bpython==0.16
|
||||
bqplot==0.9.0
|
||||
boto3==1.4.6
|
||||
botocore==1.6.3
|
||||
bs4==0.0.1
|
||||
bt==0.2.5
|
||||
cached-property==1.3.0
|
||||
cachetools==2.0.0
|
||||
celery==3.1.19
|
||||
cffi==1.10.0
|
||||
cfscrape==1.8.0
|
||||
chardet==2.3.0
|
||||
ChatterBot==0.6.0
|
||||
chatterbot-corpus==0.0.1
|
||||
chatterbot-voice==0.1.3
|
||||
certifi==2017.7.27.1
|
||||
chardet==3.0.4
|
||||
click==6.7
|
||||
click-plugins==1.0.3
|
||||
cligj==0.4.0
|
||||
cloudpickle==0.2.2
|
||||
colorama==0.3.9
|
||||
contextlib2==0.5.5
|
||||
cryptography==1.8.1
|
||||
cssmin==0.2.0
|
||||
curtsies==0.2.11
|
||||
cntk==2.2
|
||||
coverage==4.4.1
|
||||
cvxopt==1.1.9
|
||||
cycler==0.10.0
|
||||
cyordereddict==1.0.0
|
||||
Cython==0.25.2
|
||||
dask==0.15.0
|
||||
dataset==0.8.0
|
||||
deap==1.0.2
|
||||
decorator==4.0.11
|
||||
descartes==1.1.0
|
||||
dill==0.2.6
|
||||
docker==2.3.0
|
||||
docker-compose==1.13.0
|
||||
docker-pycreds==0.2.1
|
||||
dockerpty==0.4.1
|
||||
docopt==0.6.2
|
||||
dominate==2.3.1
|
||||
earthengine-api==0.1.111
|
||||
easydict==1.6
|
||||
EbookLib==0.16
|
||||
ee==0.2
|
||||
elasticsearch==5.3.0
|
||||
elasticsearch-dsl==5.2.0
|
||||
empyrical==0.2.2
|
||||
entrypoints==0.2.2
|
||||
ephem==3.7.6.0
|
||||
eventlet==0.17.4
|
||||
-e git+git@github.com:wassname/FanFicFare.git@1da7f6ec159bee18b650fa074d2d59d4721e3653#egg=FanFicFare
|
||||
fastkml==0.11
|
||||
ffn==0.3.2
|
||||
Fiona==1.7.5
|
||||
fix-yahoo-finance==0.0.7
|
||||
Flask==0.12.1
|
||||
Flask-AlchemyDumps==0.0.10
|
||||
Flask-Assets==0.11
|
||||
Flask-Bcrypt==0.7.1
|
||||
Flask-Bootstrap==3.3.5.7
|
||||
Flask-Celery-Helper==1.1.0
|
||||
Flask-DebugToolbar==0.10.0
|
||||
Flask-Login==0.3.2
|
||||
Flask-Mail==0.9.1
|
||||
Flask-Migrate==1.7.0
|
||||
Flask-Principal==0.4.0
|
||||
Flask-Redis==0.1.0
|
||||
Flask-Script==2.0.5
|
||||
Flask-Security==1.7.5
|
||||
Flask-SQLAlchemy==2.1
|
||||
Flask-WTF==0.12
|
||||
flower==0.8.4
|
||||
Folio==0.4
|
||||
future==0.15.2
|
||||
futures==3.1.1
|
||||
gbdx-auth==0.2.4
|
||||
gbdxtools==0.11.7
|
||||
GDAL==2.1.0
|
||||
geomet==0.1.1
|
||||
geopandas==0.2.1
|
||||
google-api-python-client==1.6.2
|
||||
google-auth==1.0.1
|
||||
google-auth-httplib2==0.0.2
|
||||
google-cloud-bigquery==0.25.0
|
||||
google-cloud-core==0.25.0
|
||||
googleapis-common-protos==1.5.2
|
||||
googletrans==2.1.2
|
||||
goslate==1.5.1
|
||||
greenlet==0.4.12
|
||||
gunicorn==19.4.5
|
||||
gym==0.9.2
|
||||
Cython==0.26.1
|
||||
decorator==4.1.2
|
||||
dill==0.2.7.1
|
||||
docutils==0.14
|
||||
entrypoints==0.2.3
|
||||
execnet==1.4.1
|
||||
flake8==3.4.1
|
||||
future==0.16.0
|
||||
gym==0.9.3
|
||||
h5py==2.7.0
|
||||
hammock==0.2.4
|
||||
html2text==2016.9.19
|
||||
html5lib==0.9999999
|
||||
httplib2==0.10.3
|
||||
idna==2.5
|
||||
inflection==0.3.1
|
||||
intervaltree==2.1.0
|
||||
ipykernel==4.6.0
|
||||
ipython==5.3.0
|
||||
idna==2.6
|
||||
imageio==2.2.0
|
||||
ipykernel==4.6.1
|
||||
ipython==6.1.0
|
||||
ipython-genutils==0.2.0
|
||||
ipywidgets==6.0.0
|
||||
itsdangerous==0.24
|
||||
jedi==0.10.2
|
||||
jellyfish==0.5.6
|
||||
Jinja2==2.9.6
|
||||
jmespath==0.9.3
|
||||
joblib==0.11
|
||||
jsmin==2.2.1
|
||||
jsondatabase==0.1.7
|
||||
jsonschema==2.6.0
|
||||
jupyter==1.0.0
|
||||
jupyter-client==5.0.1
|
||||
jupyter-client==5.1.0
|
||||
jupyter-console==5.1.0
|
||||
jupyter-contrib-core==0.3.1
|
||||
jupyter-contrib-nbextensions==0.2.8
|
||||
jupyter-core==4.3.0
|
||||
jupyter-highlight-selected-word==0.0.11
|
||||
jupyter-latex-envs==1.3.8.4
|
||||
jupyter-nbextensions-configurator==0.2.5
|
||||
Keras==2.0.5
|
||||
keras-contrib==1.2.1
|
||||
keras-rl==0.3.0
|
||||
keras-rl==0.3.1
|
||||
keras-tqdm==2.0.1
|
||||
kombu==3.0.37
|
||||
leven==1.0.4
|
||||
Logbook==1.0.0
|
||||
lru-dict==1.1.6
|
||||
lxml==3.7.3
|
||||
Mako==1.0.6
|
||||
Markdown==2.6.8
|
||||
-e git+git@github.com:wassname/libcryptomarket.git@7351c794ed43b11fdd187cc9c5e85eb4275c554a#egg=libcryptomarket
|
||||
Markdown==2.6.9
|
||||
MarkupSafe==1.0
|
||||
matplotlib==2.0.0
|
||||
matplotlib==2.0.2
|
||||
mccabe==0.6.1
|
||||
mistune==0.7.4
|
||||
more-itertools==3.2.0
|
||||
MosT==0.12
|
||||
mpmath==0.19
|
||||
mtranslate==1.6
|
||||
multipledispatch==0.4.9
|
||||
munch==2.1.1
|
||||
nbconvert==5.1.1
|
||||
mock==2.0.0
|
||||
mpi4py==2.0.0
|
||||
mujoco-py==0.5.7
|
||||
nbconvert==5.2.1
|
||||
nbformat==4.3.0
|
||||
ndg-httpsclient==0.4.2
|
||||
networkx==1.11
|
||||
nltk==3.2.2
|
||||
normality==0.4.0
|
||||
nose==1.3.7
|
||||
notebook==5.0.0
|
||||
numexpr==2.6.2
|
||||
numpy==1.13.1
|
||||
oauth2client==4.0.0
|
||||
oauthlib==2.0.2
|
||||
numpy==1.13.3
|
||||
olefile==0.44
|
||||
packaging==16.8
|
||||
opencv-python==3.3.0.10
|
||||
pandas==0.20.3
|
||||
pandas-datareader==0.4.0
|
||||
pandas-datareader==0.5.0
|
||||
pandas-profiling==1.4.0
|
||||
pandocfilters==1.4.1
|
||||
passlib==1.7.1
|
||||
path.py==10.1
|
||||
path.py==10.3.1
|
||||
patsy==0.4.1
|
||||
pbr==3.0.1
|
||||
pbr==3.1.1
|
||||
pep8==1.7.0
|
||||
pexpect==4.2.1
|
||||
pickleshare==0.7.4
|
||||
Pillow==4.1.0
|
||||
planet==1.0.0
|
||||
poloniex==0.4.6
|
||||
Pillow==4.3.0
|
||||
pluggy==0.5.2
|
||||
pprint==0.1
|
||||
praw==4.4.0
|
||||
prawcore==0.8.0
|
||||
progressbar2==3.30.2
|
||||
prompt-toolkit==1.0.14
|
||||
protobuf==3.3.0
|
||||
psycopg2==2.7.1
|
||||
ptyprocess==0.5.1
|
||||
py==1.4.33
|
||||
py-googletrans==1.2
|
||||
PyAlgoTrade==0.18
|
||||
pyasn1==0.2.3
|
||||
pyasn1-modules==0.0.9
|
||||
pycparser==2.17
|
||||
pycurl==7.43.0
|
||||
PyExecJS==1.4.0
|
||||
pygeoif==0.6
|
||||
progressbar2==3.34.3
|
||||
prompt-toolkit==1.0.15
|
||||
protobuf==3.4.0
|
||||
psutil==5.2.2
|
||||
ptyprocess==0.5.2
|
||||
py==1.4.34
|
||||
pycodestyle==2.3.1
|
||||
pyflakes==1.5.0
|
||||
pyglet==1.2.4
|
||||
Pygments==2.2.0
|
||||
PyJWT==1.4.2
|
||||
pymongo==3.4.0
|
||||
PyMySQL==0.7.11
|
||||
PyOpenGL==3.1.0
|
||||
pyOpenSSL==17.0.0
|
||||
pyparsing==2.2.0
|
||||
PyPrind==2.11.1
|
||||
pyproj==1.9.5.1
|
||||
pyshp==1.2.10
|
||||
pysle==1.5.7
|
||||
pytest==3.0.7
|
||||
python-dateutil==2.6.0
|
||||
python-editor==1.0.3
|
||||
python-twitter==3.2.1
|
||||
python-utils==2.1.0
|
||||
pytest==3.2.3
|
||||
pytest-cache==1.0
|
||||
pytest-cov==2.5.1
|
||||
pytest-forked==0.2
|
||||
pytest-pep8==1.0.6
|
||||
pytest-xdist==1.20.0
|
||||
python-coveralls==2.9.1
|
||||
python-dateutil==2.6.1
|
||||
python-utils==2.2.0
|
||||
pytz==2017.2
|
||||
PyWavelets==0.5.2
|
||||
PyYAML==3.12
|
||||
pyzmq==16.0.2
|
||||
qgrid==0.3.2
|
||||
qPython==1.2.2
|
||||
qtconsole==4.3.0
|
||||
Quandl==3.2.0
|
||||
rasterio==1.0a8
|
||||
recurrentshop==0.0.1
|
||||
redis==2.10.3
|
||||
requests==2.11.1
|
||||
rasterio==0.36.0
|
||||
requests==2.18.4
|
||||
requests-file==1.4.2
|
||||
requests-ftp==0.3.1
|
||||
requests-futures==0.9.7
|
||||
requests-oauthlib==0.8.0
|
||||
rsa==3.4.2
|
||||
scikit-learn==0.18.1
|
||||
s3transfer==0.1.10
|
||||
scikit-image==0.13.0
|
||||
scikit-learn==0.18.2
|
||||
scipy==0.19.1
|
||||
seaborn==0.7.1
|
||||
selenium==3.4.3
|
||||
seq2seq==0.1.0
|
||||
Shapely==1.5.17
|
||||
seaborn==0.8
|
||||
Shapely==1.6.0
|
||||
simplegeneric==0.8.1
|
||||
simplejson==3.10.0
|
||||
six==1.10.0
|
||||
six==1.11.0
|
||||
sklearn==0.0
|
||||
snuggs==1.4.1
|
||||
sortedcontainers==1.5.7
|
||||
SpeechRecognition==3.6.5
|
||||
SQLAlchemy==1.1.9
|
||||
SQLAlchemy-FullText-Search==0.2.3
|
||||
SQLAlchemy-Utils==0.31.5
|
||||
statsmodels==0.8.0
|
||||
stevedore==1.21.0
|
||||
sympy==1.0
|
||||
tables==3.3.0
|
||||
tabulate==0.7.7
|
||||
tensor2tensor==1.0.5
|
||||
tensorflow==1.2.1
|
||||
tensorflow-gpu==1.2.0
|
||||
-e git+git@github.com:reinforceio/tensorforce.git@21323bf4226a93892637551cd8d75b8e13d04eaa#egg=tensorforce
|
||||
tables==3.4.2
|
||||
tensorboard-logger==0.0.4
|
||||
tensorflow==1.3.0
|
||||
tensorflow-gpu==1.3.0
|
||||
tensorflow-tensorboard==0.1.8
|
||||
-e git+https://github.com/wassname/tensorforce.git@266fc21f472fc1ed7edb8ff3b6fd8450b3ef4828#egg=tensorforce
|
||||
terminado==0.6
|
||||
testpath==0.3
|
||||
texttable==0.8.8
|
||||
testpath==0.3.1
|
||||
Theano==0.9.0
|
||||
toolz==0.8.2
|
||||
torch==0.1.12.post2
|
||||
timer==0.1
|
||||
torch==0.2.0.post3
|
||||
torchvision==0.1.8
|
||||
tornado==4.4.3
|
||||
TPOT==0.8.3
|
||||
tqdm==4.11.2
|
||||
tornado==4.5.1
|
||||
tox==2.9.1
|
||||
tqdm==4.19.2
|
||||
traitlets==4.3.2
|
||||
traittypes==0.0.6
|
||||
Unipath==1.1
|
||||
update-checker==0.16
|
||||
uritemplate==3.0.0
|
||||
urllib3==1.21.1
|
||||
universal-portfolios==0.3.2
|
||||
urllib3==1.22
|
||||
virtualenv==15.1.0
|
||||
virtualenv-clone==0.2.6
|
||||
virtualenvwrapper==4.7.2
|
||||
visitor==0.1.3
|
||||
visdom==0.1.5
|
||||
wcwidth==0.1.7
|
||||
webassets==0.12.1
|
||||
webencodings==0.5.1
|
||||
websocket-client==0.40.0
|
||||
websocket-client==0.44.0
|
||||
Werkzeug==0.12.2
|
||||
widgetsnbextension==2.0.0
|
||||
WTForms==2.1
|
||||
WTForms-ParsleyJS==2.0.1
|
||||
xmltodict==0.10.2
|
||||
yapf==0.16.1
|
||||
zipline==1.1.0
|
||||
yapf==0.16.3
|
||||
zmq==0.0.0
|
||||
|
||||
@@ -1,22 +1,16 @@
|
||||
# data
|
||||
# rl
|
||||
gym==0.9.2
|
||||
tensorflow-gpu==1.2.0
|
||||
# for mpi4py youo need libopenmpi-dev
|
||||
mpi4py
|
||||
https://github.com/wassname/baselines/archive/676b6f0b18fe9855579cb271484dcdf118e2588c.zip
|
||||
https://github.com/farizrahman4u/keras-contrib/archive/15133077af4d9c379c8219574898ce29cae12e48.zip
|
||||
gym==0.9.3
|
||||
tensorflow-gpu==1.3.0
|
||||
h5py==2.7.0
|
||||
keras-rl==0.3.0
|
||||
keras-tqdm==2.0.1
|
||||
# use my branch with prioritised ppo, untill mergeed
|
||||
https://github.com/wassname/tensorforce/archive/merged_6b.zip
|
||||
# tensorforce 0.3.1 a specific commit
|
||||
https://github.com/reinforceio/tensorforce/archive/17cf4b84d8fbad9d7c65d1d7bde36faabddd6a36.zip
|
||||
# numbers
|
||||
tables==3.4.2
|
||||
seaborn==0.7.1
|
||||
seaborn==0.8
|
||||
pandas==0.20.3
|
||||
numpy==1.12.1
|
||||
matplotlib==2.0.0
|
||||
numpy==1.13.3
|
||||
matplotlib==2.0.2
|
||||
# misc
|
||||
tqdm==4.11.2
|
||||
seaborn==0.7.1
|
||||
tqdm==4.19.2
|
||||
seaborn==0.8
|
||||
|
||||
File diff suppressed because one or more lines are too long
@@ -1,975 +0,0 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Uses tensorforce tensorforce-0.2.0"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2017-10-15T02:50:29.756080Z",
|
||||
"start_time": "2017-10-15T02:50:28.814041Z"
|
||||
},
|
||||
"collapsed": true
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# plotting\n",
|
||||
"%matplotlib inline\n",
|
||||
"from matplotlib import pyplot as plt\n",
|
||||
"import seaborn as sns\n",
|
||||
"plt.style.use('ggplot')\n",
|
||||
"\n",
|
||||
"# numeric\n",
|
||||
"import numpy as np\n",
|
||||
"from numpy import random\n",
|
||||
"import pandas as pd\n",
|
||||
"\n",
|
||||
"# util\n",
|
||||
"from collections import Counter\n",
|
||||
"import pdb\n",
|
||||
"import glob\n",
|
||||
"import time\n",
|
||||
"import tempfile\n",
|
||||
"import itertools\n",
|
||||
"from tqdm import tqdm_notebook as tqdm\n",
|
||||
"import datetime\n",
|
||||
"\n",
|
||||
"# logging\n",
|
||||
"import logging\n",
|
||||
"logger = log = logging.getLogger(__name__)\n",
|
||||
"# log.setLevel(logging.INFO)\n",
|
||||
"logging.basicConfig()\n",
|
||||
"log.info('%s logger started.', __name__)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2017-10-15T02:50:29.899144Z",
|
||||
"start_time": "2017-10-15T02:50:29.759262Z"
|
||||
},
|
||||
"collapsed": true
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import gym\n",
|
||||
"from gym import error, spaces, utils\n",
|
||||
"from gym.utils import seeding"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2017-10-15T02:50:29.989592Z",
|
||||
"start_time": "2017-10-15T02:50:29.901021Z"
|
||||
},
|
||||
"collapsed": true
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import os\n",
|
||||
"os.sys.path.append(os.path.abspath('.'))\n",
|
||||
"%reload_ext autoreload\n",
|
||||
"%autoreload 2"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2017-10-15T02:50:30.089437Z",
|
||||
"start_time": "2017-10-15T02:50:29.991629Z"
|
||||
}
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"'./outputs/tensorforce-PPO-prioritised/tensorforce-PPO-prioritised_20171015_02-50-30.model'"
|
||||
]
|
||||
},
|
||||
"execution_count": 4,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# params\n",
|
||||
"window_length = 50\n",
|
||||
"import datetime\n",
|
||||
"ts = datetime.datetime.utcnow().strftime('%Y%m%d_%H-%M-%S')\n",
|
||||
"save_path = './outputs/tensorforce-PPO-prioritised/tensorforce-PPO-prioritised_%s.model' % ts\n",
|
||||
"save_path = './outputs/tensorforce-PPO-prioritised/tensorforce-PPO-prioritised_20171015_02-50-30.model'\n",
|
||||
"save_path\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2017-10-15T02:50:30.165055Z",
|
||||
"start_time": "2017-10-15T02:50:30.091635Z"
|
||||
}
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"'logs/tensorforce-PPO-prioritised_20171015_02-50-30'"
|
||||
]
|
||||
},
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"log_dir = os.path.join('logs', os.path.splitext(os.path.basename(save_path))[0])\n",
|
||||
"try:\n",
|
||||
" os.makedirs(log_dir)\n",
|
||||
"except OSError:\n",
|
||||
" pass\n",
|
||||
"log_dir"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Enviroment"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2017-10-15T02:50:30.239055Z",
|
||||
"start_time": "2017-10-15T02:50:30.168273Z"
|
||||
},
|
||||
"collapsed": true
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from rl_portfolio_management.environments.portfolio import PortfolioEnv"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2017-10-15T02:50:30.334097Z",
|
||||
"start_time": "2017-10-15T02:50:30.241263Z"
|
||||
},
|
||||
"collapsed": true
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from rl.memory import SequentialMemory, Memory\n",
|
||||
"from collections import deque\n",
|
||||
"\n",
|
||||
"class EnvWrapper(PortfolioEnv):\n",
|
||||
" \"\"\"Wraps env to normalise and reshape action.\"\"\"\n",
|
||||
" def __init__(self, window_length=50, *args, **kwargs):\n",
|
||||
" super().__init__(*args, **kwargs)\n",
|
||||
" \n",
|
||||
" def step(self, action):\n",
|
||||
" # also it puts it in a list\n",
|
||||
" if isinstance(action, list):\n",
|
||||
" action = action[0]\n",
|
||||
" \n",
|
||||
" # we have to normalise for some reason softmax wont work\n",
|
||||
" if isinstance(action, dict):\n",
|
||||
" action = np.abs(list(action.values()))\n",
|
||||
" action /= action.sum() \n",
|
||||
" \n",
|
||||
" return super().step(action) "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 8,
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2017-10-15T02:50:30.635247Z",
|
||||
"start_time": "2017-10-15T02:50:30.337761Z"
|
||||
}
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"INFO:gym.envs.registration:Making new env: CartPole-v0\n",
|
||||
"[2017-10-15 10:50:30,622] Making new env: CartPole-v0\n",
|
||||
"INFO:gym.envs.registration:Making new env: CartPole-v0\n",
|
||||
"[2017-10-15 10:50:30,629] Making new env: CartPole-v0\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"df_train = pd.read_hdf('./data/poloniex_30m.hf',key='train')\n",
|
||||
"env = EnvWrapper(\n",
|
||||
" df=df_train,\n",
|
||||
" steps=300, \n",
|
||||
" scale=True, \n",
|
||||
" augment=0.000,\n",
|
||||
" trading_cost=0, # let just overfit first,\n",
|
||||
" window_length = window_length,\n",
|
||||
" \n",
|
||||
")\n",
|
||||
"env.seed = 0 \n",
|
||||
"\n",
|
||||
"df_test = pd.read_hdf('./data/poloniex_30m.hf',key='test')\n",
|
||||
"env_test = EnvWrapper(\n",
|
||||
" df=df_test,\n",
|
||||
" steps=300, \n",
|
||||
" scale=True, \n",
|
||||
" augment=0.00,\n",
|
||||
" trading_cost=0, # let just overfit first\n",
|
||||
" window_length=window_length,\n",
|
||||
")\n",
|
||||
"env_test.seed = 0 \n",
|
||||
"\n",
|
||||
"from tensorforce.contrib.openai_gym import OpenAIGym\n",
|
||||
"environment = OpenAIGym('CartPole-v0')\n",
|
||||
"environment.gym = env\n",
|
||||
"\n",
|
||||
"environment_test = OpenAIGym('CartPole-v0')\n",
|
||||
"environment_test.gym = env_test"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 9,
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2017-10-15T02:50:30.725276Z",
|
||||
"start_time": "2017-10-15T02:50:30.636432Z"
|
||||
}
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"0.0 False {'reward': 0.0, 'log_return': 0.0, 'portfolio_value': 1.0, 'return': 0.9994203267253029, 'rate_of_return': 0.0, 'weights_mean': 0.16666666666666666, 'weights_std': 0.092620963292867634, 'cost': 0.0, 'market_value': 0.99991966957965184, 'date': 1463925600.0, 'steps': 2}\n",
|
||||
"(5, 50, 3) (5, 50, 3)\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# check shapes\n",
|
||||
"obs1, reward, done, info=env.step(np.random.random(env.action_space.shape))\n",
|
||||
"print(reward, done, info)\n",
|
||||
"obs2 = env.reset()\n",
|
||||
"print(obs1.shape,obs2.shape)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2017-07-16T04:41:21.116729Z",
|
||||
"start_time": "2017-07-16T12:41:21.086620+08:00"
|
||||
}
|
||||
},
|
||||
"source": [
|
||||
"# Model\n",
|
||||
"\n",
|
||||
"Derived from https://github.com/reinforceio/tensorforce/blob/0d07fadec03f76537a2431e17c51cd759d53b5e9/tensorforce/core/networks/layers.py#L90"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 10,
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2017-10-15T02:50:53.928811Z",
|
||||
"start_time": "2017-10-15T02:50:30.726693Z"
|
||||
},
|
||||
"collapsed": true
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from tensorforce import Configuration\n",
|
||||
"from tensorforce.agents import PPOAgent\n",
|
||||
"from tensorforce.core.networks import layered_network_builder"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 11,
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2017-10-15T02:50:54.317167Z",
|
||||
"start_time": "2017-10-15T02:50:53.931397Z"
|
||||
},
|
||||
"collapsed": true
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# layer helpers from:\n",
|
||||
"# https://github.com/reinforceio/tensorforce/blob/0d07fadec03f76537a2431e17c51cd759d53b5e9/tensorforce/core/networks/layers.py#L90\n",
|
||||
"import tensorflow as tf\n",
|
||||
"from math import sqrt\n",
|
||||
"from tensorforce import util\n",
|
||||
"from tensorforce import TensorForceError\n",
|
||||
"\n",
|
||||
"def linear(x, size, bias=True, l2_regularization=0.0):\n",
|
||||
" if util.rank(x) != 2:\n",
|
||||
" raise TensorForceError('Invalid input rank for linear layer.')\n",
|
||||
" with tf.variable_scope('linear'):\n",
|
||||
" weights = tf.Variable(initial_value=tf.random_normal(shape=(x.get_shape()[1].value, size), stddev=sqrt(2.0 / (x.get_shape()[1].value + size))))\n",
|
||||
" if l2_regularization > 0.0:\n",
|
||||
" tf.losses.add_loss(l2_regularization * tf.nn.l2_loss(t=weights))\n",
|
||||
" x = tf.matmul(a=x, b=weights)\n",
|
||||
" if bias:\n",
|
||||
" bias = tf.Variable(initial_value=tf.zeros(shape=(size,)))\n",
|
||||
" if l2_regularization > 0.0:\n",
|
||||
" tf.losses.add_loss(l2_regularization * tf.nn.l2_loss(t=bias))\n",
|
||||
" x = tf.nn.bias_add(value=x, bias=bias)\n",
|
||||
" return x\n",
|
||||
"\n",
|
||||
"def nonlinearity(x, name='relu'):\n",
|
||||
" with tf.variable_scope('nonlinearity'):\n",
|
||||
" if name == 'elu':\n",
|
||||
" x = tf.nn.elu(features=x)\n",
|
||||
" elif name == 'relu':\n",
|
||||
" x = tf.nn.relu(features=x)\n",
|
||||
" elif name == 'selu':\n",
|
||||
" # https://arxiv.org/pdf/1706.02515.pdf\n",
|
||||
" alpha = 1.6732632423543772848170429916717\n",
|
||||
" scale = 1.0507009873554804934193349852946\n",
|
||||
" negative = alpha * tf.nn.elu(features=x)\n",
|
||||
" x = scale * tf.where(condition=(x >= 0.0), x=x, y=negative)\n",
|
||||
" elif name == 'sigmoid':\n",
|
||||
" x = tf.sigmoid(x=x)\n",
|
||||
" elif name == 'softmax':\n",
|
||||
" x = tf.nn.softmax(logits=x)\n",
|
||||
" elif name == 'tanh':\n",
|
||||
" x = tf.nn.tanh(x=x)\n",
|
||||
" else:\n",
|
||||
" raise TensorForceError('Invalid nonlinearity.')\n",
|
||||
" return x\n",
|
||||
"\n",
|
||||
"def dense(x, size, bias=True, activation='relu', l2_regularization=0.0):\n",
|
||||
" if util.rank(x) != 2:\n",
|
||||
" raise TensorForceError('Invalid input rank for dense layer.')\n",
|
||||
" with tf.variable_scope('dense'):\n",
|
||||
" x = linear(x=x, size=size, bias=bias, l2_regularization=l2_regularization)\n",
|
||||
" x = nonlinearity(x=x, name=activation)\n",
|
||||
" return x\n",
|
||||
"\n",
|
||||
"def flatten(x):\n",
|
||||
" with tf.variable_scope('flatten'):\n",
|
||||
" x = tf.reshape(tensor=x, shape=(-1, util.prod(x.get_shape().as_list()[1:])))\n",
|
||||
" return x\n",
|
||||
"\n",
|
||||
"def conv2d(x, size, window=(3,3), stride=(1,1), bias=False, activation='relu', l2_regularization=0.0, padding='SAME'):\n",
|
||||
" if util.rank(x) != 4:\n",
|
||||
" raise TensorForceError('Invalid input rank for conv2d layer.')\n",
|
||||
" with tf.variable_scope('conv2d'):\n",
|
||||
" filters = tf.Variable(initial_value=tf.random_normal(shape=(window[0], window[1], x.get_shape()[3].value, size), stddev=sqrt(2.0 / size)))\n",
|
||||
" if l2_regularization > 0.0:\n",
|
||||
" tf.losses.add_loss(l2_regularization * tf.nn.l2_loss(t=filters))\n",
|
||||
" x = tf.nn.conv2d(input=x, filter=filters, strides=(1, stride[0], stride[1], 1), padding=padding)\n",
|
||||
" if bias:\n",
|
||||
" bias = tf.Variable(initial_value=tf.zeros(shape=(size,)))\n",
|
||||
" if l2_regularization > 0.0:\n",
|
||||
" tf.losses.add_loss(l2_regularization * tf.nn.l2_loss(t=bias))\n",
|
||||
" x = tf.nn.bias_add(value=x, bias=bias)\n",
|
||||
" x = nonlinearity(x=x, name=activation)\n",
|
||||
" return x\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 12,
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2017-10-15T02:50:54.442239Z",
|
||||
"start_time": "2017-10-15T02:50:54.318527Z"
|
||||
},
|
||||
"collapsed": true
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# build a network for a given input\n",
|
||||
"def network_builder(inputs, summary_level):\n",
|
||||
" if len(inputs) != 1:\n",
|
||||
" raise TensorForceError('Layered network must have only one input.')\n",
|
||||
" x = next(iter(inputs.values()))\n",
|
||||
" \n",
|
||||
" x = conv2d(x=x, size=2, window=(1,3), bias=True, activation='relu', l2_regularization=1e-8, padding='VALID')\n",
|
||||
" x = conv2d(x=x, size=20, window=(1,window_length-2), bias=True, activation='relu', l2_regularization=1e-8, padding='VALID')\n",
|
||||
" x = conv2d(x=x, size=1, window=(1,1), bias=True, activation='relu', l2_regularization=1e-8, padding='VALID')\n",
|
||||
" x = flatten(x)\n",
|
||||
" x = nonlinearity(x,name='softmax')\n",
|
||||
" \n",
|
||||
" return x\n",
|
||||
"network=network_builder"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Agent"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 13,
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2017-10-15T02:50:54.566983Z",
|
||||
"start_time": "2017-10-15T02:50:54.444213Z"
|
||||
}
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"({'action0': {'epsilon': 1,\n",
|
||||
" 'epsilon_final': 0.005,\n",
|
||||
" 'epsilon_timesteps': 100000.0,\n",
|
||||
" 'start_after': 0,\n",
|
||||
" 'type': 'epsilon_anneal'},\n",
|
||||
" 'action1': {'epsilon': 1,\n",
|
||||
" 'epsilon_final': 0.005,\n",
|
||||
" 'epsilon_timesteps': 100000.0,\n",
|
||||
" 'start_after': 0,\n",
|
||||
" 'type': 'epsilon_anneal'},\n",
|
||||
" 'action2': {'epsilon': 1,\n",
|
||||
" 'epsilon_final': 0.005,\n",
|
||||
" 'epsilon_timesteps': 100000.0,\n",
|
||||
" 'start_after': 0,\n",
|
||||
" 'type': 'epsilon_anneal'},\n",
|
||||
" 'action3': {'epsilon': 1,\n",
|
||||
" 'epsilon_final': 0.005,\n",
|
||||
" 'epsilon_timesteps': 100000.0,\n",
|
||||
" 'start_after': 0,\n",
|
||||
" 'type': 'epsilon_anneal'},\n",
|
||||
" 'action4': {'epsilon': 1,\n",
|
||||
" 'epsilon_final': 0.005,\n",
|
||||
" 'epsilon_timesteps': 100000.0,\n",
|
||||
" 'start_after': 0,\n",
|
||||
" 'type': 'epsilon_anneal'},\n",
|
||||
" 'action5': {'epsilon': 1,\n",
|
||||
" 'epsilon_final': 0.005,\n",
|
||||
" 'epsilon_timesteps': 100000.0,\n",
|
||||
" 'start_after': 0,\n",
|
||||
" 'type': 'epsilon_anneal'}},)"
|
||||
]
|
||||
},
|
||||
"execution_count": 13,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"exploration=dict(\n",
|
||||
" type=\"epsilon_anneal\",\n",
|
||||
" epsilon=1,\n",
|
||||
" epsilon_final= 0.005,\n",
|
||||
" epsilon_timesteps= 1e5,\n",
|
||||
" start_after=0,\n",
|
||||
")\n",
|
||||
"{'action' + str(n): exploration for n in range(env.action_space.shape[0])},"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 14,
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2017-10-15T02:50:57.570700Z",
|
||||
"start_time": "2017-10-15T02:50:54.568901Z"
|
||||
}
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"WARNING:tensorforce.agents.agent:Configuration values not accessed: first_update, memory_capacity, memory, update_frequency, repeat_update\n",
|
||||
"[2017-10-15 10:50:57,553] Configuration values not accessed: first_update, memory_capacity, memory, update_frequency, repeat_update\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"<tensorforce.agents.ppo_agent.PPOAgent at 0x7fc5592f0da0>"
|
||||
]
|
||||
},
|
||||
"execution_count": 14,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"batch_size=256\n",
|
||||
"exploration=dict(\n",
|
||||
" type=\"epsilon_anneal\",\n",
|
||||
" epsilon=1,\n",
|
||||
" epsilon_final= 0.005,\n",
|
||||
" epsilon_timesteps= 1e5,\n",
|
||||
" start_after=0,\n",
|
||||
")\n",
|
||||
"config = Configuration( \n",
|
||||
" # Each agent requires the following ``Configuration`` parameters:\n",
|
||||
" # https://github.com/reinforceio/tensorforce/blob/master/tensorforce/agents/agent.py#L32\n",
|
||||
" network=network,\n",
|
||||
" states=dict(shape=tuple(env.observation_space.shape), type='float'),\n",
|
||||
" actions={'action' + str(n): dict(continuous=True) for n in range(env.action_space.shape[0])},\n",
|
||||
" preprocessing = None,# dict or list containing state preprocessing configuration.\n",
|
||||
" exploration = {'action' + str(n): exploration for n in range(env.action_space.shape[0])}, # dict containing action exploration configuration.\n",
|
||||
" \n",
|
||||
" \n",
|
||||
" # The `MemoryAgent` class additionally requires the following parameters:\n",
|
||||
" first_update = batch_size*2, # integer indicating the number of steps to pass before the first update.\n",
|
||||
" memory_capacity = 300000, # integer of maximum experiences to store. (takes 2s to sample with 100k)\n",
|
||||
" memory = 'prioritized_replay', # string indicating memory type ('replay' or 'prioritized_replay').\n",
|
||||
" update_frequency = int(batch_size/2), # integer indicating the number of steps between model updates.\n",
|
||||
" repeat_update = 2, # integer indicating how often to repeat the model update.\n",
|
||||
"\n",
|
||||
" # Each model requires the following configuration parameters:\n",
|
||||
" # https://github.com/reinforceio/tensorforce/blob/master/tensorforce/models/model.py#L33\n",
|
||||
" discount = 0.97, # float of discount factor (gamma).\n",
|
||||
" learning_rate = 1e-3, # float of learning rate (alpha). (3e-4 in paper 1e-3 (atari) and 3e-4 in baselines)\n",
|
||||
" optimizer = 'adam', # string of optimizer to use (e.g. 'adam' in paper).\n",
|
||||
" device = None, # string of tensorflow device name.\n",
|
||||
"# tf_summary = log_dir, # string directory to write tensorflow summaries. Default None\n",
|
||||
"# tf_summary_level = 1, # int indicating which tensorflow summaries to create.\n",
|
||||
" tf_summary_interval = 1000, # int number of calls to get_action until writing tensorflow summaries on update.\n",
|
||||
" log_level = 'info', # string containing log level (e.g. 'info').\n",
|
||||
" distributed = False, # boolean indicating whether to use distributed tensorflow.\n",
|
||||
" global_model = False, # global model.\n",
|
||||
" session = None, # session to use. \n",
|
||||
"\n",
|
||||
" # A Policy Gradient Model expects the following additional configuration parameters:\n",
|
||||
" # https://github.com/reinforceio/tensorforce/blob/master/tensorforce/models/policy_gradient_model.py#L35\n",
|
||||
" # I edited my tensorflow install to have a flatten layer to make this work (my branch is in requirements.txt)\n",
|
||||
" baseline=dict(\n",
|
||||
" type=\"mlp\",\n",
|
||||
" sizes=[128, 128],\n",
|
||||
" epochs=1,\n",
|
||||
" update_batch_size=128,\n",
|
||||
" learning_rate=0.01\n",
|
||||
" ), # string indicating the baseline value function (currently 'linear' or 'mlp').\n",
|
||||
" gae_rewards= True, # boolean indicating whether to use GAE.\n",
|
||||
" gae_lambda= 0.97, # float of the Generalized Advantage Estimation lambda.\n",
|
||||
" normalize_rewards= False,# boolean indicating whether to normalize the advantage or not.\n",
|
||||
" \n",
|
||||
" # PPO Params \n",
|
||||
" # https://github.com/reinforceio/tensorforce/blob/master/tensorforce/models/ppo_model.py\n",
|
||||
" entropy_penalty=0.01, # 0 and 0.01 in baselines\n",
|
||||
" loss_clipping=0.1, # Trust region clipping\n",
|
||||
" epochs=4, # Number of training epochs for SGD, data is repeated this much 4 (atari),10 in baselines, 10 in paper\n",
|
||||
" optimizer_batch_size=32, # Batch size for optimiser, should be small (e.g. 64 in paper)\n",
|
||||
" random_sampling=True # Sampling strategy for minibatch replay memory\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"# Create a Trust Region Policy Optimization agent\n",
|
||||
"agent = PPOAgent(config=config)\n",
|
||||
"agent"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Train\n",
|
||||
"\n",
|
||||
"## Callbacks"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 15,
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2017-10-15T02:50:57.755421Z",
|
||||
"start_time": "2017-10-15T02:50:57.573140Z"
|
||||
}
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from rl_portfolio_management.callbacks.tensorforce import EpisodeFinishedTQDM\n",
|
||||
"from rl_portfolio_management.util import MDD, sharpe"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2017-10-15T02:19:28.278977Z",
|
||||
"start_time": "2017-10-15T02:19:28.132177Z"
|
||||
}
|
||||
},
|
||||
"source": [
|
||||
"## Train"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 16,
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2017-10-15T02:50:57.895354Z",
|
||||
"start_time": "2017-10-15T02:50:57.761809Z"
|
||||
},
|
||||
"collapsed": true
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from tensorforce.execution import Runner\n",
|
||||
"runner = Runner(agent=agent, environment=environment, save_path=save_path, save_episodes=1000)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 17,
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2017-10-15T02:50:57.998473Z",
|
||||
"start_time": "2017-10-15T02:50:57.897093Z"
|
||||
}
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Check my PR is included, https://github.com/wassname/tensorforce/tree/merged_6b\n",
|
||||
"import tensorforce.core.memories\n",
|
||||
"assert isinstance(runner.agent.memory,tensorforce.core.memories.PrioritizedReplay)\n",
|
||||
"assert isinstance(runner.agent, tensorforce.agents.MemoryAgent)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"start_time": "2017-10-15T02:50:28.630Z"
|
||||
}
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# resume\n",
|
||||
"saves=glob.glob(save_path+'-*')\n",
|
||||
"if len(saves)>0:\n",
|
||||
" # load saved\n",
|
||||
" last_save = os.path.splitext(saves[0])[0]\n",
|
||||
" runner.agent.load_model(last_save)\n",
|
||||
" print('loaded', last_save)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"start_time": "2017-10-15T02:50:28.632Z"
|
||||
},
|
||||
"scrolled": true
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"application/vnd.jupyter.widget-view+json": {
|
||||
"model_id": "b018b4d97fbd4f1daaf1b5e0e8e9bd09"
|
||||
}
|
||||
},
|
||||
"metadata": {},
|
||||
"output_type": "display_data"
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"TensorBoardLogger started. Run `tensorboard --logdir=/media/isisilon/Data/My_Documents/Documents/eclipse-workspace/rl_keras_finance/portfolio-rl-jiang_2017/logs/tensorforce-PPO-prioritised_20171015_02-50-30` to visualize\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"episodes = int(6e6 / 30)\n",
|
||||
"runner.run(\n",
|
||||
" episodes=episodes,\n",
|
||||
" max_timesteps=200,\n",
|
||||
" episode_finished=EpisodeFinishedTQDM(\n",
|
||||
" log_intv=100, \n",
|
||||
" episodes=episodes,\n",
|
||||
" log_dir=log_dir,\n",
|
||||
" session=runner.agent.model.session, \n",
|
||||
" )\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"start_time": "2017-10-15T02:50:28.633Z"
|
||||
},
|
||||
"collapsed": true
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# save\n",
|
||||
"agent.save_model(save_path)\n",
|
||||
"save_path"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Test"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2017-07-19T00:09:54.262405Z",
|
||||
"start_time": "2017-07-19T08:09:54.226639+08:00"
|
||||
},
|
||||
"collapsed": true
|
||||
},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"start_time": "2017-10-15T02:50:28.635Z"
|
||||
},
|
||||
"collapsed": true
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# one big test\n",
|
||||
"df_test = pd.read_hdf('./data/poloniex_30m.hf',key='test')\n",
|
||||
"steps=2400#len(df_test)-window_length-2\n",
|
||||
"env_test = EnvWrapper(\n",
|
||||
" df=df_test,\n",
|
||||
" steps=steps, \n",
|
||||
" scale=True, \n",
|
||||
" augment=0.00,\n",
|
||||
" trading_cost=0, # let just overfit first\n",
|
||||
" window_length=window_length,\n",
|
||||
")\n",
|
||||
"env_test.seed = 0 \n",
|
||||
"environment_test = OpenAIGym('CartPole-v0')\n",
|
||||
"environment_test.gym = env_test\n",
|
||||
"\n",
|
||||
"agent.load_model(save_path)\n",
|
||||
"runner_test = Runner(agent=agent, environment=environment_test)\n",
|
||||
"runner_test.run(\n",
|
||||
"episodes=1, max_timesteps=steps, episode_finished=EpisodeFinished(10))\n",
|
||||
"\n",
|
||||
"df = pd.DataFrame(env_test.infos)\n",
|
||||
"df.index=df['index']\n",
|
||||
"\n",
|
||||
"s=sharpe(df.rate_of_return+1)\n",
|
||||
"mdd=MDD(df.rate_of_return+1)\n",
|
||||
"apv=df.portfolio_value.iloc[-1]\n",
|
||||
"print('APV (Accumulated portfolio value): \\t{: 2.6f}'.format(apv))\n",
|
||||
"print('SR (Sharpe ratio): \\t{: 2.6f}'.format( s))\n",
|
||||
"print('MDD (max drawdown): \\t{: 2.6%}'.format( mdd))\n",
|
||||
"print('MMR (mean market returns): \\t{: 2.6f}'.format(df.mean_market_returns.cumprod().iloc[-1]))\n",
|
||||
"print('')\n",
|
||||
"\n",
|
||||
"# show one run vs average market performance\n",
|
||||
"plt.title('test MDD={}, Sharpe={}, APV={}'.format(mdd,s,apv))\n",
|
||||
"df.portfolio_value.plot()\n",
|
||||
"df.mean_market_returns.cumprod().plot(label='mean market performance')\n",
|
||||
"plt.legend()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2017-07-19T00:48:39.193976Z",
|
||||
"start_time": "2017-07-19T08:48:39.154752+08:00"
|
||||
},
|
||||
"collapsed": true
|
||||
},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"start_time": "2017-10-15T02:50:28.637Z"
|
||||
},
|
||||
"collapsed": true
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"\n",
|
||||
"\n",
|
||||
"data=[]\n",
|
||||
"for i in range(10):\n",
|
||||
" agent.load_model(save_path)\n",
|
||||
" df_test = pd.read_hdf('./data/poloniex_30m.hf',key='test')\n",
|
||||
" \n",
|
||||
" env_test = EnvWrapper(\n",
|
||||
" df=df_test,\n",
|
||||
" steps=1800, \n",
|
||||
" scale=True, \n",
|
||||
" augment=0.00,\n",
|
||||
" trading_cost=0, # let just overfit first\n",
|
||||
" window_length=window_length,\n",
|
||||
" )\n",
|
||||
" env_test.seed = 0 \n",
|
||||
"\n",
|
||||
"\n",
|
||||
" environment_test = OpenAIGym('CartPole-v0')\n",
|
||||
" environment_test.gym = env_test\n",
|
||||
"\n",
|
||||
" runner_test = Runner(agent=agent, environment=environment_test)\n",
|
||||
" np.random.seed(i)\n",
|
||||
" runner_test.run(\n",
|
||||
" episodes=2, max_timesteps=32, episode_finished=EpisodeFinished(10))\n",
|
||||
" df = pd.DataFrame(environment_test.gym.infos)\n",
|
||||
"# df.index=df['index']\n",
|
||||
" \n",
|
||||
" s=sharpe(df.rate_of_return+1)\n",
|
||||
" mdd=MDD(df.rate_of_return+1)\n",
|
||||
" data.append(dict(sharpe=s,mdd=mdd))\n",
|
||||
" print('APV (Accumulated portfolio value): \\t{: 2.6f}'.format(df.portfolio_value.iloc[-1]))\n",
|
||||
" print('SR (Sharpe ratio): \\t{: 2.6f}'.format( s))\n",
|
||||
" print('MDD (max drawdown): \\t{: 2.6%}'.format( mdd))\n",
|
||||
" print('MMR (mean market returns): \\t{: 2.6f}'.format(df.mean_market_returns.cumprod().iloc[-1]))\n",
|
||||
" print('')\n",
|
||||
" df.portfolio_value.plot(label=str(i))\n",
|
||||
"plt.legend()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"start_time": "2017-10-15T02:50:28.638Z"
|
||||
},
|
||||
"collapsed": true
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"data"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"collapsed": true
|
||||
},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"start_time": "2017-10-15T02:50:28.640Z"
|
||||
},
|
||||
"collapsed": true
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# one big test over train\n",
|
||||
"# one big test\n",
|
||||
"df_train = pd.read_hdf('./data/poloniex_30m.hf',key='train')\n",
|
||||
"steps=len(df_train)-window_length-2\n",
|
||||
"env = EnvWrapper(\n",
|
||||
" df=df_train,\n",
|
||||
" steps=steps, \n",
|
||||
" scale=True, \n",
|
||||
" augment=0.00,\n",
|
||||
" trading_cost=0, # let just overfit first\n",
|
||||
" window_length=window_length,\n",
|
||||
")\n",
|
||||
"env.seed = 0 \n",
|
||||
"environment = OpenAIGym('CartPole-v0')\n",
|
||||
"environment.gym = env\n",
|
||||
"\n",
|
||||
"agent.load_model(save_path)\n",
|
||||
"runner = Runner(agent=agent, environment=environment)\n",
|
||||
"runner.run(\n",
|
||||
"episodes=1, max_timesteps=steps, episode_finished=EpisodeFinished(10))\n",
|
||||
"\n",
|
||||
"df = pd.DataFrame(env.infos)\n",
|
||||
"df.index=df['index']\n",
|
||||
"\n",
|
||||
"s=sharpe(df.rate_of_return+1)\n",
|
||||
"mdd=MDD(df.rate_of_return+1)\n",
|
||||
"data.append(dict(sharpe=s,mdd=mdd))\n",
|
||||
"print('APV (Accumulated portfolio value): \\t{: 2.6f}'.format(df.portfolio_value.iloc[-1]))\n",
|
||||
"print('SR (Sharpe ratio): \\t{: 2.6f}'.format( s))\n",
|
||||
"print('MDD (max drawdown): \\t{: 2.6%}'.format( mdd))\n",
|
||||
"print('')\n",
|
||||
"\n",
|
||||
"# show one run vs average market performance\n",
|
||||
"plt.title('train')\n",
|
||||
"df.portfolio_value.plot()\n",
|
||||
"df.mean_market_returns.cumprod().plot(label='mean market performance')\n",
|
||||
"plt.legend()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"collapsed": true
|
||||
},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "jupyter3",
|
||||
"language": "python",
|
||||
"name": "jupyter3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.6.0"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
}
|
||||
File diff suppressed because one or more lines are too long
Reference in New Issue
Block a user