init

2026-06-27 18:06:31 +08:00 · 2017-07-15 08:46:18 +08:00
commit 87983600a4
26 changed files with 482155 additions and 0 deletions
@@ -0,0 +1,145 @@
+secrets/secrets.json
+outputs
+.cache
+.trash
+models
+
+# Created by https://www.gitignore.io/api/linux,python,windows
+
+### Linux ###
+*~
+
+# temporary files which can be created if a process still has a handle open of a deleted file
+.fuse_hidden*
+
+# KDE directory preferences
+.directory
+
+# Linux trash folder which might appear on any partition or disk
+.Trash-*
+
+# .nfs files are created when an open file is removed but is still being accessed
+.nfs*
+
+### Python ###
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+
+# C extensions
+*.so
+
+# Distribution / packaging
+.Python
+env/
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*,cover
+.hypothesis/
+
+# Translations
+*.mo
+*.pot
+
+# Django stuff:
+*.log
+local_settings.py
+
+# Flask stuff:
+instance/
+.webassets-cache
+
+# Scrapy stuff:
+.scrapy
+
+# Sphinx documentation
+docs/_build/
+
+# PyBuilder
+target/
+
+# Jupyter Notebook
+.ipynb_checkpoints
+
+# pyenv
+.python-version
+
+# celery beat schedule file
+celerybeat-schedule
+
+# SageMath parsed files
+*.sage.py
+
+# dotenv
+.env
+
+# virtualenv
+.venv
+venv/
+ENV/
+
+# Spyder project settings
+.spyderproject
+.spyproject
+
+# Rope project settings
+.ropeproject
+
+# mkdocs documentation
+/site
+
+### Windows ###
+# Windows thumbnail cache files
+Thumbs.db
+ehthumbs.db
+ehthumbs_vista.db
+
+# Folder config file
+Desktop.ini
+
+# Recycle Bin used on file shares
+$RECYCLE.BIN/
+
+# Windows Installer files
+*.cab
+*.msi
+*.msm
+*.msp
+
+# Windows shortcuts
+*.lnk
+
+# End of https://www.gitignore.io/api/linux,python,windows
@@ -0,0 +1,50 @@
+source: http://teachmehowto.trade/PoloData/#section-ohlcv
+post:https://www.reddit.com/r/BitcoinMarkets/comments/694q0a/historical_pricing_data_for_poloniex_btceth_pairs
+
+----
+
+Hey guys -
+
+By popular demand, I have made historical up-to-date OHLCV (open-high-low-close-volume) data for the most-popular cryptocurrency pairs traded on Poloniex available for download.
+
+**Link: http://teachmehowto.trade/PoloData/**
+
+The format looks like:
+
+          date       high        low       open      close   volume quoteVolume weightedAverage
+    1493848800 0.05337888 0.05304304 0.05334496 0.05309167 203.0503    3819.472      0.05316189
+    1493850600 0.05319993 0.05271523 0.05309168 0.05299500 319.6935    6033.497      0.05298644
+    1493852400 0.05372393 0.05297500 0.05299500 0.05357000 572.1481   10695.544      0.05349406
+    1493854200 0.05366399 0.05267554 0.05357000 0.05311562 567.2399   10675.820      0.05313314
+    1493856000 0.05313172 0.05270000 0.05311563 0.05288821 367.8146    6943.378      0.05297344
+    1493857800 0.05310932 0.05286593 0.05300000 0.05303735 159.6984    3011.262      0.05303369
+
+Where `date` is the unix timestamp in UTC.
+
+-------------------------
+
+**Frequently Asked Questions**
+
+> How often is the raw data updated?
+
+The .csv files are updated every hour (for now).
+
+> Why is the data only every 30-minutes?
+
+Based on my experience, 30-minute OHLCV data is the minimum periodicity for any sort of meaningful analysis. Can you build models/strategies/algos to trade 5-minute (or less) intervals? Certainly, but this won't be the best way to get at that data. Additionally, aggregating from 30-minute to 1H/2H/4H/6H/12H/24H intervals is trivial.
+
+> What if I *really* want less-than-30-minute time-series data? Can I get that somehow?
+
+Yes, you can. I built a package for `R`, which wraps a few of the public methods for Poloniex's API. You can use that to get 5- or 15-minute data. Check it out here: **https://github.com/Rb-Engineering/PoloniexR**
+
+> What about order book data?
+
+I'm currently finalizing the back-end for that and it will be available soon! If you can't wait, and you're comfortable working in `R` (or want to learn), check out the `getOrderBook()` function in my `PoloniexR` package above.
+
+-------------------------
+
+If you guys have any questions/comments/concerns, feel free to comment below or PM me.
+
+One last friendly reminder... please **DO NOT ABUSE THIS!!** (e.g. by repeatedly downloading the same file needlessly) This is my gift to the community so all I ask is that you be mindful of the fact that I built this and am hosting it... for free. :)
+
+Happy trading!!
@@ -0,0 +1 @@
+Replicating [Jiang 2017](https://arxiv.org/abs/1706.10059)
@@ -0,0 +1,300 @@
+affine==2.0.0.post1
+alembic==0.9.1
+amqp==1.4.9
+anyjson==0.3.3
+appdirs==1.4.3
+arrow==0.10.0
+asn1crypto==0.22.0
+atari-py==0.1.1
+azure==1.0.3
+azure-common==1.1.6
+azure-mgmt==0.20.2
+azure-mgmt-common==0.20.0
+azure-mgmt-compute==0.20.1
+azure-mgmt-network==0.20.1
+azure-mgmt-nspkg==2.0.0
+azure-mgmt-resource==0.20.1
+azure-mgmt-storage==0.20.0
+azure-nspkg==2.0.0
+azure-servicebus==0.20.1
+azure-servicemanagement-legacy==0.20.2
+azure-storage==0.20.3
+Babel==2.2.0
+backoff==1.4.0
+backports.weakref==1.0rc1
+baselines==0.1.3
+bcolz==0.12.1
+bcrypt==2.0.0
+beautifulsoup4==4.5.3
+billiard==3.3.0.23
+bleach==1.5.0
+blessings==1.6
+blinker==1.4
+boto==2.39.0
+Bottleneck==1.2.1
+bpython==0.16
+bqplot==0.9.0
+bs4==0.0.1
+bt==0.2.5
+cached-property==1.3.0
+cachetools==2.0.0
+celery==3.1.19
+cffi==1.10.0
+cfscrape==1.8.0
+chardet==2.3.0
+ChatterBot==0.6.0
+chatterbot-corpus==0.0.1
+chatterbot-voice==0.1.3
+click==6.7
+click-plugins==1.0.3
+cligj==0.4.0
+cloudpickle==0.2.2
+colorama==0.3.9
+contextlib2==0.5.5
+cryptography==1.8.1
+cssmin==0.2.0
+curtsies==0.2.11
+cycler==0.10.0
+cyordereddict==1.0.0
+Cython==0.25.2
+dask==0.15.0
+dataset==0.8.0
+deap==1.0.2
+decorator==4.0.11
+descartes==1.1.0
+dill==0.2.6
+docker==2.3.0
+docker-compose==1.13.0
+docker-pycreds==0.2.1
+dockerpty==0.4.1
+docopt==0.6.2
+dominate==2.3.1
+earthengine-api==0.1.111
+easydict==1.6
+EbookLib==0.16
+ee==0.2
+elasticsearch==5.3.0
+elasticsearch-dsl==5.2.0
+empyrical==0.2.2
+entrypoints==0.2.2
+ephem==3.7.6.0
+eventlet==0.17.4
+-e git+git@github.com:wassname/FanFicFare.git@1da7f6ec159bee18b650fa074d2d59d4721e3653#egg=FanFicFare
+fastkml==0.11
+ffn==0.3.2
+Fiona==1.7.5
+fix-yahoo-finance==0.0.7
+Flask==0.12.1
+Flask-AlchemyDumps==0.0.10
+Flask-Assets==0.11
+Flask-Bcrypt==0.7.1
+Flask-Bootstrap==3.3.5.7
+Flask-Celery-Helper==1.1.0
+Flask-DebugToolbar==0.10.0
+Flask-Login==0.3.2
+Flask-Mail==0.9.1
+Flask-Migrate==1.7.0
+Flask-Principal==0.4.0
+Flask-Redis==0.1.0
+Flask-Script==2.0.5
+Flask-Security==1.7.5
+Flask-SQLAlchemy==2.1
+Flask-WTF==0.12
+flower==0.8.4
+Folio==0.4
+future==0.15.2
+futures==3.1.1
+gbdx-auth==0.2.4
+gbdxtools==0.11.7
+GDAL==2.1.0
+geomet==0.1.1
+geopandas==0.2.1
+google-api-python-client==1.6.2
+google-auth==1.0.1
+google-auth-httplib2==0.0.2
+google-cloud-bigquery==0.25.0
+google-cloud-core==0.25.0
+googleapis-common-protos==1.5.2
+googletrans==2.1.2
+goslate==1.5.1
+greenlet==0.4.12
+gunicorn==19.4.5
+gym==0.9.2
+h5py==2.7.0
+hammock==0.2.4
+html2text==2016.9.19
+html5lib==0.9999999
+httplib2==0.10.3
+idna==2.5
+inflection==0.3.1
+intervaltree==2.1.0
+ipykernel==4.6.0
+ipython==5.3.0
+ipython-genutils==0.2.0
+ipywidgets==6.0.0
+itsdangerous==0.24
+jedi==0.10.2
+jellyfish==0.5.6
+Jinja2==2.9.6
+joblib==0.11
+jsmin==2.2.1
+jsondatabase==0.1.7
+jsonschema==2.6.0
+jupyter==1.0.0
+jupyter-client==5.0.1
+jupyter-console==5.1.0
+jupyter-contrib-core==0.3.1
+jupyter-core==4.3.0
+jupyter-nbextensions-configurator==0.2.5
+Keras==2.0.5
+keras-contrib==1.2.1
+keras-rl==0.3.0
+keras-tqdm==2.0.1
+kombu==3.0.37
+leven==1.0.4
+Logbook==1.0.0
+lru-dict==1.1.6
+lxml==3.7.3
+Mako==1.0.6
+Markdown==2.6.8
+MarkupSafe==1.0
+matplotlib==2.0.0
+mistune==0.7.4
+more-itertools==3.2.0
+MosT==0.12
+mpmath==0.19
+mtranslate==1.6
+multipledispatch==0.4.9
+munch==2.1.1
+nbconvert==5.1.1
+nbformat==4.3.0
+ndg-httpsclient==0.4.2
+networkx==1.11
+nltk==3.2.2
+normality==0.4.0
+nose==1.3.7
+notebook==5.0.0
+numexpr==2.6.2
+numpy==1.13.1
+oauth2client==4.0.0
+oauthlib==2.0.2
+olefile==0.44
+packaging==16.8
+pandas==0.20.3
+pandas-datareader==0.4.0
+pandas-profiling==1.4.0
+pandocfilters==1.4.1
+passlib==1.7.1
+path.py==10.1
+patsy==0.4.1
+pbr==3.0.1
+pexpect==4.2.1
+pickleshare==0.7.4
+Pillow==4.1.0
+planet==1.0.0
+poloniex==0.4.6
+pprint==0.1
+praw==4.4.0
+prawcore==0.8.0
+progressbar2==3.30.2
+prompt-toolkit==1.0.14
+protobuf==3.3.0
+psycopg2==2.7.1
+ptyprocess==0.5.1
+py==1.4.33
+py-googletrans==1.2
+PyAlgoTrade==0.18
+pyasn1==0.2.3
+pyasn1-modules==0.0.9
+pycparser==2.17
+pycurl==7.43.0
+PyExecJS==1.4.0
+pygeoif==0.6
+pyglet==1.2.4
+Pygments==2.2.0
+PyJWT==1.4.2
+pymongo==3.4.0
+PyOpenGL==3.1.0
+pyOpenSSL==17.0.0
+pyparsing==2.2.0
+PyPrind==2.11.1
+pyproj==1.9.5.1
+pyshp==1.2.10
+pysle==1.5.7
+pytest==3.0.7
+python-dateutil==2.6.0
+python-editor==1.0.3
+python-twitter==3.2.1
+python-utils==2.1.0
+pytz==2017.2
+PyYAML==3.12
+pyzmq==16.0.2
+qgrid==0.3.2
+qtconsole==4.3.0
+Quandl==3.2.0
+rasterio==1.0a8
+recurrentshop==0.0.1
+redis==2.10.3
+requests==2.11.1
+requests-file==1.4.2
+requests-ftp==0.3.1
+requests-futures==0.9.7
+requests-oauthlib==0.8.0
+rsa==3.4.2
+scikit-learn==0.18.1
+scipy==0.19.1
+seaborn==0.7.1
+selenium==3.4.3
+seq2seq==0.1.0
+Shapely==1.5.17
+simplegeneric==0.8.1
+simplejson==3.10.0
+six==1.10.0
+sklearn==0.0
+snuggs==1.4.1
+sortedcontainers==1.5.7
+SpeechRecognition==3.6.5
+SQLAlchemy==1.1.9
+SQLAlchemy-FullText-Search==0.2.3
+SQLAlchemy-Utils==0.31.5
+statsmodels==0.8.0
+stevedore==1.21.0
+sympy==1.0
+tables==3.3.0
+tabulate==0.7.7
+tensor2tensor==1.0.5
+tensorflow==1.2.1
+tensorflow-gpu==1.2.0
+-e git+git@github.com:reinforceio/tensorforce.git@21323bf4226a93892637551cd8d75b8e13d04eaa#egg=tensorforce
+terminado==0.6
+testpath==0.3
+texttable==0.8.8
+Theano==0.9.0
+toolz==0.8.2
+torch==0.1.12.post2
+torchvision==0.1.8
+tornado==4.4.3
+TPOT==0.8.3
+tqdm==4.11.2
+traitlets==4.3.2
+traittypes==0.0.6
+Unipath==1.1
+update-checker==0.16
+uritemplate==3.0.0
+urllib3==1.21.1
+virtualenv==15.1.0
+virtualenv-clone==0.2.6
+virtualenvwrapper==4.7.2
+visitor==0.1.3
+wcwidth==0.1.7
+webassets==0.12.1
+webencodings==0.5.1
+websocket-client==0.40.0
+Werkzeug==0.12.2
+widgetsnbextension==2.0.0
+WTForms==2.1
+WTForms-ParsleyJS==2.0.1
+xmltodict==0.10.2
+yapf==0.16.1
+zipline==1.1.0
+zmq==0.0.0
@@ -0,0 +1,14 @@
+# data
+# rl
+gym==0.9.2
+tensorflow-gpu==1.2.0
+baselines==0.1.3
+keras-contrib==1.2.1
+keras-rl==0.3.0
+keras-tqdm==2.0.1
+git+git@github.com:reinforceio/tensorforce.git@21323bf4226a93892637551cd8d75b8e13d04eaa#egg=tensorforce
+# numbers
+seaborn==0.7.1
+pandas==0.18.1
+numpy==1.12.1
+matplotlib==2.0.0
@@ -0,0 +1,210 @@
+import numpy as np
+
+import gym
+import gym.spaces
+
+eps = 1e-7
+
+
+def random_shift(x, fraction):
+    min_x, max_x = np.min(x), np.max(x)
+    m = np.random.uniform(-fraction, fraction, size=x.shape) + 1
+    c = np.random.uniform(-fraction, fraction, size=x.shape) * x.std()
+    return np.clip(x * m + c, min_x, max_x)
+
+
+def normalise(x):
+    x = (x - x.min()) / (x.std + eps)
+    return x
+
+
+def scale_to_start(x):
+    """Scale episode so that it starts at one."""
+    x = (x + eps) / (x[0] + eps)
+    return x
+
+
+class DataSrc(object):
+    """Acts as data provider for each new episode."""
+
+    def __init__(self, df, steps=252, scale=True, augument=0.00):
+        """
+        DataSrc.
+
+        df - csv for data frame index of timestamps
+             and multi-index columns levels=[['LTCBTC'],...],['close',...]]
+        steps - total steps in episode
+        scale - scale the data for each episode
+        augument - fraction to augument the data by
+        """
+        self.steps = steps + 1
+        self.augument = augument
+        self.scale = scale
+
+
+        # add return/y1 as last col
+        pairs = df.columns.levels[0]
+        for pair in pairs:
+            x = df[pair].close
+            df[pair, "return"] = (x + eps*2) / (x.shift() + eps)
+        df = df[1:]
+
+        # data processing
+        if scale:
+            df = (df - df.mean(0) + eps) / (df.max(0) - df.min(0) + eps)
+
+        # get rid of NaN's
+        df = df.fillna(method="pad")
+        df.replace(np.nan, 0, inplace=True)
+
+        self._data = df.copy()
+        self.asset_names = self._data.columns.levels[0].tolist()
+
+        self.reset()
+
+    def _step(self):
+        tdx = self.data.index[self.step]
+        obs = self.data.xs(
+            tdx, axis=0).unstack().as_matrix()  # shape = (prices, assets)
+
+        self.step += 1
+        done = self.step >= self.steps
+        return obs, done
+
+    def reset(self):
+        self.step = 0
+
+        # get data for this episode
+        self.idx = np.random.randint(
+            low=0, high=len(self._data.index) - self.steps)
+        data = self._data[self.idx:self.idx + self.steps].copy()
+
+        # scale each run to the begining of the episode so they look the same
+        if self.scale:
+            data = data.apply(lambda x: scale_to_start(x))
+
+        # augument data to prevent overfitting
+        data = data.apply(lambda x: random_shift(x, self.augument))
+
+        self.data = data
+
+
+class PortfolioSim(object):
+    """
+    Portfolio management sim.
+
+    Params:
+    - cost e.g. 0.0025 is max in Poliniex
+
+    Based of [Jiang 2017](https://arxiv.org/abs/1706.10059)
+    """
+
+    def __init__(self, asset_names=[], trading_cost_bps=0.0025):
+        self.cost = trading_cost_bps
+        self.asset_names = asset_names
+        self.reset()
+
+    def _step(self, w1, y1):
+        """
+        Step.
+
+        w1 - new action of portfolio weights - e.g. [0.1,0.9, 0.0]
+        y1 - price relative vector also called return
+            e.g. [1.0, 0.9, 1.1]
+        Numbered equations are from https://arxiv.org/abs/1706.10059
+        """
+        # y1 = v1 / v0 # (equation 1) price relative vector / return
+        w0 = self.w0
+        p0 = self.p0
+
+        dw1 = (y1 * w0) / (np.dot(y1, w0) + eps)  # (eq7) weights evolve into
+
+        mu1 = self.cost * (
+            np.abs(dw1 - w1)).sum()  # (eq16) cost to change portfolio
+
+        p1 = p0 * (1 - mu1) * np.dot(y1, w0)  # (eq11) final portfolio value
+        p1 = np.clip(p1, 0, np.inf)
+        # print(dict(mu1=mu1,p1=p1,dw1=dw1,y1=y1))
+
+        rho1 = p1 / p0 - 1  # rate of returns
+        r1 = np.log((p1 + eps) / (p0 + eps))  # log rate of return
+
+        # rememeber for next step
+        self.w0 = w1
+        self.p0 = p1
+
+        # if we run out of money, we're done
+        done = p1 == 0
+
+        info = {
+            "log_reward": r1,
+            "portfolio_value": p1,
+            "returns": y1,
+            "rate_of_return": rho1,
+            "weights": w1,
+            "cost": mu1,
+        }
+        self.infos.append(info)
+        return r1, self.infos, done  # reward
+
+    def reset(self):
+        self.infos = []
+        self.w0 = np.array([1.0] + [0.0] * (len(self.asset_names) - 1))
+        self.p0 = 1.0
+
+
+class PortfolioEnv(gym.Env):
+    """
+    An environment for financial portfolio management.
+
+    Financial portfolio management is the process of constant redistribution of a fund into different
+    financial products.
+
+    Based on [Jiang 2017](https://arxiv.org/abs/1706.10059)
+    """
+
+    def __init__(self,
+                 df,
+                 steps=256,
+                 scale=True,
+                 augument=0.00,
+                 trading_cost_bps=0.0025):
+        self.src = DataSrc(df=df, steps=steps, scale=scale, augument=augument)
+
+        self.sim = PortfolioSim(
+            asset_names=self.src.asset_names,
+            trading_cost_bps=trading_cost_bps)
+
+        # openai gym attributes
+        self.action_space = gym.spaces.Box(
+            0, 1, shape=len(self.src.asset_names))
+
+        gym.spaces.Box(
+            np.array([
+                self.src._data[pair].min().values
+                for pair in self.src.asset_names
+            ]),
+            np.array([
+                self.src._data[pair].max().values
+                for pair in self.src.asset_names
+            ]), )
+
+        self._reset()
+
+    def _step(self, action):
+        np.testing.assert_almost_equal(
+            np.sum(action), 1.0, 4, err_msg='action should be sum to 1')
+        observation, done1 = self.src._step()
+
+        y1 = observation[:, -1]  # relative price vector (return)
+        reward, info, done2 = self.sim._step(action, y1)
+
+        return observation, reward, done1 + done2, info
+
+    def _reset(self):
+        self.sim.reset()
+        self.src.reset()
+        return self.step(self.sim.w0)
+
+    def _render(self, mode='human', close=False):
+        pass
@@ -0,0 +1,29 @@
+import pandas as pd
+import numpy as np
+from src.environments.portfolio import PortfolioEnv
+
+
+def test_portfolio_env():
+    df = pd.read_pickle('./data/poliniex_30m_train.pickle')
+    asset_names = df.columns.levels[0]
+    # action
+    w = np.random.random((len(asset_names)))
+    w /= w.sum()
+
+    env = PortfolioEnv(df=df)
+    env.reset()
+    obs, reward, done, info = env.step(w)
+
+
+def test_portfolio_env_hold():
+    df = pd.read_pickle('./data/poliniex_30m_train.pickle')
+    asset_names = df.columns.levels[0]
+
+    env = PortfolioEnv(df=pd.read_pickle('./data/poliniex_30m_train.pickle'))
+    env.reset()
+    for _ in range(5):
+        w = np.array([1.0] + [0] * (len(asset_names) - 1))
+        obs, reward, done, info = env.step(w)
+
+    df = pd.DataFrame(info)
+    assert df.portfolio_value.iloc[-1] > 0.9999
				`@@ -0,0 +1 @@`
				`Replicating [Jiang 2017](https://arxiv.org/abs/1706.10059)`