Files
catalyst/tests/data/bundles/test_yahoo.py
T
Joe Jevnik 59c8e371a2 ENH: Updates the cli, data bundles and extensions.
Adds the data bundle concept which makes it easy for users to register
loading functions to build out minute and daily data along with an
assets db and adjustments db. By default we have provided a `quandl`
bundle which pulls from the public domain WIKI dataset. Users may
register new bundles by decorating an ingest function with
`zipline.data.bundles.register(<name>)`. This also provides a
`yahoo_equities` function for creating an ingestion function that will
load a static set of assets from yahoo.

The cli is now structured as a couple of subcommands and has been
changed to `python -m zipline`. The old behavior of `run_algo.py` has
been moved to the `run` subcommand. This is almost entirely the same
except that it now takes the name of the data bundle to use, defaulting
to `quandl`.

The next subcommand is `ingest` which takes the name of
a data bundle to ingest. This will run the loading machinery and write
the data to a specified location that `run` can find.

There is also a `clean` subcommand which deletes the data that was
written with `ingest`.

Extensions have also been added to zipline. This is an experimental
feature where users can provide an extra set of python files to run at
the start of the process. These can be used to configure aspects of
zipline. Right now the only thing that is supported in an extension file
is the registration of a new data bundle.
2016-05-03 18:38:24 -04:00

202 lines
6.4 KiB
Python

import numpy as np
import pandas as pd
from six.moves.urllib.parse import urlparse, parse_qs
from toolz import flip, identity
from toolz.curried import merge_with, operator as op
from zipline.data.bundles.core import _make_bundle_core
from zipline.data.bundles import yahoo_equities, load
from zipline.lib.adjustment import Float64Multiply
from zipline.testing import test_resource_path, tmp_dir, read_compressed
from zipline.testing.fixtures import WithResponses, ZiplineTestCase
from zipline.testing.predicates import assert_equal
from zipline.utils.tradingcalendar import trading_days
class YahooBundleTestCase(WithResponses, ZiplineTestCase):
symbols = 'AAPL', 'IBM', 'MSFT'
columns = 'open', 'high', 'low', 'close', 'volume'
asset_start = pd.Timestamp('2014-01-02', tz='utc')
asset_end = pd.Timestamp('2014-12-31', tz='utc')
calendar = trading_days[
(trading_days >= asset_start) &
(trading_days <= asset_end)
]
@classmethod
def init_class_fixtures(cls):
super(YahooBundleTestCase, cls).init_class_fixtures()
(cls.bundles,
cls.register,
cls.unregister,
cls.ingest) = map(staticmethod, _make_bundle_core())
def _expected_data(self):
sids = 0, 1, 2
modifier = {
'low': 0,
'open': 1,
'close': 2,
'high': 3,
'volume': 0,
}
pricing = [
np.hstack((
np.arange(252, dtype='float64')[:, np.newaxis] +
1 +
sid * 10000 +
modifier[column] * 1000
for sid in sorted(sids)
))
for column in self.columns
]
# There are two dividends and 1 split for each company.
def dividend_adjustment(sid, which):
"""The dividends occur at indices 252 // 4 and 3 * 252 / 4
with a cash amount of sid + 1 / 10 and sid + 2 / 10
"""
if which == 'first':
idx = 252 // 4
else:
idx = 3 * 252 // 4
return {
idx: [Float64Multiply(
first_row=0,
last_row=idx,
first_col=sid,
last_col=sid,
value=float(
1 -
((sid + 1 + (which == 'second')) / 10) /
(idx - 1 + sid * 10000 + 2000)
),
)],
}
def split_adjustment(sid, volume):
"""The splits occur at index 252 // 2 with a ratio of (sid + 1):1
"""
idx = 252 // 2
return {
idx: [Float64Multiply(
first_row=0,
last_row=idx,
first_col=sid,
last_col=sid,
value=(identity if volume else op.truediv(1))(sid + 2),
)],
}
merge_adjustments = merge_with(flip(sum, []))
adjustments = [
# ohlc
merge_adjustments(
*tuple(dividend_adjustment(sid, 'first') for sid in sids) +
tuple(dividend_adjustment(sid, 'second') for sid in sids) +
tuple(split_adjustment(sid, volume=False) for sid in sids)
)
] * (len(self.columns) - 1) + [
# volume
merge_adjustments(
split_adjustment(sid, volume=True) for sid in sids
),
]
return pricing, adjustments
def test_bundle(self):
def get_symbol_from_url(url):
params = parse_qs(urlparse(url).query)
symbol, = params['s']
return symbol
def pricing_callback(request):
headers = {
'content-encoding': 'gzip',
'content-type': 'text/csv',
}
path = test_resource_path(
'yahoo_samples',
get_symbol_from_url(request.url) + '.csv.gz',
)
with open(path, 'rb') as f:
return (
200,
headers,
f.read(),
)
for _ in range(3):
self.responses.add_callback(
self.responses.GET,
'http://ichart.finance.yahoo.com/table.csv',
pricing_callback,
)
def adjustments_callback(request):
path = test_resource_path(
'yahoo_samples',
get_symbol_from_url(request.url) + '.adjustments.gz',
)
return 200, {}, read_compressed(path)
for _ in range(3):
self.responses.add_callback(
self.responses.GET,
'http://ichart.finance.yahoo.com/x',
adjustments_callback,
)
cal = self.calendar
self.register(
'bundle',
yahoo_equities(self.symbols),
calendar=cal,
)
zipline_root = self.enter_instance_context(tmp_dir()).path
environ = {
'ZIPLINE_ROOT': zipline_root,
}
self.ingest('bundle', environ=environ)
bundle = load('bundle', environ=environ)
sids = 0, 1, 2
equities = bundle.asset_finder.retrieve_all(sids)
for equity, expected_symbol in zip(equities, self.symbols):
assert_equal(equity.symbol, expected_symbol)
for equity in bundle.asset_finder.retrieve_all(sids):
assert_equal(equity.start_date, self.asset_start, msg=equity)
assert_equal(equity.end_date, self.asset_end, msg=equity)
actual = bundle.daily_bar_reader.load_raw_arrays(
self.columns,
cal[cal.get_loc(self.asset_start, 'bfill')],
cal[cal.get_loc(self.asset_end, 'ffill')],
sids,
)
expected_pricing, expected_adjustments = self._expected_data()
assert_equal(actual, expected_pricing, array_decimal=2)
adjustments_for_cols = bundle.adjustment_reader.load_adjustments(
self.columns,
cal,
pd.Index(sids),
)
for column, adjustments, expected in zip(self.columns,
adjustments_for_cols,
expected_adjustments):
assert_equal(
adjustments,
expected,
msg=column,
)