mirror of
https://github.com/wassname/catalyst.git
synced 2026-06-29 21:24:16 +08:00
bc0b117dc9
Changes BcolzDailyBarWriter to not be an abc, data is passed as an iterator of (sid, dataframe) pairs to the write method. Changes the AssetsDBWriter to be a single class which accepts an engine at construction time and has a `write` method for writing dataframes for the various tables. We no longer support writing the various other data types, callers should coerce their data into a dataframe themselves. See zipline.assets.synthetic for some helpers to do this. Adds many new fixtures and updates some existing fixtures to use the new ones: WithDefaultDateBounds A fixture that provides the suite a START_DATE and END_DATE. This is meant to make it easy for other fixtures to synchronize their date ranges without depending on eachother in strange ways. For example, WithBcolzMinuteBarReader and WithBcolzDailyBarReader by default should both have data for the same dates, so they may use depend on WithDefaultDates without forcing a dependency between them. WithTmpDir, WithInstanceTmpDir Provides the suite or individual test case a temporary directory. WithBcolzDailyBarReader Provides the suite a BcolzDailyBarReader which reads from bcolz data written to a temporary directory. The data will be read from dataframes and then converted to bcolz files with BcolzDailyBarWriter.write WithBcolzDailyBarReaderFromCSVs Provides the suite a BcolzDailyBarReader which reads from bcolz data written to a temporary directory. The data will be read from a collection of CSV files and then converted into the bcolz data through BcolzDailyBarWriter.write_csvs WithBcolzMinuteBarReader Provides the suite a BcolzMinuteBarReader which reads from bcolz data written to a temporary directory. The data will be read from dataframes and then converted to bcolz files with BcolzMinuteBarWriter.write WithAdjustmentReader Provides the suite a SQLiteAdjustmentReader which reads from an in memory sqlite database. The data will be read from dataframes and then converted into sqlite with SQLiteAdjustmentWriter.write WithDataPortal Provides each test case a DataPortal object with data from temporary resources.
333 lines
11 KiB
Python
333 lines
11 KiB
Python
#
|
|
# Copyright 2015 Quantopian, Inc.
|
|
#
|
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
# you may not use this file except in compliance with the License.
|
|
# You may obtain a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
# See the License for the specific language governing permissions and
|
|
# limitations under the License.
|
|
from nose_parameterized import parameterized
|
|
from numpy import (
|
|
arange,
|
|
datetime64,
|
|
)
|
|
from numpy.testing import (
|
|
assert_array_equal,
|
|
)
|
|
from pandas import (
|
|
DataFrame,
|
|
DatetimeIndex,
|
|
Timestamp,
|
|
)
|
|
from pandas.util.testing import assert_index_equal
|
|
|
|
from zipline.data.us_equity_pricing import (
|
|
BcolzDailyBarReader,
|
|
NoDataOnDate,
|
|
)
|
|
from zipline.pipeline.data import USEquityPricing
|
|
from zipline.pipeline.loaders.synthetic import (
|
|
OHLCV,
|
|
asset_start,
|
|
asset_end,
|
|
expected_daily_bar_value,
|
|
expected_daily_bar_values_2d,
|
|
make_daily_bar_data,
|
|
)
|
|
from zipline.testing import seconds_to_timestamp
|
|
from zipline.testing.fixtures import (
|
|
WithBcolzDailyBarReader,
|
|
ZiplineTestCase,
|
|
)
|
|
|
|
TEST_CALENDAR_START = Timestamp('2015-06-01', tz='UTC')
|
|
TEST_CALENDAR_STOP = Timestamp('2015-06-30', tz='UTC')
|
|
|
|
TEST_QUERY_START = Timestamp('2015-06-10', tz='UTC')
|
|
TEST_QUERY_STOP = Timestamp('2015-06-19', tz='UTC')
|
|
|
|
# One asset for each of the cases enumerated in load_raw_arrays_from_bcolz.
|
|
EQUITY_INFO = DataFrame(
|
|
[
|
|
# 1) The equity's trades start and end before query.
|
|
{'start_date': '2015-06-01', 'end_date': '2015-06-05'},
|
|
# 2) The equity's trades start and end after query.
|
|
{'start_date': '2015-06-22', 'end_date': '2015-06-30'},
|
|
# 3) The equity's data covers all dates in range.
|
|
{'start_date': '2015-06-02', 'end_date': '2015-06-30'},
|
|
# 4) The equity's trades start before the query start, but stop
|
|
# before the query end.
|
|
{'start_date': '2015-06-01', 'end_date': '2015-06-15'},
|
|
# 5) The equity's trades start and end during the query.
|
|
{'start_date': '2015-06-12', 'end_date': '2015-06-18'},
|
|
# 6) The equity's trades start during the query, but extend through
|
|
# the whole query.
|
|
{'start_date': '2015-06-15', 'end_date': '2015-06-25'},
|
|
],
|
|
index=arange(1, 7),
|
|
columns=['start_date', 'end_date'],
|
|
).astype(datetime64)
|
|
|
|
TEST_QUERY_ASSETS = EQUITY_INFO.index
|
|
|
|
|
|
class BcolzDailyBarTestCase(WithBcolzDailyBarReader, ZiplineTestCase):
|
|
BCOLZ_DAILY_BAR_START_DATE = TEST_CALENDAR_START
|
|
BCOLZ_DAILY_BAR_END_DATE = TEST_CALENDAR_STOP
|
|
|
|
@classmethod
|
|
def make_equity_info(cls):
|
|
return EQUITY_INFO
|
|
|
|
@classmethod
|
|
def make_daily_bar_data(cls):
|
|
return make_daily_bar_data(
|
|
EQUITY_INFO,
|
|
cls.bcolz_daily_bar_days,
|
|
)
|
|
|
|
@classmethod
|
|
def init_class_fixtures(cls):
|
|
super(BcolzDailyBarTestCase, cls).init_class_fixtures()
|
|
all_trading_days = cls.env.trading_days
|
|
cls.trading_days = all_trading_days[
|
|
all_trading_days.get_loc(TEST_CALENDAR_START):
|
|
all_trading_days.get_loc(TEST_CALENDAR_STOP) + 1
|
|
]
|
|
|
|
@property
|
|
def assets(self):
|
|
return EQUITY_INFO.index
|
|
|
|
def trading_days_between(self, start, end):
|
|
return self.trading_days[self.trading_days.slice_indexer(start, end)]
|
|
|
|
def asset_start(self, asset_id):
|
|
return asset_start(EQUITY_INFO, asset_id)
|
|
|
|
def asset_end(self, asset_id):
|
|
return asset_end(EQUITY_INFO, asset_id)
|
|
|
|
def dates_for_asset(self, asset_id):
|
|
start, end = self.asset_start(asset_id), self.asset_end(asset_id)
|
|
return self.trading_days_between(start, end)
|
|
|
|
def test_write_ohlcv_content(self):
|
|
result = self.bcolz_daily_bar_ctable
|
|
for column in OHLCV:
|
|
idx = 0
|
|
data = result[column][:]
|
|
multiplier = 1 if column == 'volume' else 1000
|
|
for asset_id in self.assets:
|
|
for date in self.dates_for_asset(asset_id):
|
|
self.assertEqual(
|
|
expected_daily_bar_value(
|
|
asset_id,
|
|
date,
|
|
column
|
|
) * multiplier,
|
|
data[idx],
|
|
)
|
|
idx += 1
|
|
self.assertEqual(idx, len(data))
|
|
|
|
def test_write_day_and_id(self):
|
|
result = self.bcolz_daily_bar_ctable
|
|
idx = 0
|
|
ids = result['id']
|
|
days = result['day']
|
|
for asset_id in self.assets:
|
|
for date in self.dates_for_asset(asset_id):
|
|
self.assertEqual(ids[idx], asset_id)
|
|
self.assertEqual(date, seconds_to_timestamp(days[idx]))
|
|
idx += 1
|
|
|
|
def test_write_attrs(self):
|
|
result = self.bcolz_daily_bar_ctable
|
|
expected_first_row = {
|
|
'1': 0,
|
|
'2': 5, # Asset 1 has 5 trading days.
|
|
'3': 12, # Asset 2 has 7 trading days.
|
|
'4': 33, # Asset 3 has 21 trading days.
|
|
'5': 44, # Asset 4 has 11 trading days.
|
|
'6': 49, # Asset 5 has 5 trading days.
|
|
}
|
|
expected_last_row = {
|
|
'1': 4,
|
|
'2': 11,
|
|
'3': 32,
|
|
'4': 43,
|
|
'5': 48,
|
|
'6': 57, # Asset 6 has 9 trading days.
|
|
}
|
|
expected_calendar_offset = {
|
|
'1': 0, # Starts on 6-01, 1st trading day of month.
|
|
'2': 15, # Starts on 6-22, 16th trading day of month.
|
|
'3': 1, # Starts on 6-02, 2nd trading day of month.
|
|
'4': 0, # Starts on 6-01, 1st trading day of month.
|
|
'5': 9, # Starts on 6-12, 10th trading day of month.
|
|
'6': 10, # Starts on 6-15, 11th trading day of month.
|
|
}
|
|
self.assertEqual(result.attrs['first_row'], expected_first_row)
|
|
self.assertEqual(result.attrs['last_row'], expected_last_row)
|
|
self.assertEqual(
|
|
result.attrs['calendar_offset'],
|
|
expected_calendar_offset,
|
|
)
|
|
assert_index_equal(
|
|
self.trading_days,
|
|
DatetimeIndex(result.attrs['calendar'], tz='UTC'),
|
|
)
|
|
|
|
def _check_read_results(self, columns, assets, start_date, end_date):
|
|
results = self.bcolz_daily_bar_reader.load_raw_arrays(
|
|
columns,
|
|
start_date,
|
|
end_date,
|
|
assets,
|
|
)
|
|
dates = self.trading_days_between(start_date, end_date)
|
|
for column, result in zip(columns, results):
|
|
assert_array_equal(
|
|
result,
|
|
expected_daily_bar_values_2d(
|
|
dates,
|
|
EQUITY_INFO,
|
|
column.name,
|
|
)
|
|
)
|
|
|
|
@parameterized.expand([
|
|
([USEquityPricing.open],),
|
|
([USEquityPricing.close, USEquityPricing.volume],),
|
|
([USEquityPricing.volume, USEquityPricing.high, USEquityPricing.low],),
|
|
(USEquityPricing.columns,),
|
|
])
|
|
def test_read(self, columns):
|
|
self._check_read_results(
|
|
columns,
|
|
self.assets,
|
|
TEST_QUERY_START,
|
|
TEST_QUERY_STOP,
|
|
)
|
|
|
|
def test_start_on_asset_start(self):
|
|
"""
|
|
Test loading with queries that starts on the first day of each asset's
|
|
lifetime.
|
|
"""
|
|
columns = [USEquityPricing.high, USEquityPricing.volume]
|
|
for asset in self.assets:
|
|
self._check_read_results(
|
|
columns,
|
|
self.assets,
|
|
start_date=self.asset_start(asset),
|
|
end_date=self.trading_days[-1],
|
|
)
|
|
|
|
def test_start_on_asset_end(self):
|
|
"""
|
|
Test loading with queries that start on the last day of each asset's
|
|
lifetime.
|
|
"""
|
|
columns = [USEquityPricing.close, USEquityPricing.volume]
|
|
for asset in self.assets:
|
|
self._check_read_results(
|
|
columns,
|
|
self.assets,
|
|
start_date=self.asset_end(asset),
|
|
end_date=self.trading_days[-1],
|
|
)
|
|
|
|
def test_end_on_asset_start(self):
|
|
"""
|
|
Test loading with queries that end on the first day of each asset's
|
|
lifetime.
|
|
"""
|
|
columns = [USEquityPricing.close, USEquityPricing.volume]
|
|
for asset in self.assets:
|
|
self._check_read_results(
|
|
columns,
|
|
self.assets,
|
|
start_date=self.trading_days[0],
|
|
end_date=self.asset_start(asset),
|
|
)
|
|
|
|
def test_end_on_asset_end(self):
|
|
"""
|
|
Test loading with queries that end on the last day of each asset's
|
|
lifetime.
|
|
"""
|
|
columns = [USEquityPricing.close, USEquityPricing.volume]
|
|
for asset in self.assets:
|
|
self._check_read_results(
|
|
columns,
|
|
self.assets,
|
|
start_date=self.trading_days[0],
|
|
end_date=self.asset_end(asset),
|
|
)
|
|
|
|
def test_unadjusted_spot_price(self):
|
|
reader = self.bcolz_daily_bar_reader
|
|
# At beginning
|
|
price = reader.spot_price(1, Timestamp('2015-06-01', tz='UTC'),
|
|
'close')
|
|
# Synthetic writes price for date.
|
|
self.assertEqual(108630.0, price)
|
|
|
|
# Middle
|
|
price = reader.spot_price(1, Timestamp('2015-06-02', tz='UTC'),
|
|
'close')
|
|
self.assertEqual(108631.0, price)
|
|
# End
|
|
price = reader.spot_price(1, Timestamp('2015-06-05', tz='UTC'),
|
|
'close')
|
|
self.assertEqual(108634.0, price)
|
|
|
|
# Another sid at beginning.
|
|
price = reader.spot_price(2, Timestamp('2015-06-22', tz='UTC'),
|
|
'close')
|
|
self.assertEqual(208651.0, price)
|
|
|
|
# Ensure that volume does not have float adjustment applied.
|
|
volume = reader.spot_price(1, Timestamp('2015-06-02', tz='UTC'),
|
|
'volume')
|
|
self.assertEqual(109631, volume)
|
|
|
|
def test_unadjusted_spot_price_no_data(self):
|
|
table = self.bcolz_daily_bar_ctable
|
|
reader = BcolzDailyBarReader(table)
|
|
# before
|
|
with self.assertRaises(NoDataOnDate):
|
|
reader.spot_price(2, Timestamp('2015-06-08', tz='UTC'), 'close')
|
|
|
|
# after
|
|
with self.assertRaises(NoDataOnDate):
|
|
reader.spot_price(4, Timestamp('2015-06-16', tz='UTC'), 'close')
|
|
|
|
def test_unadjusted_spot_price_empty_value(self):
|
|
reader = self.bcolz_daily_bar_reader
|
|
|
|
# A sid, day and corresponding index into which to overwrite a zero.
|
|
zero_sid = 1
|
|
zero_day = Timestamp('2015-06-02', tz='UTC')
|
|
zero_ix = reader.sid_day_index(zero_sid, zero_day)
|
|
|
|
old = reader._spot_col('close')[zero_ix]
|
|
try:
|
|
# Write a zero into the synthetic pricing data at the day and sid,
|
|
# so that a read should now return -1.
|
|
# This a little hacky, in lieu of changing the synthetic data set.
|
|
reader._spot_col('close')[zero_ix] = 0
|
|
|
|
close = reader.spot_price(zero_sid, zero_day, 'close')
|
|
self.assertEqual(-1, close)
|
|
finally:
|
|
reader._spot_col('close')[zero_ix] = old
|