mirror of
https://github.com/wassname/catalyst.git
synced 2026-06-30 14:58:21 +08:00
2dbafd5162
wherever we zero out the second attribute. Otherwise, we can be off by some microseconds from midnight, etc.
231 lines
7.2 KiB
Python
231 lines
7.2 KiB
Python
#
|
|
# Copyright 2013 Quantopian, Inc.
|
|
#
|
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
# you may not use this file except in compliance with the License.
|
|
# You may obtain a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
# See the License for the specific language governing permissions and
|
|
# limitations under the License.
|
|
|
|
|
|
import os
|
|
from os.path import expanduser
|
|
import msgpack
|
|
from collections import OrderedDict
|
|
from datetime import timedelta
|
|
|
|
import logbook
|
|
|
|
from treasuries import get_treasury_data
|
|
import benchmarks
|
|
from benchmarks import get_benchmark_returns
|
|
|
|
from zipline.protocol import DailyReturn
|
|
from zipline.utils.date_utils import tuple_to_date
|
|
from zipline.utils.tradingcalendar import trading_days
|
|
from operator import attrgetter
|
|
|
|
logger = logbook.Logger('Loader')
|
|
|
|
# TODO: Make this path customizable.
|
|
DATA_PATH = os.path.join(
|
|
expanduser("~"),
|
|
'.zipline',
|
|
'data'
|
|
)
|
|
|
|
|
|
def get_datafile(name, mode='r'):
|
|
"""
|
|
Returns a handle to data file.
|
|
|
|
Creates containing directory, if needed.
|
|
"""
|
|
|
|
if not os.path.exists(DATA_PATH):
|
|
os.makedirs(DATA_PATH)
|
|
|
|
return open(os.path.join(DATA_PATH, name), mode)
|
|
|
|
|
|
def dump_treasury_curves():
|
|
"""
|
|
Dumps data to be used with zipline.
|
|
|
|
Puts source treasury and data into zipline.
|
|
"""
|
|
tr_data = []
|
|
|
|
for curve in get_treasury_data():
|
|
date_as_tuple = curve['date'].timetuple()[0:6] + \
|
|
(curve['date'].microsecond,)
|
|
# Not ideal but massaging data into expected format
|
|
del curve['date']
|
|
tr = (date_as_tuple, curve)
|
|
tr_data.append(tr)
|
|
|
|
with get_datafile('treasury_curves.msgpack', mode='wb') as tr_fp:
|
|
tr_fp.write(msgpack.dumps(tr_data))
|
|
|
|
|
|
def dump_benchmarks(symbol):
|
|
"""
|
|
Dumps data to be used with zipline.
|
|
|
|
Puts source treasury and data into zipline.
|
|
"""
|
|
benchmark_data = []
|
|
for daily_return in get_benchmark_returns(symbol):
|
|
date_as_tuple = daily_return.date.timetuple()[0:6] + \
|
|
(daily_return.date.microsecond,)
|
|
# Not ideal but massaging data into expected format
|
|
benchmark = (date_as_tuple, daily_return.returns)
|
|
benchmark_data.append(benchmark)
|
|
|
|
with get_datafile(get_benchmark_filename(symbol), mode='wb') as bmark_fp:
|
|
bmark_fp.write(msgpack.dumps(benchmark_data))
|
|
|
|
|
|
def update_treasury_curves(last_date):
|
|
"""
|
|
Updates data in the zipline treasury curves message pack
|
|
|
|
last_date should be a datetime object of the most recent data
|
|
|
|
Puts source treasury and data into zipline.
|
|
"""
|
|
tr_data = []
|
|
with get_datafile('treasury_curves.msgpack', mode='rb') as tr_fp:
|
|
tr_list = msgpack.loads(tr_fp.read())
|
|
for packed_date, curve in tr_list:
|
|
tr_data.append((packed_date, curve))
|
|
|
|
for curve in get_treasury_data():
|
|
date_as_tuple = curve['date'].timetuple()[0:6] + \
|
|
(curve['date'].microsecond,)
|
|
# Not ideal but massaging data into expected format
|
|
del curve['date']
|
|
tr = (date_as_tuple, curve)
|
|
tr_data.append(tr)
|
|
|
|
with get_datafile('treasury_curves.msgpack', mode='wb') as tr_fp:
|
|
tr_fp.write(msgpack.dumps(tr_data))
|
|
|
|
|
|
def update_benchmarks(symbol, last_date):
|
|
"""
|
|
Updates data in the zipline message pack
|
|
|
|
last_date should be a datetime object of the most recent data
|
|
|
|
Puts source benchmark into zipline.
|
|
"""
|
|
benchmark_data = []
|
|
with get_datafile(get_benchmark_filename(symbol), mode='rb') as bmark_fp:
|
|
bm_list = msgpack.loads(bmark_fp.read())
|
|
for packed_date, returns in bm_list:
|
|
benchmark_data.append((packed_date, returns))
|
|
|
|
try:
|
|
start = last_date + timedelta(days=1)
|
|
for daily_return in get_benchmark_returns(symbol, start_date=start):
|
|
date_as_tuple = daily_return.date.timetuple()[0:6] + \
|
|
(daily_return.date.microsecond,)
|
|
# Not ideal but massaging data into expected format
|
|
benchmark = (date_as_tuple, daily_return.returns)
|
|
benchmark_data.append(benchmark)
|
|
|
|
with get_datafile(
|
|
get_benchmark_filename(symbol), mode='wb') as bmark_fp:
|
|
bmark_fp.write(msgpack.dumps(benchmark_data))
|
|
except benchmarks.BenchmarkDataNotFoundError as exc:
|
|
logger.warn(exc)
|
|
|
|
|
|
def get_benchmark_filename(symbol):
|
|
return "%s_benchmark.msgpack" % symbol
|
|
|
|
|
|
def load_market_data(bm_symbol='^GSPC'):
|
|
try:
|
|
fp_bm = get_datafile(get_benchmark_filename(bm_symbol), "rb")
|
|
except IOError:
|
|
print """
|
|
data msgpacks aren't distributed with source.
|
|
Fetching data from Yahoo Finance.
|
|
""".strip()
|
|
dump_benchmarks(bm_symbol)
|
|
fp_bm = get_datafile(get_benchmark_filename(bm_symbol), "rb")
|
|
|
|
bm_list = msgpack.loads(fp_bm.read())
|
|
|
|
# Find the offset of the last date for which we have trading data in our
|
|
# list of valid trading days
|
|
last_bm_date = tuple_to_date(bm_list[-1][0])
|
|
last_bm_date_offset = trading_days.searchsorted(
|
|
last_bm_date.strftime('%Y/%m/%d'))
|
|
|
|
# If more than 1 trading days has elapsed since the last day where
|
|
# we have data,then we need to update
|
|
if len(trading_days) - last_bm_date_offset > 1:
|
|
update_benchmarks(bm_symbol, last_bm_date)
|
|
fp_bm = get_datafile(get_benchmark_filename(bm_symbol), "rb")
|
|
bm_list = msgpack.loads(fp_bm.read())
|
|
|
|
bm_returns = []
|
|
for packed_date, returns in bm_list:
|
|
event_dt = tuple_to_date(packed_date)
|
|
|
|
daily_return = DailyReturn(date=event_dt, returns=returns)
|
|
bm_returns.append(daily_return)
|
|
|
|
fp_bm.close()
|
|
|
|
bm_returns = sorted(bm_returns, key=attrgetter('date'))
|
|
|
|
try:
|
|
fp_tr = get_datafile('treasury_curves.msgpack', "rb")
|
|
except IOError:
|
|
print """
|
|
data msgpacks aren't distributed with source.
|
|
Fetching data from data.treasury.gov
|
|
""".strip()
|
|
dump_treasury_curves()
|
|
fp_tr = get_datafile('treasury_curves.msgpack', "rb")
|
|
|
|
tr_list = msgpack.loads(fp_tr.read())
|
|
|
|
# Find the offset of the last date for which we have trading data in our
|
|
# list of valid trading days
|
|
last_tr_date = tuple_to_date(tr_list[-1][0])
|
|
last_tr_date_offset = trading_days.searchsorted(
|
|
last_tr_date.strftime('%Y/%m/%d'))
|
|
|
|
# If more than 1 trading days has elapsed since the last day where
|
|
# we have data,then we need to update
|
|
if len(trading_days) - last_tr_date_offset > 1:
|
|
update_treasury_curves(last_tr_date)
|
|
fp_tr = get_datafile('treasury_curves.msgpack', "rb")
|
|
tr_list = msgpack.loads(fp_tr.read())
|
|
|
|
tr_curves = {}
|
|
for packed_date, curve in tr_list:
|
|
tr_dt = tuple_to_date(packed_date)
|
|
# tr_dt = tr_dt.replace(hour=0, minute=0, second=0, microsecond=0,
|
|
# tzinfo=pytz.utc)
|
|
tr_curves[tr_dt] = curve
|
|
|
|
fp_tr.close()
|
|
|
|
tr_curves = OrderedDict(sorted(
|
|
((dt, c) for dt, c in tr_curves.iteritems()),
|
|
key=lambda t: t[0]))
|
|
|
|
return bm_returns, tr_curves
|