From 6de4d32ee1c5aebd6791929e21e872fc0b37910c Mon Sep 17 00:00:00 2001 From: Thomas Wiecki Date: Sun, 29 Jun 2014 17:40:48 +0200 Subject: [PATCH 1/3] ENH: Change open and close to midnight if using daily input data. --- tests/history_cases.py | 1 - zipline/algorithm.py | 5 ++++- zipline/history/history.py | 43 +++++++++++++++++++++++++------------- 3 files changed, 33 insertions(+), 16 deletions(-) diff --git a/tests/history_cases.py b/tests/history_cases.py index cd2c42d8..83d18ece 100644 --- a/tests/history_cases.py +++ b/tests/history_cases.py @@ -246,7 +246,6 @@ HISTORY_CONTAINER_TEST_CASES = { ], }, }, - 'test illiquid prices': { # A list of HistorySpec objects. diff --git a/zipline/algorithm.py b/zipline/algorithm.py index 7bf1bb32..26bf71d9 100644 --- a/zipline/algorithm.py +++ b/zipline/algorithm.py @@ -819,7 +819,10 @@ class TradingAlgorithm(object): @api_method def add_history(self, bar_count, frequency, field, ffill=True): - history_spec = HistorySpec(bar_count, frequency, field, ffill) + daily_at_midnight = (self.sim_params.data_frequency == 'daily') + + history_spec = HistorySpec(bar_count, frequency, field, ffill, + daily_at_midnight=daily_at_midnight) self.history_specs[history_spec.key_str] = history_spec @api_method diff --git a/zipline/history/history.py b/zipline/history/history.py index f9779110..d869e0f4 100644 --- a/zipline/history/history.py +++ b/zipline/history/history.py @@ -16,6 +16,7 @@ from __future__ import division import numpy as np +import pandas as pd import re from zipline.finance import trading @@ -41,7 +42,7 @@ class Frequency(object): SUPPORTED_FREQUENCIES = frozenset({'1d', '1m'}) MAX_MINUTES = {'m': 1, 'd': 390} - def __init__(self, freq_str): + def __init__(self, freq_str, daily_at_midnight=False): if freq_str not in self.SUPPORTED_FREQUENCIES: raise ValueError( @@ -56,25 +57,31 @@ class Frequency(object): # unit_str - The unit type, e.g. 'd' self.num, self.unit_str = parse_freq_str(freq_str) + self.daily_at_midnight = daily_at_midnight + def next_window_start(self, previous_window_close): """ Get the first minute of the window starting after a window that finished on @previous_window_close. """ if self.unit_str == 'd': - return self.next_day_window_start(previous_window_close) + return self.next_day_window_start(previous_window_close, + self.daily_at_midnight) elif self.unit_str == 'm': return self.next_minute_window_start(previous_window_close) @staticmethod - def next_day_window_start(previous_window_close): + def next_day_window_start(previous_window_close, daily_at_midnight=False): """ Get the next day window start after @previous_window_close. This is defined as the first market open strictly greater than @previous_window_close. """ env = trading.environment - next_open, _ = env.next_open_and_close(previous_window_close) + if daily_at_midnight: + next_open = env.next_trading_day(previous_window_close) + else: + next_open, _ = env.next_open_and_close(previous_window_close) return next_open @staticmethod @@ -107,8 +114,7 @@ class Frequency(object): elif self.unit_str == 'm': return self.minute_window_close(window_start, self.num) - @staticmethod - def day_window_open(window_close, num_days): + def day_window_open(self, window_close, num_days): """ Get the first minute for a daily window of length @num_days with last minute @window_close. This is calculated by searching backward until @@ -120,10 +126,13 @@ class Frequency(object): 1, offset=-(num_days - 1) ).market_open.iloc[0] + + if self.daily_at_midnight: + open_ = pd.tslib.normalize_date(open_) + return open_ - @staticmethod - def minute_window_open(window_close, num_minutes): + def minute_window_open(self, window_close, num_minutes): """ Get the first minute for a minutely window of length @num_minutes with last minute @window_close. @@ -138,8 +147,7 @@ class Frequency(object): env = trading.environment return env.market_minute_window(window_close, count=-num_minutes)[-1] - @staticmethod - def day_window_close(window_start, num_days): + def day_window_close(self, window_start, num_days): """ Get the last minute for a daily window of length @num_days with first minute @window_start. This is calculated by searching forward until @@ -174,10 +182,13 @@ class Frequency(object): 1, offset=num_days - 1 ).market_close.iloc[0] + + if self.daily_at_midnight: + close = pd.tslib.normalize_date(close) + return close - @staticmethod - def minute_window_close(window_start, num_minutes): + def minute_window_close(self, window_start, num_minutes): """ Get the last minute for a minutely window of length @num_minutes with first minute @window_start. @@ -229,11 +240,12 @@ class HistorySpec(object): return "{0}:{1}:{2}:{3}".format( bar_count, freq_str, field, ffill) - def __init__(self, bar_count, frequency, field, ffill): + def __init__(self, bar_count, frequency, field, ffill, + daily_at_midnight=False): # Number of bars to look back. self.bar_count = bar_count if isinstance(frequency, str): - frequency = Frequency(frequency) + frequency = Frequency(frequency, daily_at_midnight) # The frequency at which the data is sampled. self.frequency = frequency # The field, e.g. 'price', 'volume', etc. @@ -272,6 +284,9 @@ def days_index_at_dt(history_spec, algo_dt): step=history_spec.frequency.num, ).market_close + if history_spec.frequency.daily_at_midnight: + market_closes = market_closes.apply(pd.tslib.normalize_date) + # Append the current algo_dt as the last index value. # Using the 'rawer' numpy array values here because of a bottleneck # that appeared when using DatetimeIndex From 845712e5ebcc78190b2d40af78c838895fec591c Mon Sep 17 00:00:00 2001 From: Thomas Wiecki Date: Mon, 30 Jun 2014 17:49:27 +0200 Subject: [PATCH 2/3] MAINT: Factor out column conversion in HistoryContainer. We can't be sure that security identifiers can be converted to int. In fact, most of the times they are strings. This adds an identity function that can be overwritten if such a conversion is necessary (as on Quantopian). --- zipline/history/history_container.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/zipline/history/history_container.py b/zipline/history/history_container.py index 51b38562..7c8eece4 100644 --- a/zipline/history/history_container.py +++ b/zipline/history/history_container.py @@ -246,6 +246,13 @@ class HistoryContainer(object): ) return rp + def convert_columns(self, values): + """ + If columns have a specific type you want to enforce, overwrite this + method and return the transformed values. + """ + return values + def create_return_frames(self, algo_dt): """ Populates the return frame cache. @@ -257,7 +264,8 @@ class HistoryContainer(object): index = pd.to_datetime(index_at_dt(history_spec, algo_dt)) frame = pd.DataFrame( index=index, - columns=map(int, self.buffer_panel.minor_axis.values), + columns=self.convert_columns( + self.buffer_panel.minor_axis.values), dtype=np.float64) self.return_frames[spec_key] = frame From 78d2a7ef9a1f29f9d7e5f6effea4ce86fe2c2134 Mon Sep 17 00:00:00 2001 From: Thomas Wiecki Date: Tue, 15 Jul 2014 17:39:30 +0200 Subject: [PATCH 3/3] TST: Add unittest for daily history with midnight dt. --- tests/test_history.py | 57 ++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 54 insertions(+), 3 deletions(-) diff --git a/tests/test_history.py b/tests/test_history.py index 0ba24146..71f1b37a 100644 --- a/tests/test_history.py +++ b/tests/test_history.py @@ -18,6 +18,7 @@ from unittest import TestCase from nose_parameterized import parameterized import numpy as np import pandas as pd +from pandas.util.testing import assert_frame_equal from zipline.history import history from zipline.history.history_container import HistoryContainer @@ -26,7 +27,7 @@ import zipline.utils.factory as factory from zipline import TradingAlgorithm from zipline.finance.trading import SimulationParameters, TradingEnvironment -from zipline.sources import RandomWalkSource +from zipline.sources import RandomWalkSource, DataFrameSource from .history_cases import ( HISTORY_CONTAINER_TEST_CASES, @@ -128,7 +129,8 @@ def get_index_at_dt(case_input): case_input['bar_count'], case_input['frequency'], None, - False + False, + daily_at_midnight=False ) return history.index_at_dt(history_spec, case_input['algo_dt']) @@ -217,7 +219,8 @@ class TestHistoryContainer(TestCase): bar_count=3, frequency='1d', field='price', - ffill=True + ffill=True, + daily_at_midnight=False ) specs = {spec.key_str: spec} initial_sids = [1, ] @@ -342,6 +345,54 @@ class TestHistoryAlgo(TestCase): def setUp(self): np.random.seed(123) + def test_history_daily(self): + bar_count = 3 + algo_text = """ +from zipline.api import history, add_history +from copy import deepcopy + +def initialize(context): + add_history(bar_count={bar_count}, frequency='1d', field='price') + context.history_trace = [] + +def handle_data(context, data): + prices = history(bar_count={bar_count}, frequency='1d', field='price') + context.history_trace.append(deepcopy(prices)) +""".format(bar_count=bar_count).strip() + + # March 2006 + # Su Mo Tu We Th Fr Sa + # 1 2 3 4 + # 5 6 7 8 9 10 11 + # 12 13 14 15 16 17 18 + # 19 20 21 22 23 24 25 + # 26 27 28 29 30 31 + + start = pd.Timestamp('2006-03-20', tz='UTC') + end = pd.Timestamp('2006-03-30', tz='UTC') + + sim_params = factory.create_simulation_parameters( + start=start, end=end) + + _, df = factory.create_test_df_source(sim_params) + df = df.astype(np.float64) + source = DataFrameSource(df, sids=[0]) + + test_algo = TradingAlgorithm( + script=algo_text, + data_frequency='daily', + sim_params=sim_params + ) + + output = test_algo.run(source) + self.assertIsNotNone(output) + + history_trace = test_algo.history_trace + + for i, received in enumerate(history_trace[bar_count - 1:]): + expected = df.iloc[i:i + bar_count] + assert_frame_equal(expected, received) + def test_basic_history(self): algo_text = """ from zipline.api import history, add_history