diff --git a/tests/pipeline/test_buyback_auth.py b/tests/pipeline/test_buyback_auth.py index 9bf8e19c..e88e664c 100644 --- a/tests/pipeline/test_buyback_auth.py +++ b/tests/pipeline/test_buyback_auth.py @@ -42,7 +42,8 @@ from zipline.testing.fixtures import ( ) date_intervals = [ - [[None, '2014-01-04'], ['2014-01-05', '2014-01-09'], ['2014-01-10', None]] + [['2014-01-01', '2014-01-04'], ['2014-01-05', '2014-01-09'], + ['2014-01-10', '2014-01-31']] ] buyback_authorizations_cases = [ diff --git a/tests/pipeline/test_dividends.py b/tests/pipeline/test_dividends.py index 5c10b7bc..418517f3 100644 --- a/tests/pipeline/test_dividends.py +++ b/tests/pipeline/test_dividends.py @@ -98,41 +98,41 @@ dividends_cases = [ prev_date_intervals = [ [ - [None, '2014-01-14'], ['2014-01-15', '2014-01-19'], - ['2014-01-20', None] + ['2014-01-01', '2014-01-14'], ['2014-01-15', '2014-01-19'], + ['2014-01-20', '2014-01-31'] ], [ - [None, '2014-01-14'], ['2014-01-15', '2014-01-19'], - ['2014-01-20', None] + ['2014-01-01', '2014-01-14'], ['2014-01-15', '2014-01-19'], + ['2014-01-20', '2014-01-31'] ], [ - [None, '2014-01-09'], ['2014-01-10', '2014-01-19'], - ['2014-01-20', None] + ['2014-01-01', '2014-01-09'], ['2014-01-10', '2014-01-19'], + ['2014-01-20', '2014-01-31'] ], [ - [None, '2014-01-09'], ['2014-01-10', '2014-01-14'], - ['2014-01-15', None] + ['2014-01-01', '2014-01-09'], ['2014-01-10', '2014-01-14'], + ['2014-01-15', '2014-01-31'] ] ] next_date_intervals = [ [ - [None, '2014-01-04'], ['2014-01-05', '2014-01-15'], - ['2014-01-16', '2014-01-20'], ['2014-01-21', None] + ['2014-01-01', '2014-01-04'], ['2014-01-05', '2014-01-15'], + ['2014-01-16', '2014-01-20'], ['2014-01-21', '2014-01-31'] ], [ - [None, '2014-01-04'], ['2014-01-05', '2014-01-09'], + ['2014-01-01', '2014-01-04'], ['2014-01-05', '2014-01-09'], ['2014-01-10', '2014-01-15'], ['2014-01-16', '2014-01-20'], - ['2014-01-21', None] + ['2014-01-21', '2014-01-31'] ], [ - [None, '2014-01-04'], ['2014-01-05', '2014-01-10'], + ['2014-01-01', '2014-01-04'], ['2014-01-05', '2014-01-10'], ['2014-01-11', '2014-01-14'], ['2014-01-15', '2014-01-20'], - ['2014-01-21', None] + ['2014-01-21', '2014-01-31'] ], [ - [None, '2014-01-04'], ['2014-01-05', '2014-01-10'], - ['2014-01-11', '2014-01-15'], ['2014-01-16', None] + ['2014-01-01', '2014-01-04'], ['2014-01-05', '2014-01-10'], + ['2014-01-11', '2014-01-15'], ['2014-01-16', '2014-01-31'] ] ] @@ -184,19 +184,19 @@ class DividendsByAnnouncementDateTestCase(WithPipelineEventDataLoader, def setup(self, dates): date_intervals = [ [ - [None, '2014-01-04'], ['2014-01-05', '2014-01-09'], - ['2014-01-10', None] + ['2014-01-01', '2014-01-04'], ['2014-01-05', '2014-01-09'], + ['2014-01-10', '2014-01-31'] ], [ - [None, '2014-01-04'], ['2014-01-05', '2014-01-09'], - ['2014-01-10', None] + ['2014-01-01', '2014-01-04'], ['2014-01-05', '2014-01-09'], + ['2014-01-10', '2014-01-31'] ], [ - [None, '2014-01-04'], ['2014-01-05', '2014-01-14'], - ['2014-01-15', None] + ['2014-01-01', '2014-01-04'], ['2014-01-05', '2014-01-14'], + ['2014-01-15', '2014-01-31'] ], [ - [None, '2014-01-04'], ['2014-01-05', None] + ['2014-01-01', '2014-01-04'], ['2014-01-05', '2014-01-31'] ] ] announcement_dates = [['NaT', '2014-01-04', '2014-01-09'], diff --git a/tests/pipeline/test_events.py b/tests/pipeline/test_events.py index 1bf7e46e..9663b94e 100644 --- a/tests/pipeline/test_events.py +++ b/tests/pipeline/test_events.py @@ -6,6 +6,8 @@ from unittest import TestCase import blaze as bz from nose_parameterized import parameterized +import numpy as np +from numpy.testing import assert_array_equal import pandas as pd from pandas.util.testing import assert_series_equal @@ -28,18 +30,22 @@ from zipline.pipeline.loaders.events import ( from zipline.utils.memoize import lazyval from zipline.utils.numpy_utils import datetime64ns_dtype +OTHER_FIELD = "other_field" ABSTRACT_CONCRETE_LOADER_ERROR = 'abstract methods concrete_loader' -ABSTRACT_EXPECTED_COLS_ERROR = 'abstract methods expected_cols' +ABSTRACT_EXPECTED_COLS_ERROR = 'abstract methods event_date_col, expected_cols' class EventDataSet(DataSet): previous_announcement = Column(datetime64ns_dtype) + next_announcement = Column(datetime64ns_dtype) class EventDataSetLoader(EventsLoader): expected_cols = frozenset([ANNOUNCEMENT_FIELD_NAME]) + event_date_col = ANNOUNCEMENT_FIELD_NAME + def __init__(self, all_dates, events_by_sid, @@ -56,21 +62,21 @@ class EventDataSetLoader(EventsLoader): def previous_announcement_loader(self): return self._previous_event_date_loader( self.dataset.previous_announcement, - ANNOUNCEMENT_FIELD_NAME, ) @lazyval def next_announcement_loader(self): - return self._previous_event_date_loader( - self.dataset.previous_announcement, - ANNOUNCEMENT_FIELD_NAME, + return self._next_event_date_loader( + self.dataset.next_announcement, ) # Test case just for catching an error when multiple columns are in the wrong # data format, so no loader defined. -class EventDataSetLoaderMultipleExpectedCols(EventsLoader): - expected_cols = frozenset([ANNOUNCEMENT_FIELD_NAME, "other_field"]) +class EventDataSetLoaderMultipleExpectedColsNoColumnLoaders(EventsLoader): + expected_cols = frozenset([ANNOUNCEMENT_FIELD_NAME, OTHER_FIELD]) + + event_date_col = ANNOUNCEMENT_FIELD_NAME class EventDataSetLoaderNoExpectedCols(EventsLoader): @@ -92,6 +98,34 @@ dtx = pd.date_range('2014-01-01', '2014-01-10') class EventLoaderTestCase(TestCase): + def test_null_in_event_date_col(self): + # Tests that if there is a null date in the event date column, it is + # filtered out and does not break on loading the adjusted array. + dates_with_null = pd.Series(dtx) + dates_with_null[2] = pd.NaT + events_by_sid = {0: pd.DataFrame({ANNOUNCEMENT_FIELD_NAME: + dates_with_null, + TS_FIELD_NAME: dtx})} + loader = EventDataSetLoader( + dtx, + events_by_sid, + ) + + prev_result = loader.load_adjusted_array({ + EventDataSet.previous_announcement + }, dtx, [0], [True])[EventDataSet.previous_announcement].data[:, 0] + + next_result = loader.load_adjusted_array({ + EventDataSet.next_announcement + }, dtx, [0], [True])[EventDataSet.next_announcement].data[:, 0] + + expected_prev = dates_with_null[:] + expected_prev[2] = dtx[1] + assert_array_equal(prev_result, expected_prev) + expected_next = dates_with_null[:] + expected_next[2] = np.datetime64('NaT') + assert_array_equal(next_result, expected_next) + def assert_loader_error(self, events_by_sid, error, msg, infer_timestamps, loader): with self.assertRaisesRegexp(error, re.escape(msg)): @@ -198,7 +232,7 @@ class EventLoaderTestCase(TestCase): [dtx, dtx], True, WRONG_MANY_COL_DATA_FORMAT_ERROR.format(sid=0), - EventDataSetLoaderMultipleExpectedCols + EventDataSetLoaderMultipleExpectedColsNoColumnLoaders ], [ [dtx], @@ -212,7 +246,7 @@ class EventLoaderTestCase(TestCase): [dtx, dtx], False, WRONG_MANY_COL_DATA_FORMAT_ERROR.format(sid=0), - EventDataSetLoaderMultipleExpectedCols + EventDataSetLoaderMultipleExpectedColsNoColumnLoaders ] ] ) @@ -246,3 +280,20 @@ class BlazeEventLoaderTestCase(TestCase): SID_FIELD_NAME: 0}) ) ) + + +class BlazeEventDataSetLoader(BlazeEventsLoader): + concrete_loader = EventDataSetLoader + _expected_fields = frozenset({ANNOUNCEMENT_FIELD_NAME, + TS_FIELD_NAME, + SID_FIELD_NAME}) + + def __init__(self, + expr, + dataset=EventDataSet, + **kwargs): + super( + BlazeEventDataSetLoader, self + ).__init__(expr, + dataset=dataset, + **kwargs) diff --git a/zipline/pipeline/loaders/buyback_auth.py b/zipline/pipeline/loaders/buyback_auth.py index b2123545..6d584809 100644 --- a/zipline/pipeline/loaders/buyback_auth.py +++ b/zipline/pipeline/loaders/buyback_auth.py @@ -27,6 +27,8 @@ class CashBuybackAuthorizationsLoader(EventsLoader): expected_cols = frozenset([BUYBACK_ANNOUNCEMENT_FIELD_NAME, CASH_FIELD_NAME]) + event_date_col = BUYBACK_ANNOUNCEMENT_FIELD_NAME + def __init__(self, all_dates, events_by_sid, @@ -43,7 +45,6 @@ class CashBuybackAuthorizationsLoader(EventsLoader): def cash_amount_loader(self): return self._previous_event_value_loader( self.dataset.cash_amount, - BUYBACK_ANNOUNCEMENT_FIELD_NAME, CASH_FIELD_NAME ) @@ -51,7 +52,6 @@ class CashBuybackAuthorizationsLoader(EventsLoader): def announcement_date_loader(self): return self._previous_event_date_loader( self.dataset.announcement_date, - BUYBACK_ANNOUNCEMENT_FIELD_NAME, ) @@ -70,6 +70,8 @@ class ShareBuybackAuthorizationsLoader(EventsLoader): expected_cols = frozenset([BUYBACK_ANNOUNCEMENT_FIELD_NAME, SHARE_COUNT_FIELD_NAME]) + event_date_col = BUYBACK_ANNOUNCEMENT_FIELD_NAME + def __init__(self, all_dates, events_by_sid, @@ -86,7 +88,6 @@ class ShareBuybackAuthorizationsLoader(EventsLoader): def share_count_loader(self): return self._previous_event_value_loader( self.dataset.share_count, - BUYBACK_ANNOUNCEMENT_FIELD_NAME, SHARE_COUNT_FIELD_NAME ) @@ -94,5 +95,4 @@ class ShareBuybackAuthorizationsLoader(EventsLoader): def announcement_date_loader(self): return self._previous_event_date_loader( self.dataset.announcement_date, - BUYBACK_ANNOUNCEMENT_FIELD_NAME, ) diff --git a/zipline/pipeline/loaders/consensus_estimates.py b/zipline/pipeline/loaders/consensus_estimates.py index 666931a1..9d8f20f8 100644 --- a/zipline/pipeline/loaders/consensus_estimates.py +++ b/zipline/pipeline/loaders/consensus_estimates.py @@ -29,6 +29,8 @@ class ConsensusEstimatesLoader(EventsLoader): LOW_FIELD_NAME, ACTUAL_VALUE_FIELD_NAME]) + event_date_col = RELEASE_DATE_FIELD_NAME + def __init__(self, all_dates, events_by_sid, infer_timestamps=False, dataset=ConsensusEstimates): @@ -40,21 +42,18 @@ class ConsensusEstimatesLoader(EventsLoader): def next_release_date_loader(self): return self._next_event_date_loader( self.dataset.next_release_date, - RELEASE_DATE_FIELD_NAME ) @lazyval def previous_release_date_loader(self): return self._previous_event_date_loader( self.dataset.previous_release_date, - RELEASE_DATE_FIELD_NAME, ) @lazyval def next_standard_deviation_loader(self): return self._next_event_value_loader( self.dataset.next_standard_deviation, - RELEASE_DATE_FIELD_NAME, STANDARD_DEVIATION_FIELD_NAME, ) @@ -62,7 +61,6 @@ class ConsensusEstimatesLoader(EventsLoader): def previous_standard_deviation_loader(self): return self._previous_event_value_loader( self.dataset.previous_standard_deviation, - RELEASE_DATE_FIELD_NAME, STANDARD_DEVIATION_FIELD_NAME, ) @@ -70,7 +68,6 @@ class ConsensusEstimatesLoader(EventsLoader): def next_count_loader(self): return self._next_event_value_loader( self.dataset.next_count, - RELEASE_DATE_FIELD_NAME, COUNT_FIELD_NAME, ) @@ -78,7 +75,6 @@ class ConsensusEstimatesLoader(EventsLoader): def previous_count_loader(self): return self._previous_event_value_loader( self.dataset.previous_count, - RELEASE_DATE_FIELD_NAME, COUNT_FIELD_NAME, ) @@ -86,7 +82,6 @@ class ConsensusEstimatesLoader(EventsLoader): def next_fiscal_quarter_loader(self): return self._next_event_value_loader( self.dataset.next_fiscal_quarter, - RELEASE_DATE_FIELD_NAME, FISCAL_QUARTER_FIELD_NAME, ) @@ -94,7 +89,6 @@ class ConsensusEstimatesLoader(EventsLoader): def previous_fiscal_quarter_loader(self): return self._previous_event_value_loader( self.dataset.previous_fiscal_quarter, - RELEASE_DATE_FIELD_NAME, FISCAL_QUARTER_FIELD_NAME, ) @@ -102,7 +96,6 @@ class ConsensusEstimatesLoader(EventsLoader): def next_high_loader(self): return self._next_event_value_loader( self.dataset.next_high, - RELEASE_DATE_FIELD_NAME, HIGH_FIELD_NAME, ) @@ -110,7 +103,6 @@ class ConsensusEstimatesLoader(EventsLoader): def previous_high_loader(self): return self._previous_event_value_loader( self.dataset.previous_high, - RELEASE_DATE_FIELD_NAME, HIGH_FIELD_NAME, ) @@ -118,7 +110,6 @@ class ConsensusEstimatesLoader(EventsLoader): def next_mean_loader(self): return self._next_event_value_loader( self.dataset.next_mean, - RELEASE_DATE_FIELD_NAME, MEAN_FIELD_NAME, ) @@ -126,7 +117,6 @@ class ConsensusEstimatesLoader(EventsLoader): def previous_mean_loader(self): return self._previous_event_value_loader( self.dataset.previous_mean, - RELEASE_DATE_FIELD_NAME, MEAN_FIELD_NAME, ) @@ -134,7 +124,6 @@ class ConsensusEstimatesLoader(EventsLoader): def next_fiscal_year_loader(self): return self._next_event_value_loader( self.dataset.next_fiscal_year, - RELEASE_DATE_FIELD_NAME, FISCAL_YEAR_FIELD_NAME, ) @@ -142,7 +131,6 @@ class ConsensusEstimatesLoader(EventsLoader): def previous_fiscal_year_loader(self): return self._previous_event_value_loader( self.dataset.previous_fiscal_year, - RELEASE_DATE_FIELD_NAME, FISCAL_YEAR_FIELD_NAME, ) @@ -150,7 +138,6 @@ class ConsensusEstimatesLoader(EventsLoader): def next_low_loader(self): return self._next_event_value_loader( self.dataset.next_low, - RELEASE_DATE_FIELD_NAME, LOW_FIELD_NAME, ) @@ -158,7 +145,6 @@ class ConsensusEstimatesLoader(EventsLoader): def previous_low_loader(self): return self._previous_event_value_loader( self.dataset.previous_low, - RELEASE_DATE_FIELD_NAME, LOW_FIELD_NAME, ) @@ -166,6 +152,5 @@ class ConsensusEstimatesLoader(EventsLoader): def previous_actual_value_loader(self): return self._previous_event_value_loader( self.dataset.previous_actual_value, - RELEASE_DATE_FIELD_NAME, ACTUAL_VALUE_FIELD_NAME, ) diff --git a/zipline/pipeline/loaders/dividends.py b/zipline/pipeline/loaders/dividends.py index cf6f2dad..339435c1 100644 --- a/zipline/pipeline/loaders/dividends.py +++ b/zipline/pipeline/loaders/dividends.py @@ -17,6 +17,8 @@ class DividendsByAnnouncementDateLoader(EventsLoader): expected_cols = frozenset([ANNOUNCEMENT_FIELD_NAME, CASH_AMOUNT_FIELD_NAME]) + event_date_col = ANNOUNCEMENT_FIELD_NAME + def __init__(self, all_dates, events_by_sid, infer_timestamps=False, dataset=DividendsByAnnouncementDate): @@ -28,14 +30,12 @@ class DividendsByAnnouncementDateLoader(EventsLoader): def previous_announcement_date_loader(self): return self._previous_event_date_loader( self.dataset.previous_announcement_date, - ANNOUNCEMENT_FIELD_NAME ) @lazyval def previous_amount_loader(self): return self._previous_event_value_loader( self.dataset.previous_amount, - ANNOUNCEMENT_FIELD_NAME, CASH_AMOUNT_FIELD_NAME ) @@ -44,6 +44,8 @@ class DividendsByPayDateLoader(EventsLoader): expected_cols = frozenset([PAY_DATE_FIELD_NAME, CASH_AMOUNT_FIELD_NAME]) + event_date_col = PAY_DATE_FIELD_NAME + def __init__(self, all_dates, events_by_sid, infer_timestamps=False, dataset=DividendsByPayDate): @@ -53,27 +55,23 @@ class DividendsByPayDateLoader(EventsLoader): @lazyval def next_date_loader(self): - return self._next_event_date_loader(self.dataset.next_date, - PAY_DATE_FIELD_NAME) + return self._next_event_date_loader(self.dataset.next_date) @lazyval def previous_date_loader(self): return self._previous_event_date_loader( self.dataset.previous_date, - PAY_DATE_FIELD_NAME ) @lazyval def next_amount_loader(self): return self._next_event_value_loader(self.dataset.next_amount, - PAY_DATE_FIELD_NAME, CASH_AMOUNT_FIELD_NAME) @lazyval def previous_amount_loader(self): return self._previous_event_value_loader( self.dataset.previous_amount, - PAY_DATE_FIELD_NAME, CASH_AMOUNT_FIELD_NAME ) @@ -82,6 +80,8 @@ class DividendsByExDateLoader(EventsLoader): expected_cols = frozenset([EX_DATE_FIELD_NAME, CASH_AMOUNT_FIELD_NAME]) + event_date_col = EX_DATE_FIELD_NAME + def __init__(self, all_dates, events_by_sid, infer_timestamps=False, dataset=DividendsByExDate): @@ -91,26 +91,22 @@ class DividendsByExDateLoader(EventsLoader): @lazyval def next_date_loader(self): - return self._next_event_date_loader(self.dataset.next_date, - EX_DATE_FIELD_NAME) + return self._next_event_date_loader(self.dataset.next_date) @lazyval def previous_date_loader(self): return self._previous_event_date_loader( self.dataset.previous_date, - EX_DATE_FIELD_NAME ) @lazyval def next_amount_loader(self): return self._next_event_value_loader(self.dataset.next_amount, - EX_DATE_FIELD_NAME, CASH_AMOUNT_FIELD_NAME) @lazyval def previous_amount_loader(self): return self._previous_event_value_loader( self.dataset.previous_amount, - EX_DATE_FIELD_NAME, CASH_AMOUNT_FIELD_NAME ) diff --git a/zipline/pipeline/loaders/earnings.py b/zipline/pipeline/loaders/earnings.py index 0a43e652..d4ed9bb6 100644 --- a/zipline/pipeline/loaders/earnings.py +++ b/zipline/pipeline/loaders/earnings.py @@ -12,6 +12,8 @@ class EarningsCalendarLoader(EventsLoader): expected_cols = frozenset([ANNOUNCEMENT_FIELD_NAME]) + event_date_col = ANNOUNCEMENT_FIELD_NAME + def __init__(self, all_dates, events_by_sid, infer_timestamps=False, dataset=EarningsCalendar): @@ -21,12 +23,10 @@ class EarningsCalendarLoader(EventsLoader): @lazyval def next_announcement_loader(self): - return self._next_event_date_loader(self.dataset.next_announcement, - ANNOUNCEMENT_FIELD_NAME) + return self._next_event_date_loader(self.dataset.next_announcement) @lazyval def previous_announcement_loader(self): return self._previous_event_date_loader( self.dataset.previous_announcement, - ANNOUNCEMENT_FIELD_NAME ) diff --git a/zipline/pipeline/loaders/events.py b/zipline/pipeline/loaders/events.py index e88d7966..be1a8ba3 100644 --- a/zipline/pipeline/loaders/events.py +++ b/zipline/pipeline/loaders/events.py @@ -77,6 +77,10 @@ class EventsLoader(PipelineLoader): def expected_cols(self): raise NotImplemented('expected_cols') + @abc.abstractproperty + def event_date_col(self): + raise NotImplemented('event_date_col') + def __init__(self, all_dates, events_by_sid, @@ -148,7 +152,8 @@ class EventsLoader(PipelineLoader): raise ValueError( WRONG_MANY_COL_DATA_FORMAT_ERROR.format(sid=k) ) - + self.events_by_sid = {sid: df.dropna(subset=[self.event_date_col]) for + sid, df in self.events_by_sid.items()} self.dataset = dataset def get_loader(self, column): @@ -164,7 +169,7 @@ class EventsLoader(PipelineLoader): for column in columns ) - def _next_event_date_loader(self, next_date_field, event_date_field_name): + def _next_event_date_loader(self, next_date_field): return DataFrameLoader( next_date_field, next_event_frame( @@ -172,15 +177,14 @@ class EventsLoader(PipelineLoader): self.all_dates, next_date_field.missing_value, next_date_field.dtype, - event_date_field_name, - event_date_field_name + self.event_date_col, + self.event_date_col ), adjustments=None, ) def _next_event_value_loader(self, next_value_field, - event_date_field_name, value_field_name): return DataFrameLoader( next_value_field, @@ -189,15 +193,14 @@ class EventsLoader(PipelineLoader): self.all_dates, next_value_field.missing_value, next_value_field.dtype, - event_date_field_name, + self.event_date_col, value_field_name ), adjustments=None, ) def _previous_event_date_loader(self, - prev_date_field, - event_date_field_name): + prev_date_field): return DataFrameLoader( prev_date_field, previous_event_frame( @@ -205,15 +208,14 @@ class EventsLoader(PipelineLoader): self.all_dates, NaTD, 'datetime64[ns]', - event_date_field_name, - event_date_field_name + self.event_date_col, + self.event_date_col ), adjustments=None, ) def _previous_event_value_loader(self, previous_value_field, - event_date_field_name, value_field_name): return DataFrameLoader( previous_value_field, @@ -222,7 +224,7 @@ class EventsLoader(PipelineLoader): self.all_dates, previous_value_field.missing_value, previous_value_field.dtype, - event_date_field_name, + self.event_date_col, value_field_name ), adjustments=None, diff --git a/zipline/pipeline/loaders/utils.py b/zipline/pipeline/loaders/utils.py index 3b79253f..80642a25 100644 --- a/zipline/pipeline/loaders/utils.py +++ b/zipline/pipeline/loaders/utils.py @@ -280,25 +280,18 @@ def zip_with_floats(dates, flts): return pd.Series(flts, index=dates, dtype='float') -def num_days_in_range(dates, start, end): - """ - Return the number of days in `dates` between start and end, inclusive. - """ - start_idx, stop_idx = dates.slice_locs(start, end) - return stop_idx - start_idx - - def zip_with_dates(index_dates, dts): return pd.Series(pd.to_datetime(dts), index=index_dates) def get_values_for_date_ranges(zip_date_index_with_vals, vals_for_date_intervals, - date_intervals, + starts, + ends, date_index): """ - Returns a Series of values indexed by date based on values for the given - date intervals. + Returns a Series of values indexed by date based on the intervals defined + by the start and end dates. Parameters ---------- @@ -307,10 +300,10 @@ def get_values_for_date_ranges(zip_date_index_with_vals, returns a pd.Series with the values indexed by the dates. vals_for_date_intervals : list A list of values for each date interval in `date_intervals`. - date_intervals : list - A list of pairs of dates, where each pair represents a date interval - that corresponds to the value at the same index in - `vals_for_date_intervals`. + starts : DatetimeIndex + A DatetimeIndex of start dates. + ends : list + A DatetimeIndex of end dates. date_index : DatetimeIndex The DatetimeIndex containing all dates for which values were requested. @@ -321,10 +314,22 @@ def get_values_for_date_ranges(zip_date_index_with_vals, to dates based on the given date intervals. """ # Fill in given values for given date ranges. + end_indexes = date_index.values.searchsorted(ends) + start_indexes = date_index.values.searchsorted(starts) + num_days = (end_indexes - start_indexes) + 1 + + # In case any of the end dates falls on days missing from the date_index, + # searchsorted will have placed their index within `date_index` to the + # index of the next start date, so we will have added 1 extra day for + # each of these. Subtract those extra days, but ignore any cases where the + # start and end dates are equal. Note: if any of the start dates is + # missing, it won't affect calculations because searchsorted will advance + # the index to the next date within the same range. + num_days[np.where(~np.in1d(ends, date_index) & (num_days != 0))] -= 1 return zip_date_index_with_vals( date_index, - np.repeat(vals_for_date_intervals, - [num_days_in_range(date_index, *date_interval) - for date_interval in - date_intervals]), + np.repeat( + vals_for_date_intervals, + num_days, + ) ) diff --git a/zipline/testing/fixtures.py b/zipline/testing/fixtures.py index 35eaa406..d1547489 100644 --- a/zipline/testing/fixtures.py +++ b/zipline/testing/fixtures.py @@ -912,7 +912,8 @@ class WithPipelineEventDataLoader(with_metaclass( frame = pd.DataFrame({sid: get_values_for_date_ranges( zip_date_index_with_vals, vals[sid], - date_intervals[sid], + pd.DatetimeIndex(list(zip(*date_intervals[sid]))[0]), + pd.DatetimeIndex(list(zip(*date_intervals[sid]))[1]), dates ) for sid in self.get_sids()[:-1]}) frame[self.get_sids()[-1]] = zip_date_index_with_vals( @@ -982,7 +983,7 @@ class WithPipelineEventDataLoader(with_metaclass( for sid in self.get_sids(): for col_name in cols.keys(): - assert_series_equal(result[col_name].xs(sid, level=1), + assert_series_equal(result[col_name].unstack(1)[sid], cols[col_name][sid], check_names=False) @@ -1109,24 +1110,24 @@ class WithNextAndPreviousEventDataLoader(WithPipelineEventDataLoader): ] next_date_intervals = [ - [[None, '2014-01-04'], + [['2014-01-01', '2014-01-04'], ['2014-01-05', '2014-01-15'], ['2014-01-16', '2014-01-20'], - ['2014-01-21', None]], - [[None, '2014-01-04'], + ['2014-01-21', '2014-01-31']], + [['2014-01-01', '2014-01-04'], ['2014-01-05', '2014-01-09'], ['2014-01-10', '2014-01-15'], ['2014-01-16', '2014-01-20'], - ['2014-01-21', None]], - [[None, '2014-01-04'], + ['2014-01-21', '2014-01-31']], + [['2014-01-01', '2014-01-04'], ['2014-01-05', '2014-01-10'], ['2014-01-11', '2014-01-14'], ['2014-01-15', '2014-01-20'], - ['2014-01-21', None]], - [[None, '2014-01-04'], + ['2014-01-21', '2014-01-31']], + [['2014-01-01', '2014-01-04'], ['2014-01-05', '2014-01-10'], ['2014-01-11', '2014-01-15'], - ['2014-01-16', None]] + ['2014-01-16', '2014-01-31']] ] next_dates = [ @@ -1138,18 +1139,18 @@ class WithNextAndPreviousEventDataLoader(WithPipelineEventDataLoader): ] prev_date_intervals = [ - [[None, '2014-01-14'], + [['2014-01-01', '2014-01-14'], ['2014-01-15', '2014-01-19'], - ['2014-01-20', None]], - [[None, '2014-01-14'], + ['2014-01-20', '2014-01-31']], + [['2014-01-01', '2014-01-14'], ['2014-01-15', '2014-01-19'], - ['2014-01-20', None]], - [[None, '2014-01-09'], + ['2014-01-20', '2014-01-31']], + [['2014-01-01', '2014-01-09'], ['2014-01-10', '2014-01-19'], - ['2014-01-20', None]], - [[None, '2014-01-09'], + ['2014-01-20', '2014-01-31']], + [['2014-01-01', '2014-01-09'], ['2014-01-10', '2014-01-14'], - ['2014-01-15', None]] + ['2014-01-15', '2014-01-31']] ] prev_dates = [