From 24019880afc0fe95e224c37c2a313109ef13f70c Mon Sep 17 00:00:00 2001 From: Maya Tydykov Date: Tue, 12 Apr 2016 15:38:10 -0400 Subject: [PATCH 1/3] MAINT: redesign buyback auth datasets and factor --- zipline/pipeline/common.py | 6 ++ zipline/pipeline/data/__init__.py | 7 +- zipline/pipeline/data/buyback_auth.py | 17 +--- zipline/pipeline/factors/events.py | 22 +---- zipline/pipeline/loaders/__init__.py | 10 +- zipline/pipeline/loaders/blaze/__init__.py | 9 +- .../pipeline/loaders/blaze/buyback_auth.py | 92 ++++--------------- zipline/pipeline/loaders/buyback_auth.py | 77 +++++----------- 8 files changed, 65 insertions(+), 175 deletions(-) diff --git a/zipline/pipeline/common.py b/zipline/pipeline/common.py index 6c78038d..f820c7cc 100644 --- a/zipline/pipeline/common.py +++ b/zipline/pipeline/common.py @@ -8,6 +8,7 @@ CASH_FIELD_NAME = 'cash' CASH_AMOUNT_FIELD_NAME = 'cash_amount' COUNT_FIELD_NAME = 'count' BUYBACK_ANNOUNCEMENT_FIELD_NAME = 'buyback_date' +BUYBACK_TYPE_FIELD_NAME = 'buyback_type' DAYS_SINCE_PREV = 'days_since_prev' DAYS_SINCE_PREV_DISCLOSURE = 'days_since_prev_disclosure' DAYS_SINCE_PREV_DIVIDEND_ANNOUNCEMENT = 'days_since_prev_dividend_announcement' @@ -44,6 +45,9 @@ PREVIOUS_BUYBACK_CASH = 'previous_buyback_cash' PREVIOUS_BUYBACK_SHARE_COUNT = 'previous_buyback_share_count' PREVIOUS_DISCLOSURE_DATE = 'previous_disclosure_date' PREVIOUS_COUNT = 'previous_count' +PREVIOUS_BUYBACK_TYPE = 'previous_buyback_type' +PREVIOUS_VALUE = 'previous_value' +PREVIOUS_VALUE_TYPE = 'previous_value_type' PREVIOUS_EX_DATE = 'previous_ex_date' PREVIOUS_NUM_SHARES = 'previous_number_shares' PREVIOUS_FISCAL_QUARTER = 'previous_fiscal_quarter' @@ -60,3 +64,5 @@ SHARE_COUNT_FIELD_NAME = 'share_count' SID_FIELD_NAME = 'sid' STANDARD_DEVIATION_FIELD_NAME = 'standard_deviation' TS_FIELD_NAME = 'timestamp' +VALUE_FIELD_NAME = 'value' +VALUE_TYPE_FIELD_NAME = 'value_type' diff --git a/zipline/pipeline/data/__init__.py b/zipline/pipeline/data/__init__.py index 241cbb85..626e11c8 100644 --- a/zipline/pipeline/data/__init__.py +++ b/zipline/pipeline/data/__init__.py @@ -1,5 +1,5 @@ from ._13d_filings import _13DFilings -from .buyback_auth import CashBuybackAuthorizations, ShareBuybackAuthorizations +from .buyback_auth import BuybackAuthorizations from .dividends import ( DividendsByAnnouncementDate, DividendsByExDate, @@ -13,14 +13,13 @@ from .dataset import DataSet, Column, BoundColumn __all__ = [ '_13DFilings', 'BoundColumn', - 'CashBuybackAuthorizations', + 'BuybackAuthorizations', 'Column', 'DataSet', 'DividendsByAnnouncementDate', 'DividendsByExDate', 'DividendsByPayDate', 'EarningsCalendar', - 'ConsensusEstimates', - 'ShareBuybackAuthorizations', + 'ConsensusEstimates', 'USEquityPricing', ] diff --git a/zipline/pipeline/data/buyback_auth.py b/zipline/pipeline/data/buyback_auth.py index 7dc03185..dadce8cf 100644 --- a/zipline/pipeline/data/buyback_auth.py +++ b/zipline/pipeline/data/buyback_auth.py @@ -6,19 +6,12 @@ from zipline.utils.numpy_utils import datetime64ns_dtype, float64_dtype from .dataset import Column, DataSet -class CashBuybackAuthorizations(DataSet): +class BuybackAuthorizations(DataSet): """ Dataset representing dates of recently announced cash buyback authorizations. """ - cash_amount = Column(float64_dtype) - announcement_date = Column(datetime64ns_dtype) - - -class ShareBuybackAuthorizations(DataSet): - """ - Dataset representing dates of recently announced share buyback - authorizations. - """ - share_count = Column(float64_dtype) - announcement_date = Column(datetime64ns_dtype) + previous_value = Column(float64_dtype) + previous_date = Column(datetime64ns_dtype) + previous_value_type = Column(float64_dtype) # TODO: should be string + previous_buyback_type = Column(float64_dtype) # TODO: should be string diff --git a/zipline/pipeline/factors/events.py b/zipline/pipeline/factors/events.py index 972d2a5c..16205af9 100644 --- a/zipline/pipeline/factors/events.py +++ b/zipline/pipeline/factors/events.py @@ -5,8 +5,7 @@ announcements, acquisitions, dividends, etc.). from numpy import newaxis from ..data import ( _13DFilings, - CashBuybackAuthorizations, - ShareBuybackAuthorizations, + BuybackAuthorizations, DividendsByAnnouncementDate, DividendsByExDate, EarningsCalendar @@ -132,7 +131,7 @@ class BusinessDaysSincePreviousEarnings(BusinessDaysSincePreviousEvents): inputs = [EarningsCalendar.previous_announcement] -class BusinessDaysSinceCashBuybackAuth( +class BusinessDaysSinceBuybackAuth( BusinessDaysSincePreviousEvents ): """ @@ -143,22 +142,7 @@ class BusinessDaysSinceCashBuybackAuth( -------- zipline.pipeline.factors.BusinessDaysSinceCashBuybackAuth """ - inputs = [CashBuybackAuthorizations.announcement_date] - - -class BusinessDaysSinceShareBuybackAuth( - BusinessDaysSincePreviousEvents -): - """ - Factor returning the number of **business days** (not trading days!) since - the most recent share buyback authorization for each asset. - - - See Also - -------- - zipline.pipeline.factors.BusinessDaysSinceShareBuybackAuth - """ - inputs = [ShareBuybackAuthorizations.announcement_date] + inputs = [BuybackAuthorizations.previous_date] class BusinessDaysSinceDividendAnnouncement( diff --git a/zipline/pipeline/loaders/__init__.py b/zipline/pipeline/loaders/__init__.py index aeb5635e..b69d6e41 100644 --- a/zipline/pipeline/loaders/__init__.py +++ b/zipline/pipeline/loaders/__init__.py @@ -1,10 +1,7 @@ from ._13d_filings import _13DFilingsLoader -from .earnings import EarningsCalendarLoader from .consensus_estimates import ConsensusEstimatesLoader -from .buyback_auth import ( - CashBuybackAuthorizationsLoader, - ShareBuybackAuthorizationsLoader -) +from .earnings import EarningsCalendarLoader +from .buyback_auth import BuybackAuthorizationsLoader from .dividends import ( DividendsByAnnouncementDateLoader, DividendsByExDateLoader, @@ -14,12 +11,11 @@ from .equity_pricing_loader import USEquityPricingLoader __all__ = [ '_13DFilingsLoader', - 'CashBuybackAuthorizationsLoader', + 'BuybackAuthorizationsLoader', 'DividendsByAnnouncementDateLoader', 'DividendsByExDateLoader', 'DividendsByPayDateLoader', 'EarningsCalendarLoader', 'ConsensusEstimatesLoader', - 'ShareBuybackAuthorizationsLoader', 'USEquityPricingLoader', ] diff --git a/zipline/pipeline/loaders/blaze/__init__.py b/zipline/pipeline/loaders/blaze/__init__.py index 74922685..1dfd0dd7 100644 --- a/zipline/pipeline/loaders/blaze/__init__.py +++ b/zipline/pipeline/loaders/blaze/__init__.py @@ -1,8 +1,6 @@ from ._13d_filings import Blaze_13DFilingsLoader -from .buyback_auth import ( - BlazeCashBuybackAuthorizationsLoader, - BlazeShareBuybackAuthorizationsLoader -) + +from .buyback_auth import BlazeBuybackAuthorizationsLoader from .core import ( BlazeLoader, NoDeltasWarning, @@ -21,14 +19,13 @@ from .consensus_estimates import BlazeConsensusEstimatesLoader __all__ = ( 'Blaze_13DFilingsLoader', - 'BlazeCashBuybackAuthorizationsLoader', + 'BlazeBuybackAuthorizationsLoader', 'BlazeDividendsByAnnouncementDateLoader', 'BlazeConsensusEstimatesLoader', 'BlazeDividendsByExDateLoader', 'BlazeDividendsByPayDateLoader', 'BlazeEarningsCalendarLoader', 'BlazeLoader', - 'BlazeShareBuybackAuthorizationsLoader', 'from_blaze', 'global_loader', 'NoDeltasWarning', diff --git a/zipline/pipeline/loaders/blaze/buyback_auth.py b/zipline/pipeline/loaders/blaze/buyback_auth.py index 57fd694d..deab8422 100644 --- a/zipline/pipeline/loaders/blaze/buyback_auth.py +++ b/zipline/pipeline/loaders/blaze/buyback_auth.py @@ -4,22 +4,14 @@ from .core import ( ) from zipline.pipeline.common import ( BUYBACK_ANNOUNCEMENT_FIELD_NAME, - CASH_FIELD_NAME, - SHARE_COUNT_FIELD_NAME -) -from zipline.pipeline.data import ( - CashBuybackAuthorizations, - ShareBuybackAuthorizations -) -from zipline.pipeline.loaders import ( - CashBuybackAuthorizationsLoader, - ShareBuybackAuthorizationsLoader, -) + VALUE_FIELD_NAME, VALUE_TYPE_FIELD_NAME, BUYBACK_TYPE_FIELD_NAME) +from zipline.pipeline.data import BuybackAuthorizations +from zipline.pipeline.loaders import BuybackAuthorizationsLoader from .events import BlazeEventsLoader -class BlazeCashBuybackAuthorizationsLoader(BlazeEventsLoader): - """A pipeline loader for the ``CashBuybackAuthorizations`` dataset that +class BlazeBuybackAuthorizationsLoader(BlazeEventsLoader): + """A pipeline loader for the ``BuybackAuthorizations`` dataset that loads data from a blaze expression. Parameters @@ -45,12 +37,15 @@ class BlazeCashBuybackAuthorizationsLoader(BlazeEventsLoader): {SID_FIELD_NAME}: int64, {TS_FIELD_NAME}: datetime, {BUYBACK_ANNOUNCEMENT_FIELD_NAME}: ?datetime, - {CASH_FIELD_NAME}: ?float64 + {VALUE_FIELD_NAME}: ?float64, + {VALUE_TYPE_FIELD_NAME}: ?float64, + {BUYBACK_TYPE_FIELD_NAME}: ?float64, }} Where each row of the table is a record including the sid to identify the company, the timestamp where we learned about the announcement, the - date when the buyback was announced, the share count, and the cash amount. + date when the buyback was announced, the buyback value, the value type + (in cash or in shares), and the buyback type. If the '{TS_FIELD_NAME}' field is not included it is assumed that we start the backtest with knowledge of all announcements. @@ -59,70 +54,19 @@ class BlazeCashBuybackAuthorizationsLoader(BlazeEventsLoader): TS_FIELD_NAME=TS_FIELD_NAME, SID_FIELD_NAME=SID_FIELD_NAME, BUYBACK_ANNOUNCEMENT_FIELD_NAME=BUYBACK_ANNOUNCEMENT_FIELD_NAME, - CASH_FIELD_NAME=CASH_FIELD_NAME + VALUE_FIELD_NAME=VALUE_FIELD_NAME, + VALUE_TYPE_FIELD_NAME=VALUE_TYPE_FIELD_NAME, + BUYBACK_TYPE_FIELD_NAME=BUYBACK_TYPE_FIELD_NAME ) _expected_fields = frozenset({ TS_FIELD_NAME, SID_FIELD_NAME, BUYBACK_ANNOUNCEMENT_FIELD_NAME, - CASH_FIELD_NAME + VALUE_FIELD_NAME, + VALUE_TYPE_FIELD_NAME, + BUYBACK_TYPE_FIELD_NAME }) - concrete_loader = CashBuybackAuthorizationsLoader - default_dataset = CashBuybackAuthorizations - - -class BlazeShareBuybackAuthorizationsLoader(BlazeEventsLoader): - """A pipeline loader for the ``ShareBuybackAuthorizations`` dataset that - loads data from a blaze expression. - - Parameters - ---------- - expr : Expr - The expression representing the data to load. - resources : dict, optional - Mapping from the loadable terms of ``expr`` to actual data resources. - odo_kwargs : dict, optional - Extra keyword arguments to pass to odo when executing the expression. - data_query_time : time, optional - The time to use for the data query cutoff. - data_query_tz : tzinfo or str - The timezeone to use for the data query cutoff. - dataset: DataSet - The DataSet object for which this loader loads data. - - Notes - ----- - The expression should have a tabular dshape of:: - - Dim * {{ - {SID_FIELD_NAME}: int64, - {TS_FIELD_NAME}: datetime, - {BUYBACK_ANNOUNCEMENT_FIELD_NAME}: ?datetime, - {SHARE_COUNT_FIELD_NAME}: ?float64, - }} - - Where each row of the table is a record including the sid to identify the - company, the timestamp where we learned about the announcement, the - date when the buyback was announced, the share count, and the value. - - If the '{TS_FIELD_NAME}' field is not included it is assumed that we - start the backtest with knowledge of all announcements. - """ - __doc__ = __doc__.format( - TS_FIELD_NAME=TS_FIELD_NAME, - SID_FIELD_NAME=SID_FIELD_NAME, - BUYBACK_ANNOUNCEMENT_FIELD_NAME=BUYBACK_ANNOUNCEMENT_FIELD_NAME, - SHARE_COUNT_FIELD_NAME=SHARE_COUNT_FIELD_NAME, - ) - - _expected_fields = frozenset({ - TS_FIELD_NAME, - SID_FIELD_NAME, - BUYBACK_ANNOUNCEMENT_FIELD_NAME, - SHARE_COUNT_FIELD_NAME, - }) - - concrete_loader = ShareBuybackAuthorizationsLoader - default_dataset = ShareBuybackAuthorizations + concrete_loader = BuybackAuthorizationsLoader + default_dataset = BuybackAuthorizations diff --git a/zipline/pipeline/loaders/buyback_auth.py b/zipline/pipeline/loaders/buyback_auth.py index 6d584809..bb6b6d86 100644 --- a/zipline/pipeline/loaders/buyback_auth.py +++ b/zipline/pipeline/loaders/buyback_auth.py @@ -2,30 +2,30 @@ Reference implementation for buyback auth loaders. """ -from ..data import ( - CashBuybackAuthorizations, - ShareBuybackAuthorizations -) +from ..data import BuybackAuthorizations from .events import EventsLoader from zipline.pipeline.common import ( BUYBACK_ANNOUNCEMENT_FIELD_NAME, - CASH_FIELD_NAME, - SHARE_COUNT_FIELD_NAME + BUYBACK_TYPE_FIELD_NAME, + VALUE_FIELD_NAME, + VALUE_TYPE_FIELD_NAME ) from zipline.utils.memoize import lazyval -class CashBuybackAuthorizationsLoader(EventsLoader): +class BuybackAuthorizationsLoader(EventsLoader): """ Reference loader for - :class:`zipline.pipeline.data.CashBuybackAuthorizations`. + :class:`zipline.pipeline.data.BuybackAuthorizations`. events_by_sid: dict[sid -> pd.DataFrame(knowledge date, - event date, cash value)] + event date, value, value type, buyback type)] """ expected_cols = frozenset([BUYBACK_ANNOUNCEMENT_FIELD_NAME, - CASH_FIELD_NAME]) + VALUE_FIELD_NAME, + VALUE_TYPE_FIELD_NAME, + BUYBACK_TYPE_FIELD_NAME]) event_date_col = BUYBACK_ANNOUNCEMENT_FIELD_NAME @@ -33,8 +33,8 @@ class CashBuybackAuthorizationsLoader(EventsLoader): all_dates, events_by_sid, infer_timestamps=False, - dataset=CashBuybackAuthorizations): - super(CashBuybackAuthorizationsLoader, self).__init__( + dataset=BuybackAuthorizations): + super(BuybackAuthorizationsLoader, self).__init__( all_dates, events_by_sid, infer_timestamps=infer_timestamps, @@ -42,57 +42,28 @@ class CashBuybackAuthorizationsLoader(EventsLoader): ) @lazyval - def cash_amount_loader(self): + def previous_value_loader(self): return self._previous_event_value_loader( self.dataset.cash_amount, - CASH_FIELD_NAME + VALUE_FIELD_NAME ) @lazyval - def announcement_date_loader(self): + def previous_date_loader(self): return self._previous_event_date_loader( self.dataset.announcement_date, ) - -class ShareBuybackAuthorizationsLoader(EventsLoader): - """ - Reference loader for - :class:`zipline.pipeline.data.ShareBuybackAuthorizations`. - - Does not currently support adjustments to the dates of known buyback - authorizations. - - events_by_sid: dict[sid -> pd.DataFrame(knowledge date, - event date, share value)] - - """ - expected_cols = frozenset([BUYBACK_ANNOUNCEMENT_FIELD_NAME, - SHARE_COUNT_FIELD_NAME]) - - event_date_col = BUYBACK_ANNOUNCEMENT_FIELD_NAME - - def __init__(self, - all_dates, - events_by_sid, - infer_timestamps=False, - dataset=ShareBuybackAuthorizations): - super(ShareBuybackAuthorizationsLoader, self).__init__( - all_dates, - events_by_sid, - infer_timestamps=infer_timestamps, - dataset=dataset, - ) - @lazyval - def share_count_loader(self): - return self._previous_event_value_loader( - self.dataset.share_count, - SHARE_COUNT_FIELD_NAME - ) - - @lazyval - def announcement_date_loader(self): + def previous_buyback_type_loader(self): return self._previous_event_date_loader( self.dataset.announcement_date, + BUYBACK_TYPE_FIELD_NAME, + ) + + @lazyval + def previous_value_type_loader(self): + return self._previous_event_date_loader( + self.dataset.announcement_date, + VALUE_TYPE_FIELD_NAME, ) From 3d521561f572913b70d005fee440337659a9689c Mon Sep 17 00:00:00 2001 From: Maya Tydykov Date: Tue, 12 Apr 2016 15:38:26 -0400 Subject: [PATCH 2/3] TST: update tests to handle new buyback auth design MAINT: add back cash amount constant BUG: fix field names BUG: pass remaining args WIP: make buyback units parameterized so that user can choose BUG: fix filtering based on units parameter WIP: test for undesired units Revert "WIP: make buyback units parameterized so that user can choose" This reverts commit df3b838d525bff5026eba1d81865c6645d534c88. --- tests/pipeline/test_buyback_auth.py | 196 ++++++------------ zipline/pipeline/common.py | 1 + zipline/pipeline/data/__init__.py | 2 +- zipline/pipeline/data/buyback_auth.py | 7 +- zipline/pipeline/factors/__init__.py | 6 +- .../pipeline/loaders/blaze/buyback_auth.py | 9 +- zipline/pipeline/loaders/buyback_auth.py | 22 +- zipline/pipeline/loaders/utils.py | 4 + 8 files changed, 95 insertions(+), 152 deletions(-) diff --git a/tests/pipeline/test_buyback_auth.py b/tests/pipeline/test_buyback_auth.py index e88e664c..35165097 100644 --- a/tests/pipeline/test_buyback_auth.py +++ b/tests/pipeline/test_buyback_auth.py @@ -8,35 +8,26 @@ from six import iteritems from zipline.pipeline.common import( BUYBACK_ANNOUNCEMENT_FIELD_NAME, - CASH_FIELD_NAME, + BUYBACK_TYPE_FIELD_NAME, DAYS_SINCE_PREV, PREVIOUS_BUYBACK_ANNOUNCEMENT, - PREVIOUS_BUYBACK_CASH, - PREVIOUS_BUYBACK_SHARE_COUNT, - SHARE_COUNT_FIELD_NAME, SID_FIELD_NAME, - TS_FIELD_NAME -) -from zipline.pipeline.data import ( - CashBuybackAuthorizations, - ShareBuybackAuthorizations -) -from zipline.pipeline.factors.events import ( - BusinessDaysSinceCashBuybackAuth, - BusinessDaysSinceShareBuybackAuth -) -from zipline.pipeline.loaders.buyback_auth import ( - CashBuybackAuthorizationsLoader, - ShareBuybackAuthorizationsLoader -) -from zipline.pipeline.loaders.blaze import ( - BlazeCashBuybackAuthorizationsLoader, - BlazeShareBuybackAuthorizationsLoader, + TS_FIELD_NAME, + VALUE_FIELD_NAME, + VALUE_TYPE_FIELD_NAME, + PREVIOUS_VALUE, + PREVIOUS_VALUE_TYPE, + PREVIOUS_BUYBACK_TYPE, ) +from zipline.pipeline.data import BuybackAuthorizations +from zipline.pipeline.factors.events import BusinessDaysSinceBuybackAuth +from zipline.pipeline.loaders.buyback_auth import BuybackAuthorizationsLoader +from zipline.pipeline.loaders.blaze import BlazeBuybackAuthorizationsLoader from zipline.pipeline.loaders.utils import ( zip_with_floats, - zip_with_dates -) + zip_with_dates, + get_values_for_date_ranges, + zip_with_strs) from zipline.testing.fixtures import ( WithPipelineEventDataLoader, ZiplineTestCase ) @@ -48,15 +39,17 @@ date_intervals = [ buyback_authorizations_cases = [ pd.DataFrame({ - SHARE_COUNT_FIELD_NAME: [1, 15], - CASH_FIELD_NAME: [10, 20], + VALUE_FIELD_NAME: [1, 15], + VALUE_TYPE_FIELD_NAME: ["$M", "Mshares"], + BUYBACK_TYPE_FIELD_NAME: ["New", "Additional"], TS_FIELD_NAME: pd.to_datetime(['2014-01-05', '2014-01-10']), BUYBACK_ANNOUNCEMENT_FIELD_NAME: pd.to_datetime(['2014-01-04', '2014-01-09']) }), pd.DataFrame( - columns=[SHARE_COUNT_FIELD_NAME, - CASH_FIELD_NAME, + columns=[VALUE_FIELD_NAME, + VALUE_TYPE_FIELD_NAME, + BUYBACK_TYPE_FIELD_NAME, BUYBACK_ANNOUNCEMENT_FIELD_NAME, TS_FIELD_NAME], dtype='datetime64[ns]' @@ -64,18 +57,33 @@ buyback_authorizations_cases = [ ] -class CashBuybackAuthLoaderTestCase(WithPipelineEventDataLoader, - ZiplineTestCase): +def get_expected_previous_values(zip_date_index_with_vals, + dates, + vals_for_date_intervals): + return pd.DataFrame({ + 0: get_values_for_date_ranges(zip_date_index_with_vals, + vals_for_date_intervals, + date_intervals, + dates), + 1: zip_date_index_with_vals(dates, ['NaN'] * len(dates)), + }, index=dates) + + +class BuybackAuthLoaderTestCase(WithPipelineEventDataLoader, ZiplineTestCase): """ Test for cash buyback authorizations dataset. """ pipeline_columns = { - PREVIOUS_BUYBACK_CASH: - CashBuybackAuthorizations.cash_amount.latest, + PREVIOUS_VALUE: + BuybackAuthorizations.previous_value.latest, PREVIOUS_BUYBACK_ANNOUNCEMENT: - CashBuybackAuthorizations.announcement_date.latest, + BuybackAuthorizations.previous_date.latest, + PREVIOUS_VALUE_TYPE: + BuybackAuthorizations.previous_value_type.latest, + PREVIOUS_BUYBACK_TYPE: + BuybackAuthorizations.previous_buyback_type.latest, DAYS_SINCE_PREV: - BusinessDaysSinceCashBuybackAuth(), + BusinessDaysSinceBuybackAuth(), } @classmethod @@ -84,24 +92,30 @@ class CashBuybackAuthLoaderTestCase(WithPipelineEventDataLoader, @classmethod def get_dataset(cls): - return {sid: - frame.drop(SHARE_COUNT_FIELD_NAME, axis=1) + return {sid: frame for sid, frame in enumerate(buyback_authorizations_cases)} - loader_type = CashBuybackAuthorizationsLoader + loader_type = BuybackAuthorizationsLoader def setup(self, dates): cols = { - PREVIOUS_BUYBACK_CASH: self.get_sids_to_frames(zip_with_floats, - [['NaN', 10, 20]], - date_intervals, - dates), + PREVIOUS_VALUE: self.get_sids_to_frames(zip_with_floats, + [['NaN', 1, 15]], + date_intervals, + dates), PREVIOUS_BUYBACK_ANNOUNCEMENT: self.get_sids_to_frames( zip_with_dates, [['NaT', '2014-01-04', '2014-01-09']], date_intervals, dates), + PREVIOUS_VALUE_TYPE: self.get_sids_to_frames( + zip_with_strs, [["", "$M", "Mshares"]], date_intervals, dates + ), + PREVIOUS_BUYBACK_TYPE: self.get_sids_to_frames( + zip_with_strs, [["", "New", "Additional"]], date_intervals, + dates + ) } cols[DAYS_SINCE_PREV] = self._compute_busday_offsets( cols[PREVIOUS_BUYBACK_ANNOUNCEMENT] @@ -109,68 +123,26 @@ class CashBuybackAuthLoaderTestCase(WithPipelineEventDataLoader, return cols -class ShareBuybackAuthLoaderTestCase(WithPipelineEventDataLoader, - ZiplineTestCase): - """ - Test for share buyback authorizations dataset. - """ - pipeline_columns = { - PREVIOUS_BUYBACK_SHARE_COUNT: - ShareBuybackAuthorizations.share_count.latest, - PREVIOUS_BUYBACK_ANNOUNCEMENT: - ShareBuybackAuthorizations.announcement_date.latest, - DAYS_SINCE_PREV: - BusinessDaysSinceShareBuybackAuth(), - } - - @classmethod - def get_sids(cls): - return range(2) - - @classmethod - def get_dataset(cls): - return {sid: - frame.drop(CASH_FIELD_NAME, axis=1) - for sid, frame - in enumerate(buyback_authorizations_cases)} - - loader_type = ShareBuybackAuthorizationsLoader - - def setup(self, dates): - cols = {PREVIOUS_BUYBACK_SHARE_COUNT: - self.get_sids_to_frames(zip_with_floats, - [['NaN', 1, 15]], - date_intervals, - dates,), - PREVIOUS_BUYBACK_ANNOUNCEMENT: - self.get_sids_to_frames(zip_with_dates, - [['NaT', '2014-01-04', '2014-01-09']], - date_intervals, - dates,), - } - - cols[DAYS_SINCE_PREV] = self._compute_busday_offsets( - cols[PREVIOUS_BUYBACK_ANNOUNCEMENT] - ) - return cols - - -class BlazeCashBuybackAuthLoaderTestCase(CashBuybackAuthLoaderTestCase): +class BlazeBuybackAuthLoaderTestCase(BuybackAuthLoaderTestCase): """ Test case for loading via blaze. """ - loader_type = BlazeCashBuybackAuthorizationsLoader + loader_type = BlazeBuybackAuthorizationsLoader def pipeline_event_loader_args(self, dates): _, mapping = super( - BlazeCashBuybackAuthLoaderTestCase, + BlazeBuybackAuthLoaderTestCase, self, ).pipeline_event_loader_args(dates) return (bz.data(pd.concat( pd.DataFrame({ BUYBACK_ANNOUNCEMENT_FIELD_NAME: frame[BUYBACK_ANNOUNCEMENT_FIELD_NAME], - CASH_FIELD_NAME: - frame[CASH_FIELD_NAME], + VALUE_FIELD_NAME: + frame[VALUE_FIELD_NAME], + VALUE_TYPE_FIELD_NAME: + frame[VALUE_TYPE_FIELD_NAME], + BUYBACK_TYPE_FIELD_NAME: + frame[BUYBACK_TYPE_FIELD_NAME], TS_FIELD_NAME: frame[TS_FIELD_NAME], SID_FIELD_NAME: sid, @@ -179,49 +151,13 @@ class BlazeCashBuybackAuthLoaderTestCase(CashBuybackAuthLoaderTestCase): ).reset_index(drop=True)),) -class BlazeShareBuybackAuthLoaderTestCase(ShareBuybackAuthLoaderTestCase): - """ Test case for loading via blaze. - """ - loader_type = BlazeShareBuybackAuthorizationsLoader - - def pipeline_event_loader_args(self, dates): - _, mapping = super( - BlazeShareBuybackAuthLoaderTestCase, - self, - ).pipeline_event_loader_args(dates) - return (bz.data(pd.concat( - pd.DataFrame({ - BUYBACK_ANNOUNCEMENT_FIELD_NAME: - frame[BUYBACK_ANNOUNCEMENT_FIELD_NAME], - SHARE_COUNT_FIELD_NAME: - frame[SHARE_COUNT_FIELD_NAME], - TS_FIELD_NAME: - frame[TS_FIELD_NAME], - SID_FIELD_NAME: sid, - }) - for sid, frame in iteritems(mapping) - ).reset_index(drop=True)),) - - -class BlazeShareBuybackAuthLoaderNotInteractiveTestCase( - BlazeShareBuybackAuthLoaderTestCase): +class BlazeBuybackAuthLoaderNotInteractiveTestCase( + BlazeBuybackAuthLoaderTestCase): """Test case for passing a non-interactive symbol and a dict of resources. """ def pipeline_event_loader_args(self, dates): (bound_expr,) = super( - BlazeShareBuybackAuthLoaderNotInteractiveTestCase, - self, - ).pipeline_event_loader_args(dates) - return swap_resources_into_scope(bound_expr, {}) - - -class BlazeCashBuybackAuthLoaderNotInteractiveTestCase( - BlazeCashBuybackAuthLoaderTestCase): - """Test case for passing a non-interactive symbol and a dict of resources. - """ - def pipeline_event_loader_args(self, dates): - (bound_expr,) = super( - BlazeCashBuybackAuthLoaderNotInteractiveTestCase, + BlazeBuybackAuthLoaderNotInteractiveTestCase, self, ).pipeline_event_loader_args(dates) return swap_resources_into_scope(bound_expr, {}) diff --git a/zipline/pipeline/common.py b/zipline/pipeline/common.py index f820c7cc..a264c7f0 100644 --- a/zipline/pipeline/common.py +++ b/zipline/pipeline/common.py @@ -9,6 +9,7 @@ CASH_AMOUNT_FIELD_NAME = 'cash_amount' COUNT_FIELD_NAME = 'count' BUYBACK_ANNOUNCEMENT_FIELD_NAME = 'buyback_date' BUYBACK_TYPE_FIELD_NAME = 'buyback_type' +CASH_AMOUNT_FIELD_NAME = 'cash_amount' DAYS_SINCE_PREV = 'days_since_prev' DAYS_SINCE_PREV_DISCLOSURE = 'days_since_prev_disclosure' DAYS_SINCE_PREV_DIVIDEND_ANNOUNCEMENT = 'days_since_prev_dividend_announcement' diff --git a/zipline/pipeline/data/__init__.py b/zipline/pipeline/data/__init__.py index 626e11c8..063316ad 100644 --- a/zipline/pipeline/data/__init__.py +++ b/zipline/pipeline/data/__init__.py @@ -20,6 +20,6 @@ __all__ = [ 'DividendsByExDate', 'DividendsByPayDate', 'EarningsCalendar', - 'ConsensusEstimates', + 'ConsensusEstimates', 'USEquityPricing', ] diff --git a/zipline/pipeline/data/buyback_auth.py b/zipline/pipeline/data/buyback_auth.py index dadce8cf..781e01aa 100644 --- a/zipline/pipeline/data/buyback_auth.py +++ b/zipline/pipeline/data/buyback_auth.py @@ -1,7 +1,8 @@ """ Datasets representing dates of recently announced buyback authorizations. """ -from zipline.utils.numpy_utils import datetime64ns_dtype, float64_dtype +from zipline.utils.numpy_utils import datetime64ns_dtype, float64_dtype, \ + categorical_dtype from .dataset import Column, DataSet @@ -13,5 +14,5 @@ class BuybackAuthorizations(DataSet): """ previous_value = Column(float64_dtype) previous_date = Column(datetime64ns_dtype) - previous_value_type = Column(float64_dtype) # TODO: should be string - previous_buyback_type = Column(float64_dtype) # TODO: should be string + previous_value_type = Column(categorical_dtype, missing_value="<>") + previous_buyback_type = Column(categorical_dtype, missing_value="<>") diff --git a/zipline/pipeline/factors/__init__.py b/zipline/pipeline/factors/__init__.py index d6d32b98..b5c7e2e8 100644 --- a/zipline/pipeline/factors/__init__.py +++ b/zipline/pipeline/factors/__init__.py @@ -6,13 +6,12 @@ from .factor import ( ) from .events import ( BusinessDaysSince13DFilingsDate, - BusinessDaysSinceCashBuybackAuth, + BusinessDaysSinceBuybackAuth, BusinessDaysSinceDividendAnnouncement, BusinessDaysUntilNextExDate, BusinessDaysSincePreviousExDate, BusinessDaysUntilNextEarnings, BusinessDaysSincePreviousEarnings, - BusinessDaysSinceShareBuybackAuth, ) from .technical import ( AverageDollarVolume, @@ -30,13 +29,12 @@ from .technical import ( __all__ = [ 'BusinessDaysSince13DFilingsDate', - 'BusinessDaysSinceCashBuybackAuth', + 'BusinessDaysSinceBuybackAuth', 'BusinessDaysSinceDividendAnnouncement', 'BusinessDaysUntilNextExDate', 'BusinessDaysSincePreviousExDate', 'BusinessDaysUntilNextEarnings', 'BusinessDaysSincePreviousEarnings', - 'BusinessDaysSinceShareBuybackAuth', 'CustomFactor', 'AverageDollarVolume', 'EWMA', diff --git a/zipline/pipeline/loaders/blaze/buyback_auth.py b/zipline/pipeline/loaders/blaze/buyback_auth.py index deab8422..7fe15900 100644 --- a/zipline/pipeline/loaders/blaze/buyback_auth.py +++ b/zipline/pipeline/loaders/blaze/buyback_auth.py @@ -4,7 +4,10 @@ from .core import ( ) from zipline.pipeline.common import ( BUYBACK_ANNOUNCEMENT_FIELD_NAME, - VALUE_FIELD_NAME, VALUE_TYPE_FIELD_NAME, BUYBACK_TYPE_FIELD_NAME) + BUYBACK_TYPE_FIELD_NAME, + VALUE_FIELD_NAME, + VALUE_TYPE_FIELD_NAME, +) from zipline.pipeline.data import BuybackAuthorizations from zipline.pipeline.loaders import BuybackAuthorizationsLoader from .events import BlazeEventsLoader @@ -38,8 +41,8 @@ class BlazeBuybackAuthorizationsLoader(BlazeEventsLoader): {TS_FIELD_NAME}: datetime, {BUYBACK_ANNOUNCEMENT_FIELD_NAME}: ?datetime, {VALUE_FIELD_NAME}: ?float64, - {VALUE_TYPE_FIELD_NAME}: ?float64, - {BUYBACK_TYPE_FIELD_NAME}: ?float64, + {VALUE_TYPE_FIELD_NAME}: ?str, + {BUYBACK_TYPE_FIELD_NAME}: ?str, }} Where each row of the table is a record including the sid to identify the diff --git a/zipline/pipeline/loaders/buyback_auth.py b/zipline/pipeline/loaders/buyback_auth.py index bb6b6d86..327d5743 100644 --- a/zipline/pipeline/loaders/buyback_auth.py +++ b/zipline/pipeline/loaders/buyback_auth.py @@ -44,26 +44,26 @@ class BuybackAuthorizationsLoader(EventsLoader): @lazyval def previous_value_loader(self): return self._previous_event_value_loader( - self.dataset.cash_amount, + self.dataset.previous_value, VALUE_FIELD_NAME ) @lazyval def previous_date_loader(self): return self._previous_event_date_loader( - self.dataset.announcement_date, - ) - - @lazyval - def previous_buyback_type_loader(self): - return self._previous_event_date_loader( - self.dataset.announcement_date, - BUYBACK_TYPE_FIELD_NAME, + self.dataset.previous_date, ) @lazyval def previous_value_type_loader(self): - return self._previous_event_date_loader( - self.dataset.announcement_date, + return self._previous_event_value_loader( + self.dataset.previous_value_type, VALUE_TYPE_FIELD_NAME, ) + + @lazyval + def previous_buyback_type_loader(self): + return self._previous_event_value_loader( + self.dataset.previous_buyback_type, + BUYBACK_TYPE_FIELD_NAME, + ) diff --git a/zipline/pipeline/loaders/utils.py b/zipline/pipeline/loaders/utils.py index 80642a25..f4b267db 100644 --- a/zipline/pipeline/loaders/utils.py +++ b/zipline/pipeline/loaders/utils.py @@ -280,6 +280,10 @@ def zip_with_floats(dates, flts): return pd.Series(flts, index=dates, dtype='float') +def zip_with_strs(dates, flts): + return pd.Series(flts, index=dates, dtype='string') + + def zip_with_dates(index_dates, dts): return pd.Series(pd.to_datetime(dts), index=index_dates) From 6b60e447a037ae952e5852cad5ffd43f51989806 Mon Sep 17 00:00:00 2001 From: Maya Tydykov Date: Tue, 3 May 2016 14:23:20 -0400 Subject: [PATCH 3/3] MAINT: incorporate string support STY: remove unused imports MAINT: change dtype to object for compatibility with python3 MAINT: rename pipeline columns and constants for clarity MAINT: rename column --- tests/pipeline/test_13d_filings.py | 64 ++++++------- tests/pipeline/test_buyback_auth.py | 90 ++++++++++--------- tests/pipeline/test_consensus_estimates.py | 12 ++- tests/pipeline/test_dividends.py | 27 ++++-- tests/pipeline/test_earnings.py | 7 +- zipline/pipeline/common.py | 23 +++-- zipline/pipeline/data/buyback_auth.py | 11 ++- zipline/pipeline/factors/events.py | 4 +- zipline/pipeline/loaders/__init__.py | 2 +- .../pipeline/loaders/blaze/buyback_auth.py | 22 ++--- zipline/pipeline/loaders/buyback_auth.py | 26 +++--- zipline/pipeline/loaders/utils.py | 12 ++- zipline/testing/fixtures.py | 28 ++++-- 13 files changed, 185 insertions(+), 143 deletions(-) diff --git a/tests/pipeline/test_13d_filings.py b/tests/pipeline/test_13d_filings.py index 07c5150b..7688ff22 100644 --- a/tests/pipeline/test_13d_filings.py +++ b/tests/pipeline/test_13d_filings.py @@ -17,16 +17,17 @@ from zipline.pipeline.data import _13DFilings from zipline.pipeline.factors.events import BusinessDaysSince13DFilingsDate from zipline.pipeline.loaders._13d_filings import _13DFilingsLoader from zipline.pipeline.loaders.utils import ( - get_values_for_date_ranges, zip_with_floats, zip_with_dates ) from zipline.testing.fixtures import WithPipelineEventDataLoader from zipline.testing.fixtures import ZiplineTestCase -date_intervals = [[None, '2014-01-04'], - ['2014-01-05', '2014-01-09'], - ['2014-01-10', None]] +date_intervals = [ + [['2014-01-01', '2014-01-04'], + ['2014-01-05', '2014-01-09'], + ['2014-01-10', '2014-01-31']] +] empty_df = pd.DataFrame( columns=[NUM_SHARES, @@ -40,7 +41,7 @@ empty_df[PERCENT_SHARES] = empty_df[PERCENT_SHARES].astype('float') empty_df[TS_FIELD_NAME] = empty_df[TS_FIELD_NAME].astype('datetime64[ns]') empty_df[DISCLOSURE_DATE] = empty_df[DISCLOSURE_DATE].astype('datetime64[ns]') -_13d_filngs_cases = [ +_13d_filings_cases = [ pd.DataFrame({ NUM_SHARES: [1, 15], PERCENT_SHARES: [10, 20], @@ -51,19 +52,6 @@ _13d_filngs_cases = [ ] -def get_expected_previous_values(zip_date_index_with_vals, - vals, - date_intervals, - dates): - return pd.DataFrame({ - 0: get_values_for_date_ranges(zip_date_index_with_vals, - vals, - date_intervals, - dates), - 1: zip_date_index_with_vals(dates, ['NaN'] * len(dates)), - }, index=dates) - - class _13DFilingsLoaderTestCase(WithPipelineEventDataLoader, ZiplineTestCase): """ @@ -88,23 +76,37 @@ class _13DFilingsLoaderTestCase(WithPipelineEventDataLoader, def get_dataset(cls): return {sid: frame for sid, frame - in enumerate(_13d_filngs_cases)} + in enumerate(_13d_filings_cases)} loader_type = _13DFilingsLoader def setup(self, dates): - cols = {} - cols[ - PREVIOUS_DISCLOSURE_DATE - ] = get_expected_previous_values(zip_with_dates, - ['NaT', '2014-01-04', '2014-01-09'], - date_intervals, dates) - cols[PREVIOUS_NUM_SHARES] = get_expected_previous_values( - zip_with_floats, ['NaN', 1, 15], date_intervals, dates - ) - cols[PREVIOUS_PERCENT_SHARES] = get_expected_previous_values( - zip_with_floats, ['NaN', 10, 20], date_intervals, dates - ) + cols = { + PREVIOUS_DISCLOSURE_DATE: self.get_sids_to_frames( + zip_with_dates, + [['NaT', '2014-01-04', '2014-01-09']], + date_intervals, + dates, + 'datetime64[ns]', + 'NaN' + ), + PREVIOUS_NUM_SHARES: self.get_sids_to_frames( + zip_with_floats, + [['NaN', 1, 15]], + date_intervals, + dates, + 'float', + 'NaN' + ), + PREVIOUS_PERCENT_SHARES: self.get_sids_to_frames( + zip_with_floats, + [['NaN', 10, 20]], + date_intervals, + dates, + 'float', + 'NaN' + ) + } cols[DAYS_SINCE_PREV_DISCLOSURE] = self._compute_busday_offsets( cols[PREVIOUS_DISCLOSURE_DATE] ) diff --git a/tests/pipeline/test_buyback_auth.py b/tests/pipeline/test_buyback_auth.py index 35165097..4baee11a 100644 --- a/tests/pipeline/test_buyback_auth.py +++ b/tests/pipeline/test_buyback_auth.py @@ -7,27 +7,27 @@ import pandas as pd from six import iteritems from zipline.pipeline.common import( + BUYBACK_AMOUNT_FIELD_NAME, BUYBACK_ANNOUNCEMENT_FIELD_NAME, BUYBACK_TYPE_FIELD_NAME, + BUYBACK_UNIT_FIELD_NAME, DAYS_SINCE_PREV, + PREVIOUS_BUYBACK_AMOUNT, PREVIOUS_BUYBACK_ANNOUNCEMENT, + PREVIOUS_BUYBACK_TYPE, + PREVIOUS_BUYBACK_UNIT, SID_FIELD_NAME, TS_FIELD_NAME, - VALUE_FIELD_NAME, - VALUE_TYPE_FIELD_NAME, - PREVIOUS_VALUE, - PREVIOUS_VALUE_TYPE, - PREVIOUS_BUYBACK_TYPE, ) from zipline.pipeline.data import BuybackAuthorizations from zipline.pipeline.factors.events import BusinessDaysSinceBuybackAuth from zipline.pipeline.loaders.buyback_auth import BuybackAuthorizationsLoader from zipline.pipeline.loaders.blaze import BlazeBuybackAuthorizationsLoader from zipline.pipeline.loaders.utils import ( - zip_with_floats, zip_with_dates, - get_values_for_date_ranges, - zip_with_strs) + zip_with_floats, + zip_with_strs +) from zipline.testing.fixtures import ( WithPipelineEventDataLoader, ZiplineTestCase ) @@ -39,16 +39,16 @@ date_intervals = [ buyback_authorizations_cases = [ pd.DataFrame({ - VALUE_FIELD_NAME: [1, 15], - VALUE_TYPE_FIELD_NAME: ["$M", "Mshares"], + BUYBACK_AMOUNT_FIELD_NAME: [1, 15], + BUYBACK_UNIT_FIELD_NAME: ["$M", "Mshares"], BUYBACK_TYPE_FIELD_NAME: ["New", "Additional"], TS_FIELD_NAME: pd.to_datetime(['2014-01-05', '2014-01-10']), BUYBACK_ANNOUNCEMENT_FIELD_NAME: pd.to_datetime(['2014-01-04', '2014-01-09']) }), pd.DataFrame( - columns=[VALUE_FIELD_NAME, - VALUE_TYPE_FIELD_NAME, + columns=[BUYBACK_AMOUNT_FIELD_NAME, + BUYBACK_UNIT_FIELD_NAME, BUYBACK_TYPE_FIELD_NAME, BUYBACK_ANNOUNCEMENT_FIELD_NAME, TS_FIELD_NAME], @@ -57,31 +57,19 @@ buyback_authorizations_cases = [ ] -def get_expected_previous_values(zip_date_index_with_vals, - dates, - vals_for_date_intervals): - return pd.DataFrame({ - 0: get_values_for_date_ranges(zip_date_index_with_vals, - vals_for_date_intervals, - date_intervals, - dates), - 1: zip_date_index_with_vals(dates, ['NaN'] * len(dates)), - }, index=dates) - - class BuybackAuthLoaderTestCase(WithPipelineEventDataLoader, ZiplineTestCase): """ Test for cash buyback authorizations dataset. """ pipeline_columns = { - PREVIOUS_VALUE: - BuybackAuthorizations.previous_value.latest, + PREVIOUS_BUYBACK_AMOUNT: + BuybackAuthorizations.previous_amount.latest, PREVIOUS_BUYBACK_ANNOUNCEMENT: BuybackAuthorizations.previous_date.latest, - PREVIOUS_VALUE_TYPE: - BuybackAuthorizations.previous_value_type.latest, + PREVIOUS_BUYBACK_UNIT: + BuybackAuthorizations.previous_unit.latest, PREVIOUS_BUYBACK_TYPE: - BuybackAuthorizations.previous_buyback_type.latest, + BuybackAuthorizations.previous_type.latest, DAYS_SINCE_PREV: BusinessDaysSinceBuybackAuth(), } @@ -100,23 +88,38 @@ class BuybackAuthLoaderTestCase(WithPipelineEventDataLoader, ZiplineTestCase): def setup(self, dates): cols = { - PREVIOUS_VALUE: self.get_sids_to_frames(zip_with_floats, - [['NaN', 1, 15]], - date_intervals, - dates), + PREVIOUS_BUYBACK_AMOUNT: self.get_sids_to_frames(zip_with_floats, + [['NaN', 1, 15]], + date_intervals, + dates, + 'float', + 'NaN'), PREVIOUS_BUYBACK_ANNOUNCEMENT: self.get_sids_to_frames( zip_with_dates, [['NaT', '2014-01-04', '2014-01-09']], date_intervals, - dates), - PREVIOUS_VALUE_TYPE: self.get_sids_to_frames( - zip_with_strs, [["", "$M", "Mshares"]], date_intervals, dates + dates, + 'datetime64[ns]', + 'NaN' + ), + PREVIOUS_BUYBACK_UNIT: self.get_sids_to_frames( + zip_with_strs, + [[None, "$M", "Mshares"]], + date_intervals, + dates, + 'category', + None ), PREVIOUS_BUYBACK_TYPE: self.get_sids_to_frames( - zip_with_strs, [["", "New", "Additional"]], date_intervals, - dates + zip_with_strs, + [[None, "New", "Additional"]], + date_intervals, + dates, + 'category', + None ) } + cols[DAYS_SINCE_PREV] = self._compute_busday_offsets( cols[PREVIOUS_BUYBACK_ANNOUNCEMENT] ) @@ -137,10 +140,10 @@ class BlazeBuybackAuthLoaderTestCase(BuybackAuthLoaderTestCase): pd.DataFrame({ BUYBACK_ANNOUNCEMENT_FIELD_NAME: frame[BUYBACK_ANNOUNCEMENT_FIELD_NAME], - VALUE_FIELD_NAME: - frame[VALUE_FIELD_NAME], - VALUE_TYPE_FIELD_NAME: - frame[VALUE_TYPE_FIELD_NAME], + BUYBACK_AMOUNT_FIELD_NAME: + frame[BUYBACK_AMOUNT_FIELD_NAME], + BUYBACK_UNIT_FIELD_NAME: + frame[BUYBACK_UNIT_FIELD_NAME], BUYBACK_TYPE_FIELD_NAME: frame[BUYBACK_TYPE_FIELD_NAME], TS_FIELD_NAME: @@ -152,7 +155,8 @@ class BlazeBuybackAuthLoaderTestCase(BuybackAuthLoaderTestCase): class BlazeBuybackAuthLoaderNotInteractiveTestCase( - BlazeBuybackAuthLoaderTestCase): + BlazeBuybackAuthLoaderTestCase +): """Test case for passing a non-interactive symbol and a dict of resources. """ def pipeline_event_loader_args(self, dates): diff --git a/tests/pipeline/test_consensus_estimates.py b/tests/pipeline/test_consensus_estimates.py index 3e0faae4..d7aa88ac 100644 --- a/tests/pipeline/test_consensus_estimates.py +++ b/tests/pipeline/test_consensus_estimates.py @@ -301,8 +301,12 @@ class ConsensusEstimatesLoaderTestCase(WithNextAndPreviousEventDataLoader, def setup(self, dates): cols = { PREVIOUS_RELEASE_DATE: - self.get_expected_previous_event_dates(dates), - NEXT_RELEASE_DATE: self.get_expected_next_event_dates(dates) + self.get_expected_previous_event_dates( + dates, 'datetime64[ns]', 'NaN' + ), + NEXT_RELEASE_DATE: self.get_expected_next_event_dates( + dates, 'datetime64[ns]', 'NaN' + ) } for field_name in field_name_to_expected_col: cols[field_name] = self.get_sids_to_frames( @@ -310,7 +314,9 @@ class ConsensusEstimatesLoaderTestCase(WithNextAndPreviousEventDataLoader, self.prev_date_intervals if field_name.startswith("previous") else self.next_date_intervals, - dates + dates, + 'float', + 'NaN' ) return cols diff --git a/tests/pipeline/test_dividends.py b/tests/pipeline/test_dividends.py index 418517f3..7534ef73 100644 --- a/tests/pipeline/test_dividends.py +++ b/tests/pipeline/test_dividends.py @@ -206,10 +206,11 @@ class DividendsByAnnouncementDateTestCase(WithPipelineEventDataLoader, amounts = [['NaN', 1, 15], ['NaN', 7, 13], ['NaN', 3, 1], ['NaN', 23]] cols = { PREVIOUS_ANNOUNCEMENT: self.get_sids_to_frames( - zip_with_dates, announcement_dates, date_intervals, dates + zip_with_dates, announcement_dates, date_intervals, dates, + 'datetime64[ns]', 'NaN' ), PREVIOUS_AMOUNT: self.get_sids_to_frames( - zip_with_floats, amounts, date_intervals, dates + zip_with_floats, amounts, date_intervals, dates, 'float', 'NaN' ), } @@ -281,16 +282,20 @@ class DividendsByExDateTestCase(WithPipelineEventDataLoader, ZiplineTestCase): NEXT_EX_DATE: self.get_sids_to_frames( zip_with_dates, next_ex_and_pay_dates, next_date_intervals, dates, + 'datetime64[ns]', 'NaN' ), PREVIOUS_EX_DATE: self.get_sids_to_frames( zip_with_dates, prev_ex_and_pay_dates, prev_date_intervals, - dates + dates, + 'datetime64[ns]', 'NaN' ), NEXT_AMOUNT: self.get_sids_to_frames( - zip_with_floats, next_amounts, next_date_intervals, dates + zip_with_floats, next_amounts, next_date_intervals, dates, + 'float', 'NaN' ), PREVIOUS_AMOUNT: self.get_sids_to_frames( - zip_with_floats, prev_amounts, prev_date_intervals, dates + zip_with_floats, prev_amounts, prev_date_intervals, dates, + 'float', 'NaN' ) } @@ -360,17 +365,21 @@ class DividendsByPayDateTestCase(WithPipelineEventDataLoader, ZiplineTestCase): return { NEXT_PAY_DATE: self.get_sids_to_frames( zip_with_dates, next_ex_and_pay_dates, next_date_intervals, - dates + dates, + 'datetime64[ns]', 'NaN' ), PREVIOUS_PAY_DATE: self.get_sids_to_frames( zip_with_dates, prev_ex_and_pay_dates, prev_date_intervals, - dates + dates, + 'datetime64[ns]', 'NaN' ), NEXT_AMOUNT: self.get_sids_to_frames( - zip_with_floats, next_amounts, next_date_intervals, dates + zip_with_floats, next_amounts, next_date_intervals, dates, + 'float', 'NaN' ), PREVIOUS_AMOUNT: self.get_sids_to_frames( - zip_with_floats, prev_amounts, prev_date_intervals, dates + zip_with_floats, prev_amounts, prev_date_intervals, dates, + 'float', 'NaN' ) } diff --git a/tests/pipeline/test_earnings.py b/tests/pipeline/test_earnings.py index 28727832..74f58597 100644 --- a/tests/pipeline/test_earnings.py +++ b/tests/pipeline/test_earnings.py @@ -51,9 +51,12 @@ class EarningsCalendarLoaderTestCase(WithNextAndPreviousEventDataLoader, def setup(self, dates): cols = { PREVIOUS_ANNOUNCEMENT: self.get_expected_previous_event_dates( - dates + dates, + 'datetime64[ns]', 'NaN' + ), + NEXT_ANNOUNCEMENT: self.get_expected_next_event_dates( + dates, 'datetime64[ns]', 'NaN' ), - NEXT_ANNOUNCEMENT: self.get_expected_next_event_dates(dates), } cols[DAYS_TO_NEXT] = self._compute_busday_offsets( cols[NEXT_ANNOUNCEMENT] diff --git a/zipline/pipeline/common.py b/zipline/pipeline/common.py index a264c7f0..9dbab69d 100644 --- a/zipline/pipeline/common.py +++ b/zipline/pipeline/common.py @@ -4,12 +4,13 @@ Common constants for Pipeline. ACTUAL_VALUE_FIELD_NAME = 'actual_value' AD_FIELD_NAME = 'asof_date' ANNOUNCEMENT_FIELD_NAME = 'announcement_date' -CASH_FIELD_NAME = 'cash' -CASH_AMOUNT_FIELD_NAME = 'cash_amount' -COUNT_FIELD_NAME = 'count' +BUYBACK_AMOUNT_FIELD_NAME = 'buyback_amount' BUYBACK_ANNOUNCEMENT_FIELD_NAME = 'buyback_date' BUYBACK_TYPE_FIELD_NAME = 'buyback_type' +BUYBACK_UNIT_FIELD_NAME = 'buyback_unit' CASH_AMOUNT_FIELD_NAME = 'cash_amount' +CASH_FIELD_NAME = 'cash' +COUNT_FIELD_NAME = 'count' DAYS_SINCE_PREV = 'days_since_prev' DAYS_SINCE_PREV_DISCLOSURE = 'days_since_prev_disclosure' DAYS_SINCE_PREV_DIVIDEND_ANNOUNCEMENT = 'days_since_prev_dividend_announcement' @@ -26,36 +27,36 @@ MEAN_FIELD_NAME = 'mean' NEXT_AMOUNT = 'next_amount' NEXT_ANNOUNCEMENT = 'next_announcement' NEXT_COUNT = 'next_count' +NEXT_EX_DATE = 'next_ex_date' NEXT_FISCAL_QUARTER = 'next_fiscal_quarter' NEXT_FISCAL_YEAR = 'next_fiscal_year' -NEXT_EX_DATE = 'next_ex_date' NEXT_HIGH = 'next_high' NEXT_LOW = 'next_low' NEXT_MEAN = 'next_mean' NEXT_PAY_DATE = 'next_pay_date' -NUM_SHARES = 'number_shares' NEXT_RELEASE_DATE = 'next_release_date' NEXT_STANDARD_DEVIATION = 'next_standard_deviation' +NUM_SHARES = 'number_shares' PAY_DATE_FIELD_NAME = 'pay_date' PERCENT_SHARES = 'percent_shares' PREVIOUS_ACTUAL_VALUE = 'previous_actual_value' PREVIOUS_AMOUNT = 'previous_amount' PREVIOUS_ANNOUNCEMENT = 'previous_announcement' +PREVIOUS_BUYBACK_AMOUNT = 'previous_value' PREVIOUS_BUYBACK_ANNOUNCEMENT = 'previous_buyback_announcement' PREVIOUS_BUYBACK_CASH = 'previous_buyback_cash' PREVIOUS_BUYBACK_SHARE_COUNT = 'previous_buyback_share_count' -PREVIOUS_DISCLOSURE_DATE = 'previous_disclosure_date' -PREVIOUS_COUNT = 'previous_count' PREVIOUS_BUYBACK_TYPE = 'previous_buyback_type' -PREVIOUS_VALUE = 'previous_value' -PREVIOUS_VALUE_TYPE = 'previous_value_type' +PREVIOUS_BUYBACK_UNIT = 'previous_value_type' +PREVIOUS_COUNT = 'previous_count' +PREVIOUS_DISCLOSURE_DATE = 'previous_disclosure_date' PREVIOUS_EX_DATE = 'previous_ex_date' -PREVIOUS_NUM_SHARES = 'previous_number_shares' PREVIOUS_FISCAL_QUARTER = 'previous_fiscal_quarter' PREVIOUS_FISCAL_YEAR = 'previous_fiscal_year' PREVIOUS_HIGH = 'previous_high' PREVIOUS_LOW = 'previous_low' PREVIOUS_MEAN = 'previous_mean' +PREVIOUS_NUM_SHARES = 'previous_number_shares' PREVIOUS_PAY_DATE = 'previous_pay_date' PREVIOUS_PERCENT_SHARES = 'previous_percentage' PREVIOUS_RELEASE_DATE = 'previous_release_date' @@ -65,5 +66,3 @@ SHARE_COUNT_FIELD_NAME = 'share_count' SID_FIELD_NAME = 'sid' STANDARD_DEVIATION_FIELD_NAME = 'standard_deviation' TS_FIELD_NAME = 'timestamp' -VALUE_FIELD_NAME = 'value' -VALUE_TYPE_FIELD_NAME = 'value_type' diff --git a/zipline/pipeline/data/buyback_auth.py b/zipline/pipeline/data/buyback_auth.py index 781e01aa..d0174c9d 100644 --- a/zipline/pipeline/data/buyback_auth.py +++ b/zipline/pipeline/data/buyback_auth.py @@ -1,8 +1,11 @@ """ Datasets representing dates of recently announced buyback authorizations. """ -from zipline.utils.numpy_utils import datetime64ns_dtype, float64_dtype, \ +from zipline.utils.numpy_utils import ( + datetime64ns_dtype, + float64_dtype, categorical_dtype +) from .dataset import Column, DataSet @@ -12,7 +15,7 @@ class BuybackAuthorizations(DataSet): Dataset representing dates of recently announced cash buyback authorizations. """ - previous_value = Column(float64_dtype) + previous_amount = Column(float64_dtype) previous_date = Column(datetime64ns_dtype) - previous_value_type = Column(categorical_dtype, missing_value="<>") - previous_buyback_type = Column(categorical_dtype, missing_value="<>") + previous_unit = Column(categorical_dtype, missing_value=None) + previous_type = Column(categorical_dtype, missing_value=None) diff --git a/zipline/pipeline/factors/events.py b/zipline/pipeline/factors/events.py index 16205af9..bed4f9a8 100644 --- a/zipline/pipeline/factors/events.py +++ b/zipline/pipeline/factors/events.py @@ -136,11 +136,11 @@ class BusinessDaysSinceBuybackAuth( ): """ Factor returning the number of **business days** (not trading days!) since - the most recent cash buyback authorization for each asset. + the most recent buyback authorization for each asset. See Also -------- - zipline.pipeline.factors.BusinessDaysSinceCashBuybackAuth + zipline.pipeline.factors.BusinessDaysSinceBuybackAuth """ inputs = [BuybackAuthorizations.previous_date] diff --git a/zipline/pipeline/loaders/__init__.py b/zipline/pipeline/loaders/__init__.py index b69d6e41..270cb9ce 100644 --- a/zipline/pipeline/loaders/__init__.py +++ b/zipline/pipeline/loaders/__init__.py @@ -1,7 +1,7 @@ from ._13d_filings import _13DFilingsLoader +from .buyback_auth import BuybackAuthorizationsLoader from .consensus_estimates import ConsensusEstimatesLoader from .earnings import EarningsCalendarLoader -from .buyback_auth import BuybackAuthorizationsLoader from .dividends import ( DividendsByAnnouncementDateLoader, DividendsByExDateLoader, diff --git a/zipline/pipeline/loaders/blaze/buyback_auth.py b/zipline/pipeline/loaders/blaze/buyback_auth.py index 7fe15900..39efcabe 100644 --- a/zipline/pipeline/loaders/blaze/buyback_auth.py +++ b/zipline/pipeline/loaders/blaze/buyback_auth.py @@ -1,12 +1,12 @@ from .core import ( - TS_FIELD_NAME, SID_FIELD_NAME, + TS_FIELD_NAME, ) from zipline.pipeline.common import ( + BUYBACK_AMOUNT_FIELD_NAME, BUYBACK_ANNOUNCEMENT_FIELD_NAME, BUYBACK_TYPE_FIELD_NAME, - VALUE_FIELD_NAME, - VALUE_TYPE_FIELD_NAME, + BUYBACK_UNIT_FIELD_NAME, ) from zipline.pipeline.data import BuybackAuthorizations from zipline.pipeline.loaders import BuybackAuthorizationsLoader @@ -40,15 +40,15 @@ class BlazeBuybackAuthorizationsLoader(BlazeEventsLoader): {SID_FIELD_NAME}: int64, {TS_FIELD_NAME}: datetime, {BUYBACK_ANNOUNCEMENT_FIELD_NAME}: ?datetime, - {VALUE_FIELD_NAME}: ?float64, - {VALUE_TYPE_FIELD_NAME}: ?str, + {BUYBACK_AMOUNT_FIELD_NAME}: ?float64, + {BUYBACK_UNIT_FIELD_NAME}: ?str, {BUYBACK_TYPE_FIELD_NAME}: ?str, }} Where each row of the table is a record including the sid to identify the company, the timestamp where we learned about the announcement, the - date when the buyback was announced, the buyback value, the value type - (in cash or in shares), and the buyback type. + date when the buyback was announced, the buyback amount, the buyback unit, + and the buyback type. If the '{TS_FIELD_NAME}' field is not included it is assumed that we start the backtest with knowledge of all announcements. @@ -57,8 +57,8 @@ class BlazeBuybackAuthorizationsLoader(BlazeEventsLoader): TS_FIELD_NAME=TS_FIELD_NAME, SID_FIELD_NAME=SID_FIELD_NAME, BUYBACK_ANNOUNCEMENT_FIELD_NAME=BUYBACK_ANNOUNCEMENT_FIELD_NAME, - VALUE_FIELD_NAME=VALUE_FIELD_NAME, - VALUE_TYPE_FIELD_NAME=VALUE_TYPE_FIELD_NAME, + BUYBACK_AMOUNT_FIELD_NAME=BUYBACK_AMOUNT_FIELD_NAME, + BUYBACK_UNIT_FIELD_NAME=BUYBACK_UNIT_FIELD_NAME, BUYBACK_TYPE_FIELD_NAME=BUYBACK_TYPE_FIELD_NAME ) @@ -66,8 +66,8 @@ class BlazeBuybackAuthorizationsLoader(BlazeEventsLoader): TS_FIELD_NAME, SID_FIELD_NAME, BUYBACK_ANNOUNCEMENT_FIELD_NAME, - VALUE_FIELD_NAME, - VALUE_TYPE_FIELD_NAME, + BUYBACK_AMOUNT_FIELD_NAME, + BUYBACK_UNIT_FIELD_NAME, BUYBACK_TYPE_FIELD_NAME }) diff --git a/zipline/pipeline/loaders/buyback_auth.py b/zipline/pipeline/loaders/buyback_auth.py index 327d5743..3f796b0e 100644 --- a/zipline/pipeline/loaders/buyback_auth.py +++ b/zipline/pipeline/loaders/buyback_auth.py @@ -5,10 +5,10 @@ Reference implementation for buyback auth loaders. from ..data import BuybackAuthorizations from .events import EventsLoader from zipline.pipeline.common import ( + BUYBACK_AMOUNT_FIELD_NAME, BUYBACK_ANNOUNCEMENT_FIELD_NAME, BUYBACK_TYPE_FIELD_NAME, - VALUE_FIELD_NAME, - VALUE_TYPE_FIELD_NAME + BUYBACK_UNIT_FIELD_NAME ) from zipline.utils.memoize import lazyval @@ -19,12 +19,12 @@ class BuybackAuthorizationsLoader(EventsLoader): :class:`zipline.pipeline.data.BuybackAuthorizations`. events_by_sid: dict[sid -> pd.DataFrame(knowledge date, - event date, value, value type, buyback type)] + event date, buyback amount, buyback unit, buyback type)] """ expected_cols = frozenset([BUYBACK_ANNOUNCEMENT_FIELD_NAME, - VALUE_FIELD_NAME, - VALUE_TYPE_FIELD_NAME, + BUYBACK_AMOUNT_FIELD_NAME, + BUYBACK_UNIT_FIELD_NAME, BUYBACK_TYPE_FIELD_NAME]) event_date_col = BUYBACK_ANNOUNCEMENT_FIELD_NAME @@ -42,10 +42,10 @@ class BuybackAuthorizationsLoader(EventsLoader): ) @lazyval - def previous_value_loader(self): + def previous_amount_loader(self): return self._previous_event_value_loader( - self.dataset.previous_value, - VALUE_FIELD_NAME + self.dataset.previous_amount, + BUYBACK_AMOUNT_FIELD_NAME ) @lazyval @@ -55,15 +55,15 @@ class BuybackAuthorizationsLoader(EventsLoader): ) @lazyval - def previous_value_type_loader(self): + def previous_unit_loader(self): return self._previous_event_value_loader( - self.dataset.previous_value_type, - VALUE_TYPE_FIELD_NAME, + self.dataset.previous_unit, + BUYBACK_UNIT_FIELD_NAME, ) @lazyval - def previous_buyback_type_loader(self): + def previous_type_loader(self): return self._previous_event_value_loader( - self.dataset.previous_buyback_type, + self.dataset.previous_type, BUYBACK_TYPE_FIELD_NAME, ) diff --git a/zipline/pipeline/loaders/utils.py b/zipline/pipeline/loaders/utils.py index f4b267db..ec15680c 100644 --- a/zipline/pipeline/loaders/utils.py +++ b/zipline/pipeline/loaders/utils.py @@ -5,7 +5,7 @@ import pandas as pd from six import iteritems from six.moves import zip -from zipline.utils.numpy_utils import NaTns +from zipline.utils.numpy_utils import categorical_dtype, NaTns def next_event_frame(events_by_sid, @@ -116,9 +116,11 @@ def previous_event_frame(events_by_sid, next_date_frame """ sids = list(events_by_sid) + populate_value = None if field_dtype == categorical_dtype else \ + missing_value out = np.full( (len(date_index), len(sids)), - missing_value, + populate_value, dtype=field_dtype ) d_n = date_index[-1].asm8 @@ -140,6 +142,8 @@ def previous_event_frame(events_by_sid, frame = pd.DataFrame(out, index=date_index, columns=sids) frame.ffill(inplace=True) + if field_dtype == categorical_dtype: + frame[frame.isnull()] = missing_value return frame @@ -280,8 +284,8 @@ def zip_with_floats(dates, flts): return pd.Series(flts, index=dates, dtype='float') -def zip_with_strs(dates, flts): - return pd.Series(flts, index=dates, dtype='string') +def zip_with_strs(dates, strs): + return pd.Series(strs, index=dates, dtype='object') def zip_with_dates(index_dates, dts): diff --git a/zipline/testing/fixtures.py b/zipline/testing/fixtures.py index 77c33671..16931110 100644 --- a/zipline/testing/fixtures.py +++ b/zipline/testing/fixtures.py @@ -887,7 +887,9 @@ class WithPipelineEventDataLoader(with_metaclass( zip_date_index_with_vals, vals, date_intervals, - dates): + dates, + dtype_name, + missing_dtype): """ Construct a DataFrame that maps sid to the expected values for the given dates. @@ -907,6 +909,11 @@ class WithPipelineEventDataLoader(with_metaclass( dates: DatetimeIndex The dates which will serve as the index for each Series for each sid in the DataFrame. + dtype_name: str + The name of the dtype of the values in `vals`. + missing_dtype: str + The name of the value that should be used as the missing value + for the dtype of `vals` - e.g., 'NaN' for floats. """ frame = pd.DataFrame({sid: get_values_for_date_ranges( zip_date_index_with_vals, @@ -914,10 +921,10 @@ class WithPipelineEventDataLoader(with_metaclass( pd.DatetimeIndex(list(zip(*date_intervals[sid]))[0]), pd.DatetimeIndex(list(zip(*date_intervals[sid]))[1]), dates - ) for sid in self.get_sids()[:-1]}) + ).astype(dtype_name) for sid in self.get_sids()[:-1]}) frame[self.get_sids()[-1]] = zip_date_index_with_vals( - dates, ['NaN'] * len(dates) - ) + dates, [missing_dtype] * len(dates) + ).astype(dtype_name) return frame @staticmethod @@ -1232,18 +1239,23 @@ class WithNextAndPreviousEventDataLoader(WithPipelineEventDataLoader): ['NaT'] ] - def get_expected_previous_event_dates(self, dates): + def get_expected_previous_event_dates(self, dates, dtype_name, + missing_dtype): return self.get_sids_to_frames( zip_with_dates, self.prev_dates, self.prev_date_intervals, - dates + dates, + dtype_name, + missing_dtype ) - def get_expected_next_event_dates(self, dates): + def get_expected_next_event_dates(self, dates, dtype_name, missing_dtype): return self.get_sids_to_frames( zip_with_dates, self.next_dates, self.next_date_intervals, - dates + dates, + dtype_name, + missing_dtype )