ENH: add custom dataset for estimize

MAINT: alphabetize constants

MAINT: remove obsolete column

TST: refactor tests to use common code

MAINT: remove unneeded fields from dataset

MAINT: remove obsolete earnings estimates columns and refactor
This commit is contained in:
Maya Tydykov
2016-04-14 18:10:47 -04:00
parent a5f7fc7d6d
commit 1531568899
13 changed files with 891 additions and 260 deletions
+26 -35
View File
@@ -34,7 +34,6 @@ from zipline.pipeline.loaders.blaze import (
BlazeShareBuybackAuthorizationsLoader,
)
from zipline.pipeline.loaders.utils import (
get_values_for_date_ranges,
zip_with_floats,
zip_with_dates
)
@@ -42,8 +41,9 @@ from zipline.testing.fixtures import (
WithPipelineEventDataLoader, ZiplineTestCase
)
date_intervals = [[None, '2014-01-04'], ['2014-01-05', '2014-01-09'],
['2014-01-10', None]]
date_intervals = [
[[None, '2014-01-04'], ['2014-01-05', '2014-01-09'], ['2014-01-10', None]]
]
buyback_authorizations_cases = [
pd.DataFrame({
@@ -63,18 +63,6 @@ buyback_authorizations_cases = [
]
def get_expected_previous_values(zip_date_index_with_vals,
dates,
vals_for_date_intervals):
return pd.DataFrame({
0: get_values_for_date_ranges(zip_date_index_with_vals,
vals_for_date_intervals,
date_intervals,
dates),
1: zip_date_index_with_vals(dates, ['NaN'] * len(dates)),
}, index=dates)
class CashBuybackAuthLoaderTestCase(WithPipelineEventDataLoader,
ZiplineTestCase):
"""
@@ -103,16 +91,17 @@ class CashBuybackAuthLoaderTestCase(WithPipelineEventDataLoader,
loader_type = CashBuybackAuthorizationsLoader
def setup(self, dates):
cols = {}
_expected_previous_cash = get_expected_previous_values(
zip_with_floats, dates,
['NaN', 10, 20]
)
cols[
PREVIOUS_BUYBACK_ANNOUNCEMENT
] = get_expected_previous_values(zip_with_dates, dates,
['NaT', '2014-01-04', '2014-01-09'])
cols[PREVIOUS_BUYBACK_CASH] = _expected_previous_cash
cols = {
PREVIOUS_BUYBACK_CASH: self.get_sids_to_frames(zip_with_floats,
[['NaN', 10, 20]],
date_intervals,
dates),
PREVIOUS_BUYBACK_ANNOUNCEMENT: self.get_sids_to_frames(
zip_with_dates,
[['NaT', '2014-01-04', '2014-01-09']],
date_intervals,
dates),
}
cols[DAYS_SINCE_PREV] = self._compute_busday_offsets(
cols[PREVIOUS_BUYBACK_ANNOUNCEMENT]
)
@@ -147,16 +136,18 @@ class ShareBuybackAuthLoaderTestCase(WithPipelineEventDataLoader,
loader_type = ShareBuybackAuthorizationsLoader
def setup(self, dates):
cols = {}
cols[
PREVIOUS_BUYBACK_SHARE_COUNT
] = get_expected_previous_values(zip_with_floats,
dates,
['NaN', 1, 15])
cols[
PREVIOUS_BUYBACK_ANNOUNCEMENT
] = get_expected_previous_values(zip_with_dates, dates,
['NaT', '2014-01-04', '2014-01-09'])
cols = {PREVIOUS_BUYBACK_SHARE_COUNT:
self.get_sids_to_frames(zip_with_floats,
[['NaN', 1, 15]],
date_intervals,
dates,),
PREVIOUS_BUYBACK_ANNOUNCEMENT:
self.get_sids_to_frames(zip_with_dates,
[['NaT', '2014-01-04', '2014-01-09']],
date_intervals,
dates,),
}
cols[DAYS_SINCE_PREV] = self._compute_busday_offsets(
cols[PREVIOUS_BUYBACK_ANNOUNCEMENT]
)
+327
View File
@@ -0,0 +1,327 @@
"""
Tests for the reference loader for ConsensusEstimates.
"""
import blaze as bz
from blaze.compute.core import swap_resources_into_scope
import pandas as pd
from six import iteritems
from zipline.pipeline.common import (
COUNT_FIELD_NAME,
FISCAL_QUARTER_FIELD_NAME,
FISCAL_YEAR_FIELD_NAME,
HIGH_FIELD_NAME,
LOW_FIELD_NAME,
MEAN_FIELD_NAME,
NEXT_COUNT,
NEXT_FISCAL_QUARTER,
NEXT_FISCAL_YEAR,
NEXT_HIGH,
NEXT_LOW,
NEXT_RELEASE_DATE,
NEXT_STANDARD_DEVIATION,
PREVIOUS_COUNT,
PREVIOUS_FISCAL_QUARTER,
PREVIOUS_FISCAL_YEAR,
PREVIOUS_HIGH,
PREVIOUS_LOW,
PREVIOUS_MEAN, NEXT_MEAN,
PREVIOUS_RELEASE_DATE,
PREVIOUS_STANDARD_DEVIATION,
RELEASE_DATE_FIELD_NAME,
STANDARD_DEVIATION_FIELD_NAME,
SID_FIELD_NAME)
from zipline.pipeline.data import ConsensusEstimates
from zipline.pipeline.loaders.consensus_estimates import (
ConsensusEstimatesLoader
)
from zipline.pipeline.loaders.blaze import BlazeConsensusEstimatesLoader
from zipline.pipeline.loaders.utils import (
zip_with_floats
)
from zipline.testing.fixtures import (
ZiplineTestCase,
WithNextAndPreviousEventDataLoader
)
consensus_estimates_cases = [
# K1--K2--A1--A2.
pd.DataFrame({
STANDARD_DEVIATION_FIELD_NAME: (.5, .6),
COUNT_FIELD_NAME: (1, 2),
FISCAL_QUARTER_FIELD_NAME: (1, 1),
HIGH_FIELD_NAME: (.6, .7),
MEAN_FIELD_NAME: (.1, .2),
FISCAL_YEAR_FIELD_NAME: (2014, 2014),
LOW_FIELD_NAME: (.05, .06),
}),
# K1--K2--A2--A1.
pd.DataFrame({
STANDARD_DEVIATION_FIELD_NAME: (.6, .7),
COUNT_FIELD_NAME: (2, 3),
FISCAL_QUARTER_FIELD_NAME: (1, 1),
HIGH_FIELD_NAME: (.7, .8),
MEAN_FIELD_NAME: (.2, .3),
FISCAL_YEAR_FIELD_NAME: (2014, 2014),
LOW_FIELD_NAME: (.06, .07),
}),
# K1--A1--K2--A2.
pd.DataFrame({
STANDARD_DEVIATION_FIELD_NAME: (.7, .8),
COUNT_FIELD_NAME: (3, 4),
FISCAL_QUARTER_FIELD_NAME: (1, 1),
HIGH_FIELD_NAME: (.8, .9),
MEAN_FIELD_NAME: (.3, .4),
FISCAL_YEAR_FIELD_NAME: (2014, 2014),
LOW_FIELD_NAME: (.07, .08),
}),
# K1 == K2.
pd.DataFrame({
STANDARD_DEVIATION_FIELD_NAME: (.8, .9),
COUNT_FIELD_NAME: (4, 5),
FISCAL_QUARTER_FIELD_NAME: (1, 1),
HIGH_FIELD_NAME: (.9, 1.0),
MEAN_FIELD_NAME: (.4, .5),
FISCAL_YEAR_FIELD_NAME: (2014, 2014),
LOW_FIELD_NAME: (.08, .09),
}),
pd.DataFrame(
columns=[STANDARD_DEVIATION_FIELD_NAME,
COUNT_FIELD_NAME,
FISCAL_QUARTER_FIELD_NAME,
HIGH_FIELD_NAME,
MEAN_FIELD_NAME,
FISCAL_YEAR_FIELD_NAME,
LOW_FIELD_NAME],
dtype='datetime64[ns]'
),
]
next_standard_deviation = [
['NaN', .5, .6, 'NaN'],
['NaN', .6, .7, .6, 'NaN'],
['NaN', .7, 'NaN', .8, 'NaN'],
['NaN', .8, .9, 'NaN'],
['NaN']
]
prev_standard_deviation = [
['NaN', .5, .6],
['NaN', .7, .6],
['NaN', .7, .8],
['NaN', .8, .9],
['NaN']
]
next_count = [
['NaN', 1, 2, 'NaN'],
['NaN', 2, 3, 2, 'NaN'],
['NaN', 3, 'NaN', 4, 'NaN'],
['NaN', 4, 5, 'NaN'],
['NaN']
]
prev_count = [
['NaN', 1, 2],
['NaN', 3, 2],
['NaN', 3, 4],
['NaN', 4, 5],
['NaN']
]
next_fiscal_quarter = [
['NaN', 1, 1, 'NaN'],
['NaN', 1, 1, 1, 'NaN'],
['NaN', 1, 'NaN', 1, 'NaN'],
['NaN', 1, 1, 'NaN'],
['NaN']
]
prev_fiscal_quarter = [
['NaN', 1, 1],
['NaN', 1, 1],
['NaN', 1, 1],
['NaN', 1, 1],
['NaN']
]
next_high = [
['NaN', .6, .7, 'NaN'],
['NaN', .7, .8, .7, 'NaN'],
['NaN', .8, 'NaN', .9, 'NaN'],
['NaN', .9, 1.0, 'NaN'],
['NaN']
]
prev_high = [
['NaN', .6, .7],
['NaN', .8, .7],
['NaN', .8, .9],
['NaN', .9, 1.0],
['NaN']
]
next_mean = [
['NaN', .1, .2, 'NaN'],
['NaN', .2, .3, .2, 'NaN'],
['NaN', .3, 'NaN', .4, 'NaN'],
['NaN', .4, .5, 'NaN'],
['NaN']
]
prev_mean = [
['NaN', .1, .2],
['NaN', .3, .2],
['NaN', .3, .4],
['NaN', .4, .5],
['NaN']
]
next_fiscal_year = [
['NaN', 2014, 2014, 'NaN'],
['NaN', 2014, 2014, 2014, 'NaN'],
['NaN', 2014, 'NaN', 2014, 'NaN'],
['NaN', 2014, 2014, 'NaN'],
['NaN']
]
prev_fiscal_year = [
['NaN', 2014, 2014],
['NaN', 2014, 2014],
['NaN', 2014, 2014],
['NaN', 2014, 2014],
['NaN']
]
next_low = [
['NaN', .05, .06, 'NaN'],
['NaN', .06, .07, .06, 'NaN'],
['NaN', .07, 'NaN', .08, 'NaN'],
['NaN', .08, .09, 'NaN'],
['NaN']
]
prev_low = [
['NaN', .05, .06],
['NaN', .07, .06],
['NaN', .07, .08],
['NaN', .08, .09],
['NaN']
]
field_name_to_expected_col = {
PREVIOUS_STANDARD_DEVIATION: prev_standard_deviation,
NEXT_STANDARD_DEVIATION: next_standard_deviation,
PREVIOUS_COUNT: prev_count,
NEXT_COUNT: next_count,
PREVIOUS_FISCAL_QUARTER: prev_fiscal_quarter,
NEXT_FISCAL_QUARTER: next_fiscal_quarter,
PREVIOUS_HIGH: prev_high,
NEXT_HIGH: next_high,
PREVIOUS_MEAN: prev_mean,
NEXT_MEAN: next_mean,
PREVIOUS_FISCAL_YEAR: prev_fiscal_year,
NEXT_FISCAL_YEAR: next_fiscal_year,
PREVIOUS_LOW: prev_low,
NEXT_LOW: next_low
}
class ConsensusEstimatesLoaderTestCase(WithNextAndPreviousEventDataLoader,
ZiplineTestCase):
"""
Tests for loading the consensus estimates data.
"""
pipeline_columns = {
NEXT_RELEASE_DATE:
ConsensusEstimates.next_release_date.latest,
PREVIOUS_RELEASE_DATE:
ConsensusEstimates.previous_release_date.latest,
PREVIOUS_STANDARD_DEVIATION:
ConsensusEstimates.previous_standard_deviation.latest,
NEXT_STANDARD_DEVIATION:
ConsensusEstimates.next_standard_deviation.latest,
PREVIOUS_COUNT:
ConsensusEstimates.previous_count.latest,
NEXT_COUNT:
ConsensusEstimates.next_count.latest,
PREVIOUS_FISCAL_QUARTER:
ConsensusEstimates.previous_fiscal_quarter.latest,
NEXT_FISCAL_QUARTER:
ConsensusEstimates.next_fiscal_quarter.latest,
PREVIOUS_HIGH:
ConsensusEstimates.previous_high.latest,
NEXT_HIGH:
ConsensusEstimates.next_high.latest,
PREVIOUS_MEAN:
ConsensusEstimates.previous_mean.latest,
NEXT_MEAN:
ConsensusEstimates.next_mean.latest,
PREVIOUS_FISCAL_YEAR:
ConsensusEstimates.previous_fiscal_year.latest,
NEXT_FISCAL_YEAR:
ConsensusEstimates.next_fiscal_year.latest,
PREVIOUS_LOW:
ConsensusEstimates.previous_low.latest,
NEXT_LOW:
ConsensusEstimates.next_low.latest
}
@classmethod
def get_dataset(cls):
return {sid:
pd.concat([
cls.base_cases[sid].rename(columns={
'other_date': RELEASE_DATE_FIELD_NAME
}),
df
], axis=1)
for sid, df in enumerate(consensus_estimates_cases)}
loader_type = ConsensusEstimatesLoader
def setup(self, dates):
cols = {
PREVIOUS_RELEASE_DATE:
self.get_expected_previous_event_dates(dates),
NEXT_RELEASE_DATE: self.get_expected_next_event_dates(dates)
}
for field_name in field_name_to_expected_col:
cols[field_name] = self.get_sids_to_frames(
zip_with_floats, field_name_to_expected_col[field_name],
self.prev_date_intervals
if field_name.startswith("previous")
else self.next_date_intervals,
dates
)
return cols
class BlazeConsensusEstimatesLoaderTestCase(ConsensusEstimatesLoaderTestCase):
loader_type = BlazeConsensusEstimatesLoader
def pipeline_event_loader_args(self, dates):
_, mapping = super(
BlazeConsensusEstimatesLoaderTestCase,
self,
).pipeline_event_loader_args(dates)
frames = []
for sid, df in iteritems(mapping):
frame = df.copy()
frame[SID_FIELD_NAME] = sid
frames.append(frame)
return bz.data(pd.concat(frames).reset_index(drop=True)),
class BlazeConsensusEstimatesLoaderNotInteractiveTestCase(
BlazeConsensusEstimatesLoaderTestCase
):
"""Test case for passing a non-interactive symbol and a dict of resources.
"""
def pipeline_event_loader_args(self, dates):
(bound_expr,) = super(
BlazeConsensusEstimatesLoaderNotInteractiveTestCase,
self,
).pipeline_event_loader_args(dates)
return swap_resources_into_scope(bound_expr, {})
+40 -69
View File
@@ -45,7 +45,6 @@ from zipline.pipeline.loaders.dividends import (
DividendsByPayDateLoader
)
from zipline.pipeline.loaders.utils import (
get_values_for_date_ranges,
zip_with_dates,
zip_with_floats
)
@@ -159,33 +158,6 @@ next_amounts = [['NaN', 1, 15, 'NaN'],
['NaN', 6, 23, 'NaN']]
def get_vals_for_dates(zip_date_index_with_vals,
vals,
date_invervals,
dates):
return pd.DataFrame({
0: get_values_for_date_ranges(zip_date_index_with_vals,
vals[0],
date_invervals[0],
dates),
1: get_values_for_date_ranges(zip_date_index_with_vals,
vals[1],
date_invervals[1],
dates),
2: get_values_for_date_ranges(zip_date_index_with_vals,
vals[2],
date_invervals[2],
dates),
# Assume the latest of 2 cash values is used if we find out about 2
# announcements that happened on the same day for the same sid.
3: get_values_for_date_ranges(zip_date_index_with_vals,
vals[3],
date_invervals[3],
dates),
4: zip_date_index_with_vals(dates, ['NaN'] * len(dates)),
}, index=dates)
class DividendsByAnnouncementDateTestCase(WithPipelineEventDataLoader,
ZiplineTestCase):
"""
@@ -232,14 +204,14 @@ class DividendsByAnnouncementDateTestCase(WithPipelineEventDataLoader,
['NaT', '2014-01-04', '2014-01-14'],
['NaT', '2014-01-04']]
amounts = [['NaN', 1, 15], ['NaN', 7, 13], ['NaN', 3, 1], ['NaN', 23]]
cols = {}
cols[PREVIOUS_ANNOUNCEMENT] = get_vals_for_dates(
zip_with_dates, announcement_dates, date_intervals, dates
)
cols[PREVIOUS_AMOUNT] = get_vals_for_dates(
zip_with_floats, amounts, date_intervals, dates
)
cols = {
PREVIOUS_ANNOUNCEMENT: self.get_sids_to_frames(
zip_with_dates, announcement_dates, date_intervals, dates
),
PREVIOUS_AMOUNT: self.get_sids_to_frames(
zip_with_floats, amounts, date_intervals, dates
),
}
cols[
DAYS_SINCE_PREV_DIVIDEND_ANNOUNCEMENT
@@ -305,27 +277,26 @@ class DividendsByExDateTestCase(WithPipelineEventDataLoader, ZiplineTestCase):
loader_type = DividendsByExDateLoader
def setup(self, dates):
cols = {}
cols[NEXT_EX_DATE] = get_vals_for_dates(
zip_with_dates, next_ex_and_pay_dates, next_date_intervals, dates,
)
cols[PREVIOUS_EX_DATE] = get_vals_for_dates(
zip_with_dates, prev_ex_and_pay_dates, prev_date_intervals, dates
)
cols[NEXT_AMOUNT] = get_vals_for_dates(
zip_with_floats, next_amounts, next_date_intervals, dates
)
cols[PREVIOUS_AMOUNT] = get_vals_for_dates(
zip_with_floats, prev_amounts, prev_date_intervals, dates
)
cols = {
NEXT_EX_DATE: self.get_sids_to_frames(
zip_with_dates, next_ex_and_pay_dates, next_date_intervals,
dates,
),
PREVIOUS_EX_DATE: self.get_sids_to_frames(
zip_with_dates, prev_ex_and_pay_dates, prev_date_intervals,
dates
),
NEXT_AMOUNT: self.get_sids_to_frames(
zip_with_floats, next_amounts, next_date_intervals, dates
),
PREVIOUS_AMOUNT: self.get_sids_to_frames(
zip_with_floats, prev_amounts, prev_date_intervals, dates
)
}
cols[DAYS_TO_NEXT_EX_DATE] = self._compute_busday_offsets(
cols[NEXT_EX_DATE]
)
cols[DAYS_SINCE_PREV_EX_DATE] = self._compute_busday_offsets(
cols[PREVIOUS_EX_DATE]
)
@@ -386,22 +357,22 @@ class DividendsByPayDateTestCase(WithPipelineEventDataLoader, ZiplineTestCase):
loader_type = DividendsByPayDateLoader
def setup(self, dates):
cols = {}
cols[NEXT_PAY_DATE] = get_vals_for_dates(
zip_with_dates, next_ex_and_pay_dates, next_date_intervals, dates
)
cols[PREVIOUS_PAY_DATE] = get_vals_for_dates(
zip_with_dates, prev_ex_and_pay_dates, prev_date_intervals, dates
)
cols[NEXT_AMOUNT] = get_vals_for_dates(
zip_with_floats, next_amounts, next_date_intervals, dates
)
cols[PREVIOUS_AMOUNT] = get_vals_for_dates(
zip_with_floats, prev_amounts, prev_date_intervals, dates
)
return cols
return {
NEXT_PAY_DATE: self.get_sids_to_frames(
zip_with_dates, next_ex_and_pay_dates, next_date_intervals,
dates
),
PREVIOUS_PAY_DATE: self.get_sids_to_frames(
zip_with_dates, prev_ex_and_pay_dates, prev_date_intervals,
dates
),
NEXT_AMOUNT: self.get_sids_to_frames(
zip_with_floats, next_amounts, next_date_intervals, dates
),
PREVIOUS_AMOUNT: self.get_sids_to_frames(
zip_with_floats, prev_amounts, prev_date_intervals, dates
)
}
class BlazeDividendsByPayDateLoaderTestCase(DividendsByPayDateTestCase):
+16 -145
View File
@@ -22,97 +22,13 @@ from zipline.pipeline.factors.events import (
)
from zipline.pipeline.loaders.earnings import EarningsCalendarLoader
from zipline.pipeline.loaders.blaze import BlazeEarningsCalendarLoader
from zipline.pipeline.loaders.utils import (
get_values_for_date_ranges,
zip_with_dates
)
from zipline.testing.fixtures import (
WithPipelineEventDataLoader,
ZiplineTestCase
ZiplineTestCase,
WithNextAndPreviousEventDataLoader
)
earnings_cases = [
# K1--K2--A1--A2.
pd.DataFrame({
TS_FIELD_NAME: pd.to_datetime(['2014-01-05', '2014-01-10']),
ANNOUNCEMENT_FIELD_NAME: pd.to_datetime(['2014-01-15', '2014-01-20'])
}),
# K1--K2--A2--A1.
pd.DataFrame({
TS_FIELD_NAME: pd.to_datetime(['2014-01-05', '2014-01-10']),
ANNOUNCEMENT_FIELD_NAME: pd.to_datetime(['2014-01-20', '2014-01-15'])
}),
# K1--A1--K2--A2.
pd.DataFrame({
TS_FIELD_NAME: pd.to_datetime(['2014-01-05', '2014-01-15']),
ANNOUNCEMENT_FIELD_NAME: pd.to_datetime(['2014-01-10', '2014-01-20'])
}),
# K1 == K2.
pd.DataFrame({
TS_FIELD_NAME: pd.to_datetime(['2014-01-05'] * 2),
ANNOUNCEMENT_FIELD_NAME: pd.to_datetime(['2014-01-10', '2014-01-15'])
}),
pd.DataFrame(
columns=[ANNOUNCEMENT_FIELD_NAME,
TS_FIELD_NAME],
dtype='datetime64[ns]'
),
]
next_date_intervals = [
[[None, '2014-01-04'],
['2014-01-05', '2014-01-15'],
['2014-01-16', '2014-01-20'],
['2014-01-21', None]],
[[None, '2014-01-04'],
['2014-01-05', '2014-01-09'],
['2014-01-10', '2014-01-15'],
['2014-01-16', '2014-01-20'],
['2014-01-21', None]],
[[None, '2014-01-04'],
['2014-01-05', '2014-01-10'],
['2014-01-11', '2014-01-14'],
['2014-01-15', '2014-01-20'],
['2014-01-21', None]],
[[None, '2014-01-04'],
['2014-01-05', '2014-01-10'],
['2014-01-11', '2014-01-15'],
['2014-01-16', None]]
]
next_dates = [
['NaT', '2014-01-15', '2014-01-20', 'NaT'],
['NaT', '2014-01-20', '2014-01-15', '2014-01-20', 'NaT'],
['NaT', '2014-01-10', 'NaT', '2014-01-20', 'NaT'],
['NaT', '2014-01-10', '2014-01-15', 'NaT'],
['NaT']
]
prev_date_intervals = [
[[None, '2014-01-14'],
['2014-01-15', '2014-01-19'],
['2014-01-20', None]],
[[None, '2014-01-14'],
['2014-01-15', '2014-01-19'],
['2014-01-20', None]],
[[None, '2014-01-09'],
['2014-01-10', '2014-01-19'],
['2014-01-20', None]],
[[None, '2014-01-09'],
['2014-01-10', '2014-01-14'],
['2014-01-15', None]]
]
prev_dates = [
['NaT', '2014-01-15', '2014-01-20'],
['NaT', '2014-01-15', '2014-01-20'],
['NaT', '2014-01-10', '2014-01-20'],
['NaT', '2014-01-10', '2014-01-15'],
['NaT']
]
class EarningsCalendarLoaderTestCase(WithPipelineEventDataLoader,
class EarningsCalendarLoaderTestCase(WithNextAndPreviousEventDataLoader,
ZiplineTestCase):
"""
Tests for loading the earnings announcement data.
@@ -126,70 +42,25 @@ class EarningsCalendarLoaderTestCase(WithPipelineEventDataLoader,
@classmethod
def get_dataset(cls):
return {sid: df for sid, df in enumerate(earnings_cases)}
return {sid: df.rename(
columns={'other_date': ANNOUNCEMENT_FIELD_NAME}
) for sid, df in enumerate(cls.base_cases)}
loader_type = EarningsCalendarLoader
def get_expected_next_event_dates(self, dates):
return pd.DataFrame({
0: get_values_for_date_ranges(zip_with_dates,
next_dates[0],
next_date_intervals[0],
dates),
1: get_values_for_date_ranges(zip_with_dates,
next_dates[1],
next_date_intervals[1],
dates),
2: get_values_for_date_ranges(zip_with_dates,
next_dates[2],
next_date_intervals[2],
dates),
3: get_values_for_date_ranges(zip_with_dates,
next_dates[3],
next_date_intervals[3],
dates),
4: zip_with_dates(dates, ['NaT'] * len(dates)),
}, index=dates)
def get_expected_previous_event_dates(self, dates):
return pd.DataFrame({
0: get_values_for_date_ranges(zip_with_dates,
prev_dates[0],
prev_date_intervals[0],
dates),
1: get_values_for_date_ranges(zip_with_dates,
prev_dates[1],
prev_date_intervals[1],
dates),
2: get_values_for_date_ranges(zip_with_dates,
prev_dates[2],
prev_date_intervals[2],
dates),
3: get_values_for_date_ranges(zip_with_dates,
prev_dates[3],
prev_date_intervals[3],
dates),
4: zip_with_dates(dates, ['NaT'] * len(dates)),
}, index=dates)
def setup(self, dates):
_expected_next_announce = self.get_expected_next_event_dates(dates)
_expected_previous_announce = self.get_expected_previous_event_dates(
dates
cols = {
PREVIOUS_ANNOUNCEMENT: self.get_expected_previous_event_dates(
dates
),
NEXT_ANNOUNCEMENT: self.get_expected_next_event_dates(dates),
}
cols[DAYS_TO_NEXT] = self._compute_busday_offsets(
cols[NEXT_ANNOUNCEMENT]
)
_expected_next_busday_offsets = self._compute_busday_offsets(
_expected_next_announce
cols[DAYS_SINCE_PREV] = self._compute_busday_offsets(
cols[PREVIOUS_ANNOUNCEMENT]
)
_expected_previous_busday_offsets = self._compute_busday_offsets(
_expected_previous_announce
)
cols = {}
cols[PREVIOUS_ANNOUNCEMENT] = _expected_previous_announce
cols[NEXT_ANNOUNCEMENT] = _expected_next_announce
cols[DAYS_TO_NEXT] = _expected_next_busday_offsets
cols[DAYS_SINCE_PREV] = _expected_previous_busday_offsets
return cols
+24
View File
@@ -5,6 +5,7 @@ AD_FIELD_NAME = 'asof_date'
ANNOUNCEMENT_FIELD_NAME = 'announcement_date'
CASH_FIELD_NAME = 'cash'
CASH_AMOUNT_FIELD_NAME = 'cash_amount'
COUNT_FIELD_NAME = 'count'
BUYBACK_ANNOUNCEMENT_FIELD_NAME = 'buyback_date'
DAYS_SINCE_PREV = 'days_since_prev'
DAYS_SINCE_PREV_DIVIDEND_ANNOUNCEMENT = 'days_since_prev_dividend_announcement'
@@ -12,18 +13,41 @@ DAYS_SINCE_PREV_EX_DATE = 'days_since_prev_ex_date'
DAYS_TO_NEXT = 'days_to_next'
DAYS_TO_NEXT_EX_DATE = 'days_to_next_ex_date'
EX_DATE_FIELD_NAME = 'ex_date'
FISCAL_QUARTER_FIELD_NAME = 'fiscal_quarter'
FISCAL_YEAR_FIELD_NAME = 'fiscal_year'
HIGH_FIELD_NAME = 'high'
LOW_FIELD_NAME = 'low'
MEAN_FIELD_NAME = 'mean'
NEXT_AMOUNT = 'next_amount'
NEXT_ANNOUNCEMENT = 'next_announcement'
NEXT_COUNT = 'next_count'
NEXT_FISCAL_QUARTER = 'next_fiscal_quarter'
NEXT_FISCAL_YEAR = 'next_fiscal_year'
NEXT_EX_DATE = 'next_ex_date'
NEXT_HIGH = 'next_high'
NEXT_LOW = 'next_low'
NEXT_MEAN = 'next_mean'
NEXT_PAY_DATE = 'next_pay_date'
NEXT_RELEASE_DATE = 'next_release_date'
NEXT_STANDARD_DEVIATION = 'next_standard_deviation'
PAY_DATE_FIELD_NAME = 'pay_date'
PREVIOUS_AMOUNT = 'previous_amount'
PREVIOUS_ANNOUNCEMENT = 'previous_announcement'
PREVIOUS_BUYBACK_ANNOUNCEMENT = 'previous_buyback_announcement'
PREVIOUS_BUYBACK_CASH = 'previous_buyback_cash'
PREVIOUS_BUYBACK_SHARE_COUNT = 'previous_buyback_share_count'
PREVIOUS_COUNT = 'previous_count'
PREVIOUS_EX_DATE = 'previous_ex_date'
PREVIOUS_FISCAL_QUARTER = 'previous_fiscal_quarter'
PREVIOUS_FISCAL_YEAR = 'previous_fiscal_year'
PREVIOUS_HIGH = 'previous_high'
PREVIOUS_LOW = 'previous_low'
PREVIOUS_MEAN = 'previous_mean'
PREVIOUS_PAY_DATE = 'previous_pay_date'
PREVIOUS_RELEASE_DATE = 'previous_release_date'
PREVIOUS_STANDARD_DEVIATION = 'previous_standard_deviation'
RELEASE_DATE_FIELD_NAME = 'release_date'
SHARE_COUNT_FIELD_NAME = 'share_count'
SID_FIELD_NAME = 'sid'
STANDARD_DEVIATION_FIELD_NAME = 'standard_deviation'
TS_FIELD_NAME = 'timestamp'
+2
View File
@@ -5,6 +5,7 @@ from .dividends import (
DividendsByPayDate,
)
from .earnings import EarningsCalendar
from .consensus_estimates import ConsensusEstimates
from .equity_pricing import USEquityPricing
from .dataset import DataSet, Column, BoundColumn
@@ -17,6 +18,7 @@ __all__ = [
'DividendsByExDate',
'DividendsByPayDate',
'EarningsCalendar',
'ConsensusEstimates',
'ShareBuybackAuthorizations',
'USEquityPricing',
]
@@ -0,0 +1,28 @@
"""
Datasets representing consensus estimates data.
"""
from zipline.utils.numpy_utils import datetime64ns_dtype, float64_dtype
from .dataset import Column, DataSet
class ConsensusEstimates(DataSet):
"""
Dataset representing consensus estimates data.
"""
previous_release_date = Column(datetime64ns_dtype)
next_release_date = Column(datetime64ns_dtype)
previous_standard_deviation = Column(float64_dtype)
next_standard_deviation = Column(float64_dtype)
previous_count = Column(float64_dtype)
next_count = Column(float64_dtype)
previous_fiscal_quarter = Column(float64_dtype)
next_fiscal_quarter = Column(float64_dtype)
previous_high = Column(float64_dtype)
next_high = Column(float64_dtype)
previous_mean = Column(float64_dtype)
next_mean = Column(float64_dtype)
previous_fiscal_year = Column(float64_dtype)
next_fiscal_year = Column(float64_dtype)
previous_low = Column(float64_dtype)
next_low = Column(float64_dtype)
+2
View File
@@ -1,4 +1,5 @@
from .earnings import EarningsCalendarLoader
from .consensus_estimates import ConsensusEstimatesLoader
from .buyback_auth import (
CashBuybackAuthorizationsLoader,
ShareBuybackAuthorizationsLoader
@@ -16,6 +17,7 @@ __all__ = [
'DividendsByExDateLoader',
'DividendsByPayDateLoader',
'EarningsCalendarLoader',
'ConsensusEstimatesLoader',
'ShareBuybackAuthorizationsLoader',
'USEquityPricingLoader',
]
@@ -17,10 +17,12 @@ from .dividends import (
from .earnings import (
BlazeEarningsCalendarLoader,
)
from .consensus_estimates import BlazeConsensusEstimatesLoader
__all__ = (
'BlazeCashBuybackAuthorizationsLoader',
'BlazeDividendsByAnnouncementDateLoader',
'BlazeConsensusEstimatesLoader',
'BlazeDividendsByExDateLoader',
'BlazeDividendsByPayDateLoader',
'BlazeEarningsCalendarLoader',
@@ -0,0 +1,91 @@
from zipline.pipeline.common import (
COUNT_FIELD_NAME,
FISCAL_QUARTER_FIELD_NAME,
FISCAL_YEAR_FIELD_NAME,
HIGH_FIELD_NAME,
LOW_FIELD_NAME,
MEAN_FIELD_NAME,
RELEASE_DATE_FIELD_NAME,
SID_FIELD_NAME,
STANDARD_DEVIATION_FIELD_NAME,
TS_FIELD_NAME,
)
from zipline.pipeline.data import ConsensusEstimates
from zipline.pipeline.loaders import ConsensusEstimatesLoader
from .events import BlazeEventsLoader
class BlazeConsensusEstimatesLoader(BlazeEventsLoader):
"""A pipeline loader for the ``ConsensusEstimates`` dataset that
loads
data from a blaze expression.
Parameters
----------
expr : Expr
The expression representing the data to load.
resources : dict, optional
Mapping from the loadable terms of ``expr`` to actual data resources.
odo_kwargs : dict, optional
Extra keyword arguments to pass to odo when executing the expression.
data_query_time : time, optional
The time to use for the data query cutoff.
data_query_tz : tzinfo or str
The timezeone to use for the data query cutoff.
dataset: DataSet
The DataSet object for which this loader loads data.
Notes
-----
The expression should have a tabular dshape of::
Dim * {{
{SID_FIELD_NAME}: int64,
{TS_FIELD_NAME}: datetime,
{RELEASE_DATE_FIELD_NAME}: ?datetime,
{STANDARD_DEVIATION_FIELD_NAME}: ?float64,
{COUNT_FIELD_NAME}: ?float64,
{FISCAL_QUARTER_FIELD_NAME}: ?float64,
{HIGH_FIELD_NAME}: ?float64,
{MEAN_FIELD_NAME}: ?float64,
{FISCAL_YEAR_FIELD_NAME}: ?float64,
{LOW_FIELD_NAME}: ?float64,
}}
Where each row of the table is a record including the sid to identify the
company, the timestamp where we learned about the announcement,
the release date for the corresponding estimate, and other estimate
information.
If the '{TS_FIELD_NAME}' field is not included it is assumed that we
start the backtest with knowledge of all announcements.
"""
__doc__ = __doc__.format(
TS_FIELD_NAME=TS_FIELD_NAME,
SID_FIELD_NAME=SID_FIELD_NAME,
RELEASE_DATE_FIELD_NAME=RELEASE_DATE_FIELD_NAME,
STANDARD_DEVIATION_FIELD_NAME=STANDARD_DEVIATION_FIELD_NAME,
COUNT_FIELD_NAME=COUNT_FIELD_NAME,
FISCAL_QUARTER_FIELD_NAME=FISCAL_QUARTER_FIELD_NAME,
HIGH_FIELD_NAME=HIGH_FIELD_NAME,
MEAN_FIELD_NAME=MEAN_FIELD_NAME,
FISCAL_YEAR_FIELD_NAME=FISCAL_YEAR_FIELD_NAME,
LOW_FIELD_NAME=LOW_FIELD_NAME,
)
_expected_fields = frozenset({
TS_FIELD_NAME,
SID_FIELD_NAME,
RELEASE_DATE_FIELD_NAME,
STANDARD_DEVIATION_FIELD_NAME,
COUNT_FIELD_NAME,
FISCAL_QUARTER_FIELD_NAME,
HIGH_FIELD_NAME,
MEAN_FIELD_NAME,
FISCAL_YEAR_FIELD_NAME,
LOW_FIELD_NAME,
})
concrete_loader = ConsensusEstimatesLoader
default_dataset = ConsensusEstimates
+1 -1
View File
@@ -45,7 +45,7 @@ class BlazeDividendsByAnnouncementDateLoader(BlazeEventsLoader):
Dim * {{
{SID_FIELD_NAME}: int64,
{TS_FIELD_NAME}: datetime,
{CASH_AMOUNT_FIELD_NAME}: ?datetime,
{CASH_AMOUNT_FIELD_NAME}: ?float64,
{ANNOUNCEMENT_FIELD_NAME}: ?datetime,
}}
@@ -0,0 +1,162 @@
"""
Reference implementation for ConsensusEstimates loaders.
"""
from ..data import ConsensusEstimates
from .events import EventsLoader
from zipline.pipeline.common import (
COUNT_FIELD_NAME,
FISCAL_QUARTER_FIELD_NAME,
FISCAL_YEAR_FIELD_NAME,
HIGH_FIELD_NAME,
LOW_FIELD_NAME,
MEAN_FIELD_NAME,
RELEASE_DATE_FIELD_NAME,
STANDARD_DEVIATION_FIELD_NAME,
)
from zipline.utils.memoize import lazyval
class ConsensusEstimatesLoader(EventsLoader):
expected_cols = frozenset([RELEASE_DATE_FIELD_NAME,
STANDARD_DEVIATION_FIELD_NAME,
COUNT_FIELD_NAME,
FISCAL_QUARTER_FIELD_NAME,
HIGH_FIELD_NAME,
MEAN_FIELD_NAME,
FISCAL_YEAR_FIELD_NAME,
LOW_FIELD_NAME])
def __init__(self, all_dates, events_by_sid,
infer_timestamps=False,
dataset=ConsensusEstimates):
super(ConsensusEstimatesLoader, self).__init__(
all_dates, events_by_sid, infer_timestamps, dataset=dataset,
)
@lazyval
def next_release_date_loader(self):
return self._next_event_date_loader(
self.dataset.next_release_date,
RELEASE_DATE_FIELD_NAME
)
@lazyval
def previous_release_date_loader(self):
return self._previous_event_date_loader(
self.dataset.previous_release_date,
RELEASE_DATE_FIELD_NAME,
)
@lazyval
def next_standard_deviation_loader(self):
return self._next_event_value_loader(
self.dataset.next_standard_deviation,
RELEASE_DATE_FIELD_NAME,
STANDARD_DEVIATION_FIELD_NAME,
)
@lazyval
def previous_standard_deviation_loader(self):
return self._previous_event_value_loader(
self.dataset.previous_standard_deviation,
RELEASE_DATE_FIELD_NAME,
STANDARD_DEVIATION_FIELD_NAME,
)
@lazyval
def next_count_loader(self):
return self._next_event_value_loader(
self.dataset.next_count,
RELEASE_DATE_FIELD_NAME,
COUNT_FIELD_NAME,
)
@lazyval
def previous_count_loader(self):
return self._previous_event_value_loader(
self.dataset.previous_count,
RELEASE_DATE_FIELD_NAME,
COUNT_FIELD_NAME,
)
@lazyval
def next_fiscal_quarter_loader(self):
return self._next_event_value_loader(
self.dataset.next_fiscal_quarter,
RELEASE_DATE_FIELD_NAME,
FISCAL_QUARTER_FIELD_NAME,
)
@lazyval
def previous_fiscal_quarter_loader(self):
return self._previous_event_value_loader(
self.dataset.previous_fiscal_quarter,
RELEASE_DATE_FIELD_NAME,
FISCAL_QUARTER_FIELD_NAME,
)
@lazyval
def next_high_loader(self):
return self._next_event_value_loader(
self.dataset.next_high,
RELEASE_DATE_FIELD_NAME,
HIGH_FIELD_NAME,
)
@lazyval
def previous_high_loader(self):
return self._previous_event_value_loader(
self.dataset.previous_high,
RELEASE_DATE_FIELD_NAME,
HIGH_FIELD_NAME,
)
@lazyval
def next_mean_loader(self):
return self._next_event_value_loader(
self.dataset.next_mean,
RELEASE_DATE_FIELD_NAME,
MEAN_FIELD_NAME,
)
@lazyval
def previous_mean_loader(self):
return self._previous_event_value_loader(
self.dataset.previous_mean,
RELEASE_DATE_FIELD_NAME,
MEAN_FIELD_NAME,
)
@lazyval
def next_fiscal_year_loader(self):
return self._next_event_value_loader(
self.dataset.next_fiscal_year,
RELEASE_DATE_FIELD_NAME,
FISCAL_YEAR_FIELD_NAME,
)
@lazyval
def previous_fiscal_year_loader(self):
return self._previous_event_value_loader(
self.dataset.previous_fiscal_year,
RELEASE_DATE_FIELD_NAME,
FISCAL_YEAR_FIELD_NAME,
)
@lazyval
def next_low_loader(self):
return self._next_event_value_loader(
self.dataset.next_low,
RELEASE_DATE_FIELD_NAME,
LOW_FIELD_NAME,
)
@lazyval
def previous_low_loader(self):
return self._previous_event_value_loader(
self.dataset.previous_low,
RELEASE_DATE_FIELD_NAME,
LOW_FIELD_NAME,
)
+170 -10
View File
@@ -6,19 +6,17 @@ from unittest import TestCase
from contextlib2 import ExitStack
from logbook import NullHandler, Logger
from nose_parameterized import parameterized
import numpy as np
import pandas as pd
from pandas.util.testing import assert_series_equal
import responses
from six import with_metaclass, iteritems
from toolz import flip
import numpy as np
import pandas as pd
import responses
from ..assets.synthetic import make_simple_equity_info
from .core import (
create_daily_bar_data,
create_minute_bar_data,
gen_calendars,
tmp_asset_finder,
tmp_dir,
)
from ..data.data_portal import DataPortal
@@ -26,7 +24,6 @@ from ..data.us_equity_pricing import (
SQLiteAdjustmentReader,
SQLiteAdjustmentWriter,
)
from ..finance.trading import TradingEnvironment
from ..data.us_equity_pricing import (
BcolzDailyBarReader,
BcolzDailyBarWriter,
@@ -36,13 +33,21 @@ from ..data.minute_bars import (
BcolzMinuteBarWriter,
US_EQUITIES_MINUTES_PER_DAY
)
from ..finance.trading import TradingEnvironment
from ..utils import tradingcalendar, factory
from ..utils.classproperty import classproperty
from ..utils.final import FinalMeta, final
from ..utils.metautils import compose_types
from ..pipeline import Pipeline, SimplePipelineEngine
from ..utils.numpy_utils import make_datetime64D
from ..utils.numpy_utils import NaTD
from .core import tmp_asset_finder, make_simple_equity_info, gen_calendars
from zipline.pipeline import Pipeline, SimplePipelineEngine
from zipline.utils.numpy_utils import make_datetime64D
from zipline.utils.numpy_utils import NaTD
from zipline.pipeline.common import TS_FIELD_NAME
from zipline.pipeline.loaders.utils import (
get_values_for_date_ranges,
zip_with_dates
)
def _take_out_the_trash():
@@ -890,6 +895,42 @@ class WithPipelineEventDataLoader(with_metaclass(
loader = self.loader_type(*self.pipeline_event_loader_args(dates))
return SimplePipelineEngine(lambda _: loader, dates, self.asset_finder)
def get_sids_to_frames(self,
zip_date_index_with_vals,
vals,
date_intervals,
dates):
"""
Construct a DataFrame that maps sid to the expected values for the
given dates.
Parameters
----------
zip_date_index_with_vals: callable
A function that returns a series of `vals` repeated based on the
number of days in the date interval for each val, indexed by the
dates in `dates`.
vals: iterable
An iterable with values that correspond to each interval in
`date_intervals`.
date_intervals: list
A list of date intervals for each sid that correspond to values in
`vals`.
dates: DatetimeIndex
The dates which will serve as the index for each Series for each
sid in the DataFrame.
"""
frame = pd.DataFrame({sid: get_values_for_date_ranges(
zip_date_index_with_vals,
vals[sid],
date_intervals[sid],
dates
) for sid in self.get_sids()[:-1]})
frame[self.get_sids()[-1]] = zip_date_index_with_vals(
dates, ['NaN'] * len(dates)
)
return frame
@staticmethod
def _compute_busday_offsets(announcement_dates):
"""
@@ -1026,3 +1067,122 @@ class WithResponses(object):
self.responses = self.enter_instance_context(
responses.RequestsMock(),
)
class WithNextAndPreviousEventDataLoader(WithPipelineEventDataLoader):
"""
ZiplineTestCase mixin extending common functionality for event data
loader tests that have both next and previous events.
`base_cases` should be used as the template to test cases that combine
knowledge date (timestamp) and some 'other_date' in various ways.
`next_date_intervals` gives the date intervals for the next event based
on the dates given in `base_cases`.
`next_dates` gives the next date from `other_date` which is known about at
each interval.
`prev_date_intervals` gives the date intervals for each sid for the
previous event based on the dates given in `base_cases`.
`prev_dates` gives the previous date from `other_date` which is known
about at each interval.
`get_expected_previous_event_dates` is a convenience function that fills
a DataFrame with the previously known dates for each sid for the given
dates.
`get_expected_next_event_dates` is a convenience function that fills
a DataFrame with the next known dates for each sid for the given
dates.
"""
base_cases = [
# K1--K2--A1--A2.
pd.DataFrame({
TS_FIELD_NAME: pd.to_datetime(['2014-01-05', '2014-01-10']),
'other_date': pd.to_datetime(['2014-01-15', '2014-01-20']),
}),
# K1--K2--A2--A1.
pd.DataFrame({
TS_FIELD_NAME: pd.to_datetime(['2014-01-05', '2014-01-10']),
'other_date': pd.to_datetime(['2014-01-20', '2014-01-15']),
}),
# K1--A1--K2--A2.
pd.DataFrame({
TS_FIELD_NAME: pd.to_datetime(['2014-01-05', '2014-01-15']),
'other_date': pd.to_datetime(['2014-01-10', '2014-01-20']),
}),
# K1 == K2.
pd.DataFrame({
TS_FIELD_NAME: pd.to_datetime(['2014-01-05'] * 2),
'other_date': pd.to_datetime(['2014-01-10', '2014-01-15']),
}),
pd.DataFrame(
columns=['other_date',
TS_FIELD_NAME],
dtype='datetime64[ns]'
),
]
next_date_intervals = [
[[None, '2014-01-04'],
['2014-01-05', '2014-01-15'],
['2014-01-16', '2014-01-20'],
['2014-01-21', None]],
[[None, '2014-01-04'],
['2014-01-05', '2014-01-09'],
['2014-01-10', '2014-01-15'],
['2014-01-16', '2014-01-20'],
['2014-01-21', None]],
[[None, '2014-01-04'],
['2014-01-05', '2014-01-10'],
['2014-01-11', '2014-01-14'],
['2014-01-15', '2014-01-20'],
['2014-01-21', None]],
[[None, '2014-01-04'],
['2014-01-05', '2014-01-10'],
['2014-01-11', '2014-01-15'],
['2014-01-16', None]]
]
next_dates = [
['NaT', '2014-01-15', '2014-01-20', 'NaT'],
['NaT', '2014-01-20', '2014-01-15', '2014-01-20', 'NaT'],
['NaT', '2014-01-10', 'NaT', '2014-01-20', 'NaT'],
['NaT', '2014-01-10', '2014-01-15', 'NaT'],
['NaT']
]
prev_date_intervals = [
[[None, '2014-01-14'],
['2014-01-15', '2014-01-19'],
['2014-01-20', None]],
[[None, '2014-01-14'],
['2014-01-15', '2014-01-19'],
['2014-01-20', None]],
[[None, '2014-01-09'],
['2014-01-10', '2014-01-19'],
['2014-01-20', None]],
[[None, '2014-01-09'],
['2014-01-10', '2014-01-14'],
['2014-01-15', None]]
]
prev_dates = [
['NaT', '2014-01-15', '2014-01-20'],
['NaT', '2014-01-15', '2014-01-20'],
['NaT', '2014-01-10', '2014-01-20'],
['NaT', '2014-01-10', '2014-01-15'],
['NaT']
]
def get_expected_previous_event_dates(self, dates):
return self.get_sids_to_frames(
zip_with_dates,
self.prev_dates,
self.prev_date_intervals,
dates
)
def get_expected_next_event_dates(self, dates):
return self.get_sids_to_frames(
zip_with_dates,
self.next_dates,
self.next_date_intervals,
dates
)