mirror of
https://github.com/wassname/catalyst.git
synced 2026-07-02 03:48:58 +08:00
ENH: add custom dataset for estimize
MAINT: alphabetize constants MAINT: remove obsolete column TST: refactor tests to use common code MAINT: remove unneeded fields from dataset MAINT: remove obsolete earnings estimates columns and refactor
This commit is contained in:
@@ -34,7 +34,6 @@ from zipline.pipeline.loaders.blaze import (
|
||||
BlazeShareBuybackAuthorizationsLoader,
|
||||
)
|
||||
from zipline.pipeline.loaders.utils import (
|
||||
get_values_for_date_ranges,
|
||||
zip_with_floats,
|
||||
zip_with_dates
|
||||
)
|
||||
@@ -42,8 +41,9 @@ from zipline.testing.fixtures import (
|
||||
WithPipelineEventDataLoader, ZiplineTestCase
|
||||
)
|
||||
|
||||
date_intervals = [[None, '2014-01-04'], ['2014-01-05', '2014-01-09'],
|
||||
['2014-01-10', None]]
|
||||
date_intervals = [
|
||||
[[None, '2014-01-04'], ['2014-01-05', '2014-01-09'], ['2014-01-10', None]]
|
||||
]
|
||||
|
||||
buyback_authorizations_cases = [
|
||||
pd.DataFrame({
|
||||
@@ -63,18 +63,6 @@ buyback_authorizations_cases = [
|
||||
]
|
||||
|
||||
|
||||
def get_expected_previous_values(zip_date_index_with_vals,
|
||||
dates,
|
||||
vals_for_date_intervals):
|
||||
return pd.DataFrame({
|
||||
0: get_values_for_date_ranges(zip_date_index_with_vals,
|
||||
vals_for_date_intervals,
|
||||
date_intervals,
|
||||
dates),
|
||||
1: zip_date_index_with_vals(dates, ['NaN'] * len(dates)),
|
||||
}, index=dates)
|
||||
|
||||
|
||||
class CashBuybackAuthLoaderTestCase(WithPipelineEventDataLoader,
|
||||
ZiplineTestCase):
|
||||
"""
|
||||
@@ -103,16 +91,17 @@ class CashBuybackAuthLoaderTestCase(WithPipelineEventDataLoader,
|
||||
loader_type = CashBuybackAuthorizationsLoader
|
||||
|
||||
def setup(self, dates):
|
||||
cols = {}
|
||||
_expected_previous_cash = get_expected_previous_values(
|
||||
zip_with_floats, dates,
|
||||
['NaN', 10, 20]
|
||||
)
|
||||
cols[
|
||||
PREVIOUS_BUYBACK_ANNOUNCEMENT
|
||||
] = get_expected_previous_values(zip_with_dates, dates,
|
||||
['NaT', '2014-01-04', '2014-01-09'])
|
||||
cols[PREVIOUS_BUYBACK_CASH] = _expected_previous_cash
|
||||
cols = {
|
||||
PREVIOUS_BUYBACK_CASH: self.get_sids_to_frames(zip_with_floats,
|
||||
[['NaN', 10, 20]],
|
||||
date_intervals,
|
||||
dates),
|
||||
PREVIOUS_BUYBACK_ANNOUNCEMENT: self.get_sids_to_frames(
|
||||
zip_with_dates,
|
||||
[['NaT', '2014-01-04', '2014-01-09']],
|
||||
date_intervals,
|
||||
dates),
|
||||
}
|
||||
cols[DAYS_SINCE_PREV] = self._compute_busday_offsets(
|
||||
cols[PREVIOUS_BUYBACK_ANNOUNCEMENT]
|
||||
)
|
||||
@@ -147,16 +136,18 @@ class ShareBuybackAuthLoaderTestCase(WithPipelineEventDataLoader,
|
||||
loader_type = ShareBuybackAuthorizationsLoader
|
||||
|
||||
def setup(self, dates):
|
||||
cols = {}
|
||||
cols[
|
||||
PREVIOUS_BUYBACK_SHARE_COUNT
|
||||
] = get_expected_previous_values(zip_with_floats,
|
||||
dates,
|
||||
['NaN', 1, 15])
|
||||
cols[
|
||||
PREVIOUS_BUYBACK_ANNOUNCEMENT
|
||||
] = get_expected_previous_values(zip_with_dates, dates,
|
||||
['NaT', '2014-01-04', '2014-01-09'])
|
||||
cols = {PREVIOUS_BUYBACK_SHARE_COUNT:
|
||||
self.get_sids_to_frames(zip_with_floats,
|
||||
[['NaN', 1, 15]],
|
||||
date_intervals,
|
||||
dates,),
|
||||
PREVIOUS_BUYBACK_ANNOUNCEMENT:
|
||||
self.get_sids_to_frames(zip_with_dates,
|
||||
[['NaT', '2014-01-04', '2014-01-09']],
|
||||
date_intervals,
|
||||
dates,),
|
||||
}
|
||||
|
||||
cols[DAYS_SINCE_PREV] = self._compute_busday_offsets(
|
||||
cols[PREVIOUS_BUYBACK_ANNOUNCEMENT]
|
||||
)
|
||||
|
||||
@@ -0,0 +1,327 @@
|
||||
"""
|
||||
Tests for the reference loader for ConsensusEstimates.
|
||||
"""
|
||||
import blaze as bz
|
||||
from blaze.compute.core import swap_resources_into_scope
|
||||
import pandas as pd
|
||||
from six import iteritems
|
||||
|
||||
from zipline.pipeline.common import (
|
||||
COUNT_FIELD_NAME,
|
||||
FISCAL_QUARTER_FIELD_NAME,
|
||||
FISCAL_YEAR_FIELD_NAME,
|
||||
HIGH_FIELD_NAME,
|
||||
LOW_FIELD_NAME,
|
||||
MEAN_FIELD_NAME,
|
||||
NEXT_COUNT,
|
||||
NEXT_FISCAL_QUARTER,
|
||||
NEXT_FISCAL_YEAR,
|
||||
NEXT_HIGH,
|
||||
NEXT_LOW,
|
||||
NEXT_RELEASE_DATE,
|
||||
NEXT_STANDARD_DEVIATION,
|
||||
PREVIOUS_COUNT,
|
||||
PREVIOUS_FISCAL_QUARTER,
|
||||
PREVIOUS_FISCAL_YEAR,
|
||||
PREVIOUS_HIGH,
|
||||
PREVIOUS_LOW,
|
||||
PREVIOUS_MEAN, NEXT_MEAN,
|
||||
PREVIOUS_RELEASE_DATE,
|
||||
PREVIOUS_STANDARD_DEVIATION,
|
||||
RELEASE_DATE_FIELD_NAME,
|
||||
STANDARD_DEVIATION_FIELD_NAME,
|
||||
SID_FIELD_NAME)
|
||||
from zipline.pipeline.data import ConsensusEstimates
|
||||
from zipline.pipeline.loaders.consensus_estimates import (
|
||||
ConsensusEstimatesLoader
|
||||
)
|
||||
from zipline.pipeline.loaders.blaze import BlazeConsensusEstimatesLoader
|
||||
from zipline.pipeline.loaders.utils import (
|
||||
zip_with_floats
|
||||
)
|
||||
from zipline.testing.fixtures import (
|
||||
ZiplineTestCase,
|
||||
WithNextAndPreviousEventDataLoader
|
||||
)
|
||||
|
||||
consensus_estimates_cases = [
|
||||
# K1--K2--A1--A2.
|
||||
pd.DataFrame({
|
||||
STANDARD_DEVIATION_FIELD_NAME: (.5, .6),
|
||||
COUNT_FIELD_NAME: (1, 2),
|
||||
FISCAL_QUARTER_FIELD_NAME: (1, 1),
|
||||
HIGH_FIELD_NAME: (.6, .7),
|
||||
MEAN_FIELD_NAME: (.1, .2),
|
||||
FISCAL_YEAR_FIELD_NAME: (2014, 2014),
|
||||
LOW_FIELD_NAME: (.05, .06),
|
||||
}),
|
||||
# K1--K2--A2--A1.
|
||||
pd.DataFrame({
|
||||
STANDARD_DEVIATION_FIELD_NAME: (.6, .7),
|
||||
COUNT_FIELD_NAME: (2, 3),
|
||||
FISCAL_QUARTER_FIELD_NAME: (1, 1),
|
||||
HIGH_FIELD_NAME: (.7, .8),
|
||||
MEAN_FIELD_NAME: (.2, .3),
|
||||
FISCAL_YEAR_FIELD_NAME: (2014, 2014),
|
||||
LOW_FIELD_NAME: (.06, .07),
|
||||
}),
|
||||
# K1--A1--K2--A2.
|
||||
pd.DataFrame({
|
||||
STANDARD_DEVIATION_FIELD_NAME: (.7, .8),
|
||||
COUNT_FIELD_NAME: (3, 4),
|
||||
FISCAL_QUARTER_FIELD_NAME: (1, 1),
|
||||
HIGH_FIELD_NAME: (.8, .9),
|
||||
MEAN_FIELD_NAME: (.3, .4),
|
||||
FISCAL_YEAR_FIELD_NAME: (2014, 2014),
|
||||
LOW_FIELD_NAME: (.07, .08),
|
||||
}),
|
||||
# K1 == K2.
|
||||
pd.DataFrame({
|
||||
STANDARD_DEVIATION_FIELD_NAME: (.8, .9),
|
||||
COUNT_FIELD_NAME: (4, 5),
|
||||
FISCAL_QUARTER_FIELD_NAME: (1, 1),
|
||||
HIGH_FIELD_NAME: (.9, 1.0),
|
||||
MEAN_FIELD_NAME: (.4, .5),
|
||||
FISCAL_YEAR_FIELD_NAME: (2014, 2014),
|
||||
LOW_FIELD_NAME: (.08, .09),
|
||||
}),
|
||||
pd.DataFrame(
|
||||
columns=[STANDARD_DEVIATION_FIELD_NAME,
|
||||
COUNT_FIELD_NAME,
|
||||
FISCAL_QUARTER_FIELD_NAME,
|
||||
HIGH_FIELD_NAME,
|
||||
MEAN_FIELD_NAME,
|
||||
FISCAL_YEAR_FIELD_NAME,
|
||||
LOW_FIELD_NAME],
|
||||
dtype='datetime64[ns]'
|
||||
),
|
||||
]
|
||||
|
||||
next_standard_deviation = [
|
||||
['NaN', .5, .6, 'NaN'],
|
||||
['NaN', .6, .7, .6, 'NaN'],
|
||||
['NaN', .7, 'NaN', .8, 'NaN'],
|
||||
['NaN', .8, .9, 'NaN'],
|
||||
['NaN']
|
||||
]
|
||||
|
||||
prev_standard_deviation = [
|
||||
['NaN', .5, .6],
|
||||
['NaN', .7, .6],
|
||||
['NaN', .7, .8],
|
||||
['NaN', .8, .9],
|
||||
['NaN']
|
||||
]
|
||||
|
||||
next_count = [
|
||||
['NaN', 1, 2, 'NaN'],
|
||||
['NaN', 2, 3, 2, 'NaN'],
|
||||
['NaN', 3, 'NaN', 4, 'NaN'],
|
||||
['NaN', 4, 5, 'NaN'],
|
||||
['NaN']
|
||||
]
|
||||
|
||||
prev_count = [
|
||||
['NaN', 1, 2],
|
||||
['NaN', 3, 2],
|
||||
['NaN', 3, 4],
|
||||
['NaN', 4, 5],
|
||||
['NaN']
|
||||
]
|
||||
|
||||
next_fiscal_quarter = [
|
||||
['NaN', 1, 1, 'NaN'],
|
||||
['NaN', 1, 1, 1, 'NaN'],
|
||||
['NaN', 1, 'NaN', 1, 'NaN'],
|
||||
['NaN', 1, 1, 'NaN'],
|
||||
['NaN']
|
||||
]
|
||||
|
||||
prev_fiscal_quarter = [
|
||||
['NaN', 1, 1],
|
||||
['NaN', 1, 1],
|
||||
['NaN', 1, 1],
|
||||
['NaN', 1, 1],
|
||||
['NaN']
|
||||
]
|
||||
|
||||
next_high = [
|
||||
['NaN', .6, .7, 'NaN'],
|
||||
['NaN', .7, .8, .7, 'NaN'],
|
||||
['NaN', .8, 'NaN', .9, 'NaN'],
|
||||
['NaN', .9, 1.0, 'NaN'],
|
||||
['NaN']
|
||||
]
|
||||
|
||||
prev_high = [
|
||||
['NaN', .6, .7],
|
||||
['NaN', .8, .7],
|
||||
['NaN', .8, .9],
|
||||
['NaN', .9, 1.0],
|
||||
['NaN']
|
||||
]
|
||||
|
||||
next_mean = [
|
||||
['NaN', .1, .2, 'NaN'],
|
||||
['NaN', .2, .3, .2, 'NaN'],
|
||||
['NaN', .3, 'NaN', .4, 'NaN'],
|
||||
['NaN', .4, .5, 'NaN'],
|
||||
['NaN']
|
||||
]
|
||||
|
||||
prev_mean = [
|
||||
['NaN', .1, .2],
|
||||
['NaN', .3, .2],
|
||||
['NaN', .3, .4],
|
||||
['NaN', .4, .5],
|
||||
['NaN']
|
||||
]
|
||||
|
||||
next_fiscal_year = [
|
||||
['NaN', 2014, 2014, 'NaN'],
|
||||
['NaN', 2014, 2014, 2014, 'NaN'],
|
||||
['NaN', 2014, 'NaN', 2014, 'NaN'],
|
||||
['NaN', 2014, 2014, 'NaN'],
|
||||
['NaN']
|
||||
]
|
||||
|
||||
prev_fiscal_year = [
|
||||
['NaN', 2014, 2014],
|
||||
['NaN', 2014, 2014],
|
||||
['NaN', 2014, 2014],
|
||||
['NaN', 2014, 2014],
|
||||
['NaN']
|
||||
]
|
||||
|
||||
next_low = [
|
||||
['NaN', .05, .06, 'NaN'],
|
||||
['NaN', .06, .07, .06, 'NaN'],
|
||||
['NaN', .07, 'NaN', .08, 'NaN'],
|
||||
['NaN', .08, .09, 'NaN'],
|
||||
['NaN']
|
||||
]
|
||||
|
||||
prev_low = [
|
||||
['NaN', .05, .06],
|
||||
['NaN', .07, .06],
|
||||
['NaN', .07, .08],
|
||||
['NaN', .08, .09],
|
||||
['NaN']
|
||||
]
|
||||
|
||||
field_name_to_expected_col = {
|
||||
PREVIOUS_STANDARD_DEVIATION: prev_standard_deviation,
|
||||
NEXT_STANDARD_DEVIATION: next_standard_deviation,
|
||||
PREVIOUS_COUNT: prev_count,
|
||||
NEXT_COUNT: next_count,
|
||||
PREVIOUS_FISCAL_QUARTER: prev_fiscal_quarter,
|
||||
NEXT_FISCAL_QUARTER: next_fiscal_quarter,
|
||||
PREVIOUS_HIGH: prev_high,
|
||||
NEXT_HIGH: next_high,
|
||||
PREVIOUS_MEAN: prev_mean,
|
||||
NEXT_MEAN: next_mean,
|
||||
PREVIOUS_FISCAL_YEAR: prev_fiscal_year,
|
||||
NEXT_FISCAL_YEAR: next_fiscal_year,
|
||||
PREVIOUS_LOW: prev_low,
|
||||
NEXT_LOW: next_low
|
||||
}
|
||||
|
||||
|
||||
class ConsensusEstimatesLoaderTestCase(WithNextAndPreviousEventDataLoader,
|
||||
ZiplineTestCase):
|
||||
"""
|
||||
Tests for loading the consensus estimates data.
|
||||
"""
|
||||
pipeline_columns = {
|
||||
NEXT_RELEASE_DATE:
|
||||
ConsensusEstimates.next_release_date.latest,
|
||||
PREVIOUS_RELEASE_DATE:
|
||||
ConsensusEstimates.previous_release_date.latest,
|
||||
PREVIOUS_STANDARD_DEVIATION:
|
||||
ConsensusEstimates.previous_standard_deviation.latest,
|
||||
NEXT_STANDARD_DEVIATION:
|
||||
ConsensusEstimates.next_standard_deviation.latest,
|
||||
PREVIOUS_COUNT:
|
||||
ConsensusEstimates.previous_count.latest,
|
||||
NEXT_COUNT:
|
||||
ConsensusEstimates.next_count.latest,
|
||||
PREVIOUS_FISCAL_QUARTER:
|
||||
ConsensusEstimates.previous_fiscal_quarter.latest,
|
||||
NEXT_FISCAL_QUARTER:
|
||||
ConsensusEstimates.next_fiscal_quarter.latest,
|
||||
PREVIOUS_HIGH:
|
||||
ConsensusEstimates.previous_high.latest,
|
||||
NEXT_HIGH:
|
||||
ConsensusEstimates.next_high.latest,
|
||||
PREVIOUS_MEAN:
|
||||
ConsensusEstimates.previous_mean.latest,
|
||||
NEXT_MEAN:
|
||||
ConsensusEstimates.next_mean.latest,
|
||||
PREVIOUS_FISCAL_YEAR:
|
||||
ConsensusEstimates.previous_fiscal_year.latest,
|
||||
NEXT_FISCAL_YEAR:
|
||||
ConsensusEstimates.next_fiscal_year.latest,
|
||||
PREVIOUS_LOW:
|
||||
ConsensusEstimates.previous_low.latest,
|
||||
NEXT_LOW:
|
||||
ConsensusEstimates.next_low.latest
|
||||
}
|
||||
|
||||
@classmethod
|
||||
def get_dataset(cls):
|
||||
return {sid:
|
||||
pd.concat([
|
||||
cls.base_cases[sid].rename(columns={
|
||||
'other_date': RELEASE_DATE_FIELD_NAME
|
||||
}),
|
||||
df
|
||||
], axis=1)
|
||||
for sid, df in enumerate(consensus_estimates_cases)}
|
||||
|
||||
loader_type = ConsensusEstimatesLoader
|
||||
|
||||
def setup(self, dates):
|
||||
cols = {
|
||||
PREVIOUS_RELEASE_DATE:
|
||||
self.get_expected_previous_event_dates(dates),
|
||||
NEXT_RELEASE_DATE: self.get_expected_next_event_dates(dates)
|
||||
}
|
||||
for field_name in field_name_to_expected_col:
|
||||
cols[field_name] = self.get_sids_to_frames(
|
||||
zip_with_floats, field_name_to_expected_col[field_name],
|
||||
self.prev_date_intervals
|
||||
if field_name.startswith("previous")
|
||||
else self.next_date_intervals,
|
||||
dates
|
||||
)
|
||||
return cols
|
||||
|
||||
|
||||
class BlazeConsensusEstimatesLoaderTestCase(ConsensusEstimatesLoaderTestCase):
|
||||
loader_type = BlazeConsensusEstimatesLoader
|
||||
|
||||
def pipeline_event_loader_args(self, dates):
|
||||
_, mapping = super(
|
||||
BlazeConsensusEstimatesLoaderTestCase,
|
||||
self,
|
||||
).pipeline_event_loader_args(dates)
|
||||
frames = []
|
||||
for sid, df in iteritems(mapping):
|
||||
frame = df.copy()
|
||||
frame[SID_FIELD_NAME] = sid
|
||||
frames.append(frame)
|
||||
return bz.data(pd.concat(frames).reset_index(drop=True)),
|
||||
|
||||
|
||||
class BlazeConsensusEstimatesLoaderNotInteractiveTestCase(
|
||||
BlazeConsensusEstimatesLoaderTestCase
|
||||
):
|
||||
"""Test case for passing a non-interactive symbol and a dict of resources.
|
||||
"""
|
||||
|
||||
def pipeline_event_loader_args(self, dates):
|
||||
(bound_expr,) = super(
|
||||
BlazeConsensusEstimatesLoaderNotInteractiveTestCase,
|
||||
self,
|
||||
).pipeline_event_loader_args(dates)
|
||||
return swap_resources_into_scope(bound_expr, {})
|
||||
@@ -45,7 +45,6 @@ from zipline.pipeline.loaders.dividends import (
|
||||
DividendsByPayDateLoader
|
||||
)
|
||||
from zipline.pipeline.loaders.utils import (
|
||||
get_values_for_date_ranges,
|
||||
zip_with_dates,
|
||||
zip_with_floats
|
||||
)
|
||||
@@ -159,33 +158,6 @@ next_amounts = [['NaN', 1, 15, 'NaN'],
|
||||
['NaN', 6, 23, 'NaN']]
|
||||
|
||||
|
||||
def get_vals_for_dates(zip_date_index_with_vals,
|
||||
vals,
|
||||
date_invervals,
|
||||
dates):
|
||||
return pd.DataFrame({
|
||||
0: get_values_for_date_ranges(zip_date_index_with_vals,
|
||||
vals[0],
|
||||
date_invervals[0],
|
||||
dates),
|
||||
1: get_values_for_date_ranges(zip_date_index_with_vals,
|
||||
vals[1],
|
||||
date_invervals[1],
|
||||
dates),
|
||||
2: get_values_for_date_ranges(zip_date_index_with_vals,
|
||||
vals[2],
|
||||
date_invervals[2],
|
||||
dates),
|
||||
# Assume the latest of 2 cash values is used if we find out about 2
|
||||
# announcements that happened on the same day for the same sid.
|
||||
3: get_values_for_date_ranges(zip_date_index_with_vals,
|
||||
vals[3],
|
||||
date_invervals[3],
|
||||
dates),
|
||||
4: zip_date_index_with_vals(dates, ['NaN'] * len(dates)),
|
||||
}, index=dates)
|
||||
|
||||
|
||||
class DividendsByAnnouncementDateTestCase(WithPipelineEventDataLoader,
|
||||
ZiplineTestCase):
|
||||
"""
|
||||
@@ -232,14 +204,14 @@ class DividendsByAnnouncementDateTestCase(WithPipelineEventDataLoader,
|
||||
['NaT', '2014-01-04', '2014-01-14'],
|
||||
['NaT', '2014-01-04']]
|
||||
amounts = [['NaN', 1, 15], ['NaN', 7, 13], ['NaN', 3, 1], ['NaN', 23]]
|
||||
cols = {}
|
||||
cols[PREVIOUS_ANNOUNCEMENT] = get_vals_for_dates(
|
||||
zip_with_dates, announcement_dates, date_intervals, dates
|
||||
)
|
||||
|
||||
cols[PREVIOUS_AMOUNT] = get_vals_for_dates(
|
||||
zip_with_floats, amounts, date_intervals, dates
|
||||
)
|
||||
cols = {
|
||||
PREVIOUS_ANNOUNCEMENT: self.get_sids_to_frames(
|
||||
zip_with_dates, announcement_dates, date_intervals, dates
|
||||
),
|
||||
PREVIOUS_AMOUNT: self.get_sids_to_frames(
|
||||
zip_with_floats, amounts, date_intervals, dates
|
||||
),
|
||||
}
|
||||
|
||||
cols[
|
||||
DAYS_SINCE_PREV_DIVIDEND_ANNOUNCEMENT
|
||||
@@ -305,27 +277,26 @@ class DividendsByExDateTestCase(WithPipelineEventDataLoader, ZiplineTestCase):
|
||||
loader_type = DividendsByExDateLoader
|
||||
|
||||
def setup(self, dates):
|
||||
cols = {}
|
||||
cols[NEXT_EX_DATE] = get_vals_for_dates(
|
||||
zip_with_dates, next_ex_and_pay_dates, next_date_intervals, dates,
|
||||
)
|
||||
|
||||
cols[PREVIOUS_EX_DATE] = get_vals_for_dates(
|
||||
zip_with_dates, prev_ex_and_pay_dates, prev_date_intervals, dates
|
||||
)
|
||||
|
||||
cols[NEXT_AMOUNT] = get_vals_for_dates(
|
||||
zip_with_floats, next_amounts, next_date_intervals, dates
|
||||
)
|
||||
|
||||
cols[PREVIOUS_AMOUNT] = get_vals_for_dates(
|
||||
zip_with_floats, prev_amounts, prev_date_intervals, dates
|
||||
)
|
||||
cols = {
|
||||
NEXT_EX_DATE: self.get_sids_to_frames(
|
||||
zip_with_dates, next_ex_and_pay_dates, next_date_intervals,
|
||||
dates,
|
||||
),
|
||||
PREVIOUS_EX_DATE: self.get_sids_to_frames(
|
||||
zip_with_dates, prev_ex_and_pay_dates, prev_date_intervals,
|
||||
dates
|
||||
),
|
||||
NEXT_AMOUNT: self.get_sids_to_frames(
|
||||
zip_with_floats, next_amounts, next_date_intervals, dates
|
||||
),
|
||||
PREVIOUS_AMOUNT: self.get_sids_to_frames(
|
||||
zip_with_floats, prev_amounts, prev_date_intervals, dates
|
||||
)
|
||||
}
|
||||
|
||||
cols[DAYS_TO_NEXT_EX_DATE] = self._compute_busday_offsets(
|
||||
cols[NEXT_EX_DATE]
|
||||
)
|
||||
|
||||
cols[DAYS_SINCE_PREV_EX_DATE] = self._compute_busday_offsets(
|
||||
cols[PREVIOUS_EX_DATE]
|
||||
)
|
||||
@@ -386,22 +357,22 @@ class DividendsByPayDateTestCase(WithPipelineEventDataLoader, ZiplineTestCase):
|
||||
loader_type = DividendsByPayDateLoader
|
||||
|
||||
def setup(self, dates):
|
||||
cols = {}
|
||||
cols[NEXT_PAY_DATE] = get_vals_for_dates(
|
||||
zip_with_dates, next_ex_and_pay_dates, next_date_intervals, dates
|
||||
)
|
||||
cols[PREVIOUS_PAY_DATE] = get_vals_for_dates(
|
||||
zip_with_dates, prev_ex_and_pay_dates, prev_date_intervals, dates
|
||||
)
|
||||
|
||||
cols[NEXT_AMOUNT] = get_vals_for_dates(
|
||||
zip_with_floats, next_amounts, next_date_intervals, dates
|
||||
)
|
||||
|
||||
cols[PREVIOUS_AMOUNT] = get_vals_for_dates(
|
||||
zip_with_floats, prev_amounts, prev_date_intervals, dates
|
||||
)
|
||||
return cols
|
||||
return {
|
||||
NEXT_PAY_DATE: self.get_sids_to_frames(
|
||||
zip_with_dates, next_ex_and_pay_dates, next_date_intervals,
|
||||
dates
|
||||
),
|
||||
PREVIOUS_PAY_DATE: self.get_sids_to_frames(
|
||||
zip_with_dates, prev_ex_and_pay_dates, prev_date_intervals,
|
||||
dates
|
||||
),
|
||||
NEXT_AMOUNT: self.get_sids_to_frames(
|
||||
zip_with_floats, next_amounts, next_date_intervals, dates
|
||||
),
|
||||
PREVIOUS_AMOUNT: self.get_sids_to_frames(
|
||||
zip_with_floats, prev_amounts, prev_date_intervals, dates
|
||||
)
|
||||
}
|
||||
|
||||
|
||||
class BlazeDividendsByPayDateLoaderTestCase(DividendsByPayDateTestCase):
|
||||
|
||||
+16
-145
@@ -22,97 +22,13 @@ from zipline.pipeline.factors.events import (
|
||||
)
|
||||
from zipline.pipeline.loaders.earnings import EarningsCalendarLoader
|
||||
from zipline.pipeline.loaders.blaze import BlazeEarningsCalendarLoader
|
||||
from zipline.pipeline.loaders.utils import (
|
||||
get_values_for_date_ranges,
|
||||
zip_with_dates
|
||||
)
|
||||
from zipline.testing.fixtures import (
|
||||
WithPipelineEventDataLoader,
|
||||
ZiplineTestCase
|
||||
ZiplineTestCase,
|
||||
WithNextAndPreviousEventDataLoader
|
||||
)
|
||||
|
||||
earnings_cases = [
|
||||
# K1--K2--A1--A2.
|
||||
pd.DataFrame({
|
||||
TS_FIELD_NAME: pd.to_datetime(['2014-01-05', '2014-01-10']),
|
||||
ANNOUNCEMENT_FIELD_NAME: pd.to_datetime(['2014-01-15', '2014-01-20'])
|
||||
}),
|
||||
# K1--K2--A2--A1.
|
||||
pd.DataFrame({
|
||||
TS_FIELD_NAME: pd.to_datetime(['2014-01-05', '2014-01-10']),
|
||||
ANNOUNCEMENT_FIELD_NAME: pd.to_datetime(['2014-01-20', '2014-01-15'])
|
||||
}),
|
||||
# K1--A1--K2--A2.
|
||||
pd.DataFrame({
|
||||
TS_FIELD_NAME: pd.to_datetime(['2014-01-05', '2014-01-15']),
|
||||
ANNOUNCEMENT_FIELD_NAME: pd.to_datetime(['2014-01-10', '2014-01-20'])
|
||||
}),
|
||||
# K1 == K2.
|
||||
pd.DataFrame({
|
||||
TS_FIELD_NAME: pd.to_datetime(['2014-01-05'] * 2),
|
||||
ANNOUNCEMENT_FIELD_NAME: pd.to_datetime(['2014-01-10', '2014-01-15'])
|
||||
}),
|
||||
pd.DataFrame(
|
||||
columns=[ANNOUNCEMENT_FIELD_NAME,
|
||||
TS_FIELD_NAME],
|
||||
dtype='datetime64[ns]'
|
||||
),
|
||||
]
|
||||
|
||||
next_date_intervals = [
|
||||
[[None, '2014-01-04'],
|
||||
['2014-01-05', '2014-01-15'],
|
||||
['2014-01-16', '2014-01-20'],
|
||||
['2014-01-21', None]],
|
||||
[[None, '2014-01-04'],
|
||||
['2014-01-05', '2014-01-09'],
|
||||
['2014-01-10', '2014-01-15'],
|
||||
['2014-01-16', '2014-01-20'],
|
||||
['2014-01-21', None]],
|
||||
[[None, '2014-01-04'],
|
||||
['2014-01-05', '2014-01-10'],
|
||||
['2014-01-11', '2014-01-14'],
|
||||
['2014-01-15', '2014-01-20'],
|
||||
['2014-01-21', None]],
|
||||
[[None, '2014-01-04'],
|
||||
['2014-01-05', '2014-01-10'],
|
||||
['2014-01-11', '2014-01-15'],
|
||||
['2014-01-16', None]]
|
||||
]
|
||||
|
||||
next_dates = [
|
||||
['NaT', '2014-01-15', '2014-01-20', 'NaT'],
|
||||
['NaT', '2014-01-20', '2014-01-15', '2014-01-20', 'NaT'],
|
||||
['NaT', '2014-01-10', 'NaT', '2014-01-20', 'NaT'],
|
||||
['NaT', '2014-01-10', '2014-01-15', 'NaT'],
|
||||
['NaT']
|
||||
]
|
||||
|
||||
prev_date_intervals = [
|
||||
[[None, '2014-01-14'],
|
||||
['2014-01-15', '2014-01-19'],
|
||||
['2014-01-20', None]],
|
||||
[[None, '2014-01-14'],
|
||||
['2014-01-15', '2014-01-19'],
|
||||
['2014-01-20', None]],
|
||||
[[None, '2014-01-09'],
|
||||
['2014-01-10', '2014-01-19'],
|
||||
['2014-01-20', None]],
|
||||
[[None, '2014-01-09'],
|
||||
['2014-01-10', '2014-01-14'],
|
||||
['2014-01-15', None]]
|
||||
]
|
||||
|
||||
prev_dates = [
|
||||
['NaT', '2014-01-15', '2014-01-20'],
|
||||
['NaT', '2014-01-15', '2014-01-20'],
|
||||
['NaT', '2014-01-10', '2014-01-20'],
|
||||
['NaT', '2014-01-10', '2014-01-15'],
|
||||
['NaT']
|
||||
]
|
||||
|
||||
|
||||
class EarningsCalendarLoaderTestCase(WithPipelineEventDataLoader,
|
||||
class EarningsCalendarLoaderTestCase(WithNextAndPreviousEventDataLoader,
|
||||
ZiplineTestCase):
|
||||
"""
|
||||
Tests for loading the earnings announcement data.
|
||||
@@ -126,70 +42,25 @@ class EarningsCalendarLoaderTestCase(WithPipelineEventDataLoader,
|
||||
|
||||
@classmethod
|
||||
def get_dataset(cls):
|
||||
return {sid: df for sid, df in enumerate(earnings_cases)}
|
||||
return {sid: df.rename(
|
||||
columns={'other_date': ANNOUNCEMENT_FIELD_NAME}
|
||||
) for sid, df in enumerate(cls.base_cases)}
|
||||
|
||||
loader_type = EarningsCalendarLoader
|
||||
|
||||
def get_expected_next_event_dates(self, dates):
|
||||
return pd.DataFrame({
|
||||
0: get_values_for_date_ranges(zip_with_dates,
|
||||
next_dates[0],
|
||||
next_date_intervals[0],
|
||||
dates),
|
||||
1: get_values_for_date_ranges(zip_with_dates,
|
||||
next_dates[1],
|
||||
next_date_intervals[1],
|
||||
dates),
|
||||
2: get_values_for_date_ranges(zip_with_dates,
|
||||
next_dates[2],
|
||||
next_date_intervals[2],
|
||||
dates),
|
||||
3: get_values_for_date_ranges(zip_with_dates,
|
||||
next_dates[3],
|
||||
next_date_intervals[3],
|
||||
dates),
|
||||
4: zip_with_dates(dates, ['NaT'] * len(dates)),
|
||||
}, index=dates)
|
||||
|
||||
def get_expected_previous_event_dates(self, dates):
|
||||
return pd.DataFrame({
|
||||
0: get_values_for_date_ranges(zip_with_dates,
|
||||
prev_dates[0],
|
||||
prev_date_intervals[0],
|
||||
dates),
|
||||
1: get_values_for_date_ranges(zip_with_dates,
|
||||
prev_dates[1],
|
||||
prev_date_intervals[1],
|
||||
dates),
|
||||
2: get_values_for_date_ranges(zip_with_dates,
|
||||
prev_dates[2],
|
||||
prev_date_intervals[2],
|
||||
dates),
|
||||
3: get_values_for_date_ranges(zip_with_dates,
|
||||
prev_dates[3],
|
||||
prev_date_intervals[3],
|
||||
dates),
|
||||
4: zip_with_dates(dates, ['NaT'] * len(dates)),
|
||||
}, index=dates)
|
||||
|
||||
def setup(self, dates):
|
||||
_expected_next_announce = self.get_expected_next_event_dates(dates)
|
||||
|
||||
_expected_previous_announce = self.get_expected_previous_event_dates(
|
||||
dates
|
||||
cols = {
|
||||
PREVIOUS_ANNOUNCEMENT: self.get_expected_previous_event_dates(
|
||||
dates
|
||||
),
|
||||
NEXT_ANNOUNCEMENT: self.get_expected_next_event_dates(dates),
|
||||
}
|
||||
cols[DAYS_TO_NEXT] = self._compute_busday_offsets(
|
||||
cols[NEXT_ANNOUNCEMENT]
|
||||
)
|
||||
|
||||
_expected_next_busday_offsets = self._compute_busday_offsets(
|
||||
_expected_next_announce
|
||||
cols[DAYS_SINCE_PREV] = self._compute_busday_offsets(
|
||||
cols[PREVIOUS_ANNOUNCEMENT]
|
||||
)
|
||||
_expected_previous_busday_offsets = self._compute_busday_offsets(
|
||||
_expected_previous_announce
|
||||
)
|
||||
cols = {}
|
||||
cols[PREVIOUS_ANNOUNCEMENT] = _expected_previous_announce
|
||||
cols[NEXT_ANNOUNCEMENT] = _expected_next_announce
|
||||
cols[DAYS_TO_NEXT] = _expected_next_busday_offsets
|
||||
cols[DAYS_SINCE_PREV] = _expected_previous_busday_offsets
|
||||
return cols
|
||||
|
||||
|
||||
|
||||
@@ -5,6 +5,7 @@ AD_FIELD_NAME = 'asof_date'
|
||||
ANNOUNCEMENT_FIELD_NAME = 'announcement_date'
|
||||
CASH_FIELD_NAME = 'cash'
|
||||
CASH_AMOUNT_FIELD_NAME = 'cash_amount'
|
||||
COUNT_FIELD_NAME = 'count'
|
||||
BUYBACK_ANNOUNCEMENT_FIELD_NAME = 'buyback_date'
|
||||
DAYS_SINCE_PREV = 'days_since_prev'
|
||||
DAYS_SINCE_PREV_DIVIDEND_ANNOUNCEMENT = 'days_since_prev_dividend_announcement'
|
||||
@@ -12,18 +13,41 @@ DAYS_SINCE_PREV_EX_DATE = 'days_since_prev_ex_date'
|
||||
DAYS_TO_NEXT = 'days_to_next'
|
||||
DAYS_TO_NEXT_EX_DATE = 'days_to_next_ex_date'
|
||||
EX_DATE_FIELD_NAME = 'ex_date'
|
||||
FISCAL_QUARTER_FIELD_NAME = 'fiscal_quarter'
|
||||
FISCAL_YEAR_FIELD_NAME = 'fiscal_year'
|
||||
HIGH_FIELD_NAME = 'high'
|
||||
LOW_FIELD_NAME = 'low'
|
||||
MEAN_FIELD_NAME = 'mean'
|
||||
NEXT_AMOUNT = 'next_amount'
|
||||
NEXT_ANNOUNCEMENT = 'next_announcement'
|
||||
NEXT_COUNT = 'next_count'
|
||||
NEXT_FISCAL_QUARTER = 'next_fiscal_quarter'
|
||||
NEXT_FISCAL_YEAR = 'next_fiscal_year'
|
||||
NEXT_EX_DATE = 'next_ex_date'
|
||||
NEXT_HIGH = 'next_high'
|
||||
NEXT_LOW = 'next_low'
|
||||
NEXT_MEAN = 'next_mean'
|
||||
NEXT_PAY_DATE = 'next_pay_date'
|
||||
NEXT_RELEASE_DATE = 'next_release_date'
|
||||
NEXT_STANDARD_DEVIATION = 'next_standard_deviation'
|
||||
PAY_DATE_FIELD_NAME = 'pay_date'
|
||||
PREVIOUS_AMOUNT = 'previous_amount'
|
||||
PREVIOUS_ANNOUNCEMENT = 'previous_announcement'
|
||||
PREVIOUS_BUYBACK_ANNOUNCEMENT = 'previous_buyback_announcement'
|
||||
PREVIOUS_BUYBACK_CASH = 'previous_buyback_cash'
|
||||
PREVIOUS_BUYBACK_SHARE_COUNT = 'previous_buyback_share_count'
|
||||
PREVIOUS_COUNT = 'previous_count'
|
||||
PREVIOUS_EX_DATE = 'previous_ex_date'
|
||||
PREVIOUS_FISCAL_QUARTER = 'previous_fiscal_quarter'
|
||||
PREVIOUS_FISCAL_YEAR = 'previous_fiscal_year'
|
||||
PREVIOUS_HIGH = 'previous_high'
|
||||
PREVIOUS_LOW = 'previous_low'
|
||||
PREVIOUS_MEAN = 'previous_mean'
|
||||
PREVIOUS_PAY_DATE = 'previous_pay_date'
|
||||
PREVIOUS_RELEASE_DATE = 'previous_release_date'
|
||||
PREVIOUS_STANDARD_DEVIATION = 'previous_standard_deviation'
|
||||
RELEASE_DATE_FIELD_NAME = 'release_date'
|
||||
SHARE_COUNT_FIELD_NAME = 'share_count'
|
||||
SID_FIELD_NAME = 'sid'
|
||||
STANDARD_DEVIATION_FIELD_NAME = 'standard_deviation'
|
||||
TS_FIELD_NAME = 'timestamp'
|
||||
|
||||
@@ -5,6 +5,7 @@ from .dividends import (
|
||||
DividendsByPayDate,
|
||||
)
|
||||
from .earnings import EarningsCalendar
|
||||
from .consensus_estimates import ConsensusEstimates
|
||||
from .equity_pricing import USEquityPricing
|
||||
from .dataset import DataSet, Column, BoundColumn
|
||||
|
||||
@@ -17,6 +18,7 @@ __all__ = [
|
||||
'DividendsByExDate',
|
||||
'DividendsByPayDate',
|
||||
'EarningsCalendar',
|
||||
'ConsensusEstimates',
|
||||
'ShareBuybackAuthorizations',
|
||||
'USEquityPricing',
|
||||
]
|
||||
|
||||
@@ -0,0 +1,28 @@
|
||||
"""
|
||||
Datasets representing consensus estimates data.
|
||||
"""
|
||||
from zipline.utils.numpy_utils import datetime64ns_dtype, float64_dtype
|
||||
|
||||
from .dataset import Column, DataSet
|
||||
|
||||
|
||||
class ConsensusEstimates(DataSet):
|
||||
"""
|
||||
Dataset representing consensus estimates data.
|
||||
"""
|
||||
previous_release_date = Column(datetime64ns_dtype)
|
||||
next_release_date = Column(datetime64ns_dtype)
|
||||
previous_standard_deviation = Column(float64_dtype)
|
||||
next_standard_deviation = Column(float64_dtype)
|
||||
previous_count = Column(float64_dtype)
|
||||
next_count = Column(float64_dtype)
|
||||
previous_fiscal_quarter = Column(float64_dtype)
|
||||
next_fiscal_quarter = Column(float64_dtype)
|
||||
previous_high = Column(float64_dtype)
|
||||
next_high = Column(float64_dtype)
|
||||
previous_mean = Column(float64_dtype)
|
||||
next_mean = Column(float64_dtype)
|
||||
previous_fiscal_year = Column(float64_dtype)
|
||||
next_fiscal_year = Column(float64_dtype)
|
||||
previous_low = Column(float64_dtype)
|
||||
next_low = Column(float64_dtype)
|
||||
@@ -1,4 +1,5 @@
|
||||
from .earnings import EarningsCalendarLoader
|
||||
from .consensus_estimates import ConsensusEstimatesLoader
|
||||
from .buyback_auth import (
|
||||
CashBuybackAuthorizationsLoader,
|
||||
ShareBuybackAuthorizationsLoader
|
||||
@@ -16,6 +17,7 @@ __all__ = [
|
||||
'DividendsByExDateLoader',
|
||||
'DividendsByPayDateLoader',
|
||||
'EarningsCalendarLoader',
|
||||
'ConsensusEstimatesLoader',
|
||||
'ShareBuybackAuthorizationsLoader',
|
||||
'USEquityPricingLoader',
|
||||
]
|
||||
|
||||
@@ -17,10 +17,12 @@ from .dividends import (
|
||||
from .earnings import (
|
||||
BlazeEarningsCalendarLoader,
|
||||
)
|
||||
from .consensus_estimates import BlazeConsensusEstimatesLoader
|
||||
|
||||
__all__ = (
|
||||
'BlazeCashBuybackAuthorizationsLoader',
|
||||
'BlazeDividendsByAnnouncementDateLoader',
|
||||
'BlazeConsensusEstimatesLoader',
|
||||
'BlazeDividendsByExDateLoader',
|
||||
'BlazeDividendsByPayDateLoader',
|
||||
'BlazeEarningsCalendarLoader',
|
||||
|
||||
@@ -0,0 +1,91 @@
|
||||
from zipline.pipeline.common import (
|
||||
COUNT_FIELD_NAME,
|
||||
FISCAL_QUARTER_FIELD_NAME,
|
||||
FISCAL_YEAR_FIELD_NAME,
|
||||
HIGH_FIELD_NAME,
|
||||
LOW_FIELD_NAME,
|
||||
MEAN_FIELD_NAME,
|
||||
RELEASE_DATE_FIELD_NAME,
|
||||
SID_FIELD_NAME,
|
||||
STANDARD_DEVIATION_FIELD_NAME,
|
||||
TS_FIELD_NAME,
|
||||
)
|
||||
from zipline.pipeline.data import ConsensusEstimates
|
||||
from zipline.pipeline.loaders import ConsensusEstimatesLoader
|
||||
from .events import BlazeEventsLoader
|
||||
|
||||
|
||||
class BlazeConsensusEstimatesLoader(BlazeEventsLoader):
|
||||
"""A pipeline loader for the ``ConsensusEstimates`` dataset that
|
||||
loads
|
||||
data from a blaze expression.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
expr : Expr
|
||||
The expression representing the data to load.
|
||||
resources : dict, optional
|
||||
Mapping from the loadable terms of ``expr`` to actual data resources.
|
||||
odo_kwargs : dict, optional
|
||||
Extra keyword arguments to pass to odo when executing the expression.
|
||||
data_query_time : time, optional
|
||||
The time to use for the data query cutoff.
|
||||
data_query_tz : tzinfo or str
|
||||
The timezeone to use for the data query cutoff.
|
||||
dataset: DataSet
|
||||
The DataSet object for which this loader loads data.
|
||||
|
||||
Notes
|
||||
-----
|
||||
The expression should have a tabular dshape of::
|
||||
|
||||
Dim * {{
|
||||
{SID_FIELD_NAME}: int64,
|
||||
{TS_FIELD_NAME}: datetime,
|
||||
{RELEASE_DATE_FIELD_NAME}: ?datetime,
|
||||
{STANDARD_DEVIATION_FIELD_NAME}: ?float64,
|
||||
{COUNT_FIELD_NAME}: ?float64,
|
||||
{FISCAL_QUARTER_FIELD_NAME}: ?float64,
|
||||
{HIGH_FIELD_NAME}: ?float64,
|
||||
{MEAN_FIELD_NAME}: ?float64,
|
||||
{FISCAL_YEAR_FIELD_NAME}: ?float64,
|
||||
{LOW_FIELD_NAME}: ?float64,
|
||||
}}
|
||||
|
||||
Where each row of the table is a record including the sid to identify the
|
||||
company, the timestamp where we learned about the announcement,
|
||||
the release date for the corresponding estimate, and other estimate
|
||||
information.
|
||||
|
||||
If the '{TS_FIELD_NAME}' field is not included it is assumed that we
|
||||
start the backtest with knowledge of all announcements.
|
||||
"""
|
||||
|
||||
__doc__ = __doc__.format(
|
||||
TS_FIELD_NAME=TS_FIELD_NAME,
|
||||
SID_FIELD_NAME=SID_FIELD_NAME,
|
||||
RELEASE_DATE_FIELD_NAME=RELEASE_DATE_FIELD_NAME,
|
||||
STANDARD_DEVIATION_FIELD_NAME=STANDARD_DEVIATION_FIELD_NAME,
|
||||
COUNT_FIELD_NAME=COUNT_FIELD_NAME,
|
||||
FISCAL_QUARTER_FIELD_NAME=FISCAL_QUARTER_FIELD_NAME,
|
||||
HIGH_FIELD_NAME=HIGH_FIELD_NAME,
|
||||
MEAN_FIELD_NAME=MEAN_FIELD_NAME,
|
||||
FISCAL_YEAR_FIELD_NAME=FISCAL_YEAR_FIELD_NAME,
|
||||
LOW_FIELD_NAME=LOW_FIELD_NAME,
|
||||
)
|
||||
|
||||
_expected_fields = frozenset({
|
||||
TS_FIELD_NAME,
|
||||
SID_FIELD_NAME,
|
||||
RELEASE_DATE_FIELD_NAME,
|
||||
STANDARD_DEVIATION_FIELD_NAME,
|
||||
COUNT_FIELD_NAME,
|
||||
FISCAL_QUARTER_FIELD_NAME,
|
||||
HIGH_FIELD_NAME,
|
||||
MEAN_FIELD_NAME,
|
||||
FISCAL_YEAR_FIELD_NAME,
|
||||
LOW_FIELD_NAME,
|
||||
})
|
||||
|
||||
concrete_loader = ConsensusEstimatesLoader
|
||||
default_dataset = ConsensusEstimates
|
||||
@@ -45,7 +45,7 @@ class BlazeDividendsByAnnouncementDateLoader(BlazeEventsLoader):
|
||||
Dim * {{
|
||||
{SID_FIELD_NAME}: int64,
|
||||
{TS_FIELD_NAME}: datetime,
|
||||
{CASH_AMOUNT_FIELD_NAME}: ?datetime,
|
||||
{CASH_AMOUNT_FIELD_NAME}: ?float64,
|
||||
{ANNOUNCEMENT_FIELD_NAME}: ?datetime,
|
||||
}}
|
||||
|
||||
|
||||
@@ -0,0 +1,162 @@
|
||||
"""
|
||||
Reference implementation for ConsensusEstimates loaders.
|
||||
"""
|
||||
|
||||
from ..data import ConsensusEstimates
|
||||
from .events import EventsLoader
|
||||
from zipline.pipeline.common import (
|
||||
COUNT_FIELD_NAME,
|
||||
FISCAL_QUARTER_FIELD_NAME,
|
||||
FISCAL_YEAR_FIELD_NAME,
|
||||
HIGH_FIELD_NAME,
|
||||
LOW_FIELD_NAME,
|
||||
MEAN_FIELD_NAME,
|
||||
RELEASE_DATE_FIELD_NAME,
|
||||
STANDARD_DEVIATION_FIELD_NAME,
|
||||
)
|
||||
from zipline.utils.memoize import lazyval
|
||||
|
||||
|
||||
class ConsensusEstimatesLoader(EventsLoader):
|
||||
|
||||
expected_cols = frozenset([RELEASE_DATE_FIELD_NAME,
|
||||
STANDARD_DEVIATION_FIELD_NAME,
|
||||
COUNT_FIELD_NAME,
|
||||
FISCAL_QUARTER_FIELD_NAME,
|
||||
HIGH_FIELD_NAME,
|
||||
MEAN_FIELD_NAME,
|
||||
FISCAL_YEAR_FIELD_NAME,
|
||||
LOW_FIELD_NAME])
|
||||
|
||||
def __init__(self, all_dates, events_by_sid,
|
||||
infer_timestamps=False,
|
||||
dataset=ConsensusEstimates):
|
||||
super(ConsensusEstimatesLoader, self).__init__(
|
||||
all_dates, events_by_sid, infer_timestamps, dataset=dataset,
|
||||
)
|
||||
|
||||
@lazyval
|
||||
def next_release_date_loader(self):
|
||||
return self._next_event_date_loader(
|
||||
self.dataset.next_release_date,
|
||||
RELEASE_DATE_FIELD_NAME
|
||||
)
|
||||
|
||||
@lazyval
|
||||
def previous_release_date_loader(self):
|
||||
return self._previous_event_date_loader(
|
||||
self.dataset.previous_release_date,
|
||||
RELEASE_DATE_FIELD_NAME,
|
||||
)
|
||||
|
||||
@lazyval
|
||||
def next_standard_deviation_loader(self):
|
||||
return self._next_event_value_loader(
|
||||
self.dataset.next_standard_deviation,
|
||||
RELEASE_DATE_FIELD_NAME,
|
||||
STANDARD_DEVIATION_FIELD_NAME,
|
||||
)
|
||||
|
||||
@lazyval
|
||||
def previous_standard_deviation_loader(self):
|
||||
return self._previous_event_value_loader(
|
||||
self.dataset.previous_standard_deviation,
|
||||
RELEASE_DATE_FIELD_NAME,
|
||||
STANDARD_DEVIATION_FIELD_NAME,
|
||||
)
|
||||
|
||||
@lazyval
|
||||
def next_count_loader(self):
|
||||
return self._next_event_value_loader(
|
||||
self.dataset.next_count,
|
||||
RELEASE_DATE_FIELD_NAME,
|
||||
COUNT_FIELD_NAME,
|
||||
)
|
||||
|
||||
@lazyval
|
||||
def previous_count_loader(self):
|
||||
return self._previous_event_value_loader(
|
||||
self.dataset.previous_count,
|
||||
RELEASE_DATE_FIELD_NAME,
|
||||
COUNT_FIELD_NAME,
|
||||
)
|
||||
|
||||
@lazyval
|
||||
def next_fiscal_quarter_loader(self):
|
||||
return self._next_event_value_loader(
|
||||
self.dataset.next_fiscal_quarter,
|
||||
RELEASE_DATE_FIELD_NAME,
|
||||
FISCAL_QUARTER_FIELD_NAME,
|
||||
)
|
||||
|
||||
@lazyval
|
||||
def previous_fiscal_quarter_loader(self):
|
||||
return self._previous_event_value_loader(
|
||||
self.dataset.previous_fiscal_quarter,
|
||||
RELEASE_DATE_FIELD_NAME,
|
||||
FISCAL_QUARTER_FIELD_NAME,
|
||||
)
|
||||
|
||||
@lazyval
|
||||
def next_high_loader(self):
|
||||
return self._next_event_value_loader(
|
||||
self.dataset.next_high,
|
||||
RELEASE_DATE_FIELD_NAME,
|
||||
HIGH_FIELD_NAME,
|
||||
)
|
||||
|
||||
@lazyval
|
||||
def previous_high_loader(self):
|
||||
return self._previous_event_value_loader(
|
||||
self.dataset.previous_high,
|
||||
RELEASE_DATE_FIELD_NAME,
|
||||
HIGH_FIELD_NAME,
|
||||
)
|
||||
|
||||
@lazyval
|
||||
def next_mean_loader(self):
|
||||
return self._next_event_value_loader(
|
||||
self.dataset.next_mean,
|
||||
RELEASE_DATE_FIELD_NAME,
|
||||
MEAN_FIELD_NAME,
|
||||
)
|
||||
|
||||
@lazyval
|
||||
def previous_mean_loader(self):
|
||||
return self._previous_event_value_loader(
|
||||
self.dataset.previous_mean,
|
||||
RELEASE_DATE_FIELD_NAME,
|
||||
MEAN_FIELD_NAME,
|
||||
)
|
||||
|
||||
@lazyval
|
||||
def next_fiscal_year_loader(self):
|
||||
return self._next_event_value_loader(
|
||||
self.dataset.next_fiscal_year,
|
||||
RELEASE_DATE_FIELD_NAME,
|
||||
FISCAL_YEAR_FIELD_NAME,
|
||||
)
|
||||
|
||||
@lazyval
|
||||
def previous_fiscal_year_loader(self):
|
||||
return self._previous_event_value_loader(
|
||||
self.dataset.previous_fiscal_year,
|
||||
RELEASE_DATE_FIELD_NAME,
|
||||
FISCAL_YEAR_FIELD_NAME,
|
||||
)
|
||||
|
||||
@lazyval
|
||||
def next_low_loader(self):
|
||||
return self._next_event_value_loader(
|
||||
self.dataset.next_low,
|
||||
RELEASE_DATE_FIELD_NAME,
|
||||
LOW_FIELD_NAME,
|
||||
)
|
||||
|
||||
@lazyval
|
||||
def previous_low_loader(self):
|
||||
return self._previous_event_value_loader(
|
||||
self.dataset.previous_low,
|
||||
RELEASE_DATE_FIELD_NAME,
|
||||
LOW_FIELD_NAME,
|
||||
)
|
||||
+170
-10
@@ -6,19 +6,17 @@ from unittest import TestCase
|
||||
from contextlib2 import ExitStack
|
||||
from logbook import NullHandler, Logger
|
||||
from nose_parameterized import parameterized
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
from pandas.util.testing import assert_series_equal
|
||||
import responses
|
||||
from six import with_metaclass, iteritems
|
||||
from toolz import flip
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
import responses
|
||||
|
||||
|
||||
from ..assets.synthetic import make_simple_equity_info
|
||||
from .core import (
|
||||
create_daily_bar_data,
|
||||
create_minute_bar_data,
|
||||
gen_calendars,
|
||||
tmp_asset_finder,
|
||||
tmp_dir,
|
||||
)
|
||||
from ..data.data_portal import DataPortal
|
||||
@@ -26,7 +24,6 @@ from ..data.us_equity_pricing import (
|
||||
SQLiteAdjustmentReader,
|
||||
SQLiteAdjustmentWriter,
|
||||
)
|
||||
from ..finance.trading import TradingEnvironment
|
||||
from ..data.us_equity_pricing import (
|
||||
BcolzDailyBarReader,
|
||||
BcolzDailyBarWriter,
|
||||
@@ -36,13 +33,21 @@ from ..data.minute_bars import (
|
||||
BcolzMinuteBarWriter,
|
||||
US_EQUITIES_MINUTES_PER_DAY
|
||||
)
|
||||
|
||||
from ..finance.trading import TradingEnvironment
|
||||
from ..utils import tradingcalendar, factory
|
||||
from ..utils.classproperty import classproperty
|
||||
from ..utils.final import FinalMeta, final
|
||||
from ..utils.metautils import compose_types
|
||||
from ..pipeline import Pipeline, SimplePipelineEngine
|
||||
from ..utils.numpy_utils import make_datetime64D
|
||||
from ..utils.numpy_utils import NaTD
|
||||
from .core import tmp_asset_finder, make_simple_equity_info, gen_calendars
|
||||
from zipline.pipeline import Pipeline, SimplePipelineEngine
|
||||
from zipline.utils.numpy_utils import make_datetime64D
|
||||
from zipline.utils.numpy_utils import NaTD
|
||||
from zipline.pipeline.common import TS_FIELD_NAME
|
||||
from zipline.pipeline.loaders.utils import (
|
||||
get_values_for_date_ranges,
|
||||
zip_with_dates
|
||||
)
|
||||
|
||||
|
||||
def _take_out_the_trash():
|
||||
@@ -890,6 +895,42 @@ class WithPipelineEventDataLoader(with_metaclass(
|
||||
loader = self.loader_type(*self.pipeline_event_loader_args(dates))
|
||||
return SimplePipelineEngine(lambda _: loader, dates, self.asset_finder)
|
||||
|
||||
def get_sids_to_frames(self,
|
||||
zip_date_index_with_vals,
|
||||
vals,
|
||||
date_intervals,
|
||||
dates):
|
||||
"""
|
||||
Construct a DataFrame that maps sid to the expected values for the
|
||||
given dates.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
zip_date_index_with_vals: callable
|
||||
A function that returns a series of `vals` repeated based on the
|
||||
number of days in the date interval for each val, indexed by the
|
||||
dates in `dates`.
|
||||
vals: iterable
|
||||
An iterable with values that correspond to each interval in
|
||||
`date_intervals`.
|
||||
date_intervals: list
|
||||
A list of date intervals for each sid that correspond to values in
|
||||
`vals`.
|
||||
dates: DatetimeIndex
|
||||
The dates which will serve as the index for each Series for each
|
||||
sid in the DataFrame.
|
||||
"""
|
||||
frame = pd.DataFrame({sid: get_values_for_date_ranges(
|
||||
zip_date_index_with_vals,
|
||||
vals[sid],
|
||||
date_intervals[sid],
|
||||
dates
|
||||
) for sid in self.get_sids()[:-1]})
|
||||
frame[self.get_sids()[-1]] = zip_date_index_with_vals(
|
||||
dates, ['NaN'] * len(dates)
|
||||
)
|
||||
return frame
|
||||
|
||||
@staticmethod
|
||||
def _compute_busday_offsets(announcement_dates):
|
||||
"""
|
||||
@@ -1026,3 +1067,122 @@ class WithResponses(object):
|
||||
self.responses = self.enter_instance_context(
|
||||
responses.RequestsMock(),
|
||||
)
|
||||
|
||||
|
||||
class WithNextAndPreviousEventDataLoader(WithPipelineEventDataLoader):
|
||||
"""
|
||||
ZiplineTestCase mixin extending common functionality for event data
|
||||
loader tests that have both next and previous events.
|
||||
|
||||
`base_cases` should be used as the template to test cases that combine
|
||||
knowledge date (timestamp) and some 'other_date' in various ways.
|
||||
`next_date_intervals` gives the date intervals for the next event based
|
||||
on the dates given in `base_cases`.
|
||||
`next_dates` gives the next date from `other_date` which is known about at
|
||||
each interval.
|
||||
`prev_date_intervals` gives the date intervals for each sid for the
|
||||
previous event based on the dates given in `base_cases`.
|
||||
`prev_dates` gives the previous date from `other_date` which is known
|
||||
about at each interval.
|
||||
`get_expected_previous_event_dates` is a convenience function that fills
|
||||
a DataFrame with the previously known dates for each sid for the given
|
||||
dates.
|
||||
`get_expected_next_event_dates` is a convenience function that fills
|
||||
a DataFrame with the next known dates for each sid for the given
|
||||
dates.
|
||||
"""
|
||||
base_cases = [
|
||||
# K1--K2--A1--A2.
|
||||
pd.DataFrame({
|
||||
TS_FIELD_NAME: pd.to_datetime(['2014-01-05', '2014-01-10']),
|
||||
'other_date': pd.to_datetime(['2014-01-15', '2014-01-20']),
|
||||
}),
|
||||
# K1--K2--A2--A1.
|
||||
pd.DataFrame({
|
||||
TS_FIELD_NAME: pd.to_datetime(['2014-01-05', '2014-01-10']),
|
||||
'other_date': pd.to_datetime(['2014-01-20', '2014-01-15']),
|
||||
}),
|
||||
# K1--A1--K2--A2.
|
||||
pd.DataFrame({
|
||||
TS_FIELD_NAME: pd.to_datetime(['2014-01-05', '2014-01-15']),
|
||||
'other_date': pd.to_datetime(['2014-01-10', '2014-01-20']),
|
||||
}),
|
||||
# K1 == K2.
|
||||
pd.DataFrame({
|
||||
TS_FIELD_NAME: pd.to_datetime(['2014-01-05'] * 2),
|
||||
'other_date': pd.to_datetime(['2014-01-10', '2014-01-15']),
|
||||
}),
|
||||
pd.DataFrame(
|
||||
columns=['other_date',
|
||||
TS_FIELD_NAME],
|
||||
dtype='datetime64[ns]'
|
||||
),
|
||||
]
|
||||
|
||||
next_date_intervals = [
|
||||
[[None, '2014-01-04'],
|
||||
['2014-01-05', '2014-01-15'],
|
||||
['2014-01-16', '2014-01-20'],
|
||||
['2014-01-21', None]],
|
||||
[[None, '2014-01-04'],
|
||||
['2014-01-05', '2014-01-09'],
|
||||
['2014-01-10', '2014-01-15'],
|
||||
['2014-01-16', '2014-01-20'],
|
||||
['2014-01-21', None]],
|
||||
[[None, '2014-01-04'],
|
||||
['2014-01-05', '2014-01-10'],
|
||||
['2014-01-11', '2014-01-14'],
|
||||
['2014-01-15', '2014-01-20'],
|
||||
['2014-01-21', None]],
|
||||
[[None, '2014-01-04'],
|
||||
['2014-01-05', '2014-01-10'],
|
||||
['2014-01-11', '2014-01-15'],
|
||||
['2014-01-16', None]]
|
||||
]
|
||||
|
||||
next_dates = [
|
||||
['NaT', '2014-01-15', '2014-01-20', 'NaT'],
|
||||
['NaT', '2014-01-20', '2014-01-15', '2014-01-20', 'NaT'],
|
||||
['NaT', '2014-01-10', 'NaT', '2014-01-20', 'NaT'],
|
||||
['NaT', '2014-01-10', '2014-01-15', 'NaT'],
|
||||
['NaT']
|
||||
]
|
||||
|
||||
prev_date_intervals = [
|
||||
[[None, '2014-01-14'],
|
||||
['2014-01-15', '2014-01-19'],
|
||||
['2014-01-20', None]],
|
||||
[[None, '2014-01-14'],
|
||||
['2014-01-15', '2014-01-19'],
|
||||
['2014-01-20', None]],
|
||||
[[None, '2014-01-09'],
|
||||
['2014-01-10', '2014-01-19'],
|
||||
['2014-01-20', None]],
|
||||
[[None, '2014-01-09'],
|
||||
['2014-01-10', '2014-01-14'],
|
||||
['2014-01-15', None]]
|
||||
]
|
||||
|
||||
prev_dates = [
|
||||
['NaT', '2014-01-15', '2014-01-20'],
|
||||
['NaT', '2014-01-15', '2014-01-20'],
|
||||
['NaT', '2014-01-10', '2014-01-20'],
|
||||
['NaT', '2014-01-10', '2014-01-15'],
|
||||
['NaT']
|
||||
]
|
||||
|
||||
def get_expected_previous_event_dates(self, dates):
|
||||
return self.get_sids_to_frames(
|
||||
zip_with_dates,
|
||||
self.prev_dates,
|
||||
self.prev_date_intervals,
|
||||
dates
|
||||
)
|
||||
|
||||
def get_expected_next_event_dates(self, dates):
|
||||
return self.get_sids_to_frames(
|
||||
zip_with_dates,
|
||||
self.next_dates,
|
||||
self.next_date_intervals,
|
||||
dates
|
||||
)
|
||||
|
||||
Reference in New Issue
Block a user