mirror of
https://github.com/wassname/catalyst.git
synced 2026-06-28 20:22:12 +08:00
Merge pull request #1153 from quantopian/filter-nulls-in-expected-cols
Filter nulls in expected cols
This commit is contained in:
@@ -42,7 +42,8 @@ from zipline.testing.fixtures import (
|
||||
)
|
||||
|
||||
date_intervals = [
|
||||
[[None, '2014-01-04'], ['2014-01-05', '2014-01-09'], ['2014-01-10', None]]
|
||||
[['2014-01-01', '2014-01-04'], ['2014-01-05', '2014-01-09'],
|
||||
['2014-01-10', '2014-01-31']]
|
||||
]
|
||||
|
||||
buyback_authorizations_cases = [
|
||||
|
||||
@@ -98,41 +98,41 @@ dividends_cases = [
|
||||
|
||||
prev_date_intervals = [
|
||||
[
|
||||
[None, '2014-01-14'], ['2014-01-15', '2014-01-19'],
|
||||
['2014-01-20', None]
|
||||
['2014-01-01', '2014-01-14'], ['2014-01-15', '2014-01-19'],
|
||||
['2014-01-20', '2014-01-31']
|
||||
],
|
||||
[
|
||||
[None, '2014-01-14'], ['2014-01-15', '2014-01-19'],
|
||||
['2014-01-20', None]
|
||||
['2014-01-01', '2014-01-14'], ['2014-01-15', '2014-01-19'],
|
||||
['2014-01-20', '2014-01-31']
|
||||
],
|
||||
[
|
||||
[None, '2014-01-09'], ['2014-01-10', '2014-01-19'],
|
||||
['2014-01-20', None]
|
||||
['2014-01-01', '2014-01-09'], ['2014-01-10', '2014-01-19'],
|
||||
['2014-01-20', '2014-01-31']
|
||||
],
|
||||
[
|
||||
[None, '2014-01-09'], ['2014-01-10', '2014-01-14'],
|
||||
['2014-01-15', None]
|
||||
['2014-01-01', '2014-01-09'], ['2014-01-10', '2014-01-14'],
|
||||
['2014-01-15', '2014-01-31']
|
||||
]
|
||||
]
|
||||
|
||||
next_date_intervals = [
|
||||
[
|
||||
[None, '2014-01-04'], ['2014-01-05', '2014-01-15'],
|
||||
['2014-01-16', '2014-01-20'], ['2014-01-21', None]
|
||||
['2014-01-01', '2014-01-04'], ['2014-01-05', '2014-01-15'],
|
||||
['2014-01-16', '2014-01-20'], ['2014-01-21', '2014-01-31']
|
||||
],
|
||||
[
|
||||
[None, '2014-01-04'], ['2014-01-05', '2014-01-09'],
|
||||
['2014-01-01', '2014-01-04'], ['2014-01-05', '2014-01-09'],
|
||||
['2014-01-10', '2014-01-15'], ['2014-01-16', '2014-01-20'],
|
||||
['2014-01-21', None]
|
||||
['2014-01-21', '2014-01-31']
|
||||
],
|
||||
[
|
||||
[None, '2014-01-04'], ['2014-01-05', '2014-01-10'],
|
||||
['2014-01-01', '2014-01-04'], ['2014-01-05', '2014-01-10'],
|
||||
['2014-01-11', '2014-01-14'], ['2014-01-15', '2014-01-20'],
|
||||
['2014-01-21', None]
|
||||
['2014-01-21', '2014-01-31']
|
||||
],
|
||||
[
|
||||
[None, '2014-01-04'], ['2014-01-05', '2014-01-10'],
|
||||
['2014-01-11', '2014-01-15'], ['2014-01-16', None]
|
||||
['2014-01-01', '2014-01-04'], ['2014-01-05', '2014-01-10'],
|
||||
['2014-01-11', '2014-01-15'], ['2014-01-16', '2014-01-31']
|
||||
]
|
||||
]
|
||||
|
||||
@@ -184,19 +184,19 @@ class DividendsByAnnouncementDateTestCase(WithPipelineEventDataLoader,
|
||||
def setup(self, dates):
|
||||
date_intervals = [
|
||||
[
|
||||
[None, '2014-01-04'], ['2014-01-05', '2014-01-09'],
|
||||
['2014-01-10', None]
|
||||
['2014-01-01', '2014-01-04'], ['2014-01-05', '2014-01-09'],
|
||||
['2014-01-10', '2014-01-31']
|
||||
],
|
||||
[
|
||||
[None, '2014-01-04'], ['2014-01-05', '2014-01-09'],
|
||||
['2014-01-10', None]
|
||||
['2014-01-01', '2014-01-04'], ['2014-01-05', '2014-01-09'],
|
||||
['2014-01-10', '2014-01-31']
|
||||
],
|
||||
[
|
||||
[None, '2014-01-04'], ['2014-01-05', '2014-01-14'],
|
||||
['2014-01-15', None]
|
||||
['2014-01-01', '2014-01-04'], ['2014-01-05', '2014-01-14'],
|
||||
['2014-01-15', '2014-01-31']
|
||||
],
|
||||
[
|
||||
[None, '2014-01-04'], ['2014-01-05', None]
|
||||
['2014-01-01', '2014-01-04'], ['2014-01-05', '2014-01-31']
|
||||
]
|
||||
]
|
||||
announcement_dates = [['NaT', '2014-01-04', '2014-01-09'],
|
||||
|
||||
@@ -6,6 +6,8 @@ from unittest import TestCase
|
||||
|
||||
import blaze as bz
|
||||
from nose_parameterized import parameterized
|
||||
import numpy as np
|
||||
from numpy.testing import assert_array_equal
|
||||
import pandas as pd
|
||||
from pandas.util.testing import assert_series_equal
|
||||
|
||||
@@ -28,18 +30,22 @@ from zipline.pipeline.loaders.events import (
|
||||
from zipline.utils.memoize import lazyval
|
||||
from zipline.utils.numpy_utils import datetime64ns_dtype
|
||||
|
||||
OTHER_FIELD = "other_field"
|
||||
|
||||
ABSTRACT_CONCRETE_LOADER_ERROR = 'abstract methods concrete_loader'
|
||||
ABSTRACT_EXPECTED_COLS_ERROR = 'abstract methods expected_cols'
|
||||
ABSTRACT_EXPECTED_COLS_ERROR = 'abstract methods event_date_col, expected_cols'
|
||||
|
||||
|
||||
class EventDataSet(DataSet):
|
||||
previous_announcement = Column(datetime64ns_dtype)
|
||||
next_announcement = Column(datetime64ns_dtype)
|
||||
|
||||
|
||||
class EventDataSetLoader(EventsLoader):
|
||||
expected_cols = frozenset([ANNOUNCEMENT_FIELD_NAME])
|
||||
|
||||
event_date_col = ANNOUNCEMENT_FIELD_NAME
|
||||
|
||||
def __init__(self,
|
||||
all_dates,
|
||||
events_by_sid,
|
||||
@@ -56,21 +62,21 @@ class EventDataSetLoader(EventsLoader):
|
||||
def previous_announcement_loader(self):
|
||||
return self._previous_event_date_loader(
|
||||
self.dataset.previous_announcement,
|
||||
ANNOUNCEMENT_FIELD_NAME,
|
||||
)
|
||||
|
||||
@lazyval
|
||||
def next_announcement_loader(self):
|
||||
return self._previous_event_date_loader(
|
||||
self.dataset.previous_announcement,
|
||||
ANNOUNCEMENT_FIELD_NAME,
|
||||
return self._next_event_date_loader(
|
||||
self.dataset.next_announcement,
|
||||
)
|
||||
|
||||
|
||||
# Test case just for catching an error when multiple columns are in the wrong
|
||||
# data format, so no loader defined.
|
||||
class EventDataSetLoaderMultipleExpectedCols(EventsLoader):
|
||||
expected_cols = frozenset([ANNOUNCEMENT_FIELD_NAME, "other_field"])
|
||||
class EventDataSetLoaderMultipleExpectedColsNoColumnLoaders(EventsLoader):
|
||||
expected_cols = frozenset([ANNOUNCEMENT_FIELD_NAME, OTHER_FIELD])
|
||||
|
||||
event_date_col = ANNOUNCEMENT_FIELD_NAME
|
||||
|
||||
|
||||
class EventDataSetLoaderNoExpectedCols(EventsLoader):
|
||||
@@ -92,6 +98,34 @@ dtx = pd.date_range('2014-01-01', '2014-01-10')
|
||||
|
||||
|
||||
class EventLoaderTestCase(TestCase):
|
||||
def test_null_in_event_date_col(self):
|
||||
# Tests that if there is a null date in the event date column, it is
|
||||
# filtered out and does not break on loading the adjusted array.
|
||||
dates_with_null = pd.Series(dtx)
|
||||
dates_with_null[2] = pd.NaT
|
||||
events_by_sid = {0: pd.DataFrame({ANNOUNCEMENT_FIELD_NAME:
|
||||
dates_with_null,
|
||||
TS_FIELD_NAME: dtx})}
|
||||
loader = EventDataSetLoader(
|
||||
dtx,
|
||||
events_by_sid,
|
||||
)
|
||||
|
||||
prev_result = loader.load_adjusted_array({
|
||||
EventDataSet.previous_announcement
|
||||
}, dtx, [0], [True])[EventDataSet.previous_announcement].data[:, 0]
|
||||
|
||||
next_result = loader.load_adjusted_array({
|
||||
EventDataSet.next_announcement
|
||||
}, dtx, [0], [True])[EventDataSet.next_announcement].data[:, 0]
|
||||
|
||||
expected_prev = dates_with_null[:]
|
||||
expected_prev[2] = dtx[1]
|
||||
assert_array_equal(prev_result, expected_prev)
|
||||
expected_next = dates_with_null[:]
|
||||
expected_next[2] = np.datetime64('NaT')
|
||||
assert_array_equal(next_result, expected_next)
|
||||
|
||||
def assert_loader_error(self, events_by_sid, error, msg,
|
||||
infer_timestamps, loader):
|
||||
with self.assertRaisesRegexp(error, re.escape(msg)):
|
||||
@@ -198,7 +232,7 @@ class EventLoaderTestCase(TestCase):
|
||||
[dtx, dtx],
|
||||
True,
|
||||
WRONG_MANY_COL_DATA_FORMAT_ERROR.format(sid=0),
|
||||
EventDataSetLoaderMultipleExpectedCols
|
||||
EventDataSetLoaderMultipleExpectedColsNoColumnLoaders
|
||||
],
|
||||
[
|
||||
[dtx],
|
||||
@@ -212,7 +246,7 @@ class EventLoaderTestCase(TestCase):
|
||||
[dtx, dtx],
|
||||
False,
|
||||
WRONG_MANY_COL_DATA_FORMAT_ERROR.format(sid=0),
|
||||
EventDataSetLoaderMultipleExpectedCols
|
||||
EventDataSetLoaderMultipleExpectedColsNoColumnLoaders
|
||||
]
|
||||
]
|
||||
)
|
||||
@@ -246,3 +280,20 @@ class BlazeEventLoaderTestCase(TestCase):
|
||||
SID_FIELD_NAME: 0})
|
||||
)
|
||||
)
|
||||
|
||||
|
||||
class BlazeEventDataSetLoader(BlazeEventsLoader):
|
||||
concrete_loader = EventDataSetLoader
|
||||
_expected_fields = frozenset({ANNOUNCEMENT_FIELD_NAME,
|
||||
TS_FIELD_NAME,
|
||||
SID_FIELD_NAME})
|
||||
|
||||
def __init__(self,
|
||||
expr,
|
||||
dataset=EventDataSet,
|
||||
**kwargs):
|
||||
super(
|
||||
BlazeEventDataSetLoader, self
|
||||
).__init__(expr,
|
||||
dataset=dataset,
|
||||
**kwargs)
|
||||
|
||||
@@ -27,6 +27,8 @@ class CashBuybackAuthorizationsLoader(EventsLoader):
|
||||
expected_cols = frozenset([BUYBACK_ANNOUNCEMENT_FIELD_NAME,
|
||||
CASH_FIELD_NAME])
|
||||
|
||||
event_date_col = BUYBACK_ANNOUNCEMENT_FIELD_NAME
|
||||
|
||||
def __init__(self,
|
||||
all_dates,
|
||||
events_by_sid,
|
||||
@@ -43,7 +45,6 @@ class CashBuybackAuthorizationsLoader(EventsLoader):
|
||||
def cash_amount_loader(self):
|
||||
return self._previous_event_value_loader(
|
||||
self.dataset.cash_amount,
|
||||
BUYBACK_ANNOUNCEMENT_FIELD_NAME,
|
||||
CASH_FIELD_NAME
|
||||
)
|
||||
|
||||
@@ -51,7 +52,6 @@ class CashBuybackAuthorizationsLoader(EventsLoader):
|
||||
def announcement_date_loader(self):
|
||||
return self._previous_event_date_loader(
|
||||
self.dataset.announcement_date,
|
||||
BUYBACK_ANNOUNCEMENT_FIELD_NAME,
|
||||
)
|
||||
|
||||
|
||||
@@ -70,6 +70,8 @@ class ShareBuybackAuthorizationsLoader(EventsLoader):
|
||||
expected_cols = frozenset([BUYBACK_ANNOUNCEMENT_FIELD_NAME,
|
||||
SHARE_COUNT_FIELD_NAME])
|
||||
|
||||
event_date_col = BUYBACK_ANNOUNCEMENT_FIELD_NAME
|
||||
|
||||
def __init__(self,
|
||||
all_dates,
|
||||
events_by_sid,
|
||||
@@ -86,7 +88,6 @@ class ShareBuybackAuthorizationsLoader(EventsLoader):
|
||||
def share_count_loader(self):
|
||||
return self._previous_event_value_loader(
|
||||
self.dataset.share_count,
|
||||
BUYBACK_ANNOUNCEMENT_FIELD_NAME,
|
||||
SHARE_COUNT_FIELD_NAME
|
||||
)
|
||||
|
||||
@@ -94,5 +95,4 @@ class ShareBuybackAuthorizationsLoader(EventsLoader):
|
||||
def announcement_date_loader(self):
|
||||
return self._previous_event_date_loader(
|
||||
self.dataset.announcement_date,
|
||||
BUYBACK_ANNOUNCEMENT_FIELD_NAME,
|
||||
)
|
||||
|
||||
@@ -29,6 +29,8 @@ class ConsensusEstimatesLoader(EventsLoader):
|
||||
LOW_FIELD_NAME,
|
||||
ACTUAL_VALUE_FIELD_NAME])
|
||||
|
||||
event_date_col = RELEASE_DATE_FIELD_NAME
|
||||
|
||||
def __init__(self, all_dates, events_by_sid,
|
||||
infer_timestamps=False,
|
||||
dataset=ConsensusEstimates):
|
||||
@@ -40,21 +42,18 @@ class ConsensusEstimatesLoader(EventsLoader):
|
||||
def next_release_date_loader(self):
|
||||
return self._next_event_date_loader(
|
||||
self.dataset.next_release_date,
|
||||
RELEASE_DATE_FIELD_NAME
|
||||
)
|
||||
|
||||
@lazyval
|
||||
def previous_release_date_loader(self):
|
||||
return self._previous_event_date_loader(
|
||||
self.dataset.previous_release_date,
|
||||
RELEASE_DATE_FIELD_NAME,
|
||||
)
|
||||
|
||||
@lazyval
|
||||
def next_standard_deviation_loader(self):
|
||||
return self._next_event_value_loader(
|
||||
self.dataset.next_standard_deviation,
|
||||
RELEASE_DATE_FIELD_NAME,
|
||||
STANDARD_DEVIATION_FIELD_NAME,
|
||||
)
|
||||
|
||||
@@ -62,7 +61,6 @@ class ConsensusEstimatesLoader(EventsLoader):
|
||||
def previous_standard_deviation_loader(self):
|
||||
return self._previous_event_value_loader(
|
||||
self.dataset.previous_standard_deviation,
|
||||
RELEASE_DATE_FIELD_NAME,
|
||||
STANDARD_DEVIATION_FIELD_NAME,
|
||||
)
|
||||
|
||||
@@ -70,7 +68,6 @@ class ConsensusEstimatesLoader(EventsLoader):
|
||||
def next_count_loader(self):
|
||||
return self._next_event_value_loader(
|
||||
self.dataset.next_count,
|
||||
RELEASE_DATE_FIELD_NAME,
|
||||
COUNT_FIELD_NAME,
|
||||
)
|
||||
|
||||
@@ -78,7 +75,6 @@ class ConsensusEstimatesLoader(EventsLoader):
|
||||
def previous_count_loader(self):
|
||||
return self._previous_event_value_loader(
|
||||
self.dataset.previous_count,
|
||||
RELEASE_DATE_FIELD_NAME,
|
||||
COUNT_FIELD_NAME,
|
||||
)
|
||||
|
||||
@@ -86,7 +82,6 @@ class ConsensusEstimatesLoader(EventsLoader):
|
||||
def next_fiscal_quarter_loader(self):
|
||||
return self._next_event_value_loader(
|
||||
self.dataset.next_fiscal_quarter,
|
||||
RELEASE_DATE_FIELD_NAME,
|
||||
FISCAL_QUARTER_FIELD_NAME,
|
||||
)
|
||||
|
||||
@@ -94,7 +89,6 @@ class ConsensusEstimatesLoader(EventsLoader):
|
||||
def previous_fiscal_quarter_loader(self):
|
||||
return self._previous_event_value_loader(
|
||||
self.dataset.previous_fiscal_quarter,
|
||||
RELEASE_DATE_FIELD_NAME,
|
||||
FISCAL_QUARTER_FIELD_NAME,
|
||||
)
|
||||
|
||||
@@ -102,7 +96,6 @@ class ConsensusEstimatesLoader(EventsLoader):
|
||||
def next_high_loader(self):
|
||||
return self._next_event_value_loader(
|
||||
self.dataset.next_high,
|
||||
RELEASE_DATE_FIELD_NAME,
|
||||
HIGH_FIELD_NAME,
|
||||
)
|
||||
|
||||
@@ -110,7 +103,6 @@ class ConsensusEstimatesLoader(EventsLoader):
|
||||
def previous_high_loader(self):
|
||||
return self._previous_event_value_loader(
|
||||
self.dataset.previous_high,
|
||||
RELEASE_DATE_FIELD_NAME,
|
||||
HIGH_FIELD_NAME,
|
||||
)
|
||||
|
||||
@@ -118,7 +110,6 @@ class ConsensusEstimatesLoader(EventsLoader):
|
||||
def next_mean_loader(self):
|
||||
return self._next_event_value_loader(
|
||||
self.dataset.next_mean,
|
||||
RELEASE_DATE_FIELD_NAME,
|
||||
MEAN_FIELD_NAME,
|
||||
)
|
||||
|
||||
@@ -126,7 +117,6 @@ class ConsensusEstimatesLoader(EventsLoader):
|
||||
def previous_mean_loader(self):
|
||||
return self._previous_event_value_loader(
|
||||
self.dataset.previous_mean,
|
||||
RELEASE_DATE_FIELD_NAME,
|
||||
MEAN_FIELD_NAME,
|
||||
)
|
||||
|
||||
@@ -134,7 +124,6 @@ class ConsensusEstimatesLoader(EventsLoader):
|
||||
def next_fiscal_year_loader(self):
|
||||
return self._next_event_value_loader(
|
||||
self.dataset.next_fiscal_year,
|
||||
RELEASE_DATE_FIELD_NAME,
|
||||
FISCAL_YEAR_FIELD_NAME,
|
||||
)
|
||||
|
||||
@@ -142,7 +131,6 @@ class ConsensusEstimatesLoader(EventsLoader):
|
||||
def previous_fiscal_year_loader(self):
|
||||
return self._previous_event_value_loader(
|
||||
self.dataset.previous_fiscal_year,
|
||||
RELEASE_DATE_FIELD_NAME,
|
||||
FISCAL_YEAR_FIELD_NAME,
|
||||
)
|
||||
|
||||
@@ -150,7 +138,6 @@ class ConsensusEstimatesLoader(EventsLoader):
|
||||
def next_low_loader(self):
|
||||
return self._next_event_value_loader(
|
||||
self.dataset.next_low,
|
||||
RELEASE_DATE_FIELD_NAME,
|
||||
LOW_FIELD_NAME,
|
||||
)
|
||||
|
||||
@@ -158,7 +145,6 @@ class ConsensusEstimatesLoader(EventsLoader):
|
||||
def previous_low_loader(self):
|
||||
return self._previous_event_value_loader(
|
||||
self.dataset.previous_low,
|
||||
RELEASE_DATE_FIELD_NAME,
|
||||
LOW_FIELD_NAME,
|
||||
)
|
||||
|
||||
@@ -166,6 +152,5 @@ class ConsensusEstimatesLoader(EventsLoader):
|
||||
def previous_actual_value_loader(self):
|
||||
return self._previous_event_value_loader(
|
||||
self.dataset.previous_actual_value,
|
||||
RELEASE_DATE_FIELD_NAME,
|
||||
ACTUAL_VALUE_FIELD_NAME,
|
||||
)
|
||||
|
||||
@@ -17,6 +17,8 @@ class DividendsByAnnouncementDateLoader(EventsLoader):
|
||||
expected_cols = frozenset([ANNOUNCEMENT_FIELD_NAME,
|
||||
CASH_AMOUNT_FIELD_NAME])
|
||||
|
||||
event_date_col = ANNOUNCEMENT_FIELD_NAME
|
||||
|
||||
def __init__(self, all_dates, events_by_sid,
|
||||
infer_timestamps=False,
|
||||
dataset=DividendsByAnnouncementDate):
|
||||
@@ -28,14 +30,12 @@ class DividendsByAnnouncementDateLoader(EventsLoader):
|
||||
def previous_announcement_date_loader(self):
|
||||
return self._previous_event_date_loader(
|
||||
self.dataset.previous_announcement_date,
|
||||
ANNOUNCEMENT_FIELD_NAME
|
||||
)
|
||||
|
||||
@lazyval
|
||||
def previous_amount_loader(self):
|
||||
return self._previous_event_value_loader(
|
||||
self.dataset.previous_amount,
|
||||
ANNOUNCEMENT_FIELD_NAME,
|
||||
CASH_AMOUNT_FIELD_NAME
|
||||
)
|
||||
|
||||
@@ -44,6 +44,8 @@ class DividendsByPayDateLoader(EventsLoader):
|
||||
expected_cols = frozenset([PAY_DATE_FIELD_NAME,
|
||||
CASH_AMOUNT_FIELD_NAME])
|
||||
|
||||
event_date_col = PAY_DATE_FIELD_NAME
|
||||
|
||||
def __init__(self, all_dates, events_by_sid,
|
||||
infer_timestamps=False,
|
||||
dataset=DividendsByPayDate):
|
||||
@@ -53,27 +55,23 @@ class DividendsByPayDateLoader(EventsLoader):
|
||||
|
||||
@lazyval
|
||||
def next_date_loader(self):
|
||||
return self._next_event_date_loader(self.dataset.next_date,
|
||||
PAY_DATE_FIELD_NAME)
|
||||
return self._next_event_date_loader(self.dataset.next_date)
|
||||
|
||||
@lazyval
|
||||
def previous_date_loader(self):
|
||||
return self._previous_event_date_loader(
|
||||
self.dataset.previous_date,
|
||||
PAY_DATE_FIELD_NAME
|
||||
)
|
||||
|
||||
@lazyval
|
||||
def next_amount_loader(self):
|
||||
return self._next_event_value_loader(self.dataset.next_amount,
|
||||
PAY_DATE_FIELD_NAME,
|
||||
CASH_AMOUNT_FIELD_NAME)
|
||||
|
||||
@lazyval
|
||||
def previous_amount_loader(self):
|
||||
return self._previous_event_value_loader(
|
||||
self.dataset.previous_amount,
|
||||
PAY_DATE_FIELD_NAME,
|
||||
CASH_AMOUNT_FIELD_NAME
|
||||
)
|
||||
|
||||
@@ -82,6 +80,8 @@ class DividendsByExDateLoader(EventsLoader):
|
||||
expected_cols = frozenset([EX_DATE_FIELD_NAME,
|
||||
CASH_AMOUNT_FIELD_NAME])
|
||||
|
||||
event_date_col = EX_DATE_FIELD_NAME
|
||||
|
||||
def __init__(self, all_dates, events_by_sid,
|
||||
infer_timestamps=False,
|
||||
dataset=DividendsByExDate):
|
||||
@@ -91,26 +91,22 @@ class DividendsByExDateLoader(EventsLoader):
|
||||
|
||||
@lazyval
|
||||
def next_date_loader(self):
|
||||
return self._next_event_date_loader(self.dataset.next_date,
|
||||
EX_DATE_FIELD_NAME)
|
||||
return self._next_event_date_loader(self.dataset.next_date)
|
||||
|
||||
@lazyval
|
||||
def previous_date_loader(self):
|
||||
return self._previous_event_date_loader(
|
||||
self.dataset.previous_date,
|
||||
EX_DATE_FIELD_NAME
|
||||
)
|
||||
|
||||
@lazyval
|
||||
def next_amount_loader(self):
|
||||
return self._next_event_value_loader(self.dataset.next_amount,
|
||||
EX_DATE_FIELD_NAME,
|
||||
CASH_AMOUNT_FIELD_NAME)
|
||||
|
||||
@lazyval
|
||||
def previous_amount_loader(self):
|
||||
return self._previous_event_value_loader(
|
||||
self.dataset.previous_amount,
|
||||
EX_DATE_FIELD_NAME,
|
||||
CASH_AMOUNT_FIELD_NAME
|
||||
)
|
||||
|
||||
@@ -12,6 +12,8 @@ class EarningsCalendarLoader(EventsLoader):
|
||||
|
||||
expected_cols = frozenset([ANNOUNCEMENT_FIELD_NAME])
|
||||
|
||||
event_date_col = ANNOUNCEMENT_FIELD_NAME
|
||||
|
||||
def __init__(self, all_dates, events_by_sid,
|
||||
infer_timestamps=False,
|
||||
dataset=EarningsCalendar):
|
||||
@@ -21,12 +23,10 @@ class EarningsCalendarLoader(EventsLoader):
|
||||
|
||||
@lazyval
|
||||
def next_announcement_loader(self):
|
||||
return self._next_event_date_loader(self.dataset.next_announcement,
|
||||
ANNOUNCEMENT_FIELD_NAME)
|
||||
return self._next_event_date_loader(self.dataset.next_announcement)
|
||||
|
||||
@lazyval
|
||||
def previous_announcement_loader(self):
|
||||
return self._previous_event_date_loader(
|
||||
self.dataset.previous_announcement,
|
||||
ANNOUNCEMENT_FIELD_NAME
|
||||
)
|
||||
|
||||
@@ -77,6 +77,10 @@ class EventsLoader(PipelineLoader):
|
||||
def expected_cols(self):
|
||||
raise NotImplemented('expected_cols')
|
||||
|
||||
@abc.abstractproperty
|
||||
def event_date_col(self):
|
||||
raise NotImplemented('event_date_col')
|
||||
|
||||
def __init__(self,
|
||||
all_dates,
|
||||
events_by_sid,
|
||||
@@ -148,7 +152,8 @@ class EventsLoader(PipelineLoader):
|
||||
raise ValueError(
|
||||
WRONG_MANY_COL_DATA_FORMAT_ERROR.format(sid=k)
|
||||
)
|
||||
|
||||
self.events_by_sid = {sid: df.dropna(subset=[self.event_date_col]) for
|
||||
sid, df in self.events_by_sid.items()}
|
||||
self.dataset = dataset
|
||||
|
||||
def get_loader(self, column):
|
||||
@@ -164,7 +169,7 @@ class EventsLoader(PipelineLoader):
|
||||
for column in columns
|
||||
)
|
||||
|
||||
def _next_event_date_loader(self, next_date_field, event_date_field_name):
|
||||
def _next_event_date_loader(self, next_date_field):
|
||||
return DataFrameLoader(
|
||||
next_date_field,
|
||||
next_event_frame(
|
||||
@@ -172,15 +177,14 @@ class EventsLoader(PipelineLoader):
|
||||
self.all_dates,
|
||||
next_date_field.missing_value,
|
||||
next_date_field.dtype,
|
||||
event_date_field_name,
|
||||
event_date_field_name
|
||||
self.event_date_col,
|
||||
self.event_date_col
|
||||
),
|
||||
adjustments=None,
|
||||
)
|
||||
|
||||
def _next_event_value_loader(self,
|
||||
next_value_field,
|
||||
event_date_field_name,
|
||||
value_field_name):
|
||||
return DataFrameLoader(
|
||||
next_value_field,
|
||||
@@ -189,15 +193,14 @@ class EventsLoader(PipelineLoader):
|
||||
self.all_dates,
|
||||
next_value_field.missing_value,
|
||||
next_value_field.dtype,
|
||||
event_date_field_name,
|
||||
self.event_date_col,
|
||||
value_field_name
|
||||
),
|
||||
adjustments=None,
|
||||
)
|
||||
|
||||
def _previous_event_date_loader(self,
|
||||
prev_date_field,
|
||||
event_date_field_name):
|
||||
prev_date_field):
|
||||
return DataFrameLoader(
|
||||
prev_date_field,
|
||||
previous_event_frame(
|
||||
@@ -205,15 +208,14 @@ class EventsLoader(PipelineLoader):
|
||||
self.all_dates,
|
||||
NaTD,
|
||||
'datetime64[ns]',
|
||||
event_date_field_name,
|
||||
event_date_field_name
|
||||
self.event_date_col,
|
||||
self.event_date_col
|
||||
),
|
||||
adjustments=None,
|
||||
)
|
||||
|
||||
def _previous_event_value_loader(self,
|
||||
previous_value_field,
|
||||
event_date_field_name,
|
||||
value_field_name):
|
||||
return DataFrameLoader(
|
||||
previous_value_field,
|
||||
@@ -222,7 +224,7 @@ class EventsLoader(PipelineLoader):
|
||||
self.all_dates,
|
||||
previous_value_field.missing_value,
|
||||
previous_value_field.dtype,
|
||||
event_date_field_name,
|
||||
self.event_date_col,
|
||||
value_field_name
|
||||
),
|
||||
adjustments=None,
|
||||
|
||||
@@ -280,25 +280,18 @@ def zip_with_floats(dates, flts):
|
||||
return pd.Series(flts, index=dates, dtype='float')
|
||||
|
||||
|
||||
def num_days_in_range(dates, start, end):
|
||||
"""
|
||||
Return the number of days in `dates` between start and end, inclusive.
|
||||
"""
|
||||
start_idx, stop_idx = dates.slice_locs(start, end)
|
||||
return stop_idx - start_idx
|
||||
|
||||
|
||||
def zip_with_dates(index_dates, dts):
|
||||
return pd.Series(pd.to_datetime(dts), index=index_dates)
|
||||
|
||||
|
||||
def get_values_for_date_ranges(zip_date_index_with_vals,
|
||||
vals_for_date_intervals,
|
||||
date_intervals,
|
||||
starts,
|
||||
ends,
|
||||
date_index):
|
||||
"""
|
||||
Returns a Series of values indexed by date based on values for the given
|
||||
date intervals.
|
||||
Returns a Series of values indexed by date based on the intervals defined
|
||||
by the start and end dates.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
@@ -307,10 +300,10 @@ def get_values_for_date_ranges(zip_date_index_with_vals,
|
||||
returns a pd.Series with the values indexed by the dates.
|
||||
vals_for_date_intervals : list
|
||||
A list of values for each date interval in `date_intervals`.
|
||||
date_intervals : list
|
||||
A list of pairs of dates, where each pair represents a date interval
|
||||
that corresponds to the value at the same index in
|
||||
`vals_for_date_intervals`.
|
||||
starts : DatetimeIndex
|
||||
A DatetimeIndex of start dates.
|
||||
ends : list
|
||||
A DatetimeIndex of end dates.
|
||||
date_index : DatetimeIndex
|
||||
The DatetimeIndex containing all dates for which values were requested.
|
||||
|
||||
@@ -321,10 +314,22 @@ def get_values_for_date_ranges(zip_date_index_with_vals,
|
||||
to dates based on the given date intervals.
|
||||
"""
|
||||
# Fill in given values for given date ranges.
|
||||
end_indexes = date_index.values.searchsorted(ends)
|
||||
start_indexes = date_index.values.searchsorted(starts)
|
||||
num_days = (end_indexes - start_indexes) + 1
|
||||
|
||||
# In case any of the end dates falls on days missing from the date_index,
|
||||
# searchsorted will have placed their index within `date_index` to the
|
||||
# index of the next start date, so we will have added 1 extra day for
|
||||
# each of these. Subtract those extra days, but ignore any cases where the
|
||||
# start and end dates are equal. Note: if any of the start dates is
|
||||
# missing, it won't affect calculations because searchsorted will advance
|
||||
# the index to the next date within the same range.
|
||||
num_days[np.where(~np.in1d(ends, date_index) & (num_days != 0))] -= 1
|
||||
return zip_date_index_with_vals(
|
||||
date_index,
|
||||
np.repeat(vals_for_date_intervals,
|
||||
[num_days_in_range(date_index, *date_interval)
|
||||
for date_interval in
|
||||
date_intervals]),
|
||||
np.repeat(
|
||||
vals_for_date_intervals,
|
||||
num_days,
|
||||
)
|
||||
)
|
||||
|
||||
+19
-18
@@ -912,7 +912,8 @@ class WithPipelineEventDataLoader(with_metaclass(
|
||||
frame = pd.DataFrame({sid: get_values_for_date_ranges(
|
||||
zip_date_index_with_vals,
|
||||
vals[sid],
|
||||
date_intervals[sid],
|
||||
pd.DatetimeIndex(list(zip(*date_intervals[sid]))[0]),
|
||||
pd.DatetimeIndex(list(zip(*date_intervals[sid]))[1]),
|
||||
dates
|
||||
) for sid in self.get_sids()[:-1]})
|
||||
frame[self.get_sids()[-1]] = zip_date_index_with_vals(
|
||||
@@ -982,7 +983,7 @@ class WithPipelineEventDataLoader(with_metaclass(
|
||||
|
||||
for sid in self.get_sids():
|
||||
for col_name in cols.keys():
|
||||
assert_series_equal(result[col_name].xs(sid, level=1),
|
||||
assert_series_equal(result[col_name].unstack(1)[sid],
|
||||
cols[col_name][sid],
|
||||
check_names=False)
|
||||
|
||||
@@ -1109,24 +1110,24 @@ class WithNextAndPreviousEventDataLoader(WithPipelineEventDataLoader):
|
||||
]
|
||||
|
||||
next_date_intervals = [
|
||||
[[None, '2014-01-04'],
|
||||
[['2014-01-01', '2014-01-04'],
|
||||
['2014-01-05', '2014-01-15'],
|
||||
['2014-01-16', '2014-01-20'],
|
||||
['2014-01-21', None]],
|
||||
[[None, '2014-01-04'],
|
||||
['2014-01-21', '2014-01-31']],
|
||||
[['2014-01-01', '2014-01-04'],
|
||||
['2014-01-05', '2014-01-09'],
|
||||
['2014-01-10', '2014-01-15'],
|
||||
['2014-01-16', '2014-01-20'],
|
||||
['2014-01-21', None]],
|
||||
[[None, '2014-01-04'],
|
||||
['2014-01-21', '2014-01-31']],
|
||||
[['2014-01-01', '2014-01-04'],
|
||||
['2014-01-05', '2014-01-10'],
|
||||
['2014-01-11', '2014-01-14'],
|
||||
['2014-01-15', '2014-01-20'],
|
||||
['2014-01-21', None]],
|
||||
[[None, '2014-01-04'],
|
||||
['2014-01-21', '2014-01-31']],
|
||||
[['2014-01-01', '2014-01-04'],
|
||||
['2014-01-05', '2014-01-10'],
|
||||
['2014-01-11', '2014-01-15'],
|
||||
['2014-01-16', None]]
|
||||
['2014-01-16', '2014-01-31']]
|
||||
]
|
||||
|
||||
next_dates = [
|
||||
@@ -1138,18 +1139,18 @@ class WithNextAndPreviousEventDataLoader(WithPipelineEventDataLoader):
|
||||
]
|
||||
|
||||
prev_date_intervals = [
|
||||
[[None, '2014-01-14'],
|
||||
[['2014-01-01', '2014-01-14'],
|
||||
['2014-01-15', '2014-01-19'],
|
||||
['2014-01-20', None]],
|
||||
[[None, '2014-01-14'],
|
||||
['2014-01-20', '2014-01-31']],
|
||||
[['2014-01-01', '2014-01-14'],
|
||||
['2014-01-15', '2014-01-19'],
|
||||
['2014-01-20', None]],
|
||||
[[None, '2014-01-09'],
|
||||
['2014-01-20', '2014-01-31']],
|
||||
[['2014-01-01', '2014-01-09'],
|
||||
['2014-01-10', '2014-01-19'],
|
||||
['2014-01-20', None]],
|
||||
[[None, '2014-01-09'],
|
||||
['2014-01-20', '2014-01-31']],
|
||||
[['2014-01-01', '2014-01-09'],
|
||||
['2014-01-10', '2014-01-14'],
|
||||
['2014-01-15', None]]
|
||||
['2014-01-15', '2014-01-31']]
|
||||
]
|
||||
|
||||
prev_dates = [
|
||||
|
||||
Reference in New Issue
Block a user