mirror of
https://github.com/wassname/catalyst.git
synced 2026-07-03 16:21:24 +08:00
Merge pull request #1106 from quantopian/13d_in_pipeline
13d in pipeline
This commit is contained in:
@@ -0,0 +1,111 @@
|
||||
"""
|
||||
Tests for the reference loader for 13d filings.
|
||||
"""
|
||||
import pandas as pd
|
||||
|
||||
from zipline.pipeline.common import(
|
||||
DAYS_SINCE_PREV_DISCLOSURE,
|
||||
DISCLOSURE_DATE,
|
||||
NUM_SHARES,
|
||||
PERCENT_SHARES,
|
||||
PREVIOUS_NUM_SHARES,
|
||||
PREVIOUS_PERCENT_SHARES,
|
||||
PREVIOUS_DISCLOSURE_DATE,
|
||||
TS_FIELD_NAME,
|
||||
)
|
||||
from zipline.pipeline.data import _13DFilings
|
||||
from zipline.pipeline.factors.events import BusinessDaysSince13DFilingsDate
|
||||
from zipline.pipeline.loaders._13d_filings import _13DFilingsLoader
|
||||
from zipline.pipeline.loaders.utils import (
|
||||
get_values_for_date_ranges,
|
||||
zip_with_floats,
|
||||
zip_with_dates
|
||||
)
|
||||
from zipline.testing.fixtures import WithPipelineEventDataLoader
|
||||
from zipline.testing.fixtures import ZiplineTestCase
|
||||
|
||||
date_intervals = [[None, '2014-01-04'],
|
||||
['2014-01-05', '2014-01-09'],
|
||||
['2014-01-10', None]]
|
||||
|
||||
empty_df = pd.DataFrame(
|
||||
columns=[NUM_SHARES,
|
||||
PERCENT_SHARES,
|
||||
DISCLOSURE_DATE,
|
||||
TS_FIELD_NAME],
|
||||
)
|
||||
|
||||
empty_df[NUM_SHARES] = empty_df[NUM_SHARES].astype('float')
|
||||
empty_df[PERCENT_SHARES] = empty_df[PERCENT_SHARES].astype('float')
|
||||
empty_df[TS_FIELD_NAME] = empty_df[TS_FIELD_NAME].astype('datetime64[ns]')
|
||||
empty_df[DISCLOSURE_DATE] = empty_df[DISCLOSURE_DATE].astype('datetime64[ns]')
|
||||
|
||||
_13d_filngs_cases = [
|
||||
pd.DataFrame({
|
||||
NUM_SHARES: [1, 15],
|
||||
PERCENT_SHARES: [10, 20],
|
||||
TS_FIELD_NAME: pd.to_datetime(['2014-01-05', '2014-01-10']),
|
||||
DISCLOSURE_DATE: pd.to_datetime(['2014-01-04', '2014-01-09'])
|
||||
}),
|
||||
empty_df
|
||||
]
|
||||
|
||||
|
||||
def get_expected_previous_values(zip_date_index_with_vals,
|
||||
vals,
|
||||
date_intervals,
|
||||
dates):
|
||||
return pd.DataFrame({
|
||||
0: get_values_for_date_ranges(zip_date_index_with_vals,
|
||||
vals,
|
||||
date_intervals,
|
||||
dates),
|
||||
1: zip_date_index_with_vals(dates, ['NaN'] * len(dates)),
|
||||
}, index=dates)
|
||||
|
||||
|
||||
class _13DFilingsLoaderTestCase(WithPipelineEventDataLoader,
|
||||
ZiplineTestCase):
|
||||
"""
|
||||
Test for _13_filings dataset.
|
||||
"""
|
||||
pipeline_columns = {
|
||||
PREVIOUS_NUM_SHARES:
|
||||
_13DFilings.number_shares.latest,
|
||||
PREVIOUS_PERCENT_SHARES:
|
||||
_13DFilings.percent_shares.latest,
|
||||
PREVIOUS_DISCLOSURE_DATE:
|
||||
_13DFilings.disclosure_date.latest,
|
||||
DAYS_SINCE_PREV_DISCLOSURE:
|
||||
BusinessDaysSince13DFilingsDate(),
|
||||
}
|
||||
|
||||
@classmethod
|
||||
def get_sids(cls):
|
||||
return range(2)
|
||||
|
||||
@classmethod
|
||||
def get_dataset(cls):
|
||||
return {sid: frame
|
||||
for sid, frame
|
||||
in enumerate(_13d_filngs_cases)}
|
||||
|
||||
loader_type = _13DFilingsLoader
|
||||
|
||||
def setup(self, dates):
|
||||
cols = {}
|
||||
cols[
|
||||
PREVIOUS_DISCLOSURE_DATE
|
||||
] = get_expected_previous_values(zip_with_dates,
|
||||
['NaT', '2014-01-04', '2014-01-09'],
|
||||
date_intervals, dates)
|
||||
cols[PREVIOUS_NUM_SHARES] = get_expected_previous_values(
|
||||
zip_with_floats, ['NaN', 1, 15], date_intervals, dates
|
||||
)
|
||||
cols[PREVIOUS_PERCENT_SHARES] = get_expected_previous_values(
|
||||
zip_with_floats, ['NaN', 10, 20], date_intervals, dates
|
||||
)
|
||||
cols[DAYS_SINCE_PREV_DISCLOSURE] = self._compute_busday_offsets(
|
||||
cols[PREVIOUS_DISCLOSURE_DATE]
|
||||
)
|
||||
return cols
|
||||
@@ -9,10 +9,12 @@ CASH_AMOUNT_FIELD_NAME = 'cash_amount'
|
||||
COUNT_FIELD_NAME = 'count'
|
||||
BUYBACK_ANNOUNCEMENT_FIELD_NAME = 'buyback_date'
|
||||
DAYS_SINCE_PREV = 'days_since_prev'
|
||||
DAYS_SINCE_PREV_DISCLOSURE = 'days_since_prev_disclosure'
|
||||
DAYS_SINCE_PREV_DIVIDEND_ANNOUNCEMENT = 'days_since_prev_dividend_announcement'
|
||||
DAYS_SINCE_PREV_EX_DATE = 'days_since_prev_ex_date'
|
||||
DAYS_TO_NEXT = 'days_to_next'
|
||||
DAYS_TO_NEXT_EX_DATE = 'days_to_next_ex_date'
|
||||
DISCLOSURE_DATE = 'disclosure_date'
|
||||
EX_DATE_FIELD_NAME = 'ex_date'
|
||||
FISCAL_QUARTER_FIELD_NAME = 'fiscal_quarter'
|
||||
FISCAL_YEAR_FIELD_NAME = 'fiscal_year'
|
||||
@@ -29,23 +31,28 @@ NEXT_HIGH = 'next_high'
|
||||
NEXT_LOW = 'next_low'
|
||||
NEXT_MEAN = 'next_mean'
|
||||
NEXT_PAY_DATE = 'next_pay_date'
|
||||
NUM_SHARES = 'number_shares'
|
||||
NEXT_RELEASE_DATE = 'next_release_date'
|
||||
NEXT_STANDARD_DEVIATION = 'next_standard_deviation'
|
||||
PAY_DATE_FIELD_NAME = 'pay_date'
|
||||
PERCENT_SHARES = 'percent_shares'
|
||||
PREVIOUS_ACTUAL_VALUE = 'previous_actual_value'
|
||||
PREVIOUS_AMOUNT = 'previous_amount'
|
||||
PREVIOUS_ANNOUNCEMENT = 'previous_announcement'
|
||||
PREVIOUS_BUYBACK_ANNOUNCEMENT = 'previous_buyback_announcement'
|
||||
PREVIOUS_BUYBACK_CASH = 'previous_buyback_cash'
|
||||
PREVIOUS_BUYBACK_SHARE_COUNT = 'previous_buyback_share_count'
|
||||
PREVIOUS_DISCLOSURE_DATE = 'previous_disclosure_date'
|
||||
PREVIOUS_COUNT = 'previous_count'
|
||||
PREVIOUS_EX_DATE = 'previous_ex_date'
|
||||
PREVIOUS_NUM_SHARES = 'previous_number_shares'
|
||||
PREVIOUS_FISCAL_QUARTER = 'previous_fiscal_quarter'
|
||||
PREVIOUS_FISCAL_YEAR = 'previous_fiscal_year'
|
||||
PREVIOUS_HIGH = 'previous_high'
|
||||
PREVIOUS_LOW = 'previous_low'
|
||||
PREVIOUS_MEAN = 'previous_mean'
|
||||
PREVIOUS_PAY_DATE = 'previous_pay_date'
|
||||
PREVIOUS_PERCENT_SHARES = 'previous_percentage'
|
||||
PREVIOUS_RELEASE_DATE = 'previous_release_date'
|
||||
PREVIOUS_STANDARD_DEVIATION = 'previous_standard_deviation'
|
||||
RELEASE_DATE_FIELD_NAME = 'release_date'
|
||||
|
||||
@@ -0,0 +1,15 @@
|
||||
"""
|
||||
Dataset representing recently disclosed 13d filings.
|
||||
"""
|
||||
from zipline.utils.numpy_utils import datetime64ns_dtype, float64_dtype
|
||||
|
||||
from .dataset import Column, DataSet
|
||||
|
||||
|
||||
class _13DFilings(DataSet):
|
||||
"""
|
||||
Dataset representing dates of recently disclosed 13d filings.
|
||||
"""
|
||||
number_shares = Column(float64_dtype)
|
||||
percent_shares = Column(float64_dtype)
|
||||
disclosure_date = Column(datetime64ns_dtype)
|
||||
@@ -1,3 +1,4 @@
|
||||
from ._13d_filings import _13DFilings
|
||||
from .buyback_auth import CashBuybackAuthorizations, ShareBuybackAuthorizations
|
||||
from .dividends import (
|
||||
DividendsByAnnouncementDate,
|
||||
@@ -10,6 +11,7 @@ from .equity_pricing import USEquityPricing
|
||||
from .dataset import DataSet, Column, BoundColumn
|
||||
|
||||
__all__ = [
|
||||
'_13DFilings',
|
||||
'BoundColumn',
|
||||
'CashBuybackAuthorizations',
|
||||
'Column',
|
||||
|
||||
@@ -5,6 +5,7 @@ from .factor import (
|
||||
RecarrayField,
|
||||
)
|
||||
from .events import (
|
||||
BusinessDaysSince13DFilingsDate,
|
||||
BusinessDaysSinceCashBuybackAuth,
|
||||
BusinessDaysSinceDividendAnnouncement,
|
||||
BusinessDaysUntilNextExDate,
|
||||
@@ -28,6 +29,7 @@ from .technical import (
|
||||
)
|
||||
|
||||
__all__ = [
|
||||
'BusinessDaysSince13DFilingsDate',
|
||||
'BusinessDaysSinceCashBuybackAuth',
|
||||
'BusinessDaysSinceDividendAnnouncement',
|
||||
'BusinessDaysUntilNextExDate',
|
||||
|
||||
@@ -4,6 +4,7 @@ announcements, acquisitions, dividends, etc.).
|
||||
"""
|
||||
from numpy import newaxis
|
||||
from ..data import (
|
||||
_13DFilings,
|
||||
CashBuybackAuthorizations,
|
||||
ShareBuybackAuthorizations,
|
||||
DividendsByAnnouncementDate,
|
||||
@@ -203,3 +204,14 @@ class BusinessDaysSincePreviousExDate(
|
||||
zipline.pipeline.factors.BusinessDaysSinceDividendAnnouncement
|
||||
"""
|
||||
inputs = [DividendsByExDate.previous_date]
|
||||
|
||||
|
||||
class BusinessDaysSince13DFilingsDate(
|
||||
BusinessDaysSincePreviousEvents
|
||||
):
|
||||
"""
|
||||
Factor returning the number of **business days** (not trading days!) since
|
||||
the most recent 13d filings for each asset.
|
||||
|
||||
"""
|
||||
inputs = [_13DFilings.disclosure_date]
|
||||
|
||||
@@ -0,0 +1,54 @@
|
||||
"""
|
||||
Reference implementation for 13d filings loaders.
|
||||
"""
|
||||
|
||||
from zipline.pipeline.common import (
|
||||
DISCLOSURE_DATE,
|
||||
PERCENT_SHARES,
|
||||
NUM_SHARES
|
||||
)
|
||||
from zipline.pipeline.data import _13DFilings
|
||||
from zipline.pipeline.loaders.events import EventsLoader
|
||||
from zipline.utils.memoize import lazyval
|
||||
|
||||
|
||||
class _13DFilingsLoader(EventsLoader):
|
||||
"""
|
||||
Reference loader for
|
||||
:class:`zipline.pipeline.data._13DFilings`.
|
||||
|
||||
events_by_sid: dict[sid -> pd.DataFrame(knowledge date,
|
||||
disclosure date, percent shares, number of shares)]
|
||||
|
||||
"""
|
||||
expected_cols = frozenset([DISCLOSURE_DATE,
|
||||
PERCENT_SHARES,
|
||||
NUM_SHARES])
|
||||
event_date_col = DISCLOSURE_DATE
|
||||
|
||||
def __init__(self, all_dates, events_by_sid,
|
||||
infer_timestamps=False,
|
||||
dataset=_13DFilings):
|
||||
super(_13DFilingsLoader, self).__init__(
|
||||
all_dates, events_by_sid, infer_timestamps, dataset=dataset,
|
||||
)
|
||||
|
||||
@lazyval
|
||||
def disclosure_date_loader(self):
|
||||
return self._previous_event_date_loader(
|
||||
self.dataset.disclosure_date,
|
||||
)
|
||||
|
||||
@lazyval
|
||||
def percent_shares_loader(self):
|
||||
return self._previous_event_value_loader(
|
||||
self.dataset.percent_shares,
|
||||
PERCENT_SHARES
|
||||
)
|
||||
|
||||
@lazyval
|
||||
def number_shares_loader(self):
|
||||
return self._previous_event_value_loader(
|
||||
self.dataset.number_shares,
|
||||
NUM_SHARES
|
||||
)
|
||||
@@ -1,3 +1,4 @@
|
||||
from ._13d_filings import _13DFilingsLoader
|
||||
from .earnings import EarningsCalendarLoader
|
||||
from .consensus_estimates import ConsensusEstimatesLoader
|
||||
from .buyback_auth import (
|
||||
@@ -12,6 +13,7 @@ from .dividends import (
|
||||
from .equity_pricing_loader import USEquityPricingLoader
|
||||
|
||||
__all__ = [
|
||||
'_13DFilingsLoader',
|
||||
'CashBuybackAuthorizationsLoader',
|
||||
'DividendsByAnnouncementDateLoader',
|
||||
'DividendsByExDateLoader',
|
||||
|
||||
@@ -0,0 +1,68 @@
|
||||
from zipline.pipeline.common import (
|
||||
SID_FIELD_NAME,
|
||||
TS_FIELD_NAME,
|
||||
PERCENT_SHARES,
|
||||
NUM_SHARES,
|
||||
DISCLOSURE_DATE)
|
||||
from zipline.pipeline.data import _13DFilings
|
||||
from zipline.pipeline.loaders import _13DFilingsLoader
|
||||
from .events import BlazeEventsLoader
|
||||
|
||||
|
||||
class Blaze_13DFilingsLoader(BlazeEventsLoader):
|
||||
"""A pipeline loader for the ``_13DFilings`` dataset that
|
||||
loads data from a blaze expression.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
expr : Expr
|
||||
The expression representing the data to load.
|
||||
resources : dict, optional
|
||||
Mapping from the atomic terms of ``expr`` to actual data resources.
|
||||
odo_kwargs : dict, optional
|
||||
Extra keyword arguments to pass to odo when executing the expression.
|
||||
data_query_time : time, optional
|
||||
The time to use for the data query cutoff.
|
||||
data_query_tz : tzinfo or str
|
||||
The timezeone to use for the data query cutoff.
|
||||
dataset: DataSet
|
||||
The DataSet object for which this loader loads data.
|
||||
|
||||
Notes
|
||||
-----
|
||||
The expression should have a tabular dshape of::
|
||||
|
||||
Dim * {{
|
||||
{SID_FIELD_NAME}: int64,
|
||||
{TS_FIELD_NAME}: datetime,
|
||||
{PERCENTAGE}: float64,
|
||||
{NUM_SHARES}: float64,
|
||||
{DISCLOSURE_DATE}: ?datetime,
|
||||
}}
|
||||
|
||||
Where each row of the table is a record including the sid to identify the
|
||||
company, the timestamp where we learned about the disclosure, the
|
||||
date of the disclosure, the percentage, and the number of shares.
|
||||
|
||||
If the '{TS_FIELD_NAME}' field is not included it is assumed that we
|
||||
start the backtest with knowledge of all disclosures.
|
||||
"""
|
||||
|
||||
__doc__ = __doc__.format(
|
||||
TS_FIELD_NAME=TS_FIELD_NAME,
|
||||
SID_FIELD_NAME=SID_FIELD_NAME,
|
||||
PERCENTAGE=PERCENT_SHARES,
|
||||
NUM_SHARES=NUM_SHARES,
|
||||
DISCLOSURE_DATE=DISCLOSURE_DATE
|
||||
)
|
||||
|
||||
_expected_fields = frozenset({
|
||||
TS_FIELD_NAME,
|
||||
SID_FIELD_NAME,
|
||||
PERCENT_SHARES,
|
||||
NUM_SHARES,
|
||||
DISCLOSURE_DATE
|
||||
})
|
||||
|
||||
concrete_loader = _13DFilingsLoader
|
||||
concrete_dataset = _13DFilings
|
||||
@@ -1,4 +1,4 @@
|
||||
|
||||
from ._13d_filings import Blaze_13DFilingsLoader
|
||||
from .buyback_auth import (
|
||||
BlazeCashBuybackAuthorizationsLoader,
|
||||
BlazeShareBuybackAuthorizationsLoader
|
||||
@@ -20,6 +20,7 @@ from .earnings import (
|
||||
from .consensus_estimates import BlazeConsensusEstimatesLoader
|
||||
|
||||
__all__ = (
|
||||
'Blaze_13DFilingsLoader',
|
||||
'BlazeCashBuybackAuthorizationsLoader',
|
||||
'BlazeDividendsByAnnouncementDateLoader',
|
||||
'BlazeConsensusEstimatesLoader',
|
||||
|
||||
Reference in New Issue
Block a user