From 7ddf18b407ddcef6c9e1bd7d2983cb98352e1251 Mon Sep 17 00:00:00 2001 From: Maya Tydykov Date: Tue, 16 Feb 2016 18:19:32 -0500 Subject: [PATCH] MAINT: use column's missing value. STY: fix flake8 failures. MAINT: fixes for compatibility with py2. MAINT: fix import error. MAINT: use dict.items() for compatibility with py3. --- tests/pipeline/test_buyback_auth.py | 6 +++--- tests/pipeline/test_earnings.py | 12 +++++++----- zipline/pipeline/loaders/blaze/events.py | 2 +- zipline/pipeline/loaders/buyback_auth.py | 2 +- zipline/pipeline/loaders/earnings.py | 2 +- zipline/pipeline/loaders/events.py | 12 ++++-------- zipline/pipeline/loaders/utils.py | 8 +------- 7 files changed, 18 insertions(+), 26 deletions(-) diff --git a/tests/pipeline/test_buyback_auth.py b/tests/pipeline/test_buyback_auth.py index 7bc9d6c8..5d77aec1 100644 --- a/tests/pipeline/test_buyback_auth.py +++ b/tests/pipeline/test_buyback_auth.py @@ -119,7 +119,7 @@ def zip_with_dates(index_dates, dts): return pd.Series(pd.to_datetime(dts), index=index_dates) -class BuybackAuthLoaderCommonTest: +class BuybackAuthLoaderCommonTest(object): """ Tests for loading the buyback authorization announcement data. """ @@ -294,8 +294,8 @@ class CashBuybackAuthLoaderTestCase(TestCase, BuybackAuthLoaderCommonTest): # until 15? A: zip_with_floats_dates( ['NaN'] * num_days_between(dates, None, '2014-01-14') + - [10] * num_days_between_dates('2014-01-15', '2014-01-19') + - [20] * num_days_between_dates('2014-01-20', None) + [10] * num_days_between_dates('2014-01-15', '2014-01-19') + + [20] * num_days_between_dates('2014-01-20', None) ), B: zip_with_floats_dates( ['NaN'] * num_days_between_dates(None, '2014-01-14') + diff --git a/tests/pipeline/test_earnings.py b/tests/pipeline/test_earnings.py index 308f9977..f48251da 100644 --- a/tests/pipeline/test_earnings.py +++ b/tests/pipeline/test_earnings.py @@ -393,9 +393,11 @@ class EarningsCalendarLoaderInferTimestampTestCase(TestCase): ) assert_series_equal( loader.events_by_sid[1].loc[:, ANNOUNCEMENT_FIELD_NAME], - pd.Series(index=announcement_dates[1].loc[:, TS_FIELD_NAME], - data=np.array( - announcement_dates[1].loc[:, ANNOUNCEMENT_FIELD_NAME] - ), - name=ANNOUNCEMENT_FIELD_NAME) + pd.Series( + index=announcement_dates[1].loc[:, TS_FIELD_NAME], + data=np.array( + announcement_dates[1].loc[:, ANNOUNCEMENT_FIELD_NAME] + ), + name=ANNOUNCEMENT_FIELD_NAME + ) ) diff --git a/zipline/pipeline/loaders/blaze/events.py b/zipline/pipeline/loaders/blaze/events.py index 8196377c..6dc0e6da 100644 --- a/zipline/pipeline/loaders/blaze/events.py +++ b/zipline/pipeline/loaders/blaze/events.py @@ -119,4 +119,4 @@ class BlazeEventsCalendarLoader(PipelineLoader): ).load_adjusted_array(columns, dates, assets, mask) def prepare_data(self, raw, gb): - return {sid: raw.loc[group] for sid, group in gb.groups.iteritems()} + return {sid: raw.loc[group] for sid, group in gb.groups.items()} diff --git a/zipline/pipeline/loaders/buyback_auth.py b/zipline/pipeline/loaders/buyback_auth.py index 538919da..d1661bd1 100644 --- a/zipline/pipeline/loaders/buyback_auth.py +++ b/zipline/pipeline/loaders/buyback_auth.py @@ -6,7 +6,7 @@ from ..data.buyback_auth import ( CashBuybackAuthorizations, ShareBuybackAuthorizations ) -from events import EventsLoader +from .events import EventsLoader from zipline.utils.memoize import lazyval diff --git a/zipline/pipeline/loaders/earnings.py b/zipline/pipeline/loaders/earnings.py index 33c421bb..153c5c25 100644 --- a/zipline/pipeline/loaders/earnings.py +++ b/zipline/pipeline/loaders/earnings.py @@ -3,7 +3,7 @@ Reference implementation for EarningsCalendar loaders. """ from ..data.earnings import EarningsCalendar -from events import EventsLoader +from .events import EventsLoader from zipline.utils.memoize import lazyval ANNOUNCEMENT_FIELD_NAME = "announcement_date" diff --git a/zipline/pipeline/loaders/events.py b/zipline/pipeline/loaders/events.py index 9165581c..1cf4f6f4 100644 --- a/zipline/pipeline/loaders/events.py +++ b/zipline/pipeline/loaders/events.py @@ -1,5 +1,3 @@ -from abc import abstractmethod - import numpy as np import pandas as pd from six import iteritems @@ -65,7 +63,7 @@ class EventsLoader(PipelineLoader): raise ValueError( "Got DatetimeIndex for sid %d.\n" "Pass `infer_timestamps=True` to use the first date in" - " `all_dates` as implicit timestamp."% k + " `all_dates` as implicit timestamp." % k ) self.events_by_sid[k] = pd.DataFrame(v) v.index = [dates[0]] * len(v) @@ -76,7 +74,7 @@ class EventsLoader(PipelineLoader): raise ValueError( "Got DataFrame without a '%s' column for sid %d.\n" "Pass `infer_timestamps=True` to use the first " - "date in `all_dates` as implicit timestamp."% + "date in `all_dates` as implicit timestamp." % (TS_FIELD_NAME, k) ) self.events_by_sid[k] = v = v.copy() @@ -85,7 +83,7 @@ class EventsLoader(PipelineLoader): self.events_by_sid[k] = v.set_index(TS_FIELD_NAME) else: raise ValueError("Data for sid %s must be in DataFrame, " - "Series, or DatetimeIndex."% k) + "Series, or DatetimeIndex." % k) # Once data is in a DF, make sure columns are correct. cols_except_ts = (set(v.columns.values) - {TS_FIELD_NAME} - @@ -150,9 +148,7 @@ class EventsLoader(PipelineLoader): event_date_field_name, value_field_name, previous_value_field.dtype, - # TODO: need to get actual name/method to use to get missing - # value - None + previous_value_field.missing_value ), adjustments=None, ) diff --git a/zipline/pipeline/loaders/utils.py b/zipline/pipeline/loaders/utils.py index 5be70420..1dd685dc 100644 --- a/zipline/pipeline/loaders/utils.py +++ b/zipline/pipeline/loaders/utils.py @@ -1,7 +1,6 @@ import datetime import numpy as np -from numpy import NaN import pandas as pd from six import iteritems from six.moves import zip @@ -125,14 +124,9 @@ def previous_value(date_index, events_by_sid, event_date_field, value_field, next_date_frame """ sids = list(events_by_sid) - # TODO: generalize; need to use dtype of column and missing value for that - # column; so pass - # in the pipeline column's attributes for these (replace NaN and dtype - # below) out = np.full( (len(date_index), len(sids)), - # TODO; replace with missing_value - NaN, + missing_value, dtype=value_field_dtype ) d_n = date_index[-1].asm8