MAINT: use column's missing value.

STY: fix flake8 failures.

MAINT: fixes for compatibility with py2.

MAINT: fix import error.

MAINT: use dict.items() for compatibility with py3.
This commit is contained in:
Maya Tydykov
2016-02-16 18:19:32 -05:00
parent 7100e60474
commit 7ddf18b407
7 changed files with 18 additions and 26 deletions
+3 -3
View File
@@ -119,7 +119,7 @@ def zip_with_dates(index_dates, dts):
return pd.Series(pd.to_datetime(dts), index=index_dates)
class BuybackAuthLoaderCommonTest:
class BuybackAuthLoaderCommonTest(object):
"""
Tests for loading the buyback authorization announcement data.
"""
@@ -294,8 +294,8 @@ class CashBuybackAuthLoaderTestCase(TestCase, BuybackAuthLoaderCommonTest):
# until 15?
A: zip_with_floats_dates(
['NaN'] * num_days_between(dates, None, '2014-01-14') +
[10] * num_days_between_dates('2014-01-15', '2014-01-19') +
[20] * num_days_between_dates('2014-01-20', None)
[10] * num_days_between_dates('2014-01-15', '2014-01-19') +
[20] * num_days_between_dates('2014-01-20', None)
),
B: zip_with_floats_dates(
['NaN'] * num_days_between_dates(None, '2014-01-14') +
+7 -5
View File
@@ -393,9 +393,11 @@ class EarningsCalendarLoaderInferTimestampTestCase(TestCase):
)
assert_series_equal(
loader.events_by_sid[1].loc[:, ANNOUNCEMENT_FIELD_NAME],
pd.Series(index=announcement_dates[1].loc[:, TS_FIELD_NAME],
data=np.array(
announcement_dates[1].loc[:, ANNOUNCEMENT_FIELD_NAME]
),
name=ANNOUNCEMENT_FIELD_NAME)
pd.Series(
index=announcement_dates[1].loc[:, TS_FIELD_NAME],
data=np.array(
announcement_dates[1].loc[:, ANNOUNCEMENT_FIELD_NAME]
),
name=ANNOUNCEMENT_FIELD_NAME
)
)
+1 -1
View File
@@ -119,4 +119,4 @@ class BlazeEventsCalendarLoader(PipelineLoader):
).load_adjusted_array(columns, dates, assets, mask)
def prepare_data(self, raw, gb):
return {sid: raw.loc[group] for sid, group in gb.groups.iteritems()}
return {sid: raw.loc[group] for sid, group in gb.groups.items()}
+1 -1
View File
@@ -6,7 +6,7 @@ from ..data.buyback_auth import (
CashBuybackAuthorizations,
ShareBuybackAuthorizations
)
from events import EventsLoader
from .events import EventsLoader
from zipline.utils.memoize import lazyval
+1 -1
View File
@@ -3,7 +3,7 @@ Reference implementation for EarningsCalendar loaders.
"""
from ..data.earnings import EarningsCalendar
from events import EventsLoader
from .events import EventsLoader
from zipline.utils.memoize import lazyval
ANNOUNCEMENT_FIELD_NAME = "announcement_date"
+4 -8
View File
@@ -1,5 +1,3 @@
from abc import abstractmethod
import numpy as np
import pandas as pd
from six import iteritems
@@ -65,7 +63,7 @@ class EventsLoader(PipelineLoader):
raise ValueError(
"Got DatetimeIndex for sid %d.\n"
"Pass `infer_timestamps=True` to use the first date in"
" `all_dates` as implicit timestamp."% k
" `all_dates` as implicit timestamp." % k
)
self.events_by_sid[k] = pd.DataFrame(v)
v.index = [dates[0]] * len(v)
@@ -76,7 +74,7 @@ class EventsLoader(PipelineLoader):
raise ValueError(
"Got DataFrame without a '%s' column for sid %d.\n"
"Pass `infer_timestamps=True` to use the first "
"date in `all_dates` as implicit timestamp."%
"date in `all_dates` as implicit timestamp." %
(TS_FIELD_NAME, k)
)
self.events_by_sid[k] = v = v.copy()
@@ -85,7 +83,7 @@ class EventsLoader(PipelineLoader):
self.events_by_sid[k] = v.set_index(TS_FIELD_NAME)
else:
raise ValueError("Data for sid %s must be in DataFrame, "
"Series, or DatetimeIndex."% k)
"Series, or DatetimeIndex." % k)
# Once data is in a DF, make sure columns are correct.
cols_except_ts = (set(v.columns.values) -
{TS_FIELD_NAME} -
@@ -150,9 +148,7 @@ class EventsLoader(PipelineLoader):
event_date_field_name,
value_field_name,
previous_value_field.dtype,
# TODO: need to get actual name/method to use to get missing
# value
None
previous_value_field.missing_value
),
adjustments=None,
)
+1 -7
View File
@@ -1,7 +1,6 @@
import datetime
import numpy as np
from numpy import NaN
import pandas as pd
from six import iteritems
from six.moves import zip
@@ -125,14 +124,9 @@ def previous_value(date_index, events_by_sid, event_date_field, value_field,
next_date_frame
"""
sids = list(events_by_sid)
# TODO: generalize; need to use dtype of column and missing value for that
# column; so pass
# in the pipeline column's attributes for these (replace NaN and dtype
# below)
out = np.full(
(len(date_index), len(sids)),
# TODO; replace with missing_value
NaN,
missing_value,
dtype=value_field_dtype
)
d_n = date_index[-1].asm8