TST/BUG: Full coverage on resample module.

test_resample now fully covers the resample module.

Fix a bug exposed by increased coverage, where daily aggregation on
`high` would return `nan` for an asset instead of 1) during the
course of day `1d` history was called on non-consecutive minutes and 2)
either, a) the value for the previously inspected dt was `nan` or b)
there were only `nan`s between the previous and current dt.

`low` had a similar bug which was only triggered if the value for the
previously inspected dt was `nan`.
This commit is contained in:
Eddie Hebert
2016-09-01 15:26:10 -04:00
parent e2300c73bf
commit baff6a84bc
2 changed files with 54 additions and 12 deletions
+52 -6
View File
@@ -113,11 +113,18 @@ OHLCV = ('open', 'high', 'low', 'close', 'volume')
_EQUITY_CASES = (
(1, (('none_missing', 'day_0_front'),
('none_missing', 'day_0_back'))),
('missing_last', 'day_0_back'))),
(2, (('missing_first', 'day_0_front'),
('none_missing', 'day_0_back'))),
(3, (('missing_last', 'day_0_back'),
('missing_first', 'day_1_front'))),
# Asset 4 has a start date on day 1
(4, (('all_missing', 'day_0_back'),
('none_missing', 'day_1_front'))),
# Asset 5 has a start date before day_0, but does not have data on that
# day.
(5, (('all_missing', 'day_0_back'),
('none_missing', 'day_1_front'))),
)
EQUITY_CASES = OrderedDict()
@@ -149,10 +156,10 @@ for sid, combos in _FUTURE_CASES:
EXPECTED_AGGREGATION = {
1: DataFrame({
'open': [101.5, 101.5, 101.5, 101.5, 101.5, 101.5],
'high': [101.9, 103.9, 103.9, 103.9, 103.9, 103.9],
'high': [101.9, 103.9, 103.9, 107.9, 108.9, 108.9],
'low': [101.1, 101.1, 101.1, 101.1, 101.1, 101.1],
'close': [101.3, 103.3, 102.3, 101.3, 103.3, 102.3],
'volume': [1001, 2004, 3006, 4007, 5010, 6012],
'close': [101.3, 103.3, 102.3, 107.3, 108.3, 108.3],
'volume': [1001, 2004, 3006, 4013, 5021, 5021],
}, columns=OHLCV),
2: DataFrame({
'open': [nan, 103.5, 103.5, 103.5, 103.5, 103.5],
@@ -169,6 +176,22 @@ EXPECTED_AGGREGATION = {
'close': [107.3, 108.3, 108.3, nan, 103.3, 102.3],
'volume': [1007, 2015, 2015, 0, 1003, 2005],
}, columns=OHLCV),
# Equity 4 straddles two days and is not active the first day.
4: DataFrame({
'open': [nan, nan, nan, 101.5, 101.5, 101.5],
'high': [nan, nan, nan, 101.9, 103.9, 103.9],
'low': [nan, nan, nan, 101.1, 101.1, 101.1],
'close': [nan, nan, nan, 101.3, 103.3, 102.3],
'volume': [0, 0, 0, 1001, 2004, 3006],
}, columns=OHLCV),
# Equity 5 straddles two days and does not have data the first day.
5: DataFrame({
'open': [nan, nan, nan, 101.5, 101.5, 101.5],
'high': [nan, nan, nan, 101.9, 103.9, 103.9],
'low': [nan, nan, nan, 101.1, 101.1, 101.1],
'close': [nan, nan, nan, 101.3, 103.3, 102.3],
'volume': [0, 0, 0, 1001, 2004, 3006],
}, columns=OHLCV),
1001: DataFrame({
'open': [101.5, 101.5, 101.5, 101.5, 101.5, 101.5],
'high': [101.9, 103.9, 103.9, 103.9, 103.9, 103.9],
@@ -233,7 +256,15 @@ class MinuteToDailyAggregationTestCase(WithBcolzEquityMinuteBarReader,
TRADING_ENV_MAX_DATE = END_DATE = pd.Timestamp(
'2016-03-31', tz='UTC',
)
ASSET_FINDER_EQUITY_SIDS = 1, 2, 3
ASSET_FINDER_EQUITY_SIDS = 1, 2, 3, 4, 5
@classmethod
def make_equity_info(cls):
frame = super(MinuteToDailyAggregationTestCase, cls).make_equity_info()
# Make equity 4 start a day behind the data start to exercise assets
# which not alive for the session.
frame.loc[[4], 'start_date'] = pd.Timestamp('2016-03-16', tz='UTC')
return frame
@classmethod
def make_equity_minute_bar_data(cls):
@@ -267,6 +298,11 @@ class MinuteToDailyAggregationTestCase(WithBcolzEquityMinuteBarReader,
('low_3', 'low', 3),
('close_3', 'close', 3),
('volume_3', 'volume', 3),
('open_4', 'open', 4),
('high_4', 'high', 4),
('low_4', 'low', 4),
('close_4', 'close', 4),
('volume_4', 'volume', 4),
])
def test_contiguous_minutes_individual(self, name, field, sid):
# First test each minute in order.
@@ -312,6 +348,16 @@ class MinuteToDailyAggregationTestCase(WithBcolzEquityMinuteBarReader,
('low_3', 'low', 3),
('close_3', 'close', 3),
('volume_3', 'volume', 3),
('open_4', 'open', 4),
('high_4', 'high', 4),
('low_4', 'low', 4),
('close_4', 'close', 4),
('volume_4', 'volume', 4),
('open_5', 'open', 5),
('high_5', 'high', 5),
('low_5', 'low', 5),
('close_5', 'close', 5),
('volume_5', 'volume', 5),
])
def test_skip_minutes_individual(self, name, field, sid):
# Test skipping minutes, to exercise backfills.
@@ -319,7 +365,7 @@ class MinuteToDailyAggregationTestCase(WithBcolzEquityMinuteBarReader,
method_name = field + 's'
asset = self.asset_finder.retrieve_asset(sid)
minutes = EQUITY_CASES[asset].index
for i in [1, 5]:
for i in [0, 2, 3, 5]:
minute = minutes[i]
value = getattr(self.equity_daily_aggregator, method_name)(
[asset], minute)[0]
+2 -6
View File
@@ -248,7 +248,7 @@ class DailyHistoryAggregator(object):
dt,
[asset],
)[0].T
val = max(last_max, np.nanmax(window))
val = np.nanmax(np.append(window, last_max))
entries[asset] = (dt_value, val)
highs.append(val)
continue
@@ -312,11 +312,7 @@ class DailyHistoryAggregator(object):
dt,
[asset],
)[0].T
window_min = np.nanmin(window)
if pd.isnull(window_min):
val = last_min
else:
val = min(last_min, window_min)
val = np.nanmin(np.append(window, last_min))
entries[asset] = (dt_value, val)
lows.append(val)
continue