diff --git a/tests/data/test_minute_bars.py b/tests/data/test_minute_bars.py index c218555d..5c263e6a 100644 --- a/tests/data/test_minute_bars.py +++ b/tests/data/test_minute_bars.py @@ -23,7 +23,8 @@ from pandas import ( DataFrame, DatetimeIndex, Timestamp, - NaT + Timedelta, + NaT, ) from testfixtures import TempDirectory @@ -500,3 +501,47 @@ class BcolzMinuteBarTestCase(TestCase): volume_price = self.reader.get_value(sid, minute_1, 'volume') self.assertEquals(51.0, volume_price) + + def test_unadjusted_minutes(self): + """ + Test unadjusted minutes. + """ + start_minute = self.market_opens[TEST_CALENDAR_START] + minutes = [start_minute, + start_minute + Timedelta('1 min'), + start_minute + Timedelta('2 min')] + sids = [1, 2] + data_1 = DataFrame( + data={ + 'open': [15.0, nan, 15.1], + 'high': [17.0, nan, 17.1], + 'low': [11.0, nan, 11.1], + 'close': [14.0, nan, 14.1], + 'volume': [1000, 0, 1001] + }, + index=minutes) + self.writer.write(sids[0], data_1) + + data_2 = DataFrame( + data={ + 'open': [25.0, nan, 25.1], + 'high': [27.0, nan, 27.1], + 'low': [21.0, nan, 21.1], + 'close': [24.0, nan, 24.1], + 'volume': [2000, 0, 2001] + }, + index=minutes) + self.writer.write(sids[1], data_2) + + reader = BcolzMinuteBarReader(self.dest) + + columns = ['open', 'high', 'low', 'close', 'volume'] + sids = [sids[0], sids[1]] + arrays = reader.unadjusted_window( + columns, minutes[0], minutes[-1], sids) + + data = {sids[0]: data_1, sids[1]: data_2} + + for i, col in enumerate(columns): + for j, sid in enumerate(sids): + assert_almost_equal(data[sid][col], arrays[i][j]) diff --git a/zipline/data/minute_bars.py b/zipline/data/minute_bars.py index b459851e..9ccba7e9 100644 --- a/zipline/data/minute_bars.py +++ b/zipline/data/minute_bars.py @@ -613,3 +613,47 @@ class BcolzMinuteBarReader(object): since market open on the first trading day. """ return self._minute_index.get_loc(minute_dt) + + def unadjusted_window(self, fields, start_dt, end_dt, sids): + """ + Parameters + ---------- + fields : list of str + 'open', 'high', 'low', 'close', or 'volume' + start_dt: Timestamp + Beginning of the window range. + end_dt: Timestamp + End of the window range. + sids : list of int + The asset identifiers in the window. + + Returns + ------- + list of np.ndarray + A list with an entry per field of ndarrays with shape + (sids, minutes in range) with a dtype of float64, containing the + values for the respective field over start and end dt range. + """ + # TODO: Handle early closes. + start_idx = self._find_position_of_minute(start_dt) + end_idx = self._find_position_of_minute(end_dt) + + results = [] + + shape = (len(sids), (end_idx - start_idx + 1)) + + for field in fields: + if field != 'volume': + out = np.full(shape, np.nan) + else: + out = np.zeros(shape, dtype=np.uint32) + + for i, sid in enumerate(sids): + carray = self._open_minute_file(field, sid) + values = carray[start_idx:end_idx + 1] + where = values != 0 + out[i, where] = values[where] + if field != 'volume': + out *= self._ohlc_inverse + results.append(out) + return results