From 0f14972e083c8c5153a617db15b9265239956620 Mon Sep 17 00:00:00 2001 From: Eddie Hebert Date: Mon, 14 Mar 2016 11:31:19 -0400 Subject: [PATCH] ENH: Unadjusted window data for minute bars. Add a method to minute bar reader which returns the OHLCV for all requested fields for a list assets over the specified start and end minutes. Initial usage is intended for use by a loader which consumes minute bar data to resample into daily bars, but may also be used when aggregating minute data during '1d' history calls in Q2.0. This iteration does not include including of early closes. --- tests/data/test_minute_bars.py | 47 +++++++++++++++++++++++++++++++++- zipline/data/minute_bars.py | 44 +++++++++++++++++++++++++++++++ 2 files changed, 90 insertions(+), 1 deletion(-) diff --git a/tests/data/test_minute_bars.py b/tests/data/test_minute_bars.py index c218555d..5c263e6a 100644 --- a/tests/data/test_minute_bars.py +++ b/tests/data/test_minute_bars.py @@ -23,7 +23,8 @@ from pandas import ( DataFrame, DatetimeIndex, Timestamp, - NaT + Timedelta, + NaT, ) from testfixtures import TempDirectory @@ -500,3 +501,47 @@ class BcolzMinuteBarTestCase(TestCase): volume_price = self.reader.get_value(sid, minute_1, 'volume') self.assertEquals(51.0, volume_price) + + def test_unadjusted_minutes(self): + """ + Test unadjusted minutes. + """ + start_minute = self.market_opens[TEST_CALENDAR_START] + minutes = [start_minute, + start_minute + Timedelta('1 min'), + start_minute + Timedelta('2 min')] + sids = [1, 2] + data_1 = DataFrame( + data={ + 'open': [15.0, nan, 15.1], + 'high': [17.0, nan, 17.1], + 'low': [11.0, nan, 11.1], + 'close': [14.0, nan, 14.1], + 'volume': [1000, 0, 1001] + }, + index=minutes) + self.writer.write(sids[0], data_1) + + data_2 = DataFrame( + data={ + 'open': [25.0, nan, 25.1], + 'high': [27.0, nan, 27.1], + 'low': [21.0, nan, 21.1], + 'close': [24.0, nan, 24.1], + 'volume': [2000, 0, 2001] + }, + index=minutes) + self.writer.write(sids[1], data_2) + + reader = BcolzMinuteBarReader(self.dest) + + columns = ['open', 'high', 'low', 'close', 'volume'] + sids = [sids[0], sids[1]] + arrays = reader.unadjusted_window( + columns, minutes[0], minutes[-1], sids) + + data = {sids[0]: data_1, sids[1]: data_2} + + for i, col in enumerate(columns): + for j, sid in enumerate(sids): + assert_almost_equal(data[sid][col], arrays[i][j]) diff --git a/zipline/data/minute_bars.py b/zipline/data/minute_bars.py index b459851e..9ccba7e9 100644 --- a/zipline/data/minute_bars.py +++ b/zipline/data/minute_bars.py @@ -613,3 +613,47 @@ class BcolzMinuteBarReader(object): since market open on the first trading day. """ return self._minute_index.get_loc(minute_dt) + + def unadjusted_window(self, fields, start_dt, end_dt, sids): + """ + Parameters + ---------- + fields : list of str + 'open', 'high', 'low', 'close', or 'volume' + start_dt: Timestamp + Beginning of the window range. + end_dt: Timestamp + End of the window range. + sids : list of int + The asset identifiers in the window. + + Returns + ------- + list of np.ndarray + A list with an entry per field of ndarrays with shape + (sids, minutes in range) with a dtype of float64, containing the + values for the respective field over start and end dt range. + """ + # TODO: Handle early closes. + start_idx = self._find_position_of_minute(start_dt) + end_idx = self._find_position_of_minute(end_dt) + + results = [] + + shape = (len(sids), (end_idx - start_idx + 1)) + + for field in fields: + if field != 'volume': + out = np.full(shape, np.nan) + else: + out = np.zeros(shape, dtype=np.uint32) + + for i, sid in enumerate(sids): + carray = self._open_minute_file(field, sid) + values = carray[start_idx:end_idx + 1] + where = values != 0 + out[i, where] = values[where] + if field != 'volume': + out *= self._ohlc_inverse + results.append(out) + return results