Merge pull request #1050 from quantopian/minute-bar-unadjusted-window

ENH: Unadjusted window data for minute bars.
This commit is contained in:
Eddie Hebert
2016-03-14 21:53:36 -04:00
2 changed files with 90 additions and 1 deletions
+46 -1
View File
@@ -23,7 +23,8 @@ from pandas import (
DataFrame,
DatetimeIndex,
Timestamp,
NaT
Timedelta,
NaT,
)
from testfixtures import TempDirectory
@@ -500,3 +501,47 @@ class BcolzMinuteBarTestCase(TestCase):
volume_price = self.reader.get_value(sid, minute_1, 'volume')
self.assertEquals(51.0, volume_price)
def test_unadjusted_minutes(self):
"""
Test unadjusted minutes.
"""
start_minute = self.market_opens[TEST_CALENDAR_START]
minutes = [start_minute,
start_minute + Timedelta('1 min'),
start_minute + Timedelta('2 min')]
sids = [1, 2]
data_1 = DataFrame(
data={
'open': [15.0, nan, 15.1],
'high': [17.0, nan, 17.1],
'low': [11.0, nan, 11.1],
'close': [14.0, nan, 14.1],
'volume': [1000, 0, 1001]
},
index=minutes)
self.writer.write(sids[0], data_1)
data_2 = DataFrame(
data={
'open': [25.0, nan, 25.1],
'high': [27.0, nan, 27.1],
'low': [21.0, nan, 21.1],
'close': [24.0, nan, 24.1],
'volume': [2000, 0, 2001]
},
index=minutes)
self.writer.write(sids[1], data_2)
reader = BcolzMinuteBarReader(self.dest)
columns = ['open', 'high', 'low', 'close', 'volume']
sids = [sids[0], sids[1]]
arrays = reader.unadjusted_window(
columns, minutes[0], minutes[-1], sids)
data = {sids[0]: data_1, sids[1]: data_2}
for i, col in enumerate(columns):
for j, sid in enumerate(sids):
assert_almost_equal(data[sid][col], arrays[i][j])
+44
View File
@@ -613,3 +613,47 @@ class BcolzMinuteBarReader(object):
since market open on the first trading day.
"""
return self._minute_index.get_loc(minute_dt)
def unadjusted_window(self, fields, start_dt, end_dt, sids):
"""
Parameters
----------
fields : list of str
'open', 'high', 'low', 'close', or 'volume'
start_dt: Timestamp
Beginning of the window range.
end_dt: Timestamp
End of the window range.
sids : list of int
The asset identifiers in the window.
Returns
-------
list of np.ndarray
A list with an entry per field of ndarrays with shape
(sids, minutes in range) with a dtype of float64, containing the
values for the respective field over start and end dt range.
"""
# TODO: Handle early closes.
start_idx = self._find_position_of_minute(start_dt)
end_idx = self._find_position_of_minute(end_dt)
results = []
shape = (len(sids), (end_idx - start_idx + 1))
for field in fields:
if field != 'volume':
out = np.full(shape, np.nan)
else:
out = np.zeros(shape, dtype=np.uint32)
for i, sid in enumerate(sids):
carray = self._open_minute_file(field, sid)
values = carray[start_idx:end_idx + 1]
where = values != 0
out[i, where] = values[where]
if field != 'volume':
out *= self._ohlc_inverse
results.append(out)
return results