Merge pull request #1089 from quantopian/prevent-nan-to-max-int

BUG: Prevent writing int max instead of nan.
This commit is contained in:
Eddie Hebert
2016-03-31 05:17:40 -04:00
2 changed files with 100 additions and 7 deletions
+91 -2
View File
@@ -17,14 +17,23 @@ import os
from unittest import TestCase
from numpy import nan, array
from numpy.testing import assert_almost_equal
from numpy import (
arange,
array,
int64,
float64,
full,
nan,
zeros,
)
from numpy.testing import assert_almost_equal, assert_array_equal
from pandas import (
DataFrame,
DatetimeIndex,
Timestamp,
Timedelta,
NaT,
date_range,
)
from testfixtures import TempDirectory
@@ -451,6 +460,86 @@ class BcolzMinuteBarTestCase(TestCase):
self.assertEquals(100.0, volume_price)
def test_nans(self):
"""
Test writing empty data.
"""
sid = 1
last_date = self.writer.last_date_in_output_for_sid(sid)
self.assertIs(last_date, NaT)
self.writer.pad(sid, TEST_CALENDAR_START)
last_date = self.writer.last_date_in_output_for_sid(sid)
self.assertEqual(last_date, TEST_CALENDAR_START)
freq = self.market_opens.index.freq
minute = self.market_opens[TEST_CALENDAR_START + freq]
minutes = date_range(minute, periods=9, freq='min')
data = DataFrame(
data={
'open': full(9, nan),
'high': full(9, nan),
'low': full(9, nan),
'close': full(9, nan),
'volume': full(9, 0),
},
index=[minutes])
self.writer.write(sid, data)
fields = ['open', 'high', 'low', 'close', 'volume']
ohlcv_window = self.reader.unadjusted_window(
fields, minutes[0], minutes[-1], [sid])
for i, field in enumerate(fields):
if field != 'volume':
assert_array_equal(full(9, nan), ohlcv_window[i][0])
else:
assert_array_equal(zeros(9), ohlcv_window[i][0])
def test_differing_nans(self):
"""
Also test nans of differing values/construction.
"""
sid = 1
last_date = self.writer.last_date_in_output_for_sid(sid)
self.assertIs(last_date, NaT)
self.writer.pad(sid, TEST_CALENDAR_START)
last_date = self.writer.last_date_in_output_for_sid(sid)
self.assertEqual(last_date, TEST_CALENDAR_START)
freq = self.market_opens.index.freq
minute = self.market_opens[TEST_CALENDAR_START + freq]
minutes = date_range(minute, periods=9, freq='min')
data = DataFrame(
data={
'open': ((0b11111111111 << 52) + arange(1, 10, dtype=int64)).
view(float64),
'high': ((0b11111111111 << 52) + arange(11, 20, dtype=int64)).
view(float64),
'low': ((0b11111111111 << 52) + arange(21, 30, dtype=int64)).
view(float64),
'close': ((0b11111111111 << 52) + arange(31, 40, dtype=int64)).
view(float64),
'volume': full(9, 0),
},
index=[minutes])
self.writer.write(sid, data)
fields = ['open', 'high', 'low', 'close', 'volume']
ohlcv_window = self.reader.unadjusted_window(
fields, minutes[0], minutes[-1], [sid])
for i, field in enumerate(fields):
if field != 'volume':
assert_array_equal(full(9, nan), ohlcv_window[i][0])
else:
assert_array_equal(zeros(9), ohlcv_window[i][0])
def test_write_cols(self):
minute_0 = self.market_opens[self.test_calendar_start]
minute_1 = minute_0 + timedelta(minutes=1)
+9 -5
View File
@@ -16,6 +16,7 @@ from textwrap import dedent
import bcolz
from bcolz import ctable
import numpy as np
from numpy import nan_to_num
from os.path import join
import json
import os
@@ -526,11 +527,14 @@ class BcolzMinuteBarWriter(object):
dts.astype('datetime64[ns]'))
ohlc_ratio = self._ohlc_ratio
open_col[dt_ixs] = (cols['open'] * ohlc_ratio).astype(np.uint32)
high_col[dt_ixs] = (cols['high'] * ohlc_ratio).astype(np.uint32)
low_col[dt_ixs] = (cols['low'] * ohlc_ratio).astype(np.uint32)
close_col[dt_ixs] = (cols['close'] * ohlc_ratio).astype(
np.uint32)
open_col[dt_ixs] = (nan_to_num(cols['open']) * ohlc_ratio).\
astype(np.uint32)
high_col[dt_ixs] = (nan_to_num(cols['high']) * ohlc_ratio).\
astype(np.uint32)
low_col[dt_ixs] = (nan_to_num(cols['low']) * ohlc_ratio).\
astype(np.uint32)
close_col[dt_ixs] = (nan_to_num(cols['close']) * ohlc_ratio).\
astype(np.uint32)
vol_col[dt_ixs] = cols['volume'].astype(np.uint32)
table.append([