From 7f602d7fcc1015ab6d366884b1d2c931493a06df Mon Sep 17 00:00:00 2001 From: Victor Date: Thu, 21 Sep 2017 11:27:35 -0600 Subject: [PATCH 1/2] Update requirements.txt --- etc/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/etc/requirements.txt b/etc/requirements.txt index d3cfabd6..aaa129ca 100644 --- a/etc/requirements.txt +++ b/etc/requirements.txt @@ -1,7 +1,7 @@ # Incompatible with earlier PIP versions pip>=7.1.0 # bcolz fails to install if this is not in the build_requires. -setuptools>18.0 +setuptools>36.0 # Logging Logbook==0.12.5 From 8b141a0c28217ba66217d6190a2b55102e6f5c26 Mon Sep 17 00:00:00 2001 From: Victor Grau Serrat Date: Tue, 3 Oct 2017 09:11:59 -0600 Subject: [PATCH 2/2] Fix floats for volume in data.history --- catalyst/data/_equities.pyx | 4 +--- catalyst/data/dispatch_bar_reader.py | 5 ++++- catalyst/data/minute_bars.py | 2 +- catalyst/data/us_equity_pricing.py | 16 ++++++---------- 4 files changed, 12 insertions(+), 15 deletions(-) diff --git a/catalyst/data/_equities.pyx b/catalyst/data/_equities.pyx index 563fa56a..c0652b02 100644 --- a/catalyst/data/_equities.pyx +++ b/catalyst/data/_equities.pyx @@ -215,13 +215,11 @@ cpdef _read_bcolz_data(ctable_t table, else: continue - if column_name in ['open', 'high', 'low', 'close']: + if column_name in ['open', 'high', 'low', 'close', 'volume']: where_nan = (outbuf == 0) outbuf_as_float = outbuf.astype(float64) * .000000001 outbuf_as_float[where_nan] = NAN results.append(outbuf_as_float) - elif column_name != 'volume': - results.append(outbuf.astype(uint32)) else: results.append(outbuf) return results diff --git a/catalyst/data/dispatch_bar_reader.py b/catalyst/data/dispatch_bar_reader.py index e545eef0..d96252c9 100644 --- a/catalyst/data/dispatch_bar_reader.py +++ b/catalyst/data/dispatch_bar_reader.py @@ -18,6 +18,7 @@ from numpy import ( full, nan, int64, + float64, zeros ) from six import iteritems, with_metaclass @@ -70,7 +71,9 @@ class AssetDispatchBarReader(with_metaclass(ABCMeta)): return self._dt_window_size(start_dt, end_dt), num_sids def _make_raw_array_out(self, field, shape): - if field != 'volume' and field != 'sid': + if field == 'volume': + out = zeros(shape, dtype=float64) + elif field != 'sid': out = full(shape, nan) else: out = zeros(shape, dtype=int64) diff --git a/catalyst/data/minute_bars.py b/catalyst/data/minute_bars.py index bcbb64ae..a08c4ead 100644 --- a/catalyst/data/minute_bars.py +++ b/catalyst/data/minute_bars.py @@ -1248,7 +1248,7 @@ class BcolzMinuteBarReader(MinuteBarReader): if field != 'volume': out = np.full(shape, np.nan) else: - out = np.zeros(shape, dtype=np.uint64) + out = np.zeros(shape, dtype=np.float64) for i, sid in enumerate(sids): carray = self._open_minute_file(field, sid) diff --git a/catalyst/data/us_equity_pricing.py b/catalyst/data/us_equity_pricing.py index 901a0e60..157d2f7b 100644 --- a/catalyst/data/us_equity_pricing.py +++ b/catalyst/data/us_equity_pricing.py @@ -441,7 +441,7 @@ class BcolzDailyBarWriter(object): dates = raw_data.index.values.astype('datetime64[s]') check_uint32_safe(dates.max().view(np.int64), 'day') processed['day'] = dates.astype('uint32') - processed['volume'] = raw_data.volume.astype('uint64') + processed['volume'] = (raw_data.volume * PRICE_ADJUSTMENT_FACTOR).astype('uint64') return ctable.fromdataframe(processed) @@ -494,9 +494,8 @@ class BcolzDailyBarReader(SessionBarReader): The data in these columns is interpreted as follows: - - Price columns ('open', 'high', 'low', 'close') are interpreted as 1000 * - as-traded dollar value. - - Volume is interpreted as as-traded volume. + - Price columns ('open', 'high', 'low', 'close') and Volume are interpreted + as 10^9 * as-traded dollar value. - Day is interpreted as seconds since midnight UTC, Jan 1, 1970. - Id is the asset id of the row. @@ -762,13 +761,10 @@ class BcolzDailyBarReader(SessionBarReader): """ ix = self.sid_day_index(sid, dt) price = self._spot_col(field)[ix] - if field != 'volume': - if price == 0: - return nan - else: - return price / PRICE_ADJUSTMENT_FACTOR + if field != 'volume' and price == 0: + return nan else: - return price + return price / PRICE_ADJUSTMENT_FACTOR class PanelBarReader(SessionBarReader):