From a01bcd538a31058d1c87c146d6137c495ae2da2e Mon Sep 17 00:00:00 2001 From: Victor Grau Serrat Date: Mon, 14 Aug 2017 06:22:55 -0400 Subject: [PATCH] fix issue #16 of empty files in /var/tmp; treasury data start 19990 --- catalyst/data/bundles/base.py | 2 ++ catalyst/data/bundles/poloniex.py | 4 ++-- catalyst/data/loader.py | 19 ++++++++++--------- .../pipeline/loaders/crypto_pricing_loader.py | 6 ------ catalyst/sources/benchmark_source.py | 1 - 5 files changed, 14 insertions(+), 18 deletions(-) diff --git a/catalyst/data/bundles/base.py b/catalyst/data/bundles/base.py index 3cfb8029..23640abd 100644 --- a/catalyst/data/bundles/base.py +++ b/catalyst/data/bundles/base.py @@ -172,6 +172,7 @@ class BaseBundle(object): # Compile 5-minute symbol data if bundle supports 5-minute mode and # persist the dataset to disk. + ''' if '5-minute' in self.frequencies: five_minute_bar_writer.write( self._fetch_symbol_iter( @@ -187,6 +188,7 @@ class BaseBundle(object): length=len(symbol_map), show_progress=show_progress, ) + ''' # Compile minute symbol data if bundle supports minute mode and # persist the dataset to disk. diff --git a/catalyst/data/bundles/poloniex.py b/catalyst/data/bundles/poloniex.py index a7e298f9..430da87c 100644 --- a/catalyst/data/bundles/poloniex.py +++ b/catalyst/data/bundles/poloniex.py @@ -103,7 +103,7 @@ class PoloniexBundle(BaseCryptoPricingBundle): ) raw.set_index('date', inplace=True) - scale = 1000.0 + scale = 1 raw.loc[:, 'open'] /= scale raw.loc[:, 'high'] /= scale raw.loc[:, 'low'] /= scale @@ -132,7 +132,7 @@ class PoloniexBundle(BaseCryptoPricingBundle): data_frequency): period_map = { 'daily': 86400, - '5-minute': 300, +# '5-minute': 300, } try: diff --git a/catalyst/data/loader.py b/catalyst/data/loader.py index 50dd4f46..7d06855c 100644 --- a/catalyst/data/loader.py +++ b/catalyst/data/loader.py @@ -134,17 +134,19 @@ def load_crypto_market_data(trading_day=None, trading_day, environ, ) + # Override first_date for treasury data since we have it for many more years + # and is independent of crypto data + first_date_treasury = pd.Timestamp('1990-01-01', tz='UTC') tc = ensure_treasury_data( bm_symbol, - first_date, + first_date_treasury, last_date, now, environ, ) benchmark_returns = br[br.index.slice_indexer(first_date, last_date)] - treasury_curves = tc[tc.index.slice_indexer(first_date, last_date)] + treasury_curves = tc[tc.index.slice_indexer(first_date_treasury, last_date)] return benchmark_returns, treasury_curves - def load_market_data(trading_day=None, trading_days=None, bm_symbol='SPY', @@ -232,6 +234,7 @@ def load_market_data(trading_day=None, trading_days=None, bm_symbol='SPY', treasury_curves = tc[tc.index.slice_indexer(first_date, last_date)] return benchmark_returns, treasury_curves + def ensure_crypto_benchmark_data(symbol, first_date, last_date, @@ -364,6 +367,7 @@ def ensure_benchmark_data(symbol, first_date, last_date, now, trading_day, logger.warn("Still don't have expected data after redownload!") return data + def ensure_benchmark_data(symbol, first_date, last_date, now, trading_day, environ=None): """ @@ -478,11 +482,6 @@ def ensure_treasury_data(symbol, first_date, last_date, now, environ=None): def _load_cached_data(filename, first_date, last_date, now, resource_name, environ=None): - if resource_name == 'benchmark': - from_csv = pd.Series.from_csv - else: - from_csv = pd.DataFrame.from_csv - # Path for the cache. path = get_data_filepath(filename, environ) @@ -490,7 +489,9 @@ def _load_cached_data(filename, first_date, last_date, now, resource_name, # yet, so don't try to read from 'path'. if os.path.exists(path): try: - data = from_csv(path) + data = pd.DataFrame.from_csv(path) + if data.empty: + raise ValueError("File is empty.") data.index = pd.to_datetime(data.index, infer_datetime_format=True, errors='coerce' ).tz_localize('UTC') if has_data_for_dates(data, first_date, last_date): return data diff --git a/catalyst/pipeline/loaders/crypto_pricing_loader.py b/catalyst/pipeline/loaders/crypto_pricing_loader.py index 627b9207..22c0ccf2 100644 --- a/catalyst/pipeline/loaders/crypto_pricing_loader.py +++ b/catalyst/pipeline/loaders/crypto_pricing_loader.py @@ -106,12 +106,6 @@ class CryptoPricingLoader(PipelineLoader): def _shift_dates(dates, start_date, end_date, shift): - print 'dates.head:\n', dates[:10] - print 'dates.tail:\n', dates[:-10] - - print 'start_date:', start_date - print 'end_date:', end_date - print 'shift:', shift try: start = dates.get_loc(start_date) diff --git a/catalyst/sources/benchmark_source.py b/catalyst/sources/benchmark_source.py index b9773716..846b7eb5 100644 --- a/catalyst/sources/benchmark_source.py +++ b/catalyst/sources/benchmark_source.py @@ -185,7 +185,6 @@ class BenchmarkSource(object): return benchmark_series.pct_change()[1:] else: - print '----------------------------------------' start_date = asset.start_date if start_date < trading_days[0]: # get the window of close prices for benchmark_asset from the