From 1d75143f5478a11cb40fede890c733bc6f49b55c Mon Sep 17 00:00:00 2001 From: Eddie Hebert Date: Tue, 17 Jan 2017 15:52:31 -0500 Subject: [PATCH 1/2] ENH: Add a method to open existing minute bar directory. Remove need for a consumer that is editing an existing minute bars directory to reread the values which should not change from the metadata. Add a test to the append on new day and truncate, which would be the common usage of this method. --- tests/data/test_minute_bars.py | 15 ++++++++++++--- zipline/data/minute_bars.py | 22 ++++++++++++++++++++++ 2 files changed, 34 insertions(+), 3 deletions(-) diff --git a/tests/data/test_minute_bars.py b/tests/data/test_minute_bars.py index f469365a..2c02ac5e 100644 --- a/tests/data/test_minute_bars.py +++ b/tests/data/test_minute_bars.py @@ -445,11 +445,16 @@ class BcolzMinuteBarTestCase(WithTradingCalendars, index=[first_minute]) self.writer.write_sid(sid, data) - next_day_minute = first_minute + Timedelta(days=1) + # Open a new writer to cover `open` method, also a common usage + # of appending new days will be writing to an existing directory. + cday = self.trading_calendar.schedule.index.freq + new_end_session = TEST_CALENDAR_START + cday + writer = BcolzMinuteBarWriter.open(self.dest, new_end_session) + next_day_minute = first_minute + cday new_data = DataFrame( data=ohlcv, index=[next_day_minute]) - self.writer.write_sid(sid, new_data) + writer.write_sid(sid, new_data) second_minute = first_minute + Timedelta(minutes=1) @@ -1014,8 +1019,12 @@ class BcolzMinuteBarTestCase(WithTradingCalendars, index=minutes) self.writer.write_sid(sid, data) + # Open a new writer to cover `open` method, also truncating only + # applies to an existing directory. + writer = BcolzMinuteBarWriter.open(self.dest) + # Truncate to first day with data. - self.writer.truncate(days[0]) + writer.truncate(days[0]) # Refresh the reader since truncate update the metadata. self.reader = BcolzMinuteBarReader(self.dest) diff --git a/zipline/data/minute_bars.py b/zipline/data/minute_bars.py index c5141172..4afa22e8 100644 --- a/zipline/data/minute_bars.py +++ b/zipline/data/minute_bars.py @@ -478,6 +478,28 @@ class BcolzMinuteBarWriter(object): ) metadata.write(self._rootdir) + @classmethod + def open(cls, rootdir, end_session=None): + """ + Open an existing ``rootdir`` for writing. + + Parameters + ---------- + end_session : Timestamp (optional) + When appending, the intended new ``end_session``. + """ + metadata = BcolzMinuteBarMetadata.read(rootdir) + return BcolzMinuteBarWriter( + rootdir, + metadata.calendar, + metadata.start_session, + end_session if end_session is not None else metadata.end_session, + metadata.minutes_per_day, + metadata.default_ohlc_ratio, + metadata.ohlc_ratios_per_sid, + write_metadata=False + ) + @property def first_trading_day(self): return self._start_session From df87dfb227817c2e1d041f589858a53a8a8d6af6 Mon Sep 17 00:00:00 2001 From: Eddie Hebert Date: Tue, 17 Jan 2017 16:12:40 -0500 Subject: [PATCH 2/2] ENH: Add sorted to sid list when truncating. For repeatable order of truncates between invocations. --- zipline/data/minute_bars.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/zipline/data/minute_bars.py b/zipline/data/minute_bars.py index 4afa22e8..6e4f6a5a 100644 --- a/zipline/data/minute_bars.py +++ b/zipline/data/minute_bars.py @@ -856,7 +856,7 @@ class BcolzMinuteBarWriter(object): truncate_slice_end = self.data_len_for_day(date) glob_path = os.path.join(self._rootdir, "*", "*", "*.bcolz") - sid_paths = glob(glob_path) + sid_paths = sorted(glob(glob_path)) for sid_path in sid_paths: file_name = os.path.basename(sid_path)