From 984e934e835a80da9e34d0b44a20bb452bb56292 Mon Sep 17 00:00:00 2001 From: Eddie Hebert Date: Mon, 25 Jan 2016 10:29:19 -0500 Subject: [PATCH] BUG: Fix OSError when creating sids that share dir Fix a bug where creating a sid bcolz file when the containing directory was already occupied by a sid caused an OSError on attempt of creating the directory because it already existed. e.g. if there were two sids, `1` and `2`. The paths would be `00/00/000001.bcolz` and `00/00/000002.bcolz` which share the same directory `00/00`. Fixed by checking for directory existence before calling `makedirs`. Add test coverage which exercises writing of sids that are siblings in the sid directory structure. --- tests/data/test_minute_bars.py | 88 ++++++++++++++++++++++++++++++++++ zipline/data/minute_bars.py | 10 ++-- 2 files changed, 95 insertions(+), 3 deletions(-) diff --git a/tests/data/test_minute_bars.py b/tests/data/test_minute_bars.py index 1f039d54..d7570b3a 100644 --- a/tests/data/test_minute_bars.py +++ b/tests/data/test_minute_bars.py @@ -311,3 +311,91 @@ class BcolzMinuteBarTestCase(TestCase): with self.assertRaises(BcolzMinuteOverlappingData): self.writer.write(sid, data) + + def test_write_multiple_sids(self): + """ + Test writing multiple sids. + + Tests both that the data is written to the correct sid, as well as + ensuring that the logic for creating the subdirectory path to each sid + does not cause issues from attempts to recreate existing paths. + (Calling out this coverage, because an assertion of that logic does not + show up in the test itself, but is exercised by the act of attempting + to write two consecutive sids, which would be written to the same + containing directory, `00/00/000001.bcolz` and `00/00/000002.bcolz) + + Before applying a check to make sure the path writing did not + re-attempt directory creation an OSError like the following would + occur: + + ``` + OSError: [Errno 17] File exists: '/tmp/tmpR7yzzT/minute_bars/00/00' + ``` + """ + minute = self.market_opens[TEST_CALENDAR_START] + sids = [1, 2] + data = DataFrame( + data={ + 'open': [15.0], + 'high': [17.0], + 'low': [11.0], + 'close': [15.0], + 'volume': [100.0] + }, + index=[minute]) + self.writer.write(sids[0], data) + + data = DataFrame( + data={ + 'open': [25.0], + 'high': [27.0], + 'low': [21.0], + 'close': [25.0], + 'volume': [200.0] + }, + index=[minute]) + self.writer.write(sids[1], data) + + sid = sids[0] + + open_price = self.reader.get_value(sid, minute, 'open') + + self.assertEquals(15.0, open_price) + + high_price = self.reader.get_value(sid, minute, 'high') + + self.assertEquals(17.0, high_price) + + low_price = self.reader.get_value(sid, minute, 'low') + + self.assertEquals(11.0, low_price) + + close_price = self.reader.get_value(sid, minute, 'close') + + self.assertEquals(15.0, close_price) + + volume_price = self.reader.get_value(sid, minute, 'volume') + + self.assertEquals(100.0, volume_price) + + sid = sids[1] + + open_price = self.reader.get_value(sid, minute, 'open') + + self.assertEquals(25.0, open_price) + + high_price = self.reader.get_value(sid, minute, 'high') + + self.assertEquals(27.0, high_price) + + low_price = self.reader.get_value(sid, minute, 'low') + + self.assertEquals(21.0, low_price) + + close_price = self.reader.get_value(sid, minute, 'close') + + self.assertEquals(25.0, close_price) + + volume_price = self.reader.get_value(sid, minute, 'volume') + + self.assertEquals(200.0, volume_price) diff --git a/zipline/data/minute_bars.py b/zipline/data/minute_bars.py index cba07681..52f95b85 100644 --- a/zipline/data/minute_bars.py +++ b/zipline/data/minute_bars.py @@ -300,9 +300,13 @@ class BcolzMinuteBarWriter(object): path : string The path to rootdir of the new ctable. """ - # Only create the subdir on container creation. - sid_dirname = os.path.dirname(path) - os.makedirs(sid_dirname) + # Only create the containing subdir on creation. + # This is not to be confused with the `.bcolz` directory, but is the + # directory up one level from the `.bcolz` directories. + sid_containing_dirname = os.path.dirname(path) + if not os.path.exists(sid_containing_dirname): + # Other sids may have already created the containing directory. + os.makedirs(sid_containing_dirname) initial_array = np.empty(0, np.uint32) table = ctable( rootdir=path,