diff --git a/tests/pipeline/test_us_equity_pricing_loader.py b/tests/pipeline/test_us_equity_pricing_loader.py index a10ea0fb..b75f4799 100644 --- a/tests/pipeline/test_us_equity_pricing_loader.py +++ b/tests/pipeline/test_us_equity_pricing_loader.py @@ -175,43 +175,43 @@ DIVIDENDS = DataFrame( [ # Before query range, should be excluded. {'declared_date': Timestamp('2015-05-01', tz='UTC').to_datetime64(), - 'ex_date': Timestamp('2015-06-01', tz='UTC').to_datetime64(), + 'ex_date': Timestamp('2015-06-02', tz='UTC').to_datetime64(), 'record_date': Timestamp('2015-06-03', tz='UTC').to_datetime64(), 'pay_date': Timestamp('2015-06-05', tz='UTC').to_datetime64(), 'amount': 90.0, 'sid': 1}, # First day of query range, should be excluded. {'declared_date': Timestamp('2015-06-01', tz='UTC').to_datetime64(), - 'ex_date': Timestamp('2015-06-10', tz='UTC').to_datetime64(), + 'ex_date': Timestamp('2015-06-11', tz='UTC').to_datetime64(), 'record_date': Timestamp('2015-06-15', tz='UTC').to_datetime64(), 'pay_date': Timestamp('2015-06-17', tz='UTC').to_datetime64(), 'amount': 80.0, 'sid': 3}, # Third day of query range, should have last_row of 2 {'declared_date': Timestamp('2015-06-01', tz='UTC').to_datetime64(), - 'ex_date': Timestamp('2015-06-12', tz='UTC').to_datetime64(), - 'record_date': Timestamp('2015-06-15', tz='UTC').to_datetime64(), + 'ex_date': Timestamp('2015-06-15', tz='UTC').to_datetime64(), + 'record_date': Timestamp('2015-06-16', tz='UTC').to_datetime64(), 'pay_date': Timestamp('2015-06-17', tz='UTC').to_datetime64(), 'amount': 70.0, 'sid': 3}, # After query range, should be excluded. {'declared_date': Timestamp('2015-06-01', tz='UTC').to_datetime64(), - 'ex_date': Timestamp('2015-06-25', tz='UTC').to_datetime64(), + 'ex_date': Timestamp('2015-06-26', tz='UTC').to_datetime64(), 'record_date': Timestamp('2015-06-28', tz='UTC').to_datetime64(), 'pay_date': Timestamp('2015-06-30', tz='UTC').to_datetime64(), 'amount': 60.0, 'sid': 6}, # Another action in query range, should have last_row of 3 {'declared_date': Timestamp('2015-06-01', tz='UTC').to_datetime64(), - 'ex_date': Timestamp('2015-06-15', tz='UTC').to_datetime64(), + 'ex_date': Timestamp('2015-06-16', tz='UTC').to_datetime64(), 'record_date': Timestamp('2015-06-18', tz='UTC').to_datetime64(), 'pay_date': Timestamp('2015-06-20', tz='UTC').to_datetime64(), 'amount': 50.0, 'sid': 3}, # Last day of range. Should have last_row of 7 {'declared_date': Timestamp('2015-06-01', tz='UTC').to_datetime64(), - 'ex_date': Timestamp('2015-06-19', tz='UTC').to_datetime64(), - 'record_date': Timestamp('2015-06-22', tz='UTC').to_datetime64(), + 'ex_date': Timestamp('2015-06-22', tz='UTC').to_datetime64(), + 'record_date': Timestamp('2015-06-23', tz='UTC').to_datetime64(), 'pay_date': Timestamp('2015-06-30', tz='UTC').to_datetime64(), 'amount': 40.0, 'sid': 3}, diff --git a/zipline/data/us_equity_pricing.py b/zipline/data/us_equity_pricing.py index 59ab952b..12ec6a4b 100644 --- a/zipline/data/us_equity_pricing.py +++ b/zipline/data/us_equity_pricing.py @@ -27,6 +27,7 @@ from bcolz import ( from click import progressbar from numpy import ( array, + int64, float64, floating, full, @@ -40,6 +41,7 @@ from pandas import ( DataFrame, DatetimeIndex, read_csv, + to_datetime, Timestamp, ) from six import ( @@ -647,6 +649,8 @@ class SQLiteAdjustmentWriter(object): calendar = self._calendar + effective_dates = full(len(amounts), -1, dtype=int64) + for i, amount in enumerate(amounts): sid = sids[i] ex_date = ex_dates[i] @@ -655,8 +659,11 @@ class SQLiteAdjustmentWriter(object): try: prev_close = daily_bar_reader.spot_price( sid, div_adj_date, 'close') - ratio = 1.0 - amount / (prev_close) - ratios[i] = ratio + if prev_close != 0.0: + ratio = 1.0 - amount / prev_close + ratios[i] = ratio + # only assign effective_date when data is found + effective_dates[i] = div_adj_date.value except NoDataOnDate: logger.warn("Couldn't compute ratio for dividend %s" % { 'sid': sid, @@ -665,7 +672,14 @@ class SQLiteAdjustmentWriter(object): }) continue - effective_dates = ex_dates.astype('datetime64[s]').astype(uint32) + # Create a mask to filter out indices in the effective_date, sid, and + # ratio vectors for which a ratio was not calculable. + effective_mask = effective_dates != -1 + effective_dates = effective_dates[effective_mask] + effective_dates = effective_dates.astype('datetime64[ns]').\ + astype('datetime64[s]').astype(uint32) + sids = sids[effective_mask] + ratios = ratios[effective_mask] return DataFrame({ 'sid': sids,