Merge pull request #774 from quantopian/filter-out-dates-with-no-data

BUG: Filter out payout rows with no prev close.
This commit is contained in:
Eddie Hebert
2015-10-15 13:30:46 -04:00
2 changed files with 25 additions and 11 deletions
@@ -175,43 +175,43 @@ DIVIDENDS = DataFrame(
[
# Before query range, should be excluded.
{'declared_date': Timestamp('2015-05-01', tz='UTC').to_datetime64(),
'ex_date': Timestamp('2015-06-01', tz='UTC').to_datetime64(),
'ex_date': Timestamp('2015-06-02', tz='UTC').to_datetime64(),
'record_date': Timestamp('2015-06-03', tz='UTC').to_datetime64(),
'pay_date': Timestamp('2015-06-05', tz='UTC').to_datetime64(),
'amount': 90.0,
'sid': 1},
# First day of query range, should be excluded.
{'declared_date': Timestamp('2015-06-01', tz='UTC').to_datetime64(),
'ex_date': Timestamp('2015-06-10', tz='UTC').to_datetime64(),
'ex_date': Timestamp('2015-06-11', tz='UTC').to_datetime64(),
'record_date': Timestamp('2015-06-15', tz='UTC').to_datetime64(),
'pay_date': Timestamp('2015-06-17', tz='UTC').to_datetime64(),
'amount': 80.0,
'sid': 3},
# Third day of query range, should have last_row of 2
{'declared_date': Timestamp('2015-06-01', tz='UTC').to_datetime64(),
'ex_date': Timestamp('2015-06-12', tz='UTC').to_datetime64(),
'record_date': Timestamp('2015-06-15', tz='UTC').to_datetime64(),
'ex_date': Timestamp('2015-06-15', tz='UTC').to_datetime64(),
'record_date': Timestamp('2015-06-16', tz='UTC').to_datetime64(),
'pay_date': Timestamp('2015-06-17', tz='UTC').to_datetime64(),
'amount': 70.0,
'sid': 3},
# After query range, should be excluded.
{'declared_date': Timestamp('2015-06-01', tz='UTC').to_datetime64(),
'ex_date': Timestamp('2015-06-25', tz='UTC').to_datetime64(),
'ex_date': Timestamp('2015-06-26', tz='UTC').to_datetime64(),
'record_date': Timestamp('2015-06-28', tz='UTC').to_datetime64(),
'pay_date': Timestamp('2015-06-30', tz='UTC').to_datetime64(),
'amount': 60.0,
'sid': 6},
# Another action in query range, should have last_row of 3
{'declared_date': Timestamp('2015-06-01', tz='UTC').to_datetime64(),
'ex_date': Timestamp('2015-06-15', tz='UTC').to_datetime64(),
'ex_date': Timestamp('2015-06-16', tz='UTC').to_datetime64(),
'record_date': Timestamp('2015-06-18', tz='UTC').to_datetime64(),
'pay_date': Timestamp('2015-06-20', tz='UTC').to_datetime64(),
'amount': 50.0,
'sid': 3},
# Last day of range. Should have last_row of 7
{'declared_date': Timestamp('2015-06-01', tz='UTC').to_datetime64(),
'ex_date': Timestamp('2015-06-19', tz='UTC').to_datetime64(),
'record_date': Timestamp('2015-06-22', tz='UTC').to_datetime64(),
'ex_date': Timestamp('2015-06-22', tz='UTC').to_datetime64(),
'record_date': Timestamp('2015-06-23', tz='UTC').to_datetime64(),
'pay_date': Timestamp('2015-06-30', tz='UTC').to_datetime64(),
'amount': 40.0,
'sid': 3},
+17 -3
View File
@@ -27,6 +27,7 @@ from bcolz import (
from click import progressbar
from numpy import (
array,
int64,
float64,
floating,
full,
@@ -40,6 +41,7 @@ from pandas import (
DataFrame,
DatetimeIndex,
read_csv,
to_datetime,
Timestamp,
)
from six import (
@@ -647,6 +649,8 @@ class SQLiteAdjustmentWriter(object):
calendar = self._calendar
effective_dates = full(len(amounts), -1, dtype=int64)
for i, amount in enumerate(amounts):
sid = sids[i]
ex_date = ex_dates[i]
@@ -655,8 +659,11 @@ class SQLiteAdjustmentWriter(object):
try:
prev_close = daily_bar_reader.spot_price(
sid, div_adj_date, 'close')
ratio = 1.0 - amount / (prev_close)
ratios[i] = ratio
if prev_close != 0.0:
ratio = 1.0 - amount / prev_close
ratios[i] = ratio
# only assign effective_date when data is found
effective_dates[i] = div_adj_date.value
except NoDataOnDate:
logger.warn("Couldn't compute ratio for dividend %s" % {
'sid': sid,
@@ -665,7 +672,14 @@ class SQLiteAdjustmentWriter(object):
})
continue
effective_dates = ex_dates.astype('datetime64[s]').astype(uint32)
# Create a mask to filter out indices in the effective_date, sid, and
# ratio vectors for which a ratio was not calculable.
effective_mask = effective_dates != -1
effective_dates = effective_dates[effective_mask]
effective_dates = effective_dates.astype('datetime64[ns]').\
astype('datetime64[s]').astype(uint32)
sids = sids[effective_mask]
ratios = ratios[effective_mask]
return DataFrame({
'sid': sids,