Merge pull request #969 from quantopian/fix-numpy-warnings

Fix numpy warnings
This commit is contained in:
Richard Frank
2016-02-12 16:41:50 -05:00
30 changed files with 281 additions and 407 deletions
+11 -5
View File
@@ -4,22 +4,28 @@ cache:
directories:
- $HOME/.cache/pip
matrix:
fast_finish: true
include:
- python: 2.7
env: PANDAS_VERSION=0.16.1 NUMPY_VERSION=1.9.2 SCIPY_VERSION=0.15.1
- python: 2.7
env: PANDAS_VERSION=0.17.1 NUMPY_VERSION=1.10.2 SCIPY_VERSION=0.16.1
- python: 3.4
env: PANDAS_VERSION=0.16.1 NUMPY_VERSION=1.9.2 SCIPY_VERSION=0.15.1
- python: 3.4
env: PANDAS_VERSION=0.17.1 NUMPY_VERSION=1.10.2 SCIPY_VERSION=0.16.1
before_install:
- if [ ${TRAVIS_PYTHON_VERSION:0:1} == "2" ]; then wget https://repo.continuum.io/miniconda/Miniconda-3.7.0-Linux-x86_64.sh -O miniconda.sh; else wget https://repo.continuum.io/miniconda/Miniconda3-3.7.0-Linux-x86_64.sh -O miniconda.sh; fi
- chmod +x miniconda.sh
- ./miniconda.sh -b -p $HOME/miniconda
- export PATH="$HOME/miniconda/bin:$PATH"
- sed -i "s/numpy==.*/numpy==$NUMPY_VERSION/" etc/requirements.txt
- sed -i "s/pandas==.*/pandas==$PANDAS_VERSION/" etc/requirements.txt
- sed -i "s/scipy==.*/scipy==$SCIPY_VERSION/" etc/requirements.txt
install:
- conda create -n testenv --yes pip python=$TRAVIS_PYTHON_VERSION
- conda create -n testenv --yes -c quantopian pip python=$TRAVIS_PYTHON_VERSION numpy=$NUMPY_VERSION scipy=$SCIPY_VERSION ta-lib=0.4.8
- source activate testenv
- conda install --yes -c quantopian numpy=$NUMPY_VERSION pandas=$PANDAS_VERSION scipy=$SCIPY_VERSION Cython=0.22.1 ta-lib=0.4.8
- conda remove --yes six pytz # remove these packages so our requirements can downgrade them
- pip install --upgrade pip coverage coveralls
- pip install -r etc/requirements.txt
- pip install -r etc/requirements_dev.txt
@@ -27,9 +33,9 @@ install:
- pip install -e .
before_script:
- pip freeze | sort
- flake8 zipline tests
script:
- nosetests --with-timer --exclude=^test_examples --with-coverage --cover-package=zipline --timer-top-n=15
- nosetests tests/
- flake8 zipline tests
after_success:
- coveralls
+2 -2
View File
@@ -24,7 +24,7 @@ six==1.9.0
# For fetching remote data
requests==2.9.1
Cython==0.22.1
Cython==0.23.4
# faster OrderedDict
cyordereddict==0.2.2
@@ -41,7 +41,7 @@ networkx==1.9.1
numexpr==2.4.3
# On disk storage format for pipeline data.
bcolz==0.10.0
bcolz==0.12.1
# Command line interface helper
click==4.0.0
+3 -1
View File
@@ -3,7 +3,9 @@ verbosity=2
detailed-errors=1
with-ignore-docstrings=1
with-timer=1
timer-filter=warning
timer-top-n=15
with-coverage=1
cover-package=zipline
[metadata]
description-file = README.rst
+63 -64
View File
@@ -33,43 +33,63 @@ from setuptools import (
import versioneer
class LazyCythonizingList(list):
cythonized = False
def lazy_cythonize(self):
if self.cythonized:
return
self.cythonized = True
from Cython.Build import cythonize
from numpy import get_include
self[:] = cythonize(
[
Extension(*ext_args, include_dirs=[get_include()])
for ext_args in self
]
class LazyBuildExtCommandClass(dict):
"""
Lazy command class that defers operations requiring Cython and numpy until
they've actually been downloaded and installed by setup_requires.
"""
def __contains__(self, key):
return (
key == 'build_ext'
or super(LazyBuildExtCommandClass, self).__contains__(key)
)
def __iter__(self):
self.lazy_cythonize()
return super(LazyCythonizingList, self).__iter__()
def __setitem__(self, key, value):
if key == 'build_ext':
raise AssertionError("build_ext overridden!")
super(LazyBuildExtCommandClass, self).__setitem__(key, value)
def __getitem__(self, num):
self.lazy_cythonize()
return super(LazyCythonizingList, self).__getitem__(num)
def __getitem__(self, key):
if key != 'build_ext':
return super(LazyBuildExtCommandClass, self).__getitem__(key)
from Cython.Distutils import build_ext as cython_build_ext
import numpy
# Cython_build_ext isn't a new-style class in Py2.
class build_ext(cython_build_ext, object):
"""
Custom build_ext command that lazily adds numpy's include_dir to
extensions.
"""
def build_extensions(self):
"""
Lazily append numpy's include directory to Extension includes.
This is done here rather than at module scope because setup.py
may be run before numpy has been installed, in which case
importing numpy and calling `numpy.get_include()` will fail.
"""
numpy_incl = numpy.get_include()
for ext in self.extensions:
ext.include_dirs.append(numpy_incl)
super(build_ext, self).build_extensions()
return build_ext
ext_modules = LazyCythonizingList([
('zipline.assets._assets', ['zipline/assets/_assets.pyx']),
('zipline.lib.adjustment', ['zipline/lib/adjustment.pyx']),
('zipline.lib._float64window', ['zipline/lib/_float64window.pyx']),
('zipline.lib._int64window', ['zipline/lib/_int64window.pyx']),
('zipline.lib._uint8window', ['zipline/lib/_uint8window.pyx']),
('zipline.lib.rank', ['zipline/lib/rank.pyx']),
('zipline.data._equities', ['zipline/data/_equities.pyx']),
('zipline.data._adjustments', ['zipline/data/_adjustments.pyx']),
])
ext_modules = [
Extension('zipline.assets._assets', ['zipline/assets/_assets.pyx']),
Extension('zipline.lib.adjustment', ['zipline/lib/adjustment.pyx']),
Extension(
'zipline.lib._float64window', ['zipline/lib/_float64window.pyx']
),
Extension('zipline.lib._int64window', ['zipline/lib/_int64window.pyx']),
Extension('zipline.lib._uint8window', ['zipline/lib/_uint8window.pyx']),
Extension('zipline.lib.rank', ['zipline/lib/rank.pyx']),
Extension('zipline.data._equities', ['zipline/data/_equities.pyx']),
Extension('zipline.data._adjustments', ['zipline/data/_adjustments.pyx']),
]
STR_TO_CMP = {
@@ -116,9 +136,8 @@ def _filter_requirements(lines_iter):
yield requirement
REQ_UPPER_BOUNDS = {
'numpy': '<1.10',
}
# We don't currently have any known upper bounds.
REQ_UPPER_BOUNDS = {}
def _with_bounds(req):
@@ -183,11 +202,12 @@ def extras_requires(conda_format=False):
}
def module_requirements(requirements_path, module_names):
def module_requirements(requirements_path, module_names, strict_bounds):
module_names = set(module_names)
found = set()
module_lines = []
for line in read_requirements(requirements_path, strict_bounds=True):
for line in read_requirements(requirements_path,
strict_bounds=strict_bounds):
match = REQ_PATTERN.match(line)
if match is None:
raise AssertionError("Could not parse requirement: '%s'" % line)
@@ -203,38 +223,12 @@ def module_requirements(requirements_path, module_names):
)
return module_lines
def pre_setup():
if not set(sys.argv) & {'install', 'develop', 'egg_info', 'bdist_wheel'}:
return
try:
import pip
if StrictVersion(pip.__version__) < StrictVersion('7.1.0'):
raise AssertionError(
"Zipline installation requires pip>=7.1.0, but your pip "
"version is {version}. \n"
"You can upgrade your pip with "
"'pip install --upgrade pip'.".format(
version=pip.__version__,
)
)
except ImportError:
raise AssertionError("Zipline installation requires pip")
required = ('Cython', 'numpy')
for line in module_requirements('etc/requirements.txt', required):
pip.main(['install', line])
pre_setup()
conda_build = os.path.basename(sys.argv[0]) == 'conda-build'
setup(
name='zipline',
version=versioneer.get_version(),
cmdclass=versioneer.get_cmdclass(),
cmdclass=LazyBuildExtCommandClass(versioneer.get_cmdclass()),
description='A backtester for financial algorithms.',
author='Quantopian Inc.',
author_email='opensource@quantopian.com',
@@ -258,5 +252,10 @@ setup(
],
install_requires=install_requires(conda_format=conda_build),
extras_require=extras_requires(conda_format=conda_build),
setup_requires=module_requirements(
'etc/requirements.txt',
('Cython', 'numpy'),
strict_bounds=False,
),
url="http://zipline.io",
)
+36 -34
View File
@@ -52,6 +52,19 @@ asset_infos = (
with_extra_sid = parameterized.expand(asset_infos)
def _utc_localize_index_level_0(df):
"""``tz_localize`` the first level of a multiindexed dataframe to utc.
Mutates df in place.
"""
idx = df.index
df.index = pd.MultiIndex.from_product(
(idx.levels[0].tz_localize('utc'), idx.levels[1]),
names=idx.names,
)
return df
class BlazeToPipelineTestCase(TestCase):
@classmethod
def setUpClass(cls):
@@ -315,7 +328,11 @@ class BlazeToPipelineTestCase(TestCase):
finder,
).run_pipeline(p, dates[0], dates[-1])
assert_frame_equal(result, expected, check_dtype=False)
assert_frame_equal(
result,
_utc_localize_index_level_0(expected),
check_dtype=False,
)
def test_custom_query_time_tz(self):
df = self.df.copy()
@@ -345,7 +362,7 @@ class BlazeToPipelineTestCase(TestCase):
expected = df.drop('asof_date', axis=1)
expected['timestamp'] = expected['timestamp'].dt.normalize().astype(
'datetime64[ns]',
)
).dt.tz_localize('utc')
expected.ix[3:5, 'timestamp'] += timedelta(days=1)
expected.set_index(['timestamp', 'sid'], inplace=True)
expected.index = pd.MultiIndex.from_product((
@@ -615,41 +632,26 @@ class BlazeToPipelineTestCase(TestCase):
df['other'] = df.value + 1
fields = OrderedDict(self.macro_dshape.measure.fields)
fields['other'] = fields['value']
expr = bz.Data(df, name='expr', dshape=var * Record(fields))
loader = BlazeLoader()
ds = from_blaze(
expr,
loader=loader,
no_deltas_rule=no_deltas_rules.ignore,
)
p = Pipeline()
p.add(ds.value.latest, 'value')
p.add(ds.other.latest, 'other')
dates = self.dates
asset_info = asset_infos[0][0]
with tmp_asset_finder(equities=asset_info) as finder:
result = SimplePipelineEngine(
loader,
dates,
expected = pd.DataFrame(
np.array([[0, 1],
[1, 2],
[2, 3]]).repeat(3, axis=0),
index=pd.MultiIndex.from_product((
df.timestamp,
finder.retrieve_all(asset_info.index),
)),
columns=('value', 'other'),
).sort_index(axis=1)
self._test_id(
df,
var * Record(fields),
expected,
finder,
).run_pipeline(p, dates[0], dates[-1])
expected = pd.DataFrame(
np.array([[0, 1],
[1, 2],
[2, 3]]).repeat(3, axis=0),
index=pd.MultiIndex.from_product((
df.timestamp,
finder.retrieve_all(asset_info.index),
)),
columns=('value', 'other'),
).sort_index(axis=1)
assert_frame_equal(
result,
expected.sort_index(axis=1),
check_dtype=False,
)
('value', 'other'),
)
def test_id_take_last_in_group(self):
T = pd.Timestamp
@@ -804,7 +806,7 @@ class BlazeToPipelineTestCase(TestCase):
assert_frame_equal(
result,
expected_output,
_utc_localize_index_level_0(expected_output),
check_dtype=False,
)
+9 -3
View File
@@ -26,7 +26,7 @@ from zipline.pipeline.loaders.blaze import (
SID_FIELD_NAME,
TS_FIELD_NAME,
)
from zipline.utils.numpy_utils import make_datetime64D, np_NaT
from zipline.utils.numpy_utils import make_datetime64D, NaTD
from zipline.utils.test_utils import (
make_simple_equity_info,
tmp_asset_finder,
@@ -234,7 +234,7 @@ class EarningsCalendarLoaderTestCase(TestCase):
# Set NaTs to 0 temporarily because busday_count doesn't support NaT.
# We fill these entries with NaNs later.
whereNaT = raw_announce_dates == np_NaT
whereNaT = raw_announce_dates == NaTD
raw_announce_dates[whereNaT] = make_datetime64D(0)
# The abs call here makes it so that we can use this function to
@@ -260,7 +260,7 @@ class EarningsCalendarLoaderTestCase(TestCase):
'2014-01-10',
'2014-01-15',
'2014-01-20',
]),
], utc=True),
))
def test_compute_earnings(self, dates):
@@ -296,10 +296,12 @@ class EarningsCalendarLoaderTestCase(TestCase):
assert_series_equal(
computed_next.isnull(),
computed_next_busday_offset.isnull(),
check_names=False,
)
assert_series_equal(
computed_previous.isnull(),
computed_previous_busday_offset.isnull(),
check_names=False,
)
for sid in self.sids:
@@ -308,24 +310,28 @@ class EarningsCalendarLoaderTestCase(TestCase):
computed_next.xs(sid, level=1),
expected_next(sid),
sid,
check_names=False,
)
assert_series_equal(
computed_previous.xs(sid, level=1),
expected_previous(sid),
sid,
check_names=False,
)
assert_series_equal(
computed_next_busday_offset.xs(sid, level=1),
expected_next_busday_offset(sid),
sid,
check_names=False,
)
assert_series_equal(
computed_previous_busday_offset.xs(sid, level=1),
expected_previous_busday_offset(sid),
sid,
check_names=False,
)
+60 -47
View File
@@ -157,22 +157,23 @@ class ConstantInputTestCase(TestCase):
USEquityPricing.close: 3,
USEquityPricing.high: 4,
}
self.assets = [1, 2, 3]
self.asset_ids = [1, 2, 3]
self.dates = date_range('2014-01', '2014-03', freq='D', tz='UTC')
self.loader = ConstantLoader(
constants=self.constants,
dates=self.dates,
assets=self.assets,
assets=self.asset_ids,
)
self.asset_info = make_simple_equity_info(
self.assets,
self.asset_ids,
start_date=self.dates[0],
end_date=self.dates[-1],
)
environment = TradingEnvironment()
environment.write_data(equities_df=self.asset_info)
self.asset_finder = environment.asset_finder
self.assets = self.asset_finder.retrieve_all(self.asset_ids)
def test_bad_dates(self):
loader = self.loader
@@ -192,7 +193,7 @@ class ConstantInputTestCase(TestCase):
lambda column: loader, self.dates, self.asset_finder,
)
factor = AssetID()
asset = self.assets[0]
asset = self.asset_ids[0]
p = Pipeline(columns={'f': factor}, screen=factor <= asset)
# The crux of this is that when we run the pipeline for a single day
@@ -204,7 +205,7 @@ class ConstantInputTestCase(TestCase):
def test_screen(self):
loader = self.loader
finder = self.asset_finder
assets = array(self.assets)
asset_ids = array(self.asset_ids)
engine = SimplePipelineEngine(
lambda column: loader, self.dates, self.asset_finder,
)
@@ -212,11 +213,11 @@ class ConstantInputTestCase(TestCase):
dates = self.dates[10:10 + num_dates]
factor = AssetID()
for asset in assets:
p = Pipeline(columns={'f': factor}, screen=factor <= asset)
for asset_id in asset_ids:
p = Pipeline(columns={'f': factor}, screen=factor <= asset_id)
result = engine.run_pipeline(p, dates[0], dates[-1])
expected_sids = assets[assets <= asset]
expected_sids = asset_ids[asset_ids <= asset_id]
expected_assets = finder.retrieve_all(expected_sids)
expected_result = DataFrame(
index=MultiIndex.from_product([dates, expected_assets]),
@@ -228,7 +229,6 @@ class ConstantInputTestCase(TestCase):
def test_single_factor(self):
loader = self.loader
finder = self.asset_finder
assets = self.assets
engine = SimplePipelineEngine(
lambda column: loader, self.dates, self.asset_finder,
@@ -252,18 +252,17 @@ class ConstantInputTestCase(TestCase):
result = engine.run_pipeline(p, dates[0], dates[-1])
self.assertEqual(set(result.columns), {'f'})
assert_multi_index_is_product(
self, result.index, dates, finder.retrieve_all(assets)
self, result.index, dates, assets
)
check_arrays(
result['f'].unstack().values,
full(result_shape, expected_result),
full(result_shape, expected_result, dtype=float),
)
def test_multiple_rolling_factors(self):
loader = self.loader
finder = self.asset_finder
assets = self.assets
engine = SimplePipelineEngine(
lambda column: loader, self.dates, self.asset_finder,
@@ -289,22 +288,22 @@ class ConstantInputTestCase(TestCase):
self.assertEqual(set(results.columns), {'short', 'high', 'long'})
assert_multi_index_is_product(
self, results.index, dates, finder.retrieve_all(assets)
self, results.index, dates, assets
)
# row-wise sum over an array whose values are all (1 - 2)
check_arrays(
results['short'].unstack().values,
full(shape, -short_factor.window_length),
full(shape, -short_factor.window_length, dtype=float),
)
check_arrays(
results['long'].unstack().values,
full(shape, -long_factor.window_length),
full(shape, -long_factor.window_length, dtype=float),
)
# row-wise sum over an array whose values are all (1 - 3)
check_arrays(
results['high'].unstack().values,
full(shape, -2 * high_factor.window_length),
full(shape, -2 * high_factor.window_length, dtype=float),
)
def test_numeric_factor(self):
@@ -368,7 +367,7 @@ class ConstantInputTestCase(TestCase):
loader = ConstantLoader(
constants=constants,
dates=self.dates,
assets=self.assets,
assets=self.asset_ids,
)
engine = SimplePipelineEngine(
lambda column: loader, self.dates, self.asset_finder,
@@ -394,17 +393,23 @@ class ConstantInputTestCase(TestCase):
set(result.columns)
)
result_index = self.assets * len(dates_to_test)
result_index = self.asset_ids * len(dates_to_test)
result_shape = (len(result_index),)
check_arrays(
result['sumdiff'],
Series(index=result_index, data=full(result_shape, -3)),
Series(
index=result_index,
data=full(result_shape, -3, dtype=float),
),
)
for name, const in [('open', 1), ('close', 2), ('volume', 3)]:
check_arrays(
result[name],
Series(index=result_index, data=full(result_shape, const)),
Series(
index=result_index,
data=full(result_shape, const, dtype=float),
),
)
def test_loader_given_multiple_columns(self):
@@ -427,12 +432,12 @@ class ConstantInputTestCase(TestCase):
Loader1DataSet2.col2: 4}
loader1 = RecordingConstantLoader(constants=constants1,
dates=self.dates,
assets=self.assets)
assets=self.asset_ids)
constants2 = {Loader2DataSet.col1: 5,
Loader2DataSet.col2: 6}
loader2 = RecordingConstantLoader(constants=constants2,
dates=self.dates,
assets=self.assets)
assets=self.asset_ids)
engine = SimplePipelineEngine(
lambda column:
@@ -471,19 +476,26 @@ class ConstantInputTestCase(TestCase):
for name, pipe_col in iteritems(columns)}
index = MultiIndex.from_product([self.dates[2:], self.assets])
def expected_for_col(col):
val = vals[col]
offset = columns[col].window_length - min_window
return concatenate(
[
full(offset * index.levshape[1], nan),
full(
(index.levshape[0] - offset) * index.levshape[1],
val,
float,
)
],
)
expected = DataFrame(
data={col:
concatenate((
full((columns[col].window_length - min_window)
* index.levshape[1],
nan),
full((index.levshape[0]
- (columns[col].window_length - min_window))
* index.levshape[1],
val)))
for col, val in iteritems(vals)},
data={col: expected_for_col(col) for col in vals},
index=index,
columns=columns)
columns=columns,
)
assert_frame_equal(result, expected)
@@ -504,7 +516,7 @@ class FrameInputTestCase(TestCase):
cls.env = TradingEnvironment()
day = cls.env.trading_day
cls.assets = Int64Index([1, 2, 3])
cls.asset_ids = [1, 2, 3]
cls.dates = date_range(
'2015-01-01',
'2015-01-31',
@@ -513,12 +525,13 @@ class FrameInputTestCase(TestCase):
)
asset_info = make_simple_equity_info(
cls.assets,
cls.asset_ids,
start_date=cls.dates[0],
end_date=cls.dates[-1],
)
cls.env.write_data(equities_df=asset_info)
cls.asset_finder = cls.env.asset_finder
cls.assets = cls.asset_finder.retrieve_all(cls.asset_ids)
@classmethod
def tearDownClass(cls):
@@ -533,7 +546,7 @@ class FrameInputTestCase(TestCase):
return DataFrame(data, columns=self.assets, index=self.dates)
def test_compute_with_adjustments(self):
dates, assets = self.dates, self.assets
dates, asset_ids = self.dates, self.asset_ids
low, high = USEquityPricing.low, USEquityPricing.high
apply_idxs = [3, 10, 16]
@@ -544,7 +557,7 @@ class FrameInputTestCase(TestCase):
[
dict(
kind=MULTIPLY,
sid=assets[1],
sid=asset_ids[1],
value=2.0,
start_date=None,
end_date=apply_date(0, offset=-1),
@@ -552,7 +565,7 @@ class FrameInputTestCase(TestCase):
),
dict(
kind=MULTIPLY,
sid=assets[1],
sid=asset_ids[1],
value=3.0,
start_date=None,
end_date=apply_date(1, offset=-1),
@@ -560,7 +573,7 @@ class FrameInputTestCase(TestCase):
),
dict(
kind=MULTIPLY,
sid=assets[1],
sid=asset_ids[1],
value=5.0,
start_date=None,
end_date=apply_date(2, offset=-1),
@@ -630,7 +643,7 @@ class SyntheticBcolzTestCase(TestCase):
asset_lifetime=8,
)
cls.last_asset_end = cls.asset_info['end_date'].max()
cls.all_assets = cls.asset_info.index
cls.all_asset_ids = cls.asset_info.index
cls.env.write_data(equities_df=cls.asset_info)
cls.finder = cls.env.asset_finder
@@ -646,7 +659,7 @@ class SyntheticBcolzTestCase(TestCase):
table = cls.writer.write(
cls.temp_dir.getpath('testdata.bcolz'),
cls.calendar,
cls.all_assets,
cls.all_asset_ids,
)
cls.pipeline_loader = USEquityPricingLoader(
@@ -698,7 +711,7 @@ class SyntheticBcolzTestCase(TestCase):
self.finder,
)
window_length = 5
assets = self.all_assets
asset_ids = self.all_asset_ids
dates = date_range(
self.first_asset_start + self.trading_day,
self.last_asset_end,
@@ -722,7 +735,7 @@ class SyntheticBcolzTestCase(TestCase):
# **previous** day's data.
expected_raw = rolling_mean(
self.writer.expected_values_2d(
dates - self.trading_day, assets, 'close',
dates - self.trading_day, asset_ids, 'close',
),
window_length,
min_periods=1,
@@ -732,7 +745,7 @@ class SyntheticBcolzTestCase(TestCase):
# Truncate off the extra rows needed to compute the SMAs.
expected_raw[window_length:],
index=dates_to_test, # dates_to_test is dates[window_length:]
columns=self.finder.retrieve_all(assets),
columns=self.finder.retrieve_all(asset_ids),
)
self.write_nans(expected)
result = results['sma'].unstack()
@@ -750,7 +763,7 @@ class SyntheticBcolzTestCase(TestCase):
self.finder,
)
window_length = 5
assets = self.all_assets
asset_ids = self.all_asset_ids
dates = date_range(
self.first_asset_start + self.trading_day,
self.last_asset_end,
@@ -772,9 +785,9 @@ class SyntheticBcolzTestCase(TestCase):
# We expect NaNs when the asset was undefined, otherwise 0 everywhere,
# since the input is always increasing.
expected = DataFrame(
data=zeros((len(dates_to_test), len(assets)), dtype=float),
data=zeros((len(dates_to_test), len(asset_ids)), dtype=float),
index=dates_to_test,
columns=self.finder.retrieve_all(assets),
columns=self.finder.retrieve_all(asset_ids),
)
self.write_nans(expected)
result = results['drawdown'].unstack()
+7 -3
View File
@@ -23,7 +23,11 @@ from zipline.pipeline.factors import (
RSI,
)
from zipline.utils.test_utils import check_allclose, check_arrays
from zipline.utils.numpy_utils import datetime64ns_dtype, float64_dtype, np_NaT
from zipline.utils.numpy_utils import (
datetime64ns_dtype,
float64_dtype,
NaTns,
)
from .base import BasePipelineTestCase
@@ -309,7 +313,7 @@ class FactorTestCase(BasePipelineTestCase):
mask = eyemask if use_mask else nomask
if set_missing:
asfloat[:, 2] = nan
asdatetime[:, 2] = np_NaT
asdatetime[:, 2] = NaTns
float_result = masked_rankdata_2d(
data=asfloat,
@@ -321,7 +325,7 @@ class FactorTestCase(BasePipelineTestCase):
datetime_result = masked_rankdata_2d(
data=asdatetime,
mask=mask,
missing_value=np_NaT,
missing_value=NaTns,
method=method,
ascending=True,
)
+10 -10
View File
@@ -76,9 +76,9 @@ class NumericalExpressionTestCase(TestCase):
self.h = H()
self.d = DateFactor()
self.fake_raw_data = {
self.f: full((5, 5), 3),
self.g: full((5, 5), 2),
self.h: full((5, 5), 1),
self.f: full((5, 5), 3, float),
self.g: full((5, 5), 2, float),
self.h: full((5, 5), 1, float),
self.d: full((5, 5), 0, dtype='datetime64[ns]'),
}
self.mask = DataFrame(True, index=self.dates, columns=self.assets)
@@ -94,7 +94,7 @@ class NumericalExpressionTestCase(TestCase):
def check_constant_output(self, expr, expected):
self.assertFalse(isnan(expected))
return self.check_output(expr, full((5, 5), expected))
return self.check_output(expr, full((5, 5), expected, float))
def test_validate_good(self):
f = self.f
@@ -435,9 +435,9 @@ class NumericalExpressionTestCase(TestCase):
def test_comparisons(self):
f, g, h = self.f, self.g, self.h
self.fake_raw_data = {
f: arange(25).reshape(5, 5),
g: arange(25).reshape(5, 5) - eye(5),
h: full((5, 5), 5),
f: arange(25, dtype=float).reshape(5, 5),
g: arange(25, dtype=float).reshape(5, 5) - eye(5),
h: full((5, 5), 5, dtype=float),
}
f_data = self.fake_raw_data[f]
g_data = self.fake_raw_data[g]
@@ -479,9 +479,9 @@ class NumericalExpressionTestCase(TestCase):
)
self.fake_raw_data = {
f: arange(25).reshape(5, 5),
g: arange(25).reshape(5, 5) - eye(5),
h: full((5, 5), 5),
f: arange(25, dtype=float).reshape(5, 5),
g: arange(25, dtype=float).reshape(5, 5) - eye(5),
h: full((5, 5), 5, dtype=float),
custom_filter: custom_filter_mask,
}
+1 -1
View File
@@ -401,7 +401,7 @@ class PipelineAlgorithmTestCase(TestCase):
'ratio': array([], dtype=float),
'sid': array([], dtype=int),
},
index=DatetimeIndex([], tz='UTC'),
index=DatetimeIndex([]),
columns=['effective_date', 'ratio', 'sid'],
)
dividends = DataFrame({
+1
View File
@@ -40,6 +40,7 @@ class SomeFactor(Factor):
dtype = float64_dtype
window_length = 5
inputs = [SomeDataSet.foo, SomeDataSet.bar]
SomeFactorAlias = SomeFactor
+2 -5
View File
@@ -16,14 +16,11 @@
# This code is based on a unittest written by John Salvatier:
# https://github.com/pymc-devs/pymc/blob/pymc3/tests/test_examples.py
# Disable plotting
#
import glob
import imp
import matplotlib
from nose_parameterized import parameterized
import os
import runpy
from unittest import TestCase
from zipline.utils import parse_args, run_pipeline
@@ -45,7 +42,7 @@ class ExamplesTests(TestCase):
@parameterized.expand(((os.path.basename(f).replace('.', '_'), f) for f in
glob.glob(os.path.join(example_dir(), '*.py'))))
def test_example(self, name, example):
imp.load_source('__main__', os.path.basename(example), open(example))
runpy.run_path(example, run_name='__main__')
# Test algorithm as if scripts/run_algo.py is being used.
def test_example_run_pipline(self):
+2 -127
View File
@@ -41,131 +41,6 @@ from zipline.sources import RandomWalkSource, DataFrameSource
import zipline.utils.factory as factory
from zipline.utils.test_utils import subtest
# Cases are over the July 4th holiday, to ensure use of trading calendar.
# March 2013
# Su Mo Tu We Th Fr Sa
# 1 2
# 3 4 5 6 7 8 9
# 10 11 12 13 14 15 16
# 17 18 19 20 21 22 23
# 24 25 26 27 28 29 30
# 31
# April 2013
# Su Mo Tu We Th Fr Sa
# 1 2 3 4 5 6
# 7 8 9 10 11 12 13
# 14 15 16 17 18 19 20
# 21 22 23 24 25 26 27
# 28 29 30
#
# May 2013
# Su Mo Tu We Th Fr Sa
# 1 2 3 4
# 5 6 7 8 9 10 11
# 12 13 14 15 16 17 18
# 19 20 21 22 23 24 25
# 26 27 28 29 30 31
#
# June 2013
# Su Mo Tu We Th Fr Sa
# 1
# 2 3 4 5 6 7 8
# 9 10 11 12 13 14 15
# 16 17 18 19 20 21 22
# 23 24 25 26 27 28 29
# 30
# July 2013
# Su Mo Tu We Th Fr Sa
# 1 2 3 4 5 6
# 7 8 9 10 11 12 13
# 14 15 16 17 18 19 20
# 21 22 23 24 25 26 27
# 28 29 30 31
#
# Times to be converted via:
# pd.Timestamp('2013-07-05 9:31', tz='US/Eastern').tz_convert('UTC')},
INDEX_TEST_CASES_RAW = {
'week of daily data': {
'input': {'bar_count': 5,
'frequency': '1d',
'algo_dt': '2013-07-05 9:31AM'},
'expected': [
'2013-06-28 4:00PM',
'2013-07-01 4:00PM',
'2013-07-02 4:00PM',
'2013-07-03 1:00PM',
'2013-07-05 9:31AM',
]
},
'five minutes on july 5th open': {
'input': {'bar_count': 5,
'frequency': '1m',
'algo_dt': '2013-07-05 9:31AM'},
'expected': [
'2013-07-03 12:57PM',
'2013-07-03 12:58PM',
'2013-07-03 12:59PM',
'2013-07-03 1:00PM',
'2013-07-05 9:31AM',
]
},
}
def to_timestamp(dt_str):
return pd.Timestamp(dt_str, tz='US/Eastern').tz_convert('UTC')
def convert_cases(cases):
"""
Convert raw strings to values comparable with system data.
"""
cases = cases.copy()
for case in cases.values():
case['input']['algo_dt'] = to_timestamp(case['input']['algo_dt'])
case['expected'] = pd.DatetimeIndex([to_timestamp(dt_str) for dt_str
in case['expected']])
return cases
INDEX_TEST_CASES = convert_cases(INDEX_TEST_CASES_RAW)
def get_index_at_dt(case_input, env):
history_spec = history.HistorySpec(
case_input['bar_count'],
case_input['frequency'],
None,
False,
env=env,
data_frequency='minute',
)
return history.index_at_dt(history_spec, case_input['algo_dt'], env=env)
class TestHistoryIndex(TestCase):
@classmethod
def setUpClass(cls):
cls.environment = TradingEnvironment()
@classmethod
def tearDownClass(cls):
del cls.environment
@parameterized.expand(
[(name, case['input'], case['expected'])
for name, case in INDEX_TEST_CASES.items()]
)
def test_index_at_dt(self, name, case_input, expected):
history_index = get_index_at_dt(case_input, self.environment)
history_series = pd.Series(index=history_index)
expected_series = pd.Series(index=expected)
pd.util.testing.assert_series_equal(history_series, expected_series)
class TestHistoryContainer(TestCase):
@@ -473,9 +348,9 @@ def handle_data(context, data):
output = test_algo.run(source)
self.assertIsNotNone(output)
history_trace = test_algo.history_trace
df.columns = self.env.asset_finder.retrieve_all(df.columns)
for i, received in enumerate(history_trace[bar_count - 1:]):
for i, received in enumerate(test_algo.history_trace[bar_count - 1:]):
expected = df.iloc[i:i + bar_count]
assert_frame_equal(expected, received)
+1 -1
View File
@@ -266,7 +266,7 @@ class TestStatelessRules(RuleTestCase):
@subtest(minutes_for_days(), 'ms')
def test_Always(self, ms):
should_trigger = partial(Always().should_trigger, env=self.env)
self.assertTrue(all(map(partial(should_trigger, env=self.env), ms)))
self.assertTrue(all(map(should_trigger, ms)))
@subtest(minutes_for_days(), 'ms')
def test_Never(self, ms):
+16 -5
View File
@@ -1,5 +1,5 @@
#
# Copyright 2013 Quantopian, Inc.
# Copyright 2016 Quantopian, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
@@ -22,6 +22,7 @@ from pandas.io.data import DataReader
import pytz
from six import iteritems
from six.moves.urllib_error import HTTPError
from . benchmarks import get_benchmark_returns
from . import treasuries, treasuries_can
@@ -239,8 +240,15 @@ def ensure_benchmark_data(symbol, first_date, last_date, now, trading_day):
path=path,
)
data = get_benchmark_returns(symbol, first_date - trading_day, last_date)
data.to_csv(path)
try:
data = get_benchmark_returns(
symbol,
first_date - trading_day,
last_date,
)
data.to_csv(path)
except (OSError, IOError, HTTPError):
logger.exception('failed to cache the new benchmark returns')
if not has_data_for_dates(data, first_date, last_date):
logger.warn("Still don't have expected data after redownload!")
return data
@@ -306,8 +314,11 @@ def ensure_treasury_data(bm_symbol, first_date, last_date, now):
)
)
data = loader_module.get_treasury_data(first_date, last_date)
data.to_csv(path)
try:
data = loader_module.get_treasury_data(first_date, last_date)
data.to_csv(path)
except (OSError, IOError, HTTPError):
logger.exception('failed to cache treasury data')
if not has_data_for_dates(data, first_date, last_date):
logger.warn("Still don't have expected data after redownload!")
return data
+1 -1
View File
@@ -206,7 +206,7 @@ class BcolzDailyBarWriter(with_metaclass(ABCMeta)):
if column_name == 'id':
# We know what the content of this column is, so don't
# bother reading it.
columns['id'].append(full((nrows,), asset_id))
columns['id'].append(full((nrows,), asset_id, uint32))
continue
columns[column_name].append(
self.to_uint32(table[column_name][:], column_name)
+2 -2
View File
@@ -32,8 +32,8 @@ def ols_transform(data, sid1, sid2):
"""Computes regression coefficient (slope and intercept)
via Ordinary Least Squares between two SIDs.
"""
p0 = data.price[sid1]
p1 = sm.add_constant(data.price[sid2], prepend=True)
p0 = data.price[sid1].values
p1 = sm.add_constant(data.price[sid2].values, prepend=True)
slope, intercept = sm.OLS(p0, p1).fit().params
return slope, intercept
+5 -6
View File
@@ -32,14 +32,12 @@ Position Tracking
"""
from __future__ import division
from math import (
copysign,
floor,
)
from math import copysign
from copy import copy
import logbook
import numpy as np
import zipline.protocol as zp
from zipline.utils.serialization_utils import (
@@ -71,7 +69,8 @@ class Position(object):
# stock dividend
if dividend['payment_sid']:
out['payment_sid'] = dividend['payment_sid']
out['share_count'] = floor(self.amount * float(dividend['ratio']))
out['share_count'] = np.floor(self.amount
* float(dividend['ratio']))
# cash dividend
if dividend['net_amount']:
@@ -106,7 +105,7 @@ class Position(object):
raw_share_count = self.amount / float(ratio)
# e.g., 33
full_share_count = floor(raw_share_count)
full_share_count = np.floor(raw_share_count)
# e.g., 0.333
fractional_share_count = raw_share_count - full_share_count
+3 -2
View File
@@ -70,6 +70,7 @@ from pandas.tseries.tools import normalize_date
import zipline.finance.risk as risk
from . period import PerformancePeriod
from zipline.utils.pandas_utils import sort_values
from zipline.utils.serialization_utils import (
VERSION_LABEL
)
@@ -200,9 +201,9 @@ class PerformanceTracker(object):
)
self._dividend_count += len(new_dividends)
self.dividend_frame = pd.concat(
self.dividend_frame = sort_values(pd.concat(
[self.dividend_frame, new_dividends]
).sort(['pay_date', 'ex_date']).set_index('id', drop=False)
), ['pay_date', 'ex_date']).set_index('id', drop=False)
def initialize_dividends_from_other(self, other):
"""
+1 -1
View File
@@ -374,7 +374,7 @@ class TradingEnvironment(object):
def get_open_and_close(self, day):
index = self.open_and_closes.index.get_loc(day.date())
todays_minutes = self.open_and_closes.values[index]
todays_minutes = self.open_and_closes.iloc[index]
return todays_minutes[0], todays_minutes[1]
def market_minutes_for_day(self, stamp):
-4
View File
@@ -15,8 +15,6 @@
from . history import (
HistorySpec,
days_index_at_dt,
index_at_dt,
Frequency,
)
@@ -24,8 +22,6 @@ from . import history_container
__all__ = [
'HistorySpec',
'days_index_at_dt',
'index_at_dt',
'history_container',
'Frequency',
]
-57
View File
@@ -15,7 +15,6 @@
from __future__ import division
import numpy as np
import pandas as pd
import re
@@ -284,59 +283,3 @@ class HistorySpec(object):
def __repr__(self):
return ''.join([self.__class__.__name__, "('", self.key_str, "')"])
def days_index_at_dt(history_spec, algo_dt, env):
"""
Get the index of a frame to be used for a get_history call with daily
frequency.
"""
# Get the previous (bar_count - 1) days' worth of market closes.
day_delta = (history_spec.bar_count - 1) * history_spec.frequency.num
market_closes = env.open_close_window(
algo_dt,
day_delta,
offset=(-day_delta),
step=history_spec.frequency.num,
).market_close
if history_spec.frequency.data_frequency == 'daily':
market_closes = market_closes.apply(pd.tslib.normalize_date)
# Append the current algo_dt as the last index value.
# Using the 'rawer' numpy array values here because of a bottleneck
# that appeared when using DatetimeIndex
return np.append(market_closes.values, algo_dt)
def minutes_index_at_dt(history_spec, algo_dt, env):
"""
Get the index of a frame to be used for a get_history_call with minutely
frequency.
"""
# TODO: This is almost certainly going to be too slow for production.
return env.market_minute_window(
algo_dt,
history_spec.bar_count,
step=-1,
)[::-1]
def index_at_dt(history_spec, algo_dt, env):
"""
Returns index of a frame returned by get_history() with the given
history_spec and algo_dt.
The resulting index will have @history_spec.bar_count bars, increasing in
units of @history_spec.frequency, terminating at the given @algo_dt.
Note: The last bar of the returned frame represents an as-of-yet incomplete
time window, so the delta between the last and second-to-last bars is
usually always less than `@history_spec.frequency` for frequencies greater
than 1m.
"""
frequency = history_spec.frequency
if frequency.unit_str == 'd':
return days_index_at_dt(history_spec, algo_dt, env)
elif frequency.unit_str == 'm':
return minutes_index_at_dt(history_spec, algo_dt, env)
+3 -3
View File
@@ -5,7 +5,7 @@ announcements, acquisitions, dividends, etc.).
from numpy import newaxis
from zipline.pipeline.data.earnings import EarningsCalendar
from zipline.utils.numpy_utils import (
np_NaT,
NaTD,
busday_count_mask_NaT,
datetime64D_dtype,
float64_dtype,
@@ -48,7 +48,7 @@ class BusinessDaysUntilNextEarnings(Factor):
announce_dates = arrays[0].astype(datetime64D_dtype)
# Set masked values to NaT.
announce_dates[~mask] = np_NaT
announce_dates[~mask] = NaTD
# Convert row labels into a column vector for broadcasted comparison.
reference_dates = dates.values.astype(datetime64D_dtype)[:, newaxis]
@@ -84,7 +84,7 @@ class BusinessDaysSincePreviousEarnings(Factor):
announce_dates = arrays[0].astype(datetime64D_dtype)
# Set masked values to NaT.
announce_dates[~mask] = np_NaT
announce_dates[~mask] = NaTD
# Convert row labels into a column vector for broadcasted comparison.
reference_dates = dates.values.astype(datetime64D_dtype)[:, newaxis]
+7 -7
View File
@@ -2,12 +2,6 @@
Technical Analysis Factors
--------------------------
"""
from bottleneck import (
nanargmax,
nanmax,
nanmean,
nansum,
)
from numbers import Number
from numpy import (
abs,
@@ -31,6 +25,12 @@ from zipline.pipeline.data import USEquityPricing
from zipline.pipeline.mixins import SingleInputMixin
from zipline.utils.control_flow import ignore_nanwarnings
from zipline.utils.input_validation import expect_types
from zipline.utils.math_utils import (
nanargmax,
nanmax,
nanmean,
nansum,
)
from .factor import CustomFactor
@@ -181,7 +181,7 @@ class _ExponentialWeightedFactor(SingleInputMixin, CustomFactor):
Return weighting vector for an exponential moving statistic on `length`
rows with a decay rate of `decay_rate`.
"""
return full(length, decay_rate) ** arange(length + 1, 1, -1)
return full(length, decay_rate, float) ** arange(length + 1, 1, -1)
@classmethod
@expect_types(span=Number)
+5 -4
View File
@@ -174,6 +174,7 @@ from zipline.utils.input_validation import (
optionally,
)
from zipline.utils.numpy_utils import repeat_last_axis
from zipline.utils.pandas_utils import sort_values
from zipline.utils.preprocess import preprocess
@@ -626,10 +627,10 @@ def overwrite_novel_deltas(baseline, deltas, dates):
) <= 1
novel_deltas = deltas.loc[novel_idx]
non_novel_deltas = deltas.loc[~novel_idx]
return pd.concat(
return sort_values(pd.concat(
(baseline, novel_deltas),
ignore_index=True,
).sort(TS_FIELD_NAME), non_novel_deltas
), TS_FIELD_NAME), non_novel_deltas
def overwrite_from_dates(asof, dense_dates, sparse_dates, asset_idx, value):
@@ -910,13 +911,13 @@ class BlazeLoader(dict):
This can return more data than needed. The in memory reindex will
handle this.
"""
return reduce(
return sort_values(reduce(
partial(pd.merge, on=added_query_fields, how='outer'),
(
odo(where(e, column), pd.DataFrame, **odo_kwargs)
for column in columns
),
).sort(TS_FIELD_NAME) # sort for the groupby later
), TS_FIELD_NAME) # sort for the groupby later
materialized_expr = collect_expr(expr)
materialized_deltas = (
+2 -1
View File
@@ -15,6 +15,7 @@ from pandas import (
)
from zipline.lib.adjusted_array import AdjustedArray
from zipline.lib.adjustment import make_adjustment_from_labels
from zipline.utils.pandas_utils import sort_values
from .base import PipelineLoader
ADJUSTMENT_COLUMNS = Index([
@@ -71,7 +72,7 @@ class DataFrameLoader(PipelineLoader):
else:
# Ensure that columns are in the correct order.
adjustments = adjustments.reindex_axis(ADJUSTMENT_COLUMNS, axis=1)
adjustments.sort(['apply_date', 'sid'], inplace=True)
sort_values(adjustments, ['apply_date', 'sid'], inplace=True)
self.adjustments = adjustments
self.adjustment_apply_dates = DatetimeIndex(adjustments.apply_date)
+4 -4
View File
@@ -5,7 +5,7 @@ import pandas as pd
from six import iteritems
from six.moves import zip
from zipline.utils.numpy_utils import np_NaT
from zipline.utils.numpy_utils import NaTns
def next_date_frame(dates, events_by_sid):
@@ -34,7 +34,7 @@ def next_date_frame(dates, events_by_sid):
previous_date_frame
"""
cols = {
equity: np.full_like(dates, np_NaT) for equity in events_by_sid
equity: np.full_like(dates, NaTns) for equity in events_by_sid
}
raw_dates = dates.values
for equity, event_dates in iteritems(events_by_sid):
@@ -50,7 +50,7 @@ def next_date_frame(dates, events_by_sid):
(knowledge_date <= raw_dates) &
(raw_dates <= event_date)
)
value_mask = (event_date <= data) | (data == np_NaT)
value_mask = (event_date <= data) | (data == NaTns)
data[date_mask & value_mask] = event_date
return pd.DataFrame(index=dates, data=cols)
@@ -82,7 +82,7 @@ def previous_date_frame(date_index, events_by_sid):
next_date_frame
"""
sids = list(events_by_sid)
out = np.full((len(date_index), len(sids)), np_NaT, dtype='datetime64[ns]')
out = np.full((len(date_index), len(sids)), NaTns, dtype='datetime64[ns]')
dn = date_index[-1].asm8
for col_idx, sid in enumerate(sids):
# events_by_sid[sid] is Series mapping knowledge_date to actual
+1 -2
View File
@@ -309,8 +309,7 @@ def create_test_panel_source(sim_params=None, env=None, source_type=None):
'arbitrary': arbitrary},
index=index)
if source_type:
source_types = np.full(len(index), source_type)
df['type'] = source_types
df['type'] = source_type
panel = pd.Panel.from_dict({0: df})
+15 -5
View File
@@ -22,11 +22,19 @@ datetime64ns_dtype = dtype('datetime64[ns]')
make_datetime64ns = flip(datetime64, 'ns')
make_datetime64D = flip(datetime64, 'D')
np_NaT = make_datetime64ns('NaT')
NaTmap = {
dtype('datetime64[%s]' % unit): datetime64('NaT', unit)
for unit in ('ns', 'us', 'ms', 's', 'm', 'D')
}
NaT_for_dtype = NaTmap.__getitem__
NaTns = NaT_for_dtype(datetime64ns_dtype)
NaTD = NaT_for_dtype(datetime64D_dtype)
_FILLVALUE_DEFAULTS = {
float64_dtype: nan,
datetime64ns_dtype: np_NaT,
datetime64ns_dtype: NaTns,
}
@@ -127,7 +135,9 @@ def repeat_last_axis(array, count):
_notNaT = make_datetime64D(0)
def busday_count_mask_NaT(begindates, enddates, out=None):
def busday_count_mask_NaT(begindates,
enddates,
out=None):
"""
Simple of numpy.busday_count that returns `float` arrays rather than int
arrays, and handles `NaT`s by returning `NaN`s where the inputs were `NaT`.
@@ -142,8 +152,8 @@ def busday_count_mask_NaT(begindates, enddates, out=None):
if out is None:
out = empty(broadcast(begindates, enddates).shape, dtype=float)
beginmask = (begindates == np_NaT)
endmask = (enddates == np_NaT)
beginmask = (begindates == NaTD)
endmask = (enddates == NaTD)
out = busday_count(
# Temporarily fill in non-NaT values.
+8
View File
@@ -1,6 +1,7 @@
"""
Utilities for working with pandas objects.
"""
import pandas as pd
def explode(df):
@@ -10,3 +11,10 @@ def explode(df):
(df.index, df.columns, df.values)
"""
return df.index, df.columns, df.values
try:
# pandas 0.16 compat
sort_values = pd.DataFrame.sort_values
except AttributeError:
sort_values = pd.DataFrame.sort