mirror of
https://github.com/wassname/catalyst.git
synced 2026-07-01 01:58:43 +08:00
Merge pull request #969 from quantopian/fix-numpy-warnings
Fix numpy warnings
This commit is contained in:
+11
-5
@@ -4,22 +4,28 @@ cache:
|
||||
directories:
|
||||
- $HOME/.cache/pip
|
||||
matrix:
|
||||
fast_finish: true
|
||||
include:
|
||||
- python: 2.7
|
||||
env: PANDAS_VERSION=0.16.1 NUMPY_VERSION=1.9.2 SCIPY_VERSION=0.15.1
|
||||
- python: 2.7
|
||||
env: PANDAS_VERSION=0.17.1 NUMPY_VERSION=1.10.2 SCIPY_VERSION=0.16.1
|
||||
- python: 3.4
|
||||
env: PANDAS_VERSION=0.16.1 NUMPY_VERSION=1.9.2 SCIPY_VERSION=0.15.1
|
||||
- python: 3.4
|
||||
env: PANDAS_VERSION=0.17.1 NUMPY_VERSION=1.10.2 SCIPY_VERSION=0.16.1
|
||||
|
||||
before_install:
|
||||
- if [ ${TRAVIS_PYTHON_VERSION:0:1} == "2" ]; then wget https://repo.continuum.io/miniconda/Miniconda-3.7.0-Linux-x86_64.sh -O miniconda.sh; else wget https://repo.continuum.io/miniconda/Miniconda3-3.7.0-Linux-x86_64.sh -O miniconda.sh; fi
|
||||
- chmod +x miniconda.sh
|
||||
- ./miniconda.sh -b -p $HOME/miniconda
|
||||
- export PATH="$HOME/miniconda/bin:$PATH"
|
||||
- sed -i "s/numpy==.*/numpy==$NUMPY_VERSION/" etc/requirements.txt
|
||||
- sed -i "s/pandas==.*/pandas==$PANDAS_VERSION/" etc/requirements.txt
|
||||
- sed -i "s/scipy==.*/scipy==$SCIPY_VERSION/" etc/requirements.txt
|
||||
install:
|
||||
- conda create -n testenv --yes pip python=$TRAVIS_PYTHON_VERSION
|
||||
- conda create -n testenv --yes -c quantopian pip python=$TRAVIS_PYTHON_VERSION numpy=$NUMPY_VERSION scipy=$SCIPY_VERSION ta-lib=0.4.8
|
||||
- source activate testenv
|
||||
- conda install --yes -c quantopian numpy=$NUMPY_VERSION pandas=$PANDAS_VERSION scipy=$SCIPY_VERSION Cython=0.22.1 ta-lib=0.4.8
|
||||
- conda remove --yes six pytz # remove these packages so our requirements can downgrade them
|
||||
- pip install --upgrade pip coverage coveralls
|
||||
- pip install -r etc/requirements.txt
|
||||
- pip install -r etc/requirements_dev.txt
|
||||
@@ -27,9 +33,9 @@ install:
|
||||
- pip install -e .
|
||||
before_script:
|
||||
- pip freeze | sort
|
||||
- flake8 zipline tests
|
||||
script:
|
||||
- nosetests --with-timer --exclude=^test_examples --with-coverage --cover-package=zipline --timer-top-n=15
|
||||
- nosetests tests/
|
||||
- flake8 zipline tests
|
||||
after_success:
|
||||
- coveralls
|
||||
|
||||
|
||||
@@ -24,7 +24,7 @@ six==1.9.0
|
||||
# For fetching remote data
|
||||
requests==2.9.1
|
||||
|
||||
Cython==0.22.1
|
||||
Cython==0.23.4
|
||||
|
||||
# faster OrderedDict
|
||||
cyordereddict==0.2.2
|
||||
@@ -41,7 +41,7 @@ networkx==1.9.1
|
||||
numexpr==2.4.3
|
||||
|
||||
# On disk storage format for pipeline data.
|
||||
bcolz==0.10.0
|
||||
bcolz==0.12.1
|
||||
|
||||
# Command line interface helper
|
||||
click==4.0.0
|
||||
|
||||
@@ -3,7 +3,9 @@ verbosity=2
|
||||
detailed-errors=1
|
||||
with-ignore-docstrings=1
|
||||
with-timer=1
|
||||
timer-filter=warning
|
||||
timer-top-n=15
|
||||
with-coverage=1
|
||||
cover-package=zipline
|
||||
|
||||
[metadata]
|
||||
description-file = README.rst
|
||||
|
||||
@@ -33,43 +33,63 @@ from setuptools import (
|
||||
import versioneer
|
||||
|
||||
|
||||
class LazyCythonizingList(list):
|
||||
cythonized = False
|
||||
|
||||
def lazy_cythonize(self):
|
||||
if self.cythonized:
|
||||
return
|
||||
self.cythonized = True
|
||||
|
||||
from Cython.Build import cythonize
|
||||
from numpy import get_include
|
||||
|
||||
self[:] = cythonize(
|
||||
[
|
||||
Extension(*ext_args, include_dirs=[get_include()])
|
||||
for ext_args in self
|
||||
]
|
||||
class LazyBuildExtCommandClass(dict):
|
||||
"""
|
||||
Lazy command class that defers operations requiring Cython and numpy until
|
||||
they've actually been downloaded and installed by setup_requires.
|
||||
"""
|
||||
def __contains__(self, key):
|
||||
return (
|
||||
key == 'build_ext'
|
||||
or super(LazyBuildExtCommandClass, self).__contains__(key)
|
||||
)
|
||||
|
||||
def __iter__(self):
|
||||
self.lazy_cythonize()
|
||||
return super(LazyCythonizingList, self).__iter__()
|
||||
def __setitem__(self, key, value):
|
||||
if key == 'build_ext':
|
||||
raise AssertionError("build_ext overridden!")
|
||||
super(LazyBuildExtCommandClass, self).__setitem__(key, value)
|
||||
|
||||
def __getitem__(self, num):
|
||||
self.lazy_cythonize()
|
||||
return super(LazyCythonizingList, self).__getitem__(num)
|
||||
def __getitem__(self, key):
|
||||
if key != 'build_ext':
|
||||
return super(LazyBuildExtCommandClass, self).__getitem__(key)
|
||||
|
||||
from Cython.Distutils import build_ext as cython_build_ext
|
||||
import numpy
|
||||
|
||||
# Cython_build_ext isn't a new-style class in Py2.
|
||||
class build_ext(cython_build_ext, object):
|
||||
"""
|
||||
Custom build_ext command that lazily adds numpy's include_dir to
|
||||
extensions.
|
||||
"""
|
||||
def build_extensions(self):
|
||||
"""
|
||||
Lazily append numpy's include directory to Extension includes.
|
||||
|
||||
This is done here rather than at module scope because setup.py
|
||||
may be run before numpy has been installed, in which case
|
||||
importing numpy and calling `numpy.get_include()` will fail.
|
||||
"""
|
||||
numpy_incl = numpy.get_include()
|
||||
for ext in self.extensions:
|
||||
ext.include_dirs.append(numpy_incl)
|
||||
|
||||
super(build_ext, self).build_extensions()
|
||||
return build_ext
|
||||
|
||||
|
||||
ext_modules = LazyCythonizingList([
|
||||
('zipline.assets._assets', ['zipline/assets/_assets.pyx']),
|
||||
('zipline.lib.adjustment', ['zipline/lib/adjustment.pyx']),
|
||||
('zipline.lib._float64window', ['zipline/lib/_float64window.pyx']),
|
||||
('zipline.lib._int64window', ['zipline/lib/_int64window.pyx']),
|
||||
('zipline.lib._uint8window', ['zipline/lib/_uint8window.pyx']),
|
||||
('zipline.lib.rank', ['zipline/lib/rank.pyx']),
|
||||
('zipline.data._equities', ['zipline/data/_equities.pyx']),
|
||||
('zipline.data._adjustments', ['zipline/data/_adjustments.pyx']),
|
||||
])
|
||||
ext_modules = [
|
||||
Extension('zipline.assets._assets', ['zipline/assets/_assets.pyx']),
|
||||
Extension('zipline.lib.adjustment', ['zipline/lib/adjustment.pyx']),
|
||||
Extension(
|
||||
'zipline.lib._float64window', ['zipline/lib/_float64window.pyx']
|
||||
),
|
||||
Extension('zipline.lib._int64window', ['zipline/lib/_int64window.pyx']),
|
||||
Extension('zipline.lib._uint8window', ['zipline/lib/_uint8window.pyx']),
|
||||
Extension('zipline.lib.rank', ['zipline/lib/rank.pyx']),
|
||||
Extension('zipline.data._equities', ['zipline/data/_equities.pyx']),
|
||||
Extension('zipline.data._adjustments', ['zipline/data/_adjustments.pyx']),
|
||||
]
|
||||
|
||||
|
||||
STR_TO_CMP = {
|
||||
@@ -116,9 +136,8 @@ def _filter_requirements(lines_iter):
|
||||
yield requirement
|
||||
|
||||
|
||||
REQ_UPPER_BOUNDS = {
|
||||
'numpy': '<1.10',
|
||||
}
|
||||
# We don't currently have any known upper bounds.
|
||||
REQ_UPPER_BOUNDS = {}
|
||||
|
||||
|
||||
def _with_bounds(req):
|
||||
@@ -183,11 +202,12 @@ def extras_requires(conda_format=False):
|
||||
}
|
||||
|
||||
|
||||
def module_requirements(requirements_path, module_names):
|
||||
def module_requirements(requirements_path, module_names, strict_bounds):
|
||||
module_names = set(module_names)
|
||||
found = set()
|
||||
module_lines = []
|
||||
for line in read_requirements(requirements_path, strict_bounds=True):
|
||||
for line in read_requirements(requirements_path,
|
||||
strict_bounds=strict_bounds):
|
||||
match = REQ_PATTERN.match(line)
|
||||
if match is None:
|
||||
raise AssertionError("Could not parse requirement: '%s'" % line)
|
||||
@@ -203,38 +223,12 @@ def module_requirements(requirements_path, module_names):
|
||||
)
|
||||
return module_lines
|
||||
|
||||
|
||||
def pre_setup():
|
||||
if not set(sys.argv) & {'install', 'develop', 'egg_info', 'bdist_wheel'}:
|
||||
return
|
||||
|
||||
try:
|
||||
import pip
|
||||
if StrictVersion(pip.__version__) < StrictVersion('7.1.0'):
|
||||
raise AssertionError(
|
||||
"Zipline installation requires pip>=7.1.0, but your pip "
|
||||
"version is {version}. \n"
|
||||
"You can upgrade your pip with "
|
||||
"'pip install --upgrade pip'.".format(
|
||||
version=pip.__version__,
|
||||
)
|
||||
)
|
||||
except ImportError:
|
||||
raise AssertionError("Zipline installation requires pip")
|
||||
|
||||
required = ('Cython', 'numpy')
|
||||
for line in module_requirements('etc/requirements.txt', required):
|
||||
pip.main(['install', line])
|
||||
|
||||
|
||||
pre_setup()
|
||||
|
||||
conda_build = os.path.basename(sys.argv[0]) == 'conda-build'
|
||||
|
||||
setup(
|
||||
name='zipline',
|
||||
version=versioneer.get_version(),
|
||||
cmdclass=versioneer.get_cmdclass(),
|
||||
cmdclass=LazyBuildExtCommandClass(versioneer.get_cmdclass()),
|
||||
description='A backtester for financial algorithms.',
|
||||
author='Quantopian Inc.',
|
||||
author_email='opensource@quantopian.com',
|
||||
@@ -258,5 +252,10 @@ setup(
|
||||
],
|
||||
install_requires=install_requires(conda_format=conda_build),
|
||||
extras_require=extras_requires(conda_format=conda_build),
|
||||
setup_requires=module_requirements(
|
||||
'etc/requirements.txt',
|
||||
('Cython', 'numpy'),
|
||||
strict_bounds=False,
|
||||
),
|
||||
url="http://zipline.io",
|
||||
)
|
||||
|
||||
@@ -52,6 +52,19 @@ asset_infos = (
|
||||
with_extra_sid = parameterized.expand(asset_infos)
|
||||
|
||||
|
||||
def _utc_localize_index_level_0(df):
|
||||
"""``tz_localize`` the first level of a multiindexed dataframe to utc.
|
||||
|
||||
Mutates df in place.
|
||||
"""
|
||||
idx = df.index
|
||||
df.index = pd.MultiIndex.from_product(
|
||||
(idx.levels[0].tz_localize('utc'), idx.levels[1]),
|
||||
names=idx.names,
|
||||
)
|
||||
return df
|
||||
|
||||
|
||||
class BlazeToPipelineTestCase(TestCase):
|
||||
@classmethod
|
||||
def setUpClass(cls):
|
||||
@@ -315,7 +328,11 @@ class BlazeToPipelineTestCase(TestCase):
|
||||
finder,
|
||||
).run_pipeline(p, dates[0], dates[-1])
|
||||
|
||||
assert_frame_equal(result, expected, check_dtype=False)
|
||||
assert_frame_equal(
|
||||
result,
|
||||
_utc_localize_index_level_0(expected),
|
||||
check_dtype=False,
|
||||
)
|
||||
|
||||
def test_custom_query_time_tz(self):
|
||||
df = self.df.copy()
|
||||
@@ -345,7 +362,7 @@ class BlazeToPipelineTestCase(TestCase):
|
||||
expected = df.drop('asof_date', axis=1)
|
||||
expected['timestamp'] = expected['timestamp'].dt.normalize().astype(
|
||||
'datetime64[ns]',
|
||||
)
|
||||
).dt.tz_localize('utc')
|
||||
expected.ix[3:5, 'timestamp'] += timedelta(days=1)
|
||||
expected.set_index(['timestamp', 'sid'], inplace=True)
|
||||
expected.index = pd.MultiIndex.from_product((
|
||||
@@ -615,41 +632,26 @@ class BlazeToPipelineTestCase(TestCase):
|
||||
df['other'] = df.value + 1
|
||||
fields = OrderedDict(self.macro_dshape.measure.fields)
|
||||
fields['other'] = fields['value']
|
||||
expr = bz.Data(df, name='expr', dshape=var * Record(fields))
|
||||
loader = BlazeLoader()
|
||||
ds = from_blaze(
|
||||
expr,
|
||||
loader=loader,
|
||||
no_deltas_rule=no_deltas_rules.ignore,
|
||||
)
|
||||
p = Pipeline()
|
||||
p.add(ds.value.latest, 'value')
|
||||
p.add(ds.other.latest, 'other')
|
||||
dates = self.dates
|
||||
|
||||
asset_info = asset_infos[0][0]
|
||||
with tmp_asset_finder(equities=asset_info) as finder:
|
||||
result = SimplePipelineEngine(
|
||||
loader,
|
||||
dates,
|
||||
expected = pd.DataFrame(
|
||||
np.array([[0, 1],
|
||||
[1, 2],
|
||||
[2, 3]]).repeat(3, axis=0),
|
||||
index=pd.MultiIndex.from_product((
|
||||
df.timestamp,
|
||||
finder.retrieve_all(asset_info.index),
|
||||
)),
|
||||
columns=('value', 'other'),
|
||||
).sort_index(axis=1)
|
||||
self._test_id(
|
||||
df,
|
||||
var * Record(fields),
|
||||
expected,
|
||||
finder,
|
||||
).run_pipeline(p, dates[0], dates[-1])
|
||||
|
||||
expected = pd.DataFrame(
|
||||
np.array([[0, 1],
|
||||
[1, 2],
|
||||
[2, 3]]).repeat(3, axis=0),
|
||||
index=pd.MultiIndex.from_product((
|
||||
df.timestamp,
|
||||
finder.retrieve_all(asset_info.index),
|
||||
)),
|
||||
columns=('value', 'other'),
|
||||
).sort_index(axis=1)
|
||||
assert_frame_equal(
|
||||
result,
|
||||
expected.sort_index(axis=1),
|
||||
check_dtype=False,
|
||||
)
|
||||
('value', 'other'),
|
||||
)
|
||||
|
||||
def test_id_take_last_in_group(self):
|
||||
T = pd.Timestamp
|
||||
@@ -804,7 +806,7 @@ class BlazeToPipelineTestCase(TestCase):
|
||||
|
||||
assert_frame_equal(
|
||||
result,
|
||||
expected_output,
|
||||
_utc_localize_index_level_0(expected_output),
|
||||
check_dtype=False,
|
||||
)
|
||||
|
||||
|
||||
@@ -26,7 +26,7 @@ from zipline.pipeline.loaders.blaze import (
|
||||
SID_FIELD_NAME,
|
||||
TS_FIELD_NAME,
|
||||
)
|
||||
from zipline.utils.numpy_utils import make_datetime64D, np_NaT
|
||||
from zipline.utils.numpy_utils import make_datetime64D, NaTD
|
||||
from zipline.utils.test_utils import (
|
||||
make_simple_equity_info,
|
||||
tmp_asset_finder,
|
||||
@@ -234,7 +234,7 @@ class EarningsCalendarLoaderTestCase(TestCase):
|
||||
|
||||
# Set NaTs to 0 temporarily because busday_count doesn't support NaT.
|
||||
# We fill these entries with NaNs later.
|
||||
whereNaT = raw_announce_dates == np_NaT
|
||||
whereNaT = raw_announce_dates == NaTD
|
||||
raw_announce_dates[whereNaT] = make_datetime64D(0)
|
||||
|
||||
# The abs call here makes it so that we can use this function to
|
||||
@@ -260,7 +260,7 @@ class EarningsCalendarLoaderTestCase(TestCase):
|
||||
'2014-01-10',
|
||||
'2014-01-15',
|
||||
'2014-01-20',
|
||||
]),
|
||||
], utc=True),
|
||||
))
|
||||
def test_compute_earnings(self, dates):
|
||||
|
||||
@@ -296,10 +296,12 @@ class EarningsCalendarLoaderTestCase(TestCase):
|
||||
assert_series_equal(
|
||||
computed_next.isnull(),
|
||||
computed_next_busday_offset.isnull(),
|
||||
check_names=False,
|
||||
)
|
||||
assert_series_equal(
|
||||
computed_previous.isnull(),
|
||||
computed_previous_busday_offset.isnull(),
|
||||
check_names=False,
|
||||
)
|
||||
|
||||
for sid in self.sids:
|
||||
@@ -308,24 +310,28 @@ class EarningsCalendarLoaderTestCase(TestCase):
|
||||
computed_next.xs(sid, level=1),
|
||||
expected_next(sid),
|
||||
sid,
|
||||
check_names=False,
|
||||
)
|
||||
|
||||
assert_series_equal(
|
||||
computed_previous.xs(sid, level=1),
|
||||
expected_previous(sid),
|
||||
sid,
|
||||
check_names=False,
|
||||
)
|
||||
|
||||
assert_series_equal(
|
||||
computed_next_busday_offset.xs(sid, level=1),
|
||||
expected_next_busday_offset(sid),
|
||||
sid,
|
||||
check_names=False,
|
||||
)
|
||||
|
||||
assert_series_equal(
|
||||
computed_previous_busday_offset.xs(sid, level=1),
|
||||
expected_previous_busday_offset(sid),
|
||||
sid,
|
||||
check_names=False,
|
||||
)
|
||||
|
||||
|
||||
|
||||
@@ -157,22 +157,23 @@ class ConstantInputTestCase(TestCase):
|
||||
USEquityPricing.close: 3,
|
||||
USEquityPricing.high: 4,
|
||||
}
|
||||
self.assets = [1, 2, 3]
|
||||
self.asset_ids = [1, 2, 3]
|
||||
self.dates = date_range('2014-01', '2014-03', freq='D', tz='UTC')
|
||||
self.loader = ConstantLoader(
|
||||
constants=self.constants,
|
||||
dates=self.dates,
|
||||
assets=self.assets,
|
||||
assets=self.asset_ids,
|
||||
)
|
||||
|
||||
self.asset_info = make_simple_equity_info(
|
||||
self.assets,
|
||||
self.asset_ids,
|
||||
start_date=self.dates[0],
|
||||
end_date=self.dates[-1],
|
||||
)
|
||||
environment = TradingEnvironment()
|
||||
environment.write_data(equities_df=self.asset_info)
|
||||
self.asset_finder = environment.asset_finder
|
||||
self.assets = self.asset_finder.retrieve_all(self.asset_ids)
|
||||
|
||||
def test_bad_dates(self):
|
||||
loader = self.loader
|
||||
@@ -192,7 +193,7 @@ class ConstantInputTestCase(TestCase):
|
||||
lambda column: loader, self.dates, self.asset_finder,
|
||||
)
|
||||
factor = AssetID()
|
||||
asset = self.assets[0]
|
||||
asset = self.asset_ids[0]
|
||||
p = Pipeline(columns={'f': factor}, screen=factor <= asset)
|
||||
|
||||
# The crux of this is that when we run the pipeline for a single day
|
||||
@@ -204,7 +205,7 @@ class ConstantInputTestCase(TestCase):
|
||||
def test_screen(self):
|
||||
loader = self.loader
|
||||
finder = self.asset_finder
|
||||
assets = array(self.assets)
|
||||
asset_ids = array(self.asset_ids)
|
||||
engine = SimplePipelineEngine(
|
||||
lambda column: loader, self.dates, self.asset_finder,
|
||||
)
|
||||
@@ -212,11 +213,11 @@ class ConstantInputTestCase(TestCase):
|
||||
dates = self.dates[10:10 + num_dates]
|
||||
|
||||
factor = AssetID()
|
||||
for asset in assets:
|
||||
p = Pipeline(columns={'f': factor}, screen=factor <= asset)
|
||||
for asset_id in asset_ids:
|
||||
p = Pipeline(columns={'f': factor}, screen=factor <= asset_id)
|
||||
result = engine.run_pipeline(p, dates[0], dates[-1])
|
||||
|
||||
expected_sids = assets[assets <= asset]
|
||||
expected_sids = asset_ids[asset_ids <= asset_id]
|
||||
expected_assets = finder.retrieve_all(expected_sids)
|
||||
expected_result = DataFrame(
|
||||
index=MultiIndex.from_product([dates, expected_assets]),
|
||||
@@ -228,7 +229,6 @@ class ConstantInputTestCase(TestCase):
|
||||
|
||||
def test_single_factor(self):
|
||||
loader = self.loader
|
||||
finder = self.asset_finder
|
||||
assets = self.assets
|
||||
engine = SimplePipelineEngine(
|
||||
lambda column: loader, self.dates, self.asset_finder,
|
||||
@@ -252,18 +252,17 @@ class ConstantInputTestCase(TestCase):
|
||||
result = engine.run_pipeline(p, dates[0], dates[-1])
|
||||
self.assertEqual(set(result.columns), {'f'})
|
||||
assert_multi_index_is_product(
|
||||
self, result.index, dates, finder.retrieve_all(assets)
|
||||
self, result.index, dates, assets
|
||||
)
|
||||
|
||||
check_arrays(
|
||||
result['f'].unstack().values,
|
||||
full(result_shape, expected_result),
|
||||
full(result_shape, expected_result, dtype=float),
|
||||
)
|
||||
|
||||
def test_multiple_rolling_factors(self):
|
||||
|
||||
loader = self.loader
|
||||
finder = self.asset_finder
|
||||
assets = self.assets
|
||||
engine = SimplePipelineEngine(
|
||||
lambda column: loader, self.dates, self.asset_finder,
|
||||
@@ -289,22 +288,22 @@ class ConstantInputTestCase(TestCase):
|
||||
|
||||
self.assertEqual(set(results.columns), {'short', 'high', 'long'})
|
||||
assert_multi_index_is_product(
|
||||
self, results.index, dates, finder.retrieve_all(assets)
|
||||
self, results.index, dates, assets
|
||||
)
|
||||
|
||||
# row-wise sum over an array whose values are all (1 - 2)
|
||||
check_arrays(
|
||||
results['short'].unstack().values,
|
||||
full(shape, -short_factor.window_length),
|
||||
full(shape, -short_factor.window_length, dtype=float),
|
||||
)
|
||||
check_arrays(
|
||||
results['long'].unstack().values,
|
||||
full(shape, -long_factor.window_length),
|
||||
full(shape, -long_factor.window_length, dtype=float),
|
||||
)
|
||||
# row-wise sum over an array whose values are all (1 - 3)
|
||||
check_arrays(
|
||||
results['high'].unstack().values,
|
||||
full(shape, -2 * high_factor.window_length),
|
||||
full(shape, -2 * high_factor.window_length, dtype=float),
|
||||
)
|
||||
|
||||
def test_numeric_factor(self):
|
||||
@@ -368,7 +367,7 @@ class ConstantInputTestCase(TestCase):
|
||||
loader = ConstantLoader(
|
||||
constants=constants,
|
||||
dates=self.dates,
|
||||
assets=self.assets,
|
||||
assets=self.asset_ids,
|
||||
)
|
||||
engine = SimplePipelineEngine(
|
||||
lambda column: loader, self.dates, self.asset_finder,
|
||||
@@ -394,17 +393,23 @@ class ConstantInputTestCase(TestCase):
|
||||
set(result.columns)
|
||||
)
|
||||
|
||||
result_index = self.assets * len(dates_to_test)
|
||||
result_index = self.asset_ids * len(dates_to_test)
|
||||
result_shape = (len(result_index),)
|
||||
check_arrays(
|
||||
result['sumdiff'],
|
||||
Series(index=result_index, data=full(result_shape, -3)),
|
||||
Series(
|
||||
index=result_index,
|
||||
data=full(result_shape, -3, dtype=float),
|
||||
),
|
||||
)
|
||||
|
||||
for name, const in [('open', 1), ('close', 2), ('volume', 3)]:
|
||||
check_arrays(
|
||||
result[name],
|
||||
Series(index=result_index, data=full(result_shape, const)),
|
||||
Series(
|
||||
index=result_index,
|
||||
data=full(result_shape, const, dtype=float),
|
||||
),
|
||||
)
|
||||
|
||||
def test_loader_given_multiple_columns(self):
|
||||
@@ -427,12 +432,12 @@ class ConstantInputTestCase(TestCase):
|
||||
Loader1DataSet2.col2: 4}
|
||||
loader1 = RecordingConstantLoader(constants=constants1,
|
||||
dates=self.dates,
|
||||
assets=self.assets)
|
||||
assets=self.asset_ids)
|
||||
constants2 = {Loader2DataSet.col1: 5,
|
||||
Loader2DataSet.col2: 6}
|
||||
loader2 = RecordingConstantLoader(constants=constants2,
|
||||
dates=self.dates,
|
||||
assets=self.assets)
|
||||
assets=self.asset_ids)
|
||||
|
||||
engine = SimplePipelineEngine(
|
||||
lambda column:
|
||||
@@ -471,19 +476,26 @@ class ConstantInputTestCase(TestCase):
|
||||
for name, pipe_col in iteritems(columns)}
|
||||
|
||||
index = MultiIndex.from_product([self.dates[2:], self.assets])
|
||||
|
||||
def expected_for_col(col):
|
||||
val = vals[col]
|
||||
offset = columns[col].window_length - min_window
|
||||
return concatenate(
|
||||
[
|
||||
full(offset * index.levshape[1], nan),
|
||||
full(
|
||||
(index.levshape[0] - offset) * index.levshape[1],
|
||||
val,
|
||||
float,
|
||||
)
|
||||
],
|
||||
)
|
||||
|
||||
expected = DataFrame(
|
||||
data={col:
|
||||
concatenate((
|
||||
full((columns[col].window_length - min_window)
|
||||
* index.levshape[1],
|
||||
nan),
|
||||
full((index.levshape[0]
|
||||
- (columns[col].window_length - min_window))
|
||||
* index.levshape[1],
|
||||
val)))
|
||||
for col, val in iteritems(vals)},
|
||||
data={col: expected_for_col(col) for col in vals},
|
||||
index=index,
|
||||
columns=columns)
|
||||
columns=columns,
|
||||
)
|
||||
|
||||
assert_frame_equal(result, expected)
|
||||
|
||||
@@ -504,7 +516,7 @@ class FrameInputTestCase(TestCase):
|
||||
cls.env = TradingEnvironment()
|
||||
day = cls.env.trading_day
|
||||
|
||||
cls.assets = Int64Index([1, 2, 3])
|
||||
cls.asset_ids = [1, 2, 3]
|
||||
cls.dates = date_range(
|
||||
'2015-01-01',
|
||||
'2015-01-31',
|
||||
@@ -513,12 +525,13 @@ class FrameInputTestCase(TestCase):
|
||||
)
|
||||
|
||||
asset_info = make_simple_equity_info(
|
||||
cls.assets,
|
||||
cls.asset_ids,
|
||||
start_date=cls.dates[0],
|
||||
end_date=cls.dates[-1],
|
||||
)
|
||||
cls.env.write_data(equities_df=asset_info)
|
||||
cls.asset_finder = cls.env.asset_finder
|
||||
cls.assets = cls.asset_finder.retrieve_all(cls.asset_ids)
|
||||
|
||||
@classmethod
|
||||
def tearDownClass(cls):
|
||||
@@ -533,7 +546,7 @@ class FrameInputTestCase(TestCase):
|
||||
return DataFrame(data, columns=self.assets, index=self.dates)
|
||||
|
||||
def test_compute_with_adjustments(self):
|
||||
dates, assets = self.dates, self.assets
|
||||
dates, asset_ids = self.dates, self.asset_ids
|
||||
low, high = USEquityPricing.low, USEquityPricing.high
|
||||
apply_idxs = [3, 10, 16]
|
||||
|
||||
@@ -544,7 +557,7 @@ class FrameInputTestCase(TestCase):
|
||||
[
|
||||
dict(
|
||||
kind=MULTIPLY,
|
||||
sid=assets[1],
|
||||
sid=asset_ids[1],
|
||||
value=2.0,
|
||||
start_date=None,
|
||||
end_date=apply_date(0, offset=-1),
|
||||
@@ -552,7 +565,7 @@ class FrameInputTestCase(TestCase):
|
||||
),
|
||||
dict(
|
||||
kind=MULTIPLY,
|
||||
sid=assets[1],
|
||||
sid=asset_ids[1],
|
||||
value=3.0,
|
||||
start_date=None,
|
||||
end_date=apply_date(1, offset=-1),
|
||||
@@ -560,7 +573,7 @@ class FrameInputTestCase(TestCase):
|
||||
),
|
||||
dict(
|
||||
kind=MULTIPLY,
|
||||
sid=assets[1],
|
||||
sid=asset_ids[1],
|
||||
value=5.0,
|
||||
start_date=None,
|
||||
end_date=apply_date(2, offset=-1),
|
||||
@@ -630,7 +643,7 @@ class SyntheticBcolzTestCase(TestCase):
|
||||
asset_lifetime=8,
|
||||
)
|
||||
cls.last_asset_end = cls.asset_info['end_date'].max()
|
||||
cls.all_assets = cls.asset_info.index
|
||||
cls.all_asset_ids = cls.asset_info.index
|
||||
|
||||
cls.env.write_data(equities_df=cls.asset_info)
|
||||
cls.finder = cls.env.asset_finder
|
||||
@@ -646,7 +659,7 @@ class SyntheticBcolzTestCase(TestCase):
|
||||
table = cls.writer.write(
|
||||
cls.temp_dir.getpath('testdata.bcolz'),
|
||||
cls.calendar,
|
||||
cls.all_assets,
|
||||
cls.all_asset_ids,
|
||||
)
|
||||
|
||||
cls.pipeline_loader = USEquityPricingLoader(
|
||||
@@ -698,7 +711,7 @@ class SyntheticBcolzTestCase(TestCase):
|
||||
self.finder,
|
||||
)
|
||||
window_length = 5
|
||||
assets = self.all_assets
|
||||
asset_ids = self.all_asset_ids
|
||||
dates = date_range(
|
||||
self.first_asset_start + self.trading_day,
|
||||
self.last_asset_end,
|
||||
@@ -722,7 +735,7 @@ class SyntheticBcolzTestCase(TestCase):
|
||||
# **previous** day's data.
|
||||
expected_raw = rolling_mean(
|
||||
self.writer.expected_values_2d(
|
||||
dates - self.trading_day, assets, 'close',
|
||||
dates - self.trading_day, asset_ids, 'close',
|
||||
),
|
||||
window_length,
|
||||
min_periods=1,
|
||||
@@ -732,7 +745,7 @@ class SyntheticBcolzTestCase(TestCase):
|
||||
# Truncate off the extra rows needed to compute the SMAs.
|
||||
expected_raw[window_length:],
|
||||
index=dates_to_test, # dates_to_test is dates[window_length:]
|
||||
columns=self.finder.retrieve_all(assets),
|
||||
columns=self.finder.retrieve_all(asset_ids),
|
||||
)
|
||||
self.write_nans(expected)
|
||||
result = results['sma'].unstack()
|
||||
@@ -750,7 +763,7 @@ class SyntheticBcolzTestCase(TestCase):
|
||||
self.finder,
|
||||
)
|
||||
window_length = 5
|
||||
assets = self.all_assets
|
||||
asset_ids = self.all_asset_ids
|
||||
dates = date_range(
|
||||
self.first_asset_start + self.trading_day,
|
||||
self.last_asset_end,
|
||||
@@ -772,9 +785,9 @@ class SyntheticBcolzTestCase(TestCase):
|
||||
# We expect NaNs when the asset was undefined, otherwise 0 everywhere,
|
||||
# since the input is always increasing.
|
||||
expected = DataFrame(
|
||||
data=zeros((len(dates_to_test), len(assets)), dtype=float),
|
||||
data=zeros((len(dates_to_test), len(asset_ids)), dtype=float),
|
||||
index=dates_to_test,
|
||||
columns=self.finder.retrieve_all(assets),
|
||||
columns=self.finder.retrieve_all(asset_ids),
|
||||
)
|
||||
self.write_nans(expected)
|
||||
result = results['drawdown'].unstack()
|
||||
|
||||
@@ -23,7 +23,11 @@ from zipline.pipeline.factors import (
|
||||
RSI,
|
||||
)
|
||||
from zipline.utils.test_utils import check_allclose, check_arrays
|
||||
from zipline.utils.numpy_utils import datetime64ns_dtype, float64_dtype, np_NaT
|
||||
from zipline.utils.numpy_utils import (
|
||||
datetime64ns_dtype,
|
||||
float64_dtype,
|
||||
NaTns,
|
||||
)
|
||||
|
||||
from .base import BasePipelineTestCase
|
||||
|
||||
@@ -309,7 +313,7 @@ class FactorTestCase(BasePipelineTestCase):
|
||||
mask = eyemask if use_mask else nomask
|
||||
if set_missing:
|
||||
asfloat[:, 2] = nan
|
||||
asdatetime[:, 2] = np_NaT
|
||||
asdatetime[:, 2] = NaTns
|
||||
|
||||
float_result = masked_rankdata_2d(
|
||||
data=asfloat,
|
||||
@@ -321,7 +325,7 @@ class FactorTestCase(BasePipelineTestCase):
|
||||
datetime_result = masked_rankdata_2d(
|
||||
data=asdatetime,
|
||||
mask=mask,
|
||||
missing_value=np_NaT,
|
||||
missing_value=NaTns,
|
||||
method=method,
|
||||
ascending=True,
|
||||
)
|
||||
|
||||
@@ -76,9 +76,9 @@ class NumericalExpressionTestCase(TestCase):
|
||||
self.h = H()
|
||||
self.d = DateFactor()
|
||||
self.fake_raw_data = {
|
||||
self.f: full((5, 5), 3),
|
||||
self.g: full((5, 5), 2),
|
||||
self.h: full((5, 5), 1),
|
||||
self.f: full((5, 5), 3, float),
|
||||
self.g: full((5, 5), 2, float),
|
||||
self.h: full((5, 5), 1, float),
|
||||
self.d: full((5, 5), 0, dtype='datetime64[ns]'),
|
||||
}
|
||||
self.mask = DataFrame(True, index=self.dates, columns=self.assets)
|
||||
@@ -94,7 +94,7 @@ class NumericalExpressionTestCase(TestCase):
|
||||
|
||||
def check_constant_output(self, expr, expected):
|
||||
self.assertFalse(isnan(expected))
|
||||
return self.check_output(expr, full((5, 5), expected))
|
||||
return self.check_output(expr, full((5, 5), expected, float))
|
||||
|
||||
def test_validate_good(self):
|
||||
f = self.f
|
||||
@@ -435,9 +435,9 @@ class NumericalExpressionTestCase(TestCase):
|
||||
def test_comparisons(self):
|
||||
f, g, h = self.f, self.g, self.h
|
||||
self.fake_raw_data = {
|
||||
f: arange(25).reshape(5, 5),
|
||||
g: arange(25).reshape(5, 5) - eye(5),
|
||||
h: full((5, 5), 5),
|
||||
f: arange(25, dtype=float).reshape(5, 5),
|
||||
g: arange(25, dtype=float).reshape(5, 5) - eye(5),
|
||||
h: full((5, 5), 5, dtype=float),
|
||||
}
|
||||
f_data = self.fake_raw_data[f]
|
||||
g_data = self.fake_raw_data[g]
|
||||
@@ -479,9 +479,9 @@ class NumericalExpressionTestCase(TestCase):
|
||||
)
|
||||
|
||||
self.fake_raw_data = {
|
||||
f: arange(25).reshape(5, 5),
|
||||
g: arange(25).reshape(5, 5) - eye(5),
|
||||
h: full((5, 5), 5),
|
||||
f: arange(25, dtype=float).reshape(5, 5),
|
||||
g: arange(25, dtype=float).reshape(5, 5) - eye(5),
|
||||
h: full((5, 5), 5, dtype=float),
|
||||
custom_filter: custom_filter_mask,
|
||||
}
|
||||
|
||||
|
||||
@@ -401,7 +401,7 @@ class PipelineAlgorithmTestCase(TestCase):
|
||||
'ratio': array([], dtype=float),
|
||||
'sid': array([], dtype=int),
|
||||
},
|
||||
index=DatetimeIndex([], tz='UTC'),
|
||||
index=DatetimeIndex([]),
|
||||
columns=['effective_date', 'ratio', 'sid'],
|
||||
)
|
||||
dividends = DataFrame({
|
||||
|
||||
@@ -40,6 +40,7 @@ class SomeFactor(Factor):
|
||||
dtype = float64_dtype
|
||||
window_length = 5
|
||||
inputs = [SomeDataSet.foo, SomeDataSet.bar]
|
||||
|
||||
SomeFactorAlias = SomeFactor
|
||||
|
||||
|
||||
|
||||
@@ -16,14 +16,11 @@
|
||||
# This code is based on a unittest written by John Salvatier:
|
||||
# https://github.com/pymc-devs/pymc/blob/pymc3/tests/test_examples.py
|
||||
|
||||
# Disable plotting
|
||||
#
|
||||
|
||||
import glob
|
||||
import imp
|
||||
import matplotlib
|
||||
from nose_parameterized import parameterized
|
||||
import os
|
||||
import runpy
|
||||
from unittest import TestCase
|
||||
|
||||
from zipline.utils import parse_args, run_pipeline
|
||||
@@ -45,7 +42,7 @@ class ExamplesTests(TestCase):
|
||||
@parameterized.expand(((os.path.basename(f).replace('.', '_'), f) for f in
|
||||
glob.glob(os.path.join(example_dir(), '*.py'))))
|
||||
def test_example(self, name, example):
|
||||
imp.load_source('__main__', os.path.basename(example), open(example))
|
||||
runpy.run_path(example, run_name='__main__')
|
||||
|
||||
# Test algorithm as if scripts/run_algo.py is being used.
|
||||
def test_example_run_pipline(self):
|
||||
|
||||
+2
-127
@@ -41,131 +41,6 @@ from zipline.sources import RandomWalkSource, DataFrameSource
|
||||
import zipline.utils.factory as factory
|
||||
from zipline.utils.test_utils import subtest
|
||||
|
||||
# Cases are over the July 4th holiday, to ensure use of trading calendar.
|
||||
|
||||
# March 2013
|
||||
# Su Mo Tu We Th Fr Sa
|
||||
# 1 2
|
||||
# 3 4 5 6 7 8 9
|
||||
# 10 11 12 13 14 15 16
|
||||
# 17 18 19 20 21 22 23
|
||||
# 24 25 26 27 28 29 30
|
||||
# 31
|
||||
# April 2013
|
||||
# Su Mo Tu We Th Fr Sa
|
||||
# 1 2 3 4 5 6
|
||||
# 7 8 9 10 11 12 13
|
||||
# 14 15 16 17 18 19 20
|
||||
# 21 22 23 24 25 26 27
|
||||
# 28 29 30
|
||||
#
|
||||
# May 2013
|
||||
# Su Mo Tu We Th Fr Sa
|
||||
# 1 2 3 4
|
||||
# 5 6 7 8 9 10 11
|
||||
# 12 13 14 15 16 17 18
|
||||
# 19 20 21 22 23 24 25
|
||||
# 26 27 28 29 30 31
|
||||
#
|
||||
# June 2013
|
||||
# Su Mo Tu We Th Fr Sa
|
||||
# 1
|
||||
# 2 3 4 5 6 7 8
|
||||
# 9 10 11 12 13 14 15
|
||||
# 16 17 18 19 20 21 22
|
||||
# 23 24 25 26 27 28 29
|
||||
# 30
|
||||
# July 2013
|
||||
# Su Mo Tu We Th Fr Sa
|
||||
# 1 2 3 4 5 6
|
||||
# 7 8 9 10 11 12 13
|
||||
# 14 15 16 17 18 19 20
|
||||
# 21 22 23 24 25 26 27
|
||||
# 28 29 30 31
|
||||
#
|
||||
# Times to be converted via:
|
||||
# pd.Timestamp('2013-07-05 9:31', tz='US/Eastern').tz_convert('UTC')},
|
||||
|
||||
INDEX_TEST_CASES_RAW = {
|
||||
'week of daily data': {
|
||||
'input': {'bar_count': 5,
|
||||
'frequency': '1d',
|
||||
'algo_dt': '2013-07-05 9:31AM'},
|
||||
'expected': [
|
||||
'2013-06-28 4:00PM',
|
||||
'2013-07-01 4:00PM',
|
||||
'2013-07-02 4:00PM',
|
||||
'2013-07-03 1:00PM',
|
||||
'2013-07-05 9:31AM',
|
||||
]
|
||||
},
|
||||
'five minutes on july 5th open': {
|
||||
'input': {'bar_count': 5,
|
||||
'frequency': '1m',
|
||||
'algo_dt': '2013-07-05 9:31AM'},
|
||||
'expected': [
|
||||
'2013-07-03 12:57PM',
|
||||
'2013-07-03 12:58PM',
|
||||
'2013-07-03 12:59PM',
|
||||
'2013-07-03 1:00PM',
|
||||
'2013-07-05 9:31AM',
|
||||
]
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
def to_timestamp(dt_str):
|
||||
return pd.Timestamp(dt_str, tz='US/Eastern').tz_convert('UTC')
|
||||
|
||||
|
||||
def convert_cases(cases):
|
||||
"""
|
||||
Convert raw strings to values comparable with system data.
|
||||
"""
|
||||
cases = cases.copy()
|
||||
for case in cases.values():
|
||||
case['input']['algo_dt'] = to_timestamp(case['input']['algo_dt'])
|
||||
case['expected'] = pd.DatetimeIndex([to_timestamp(dt_str) for dt_str
|
||||
in case['expected']])
|
||||
return cases
|
||||
|
||||
INDEX_TEST_CASES = convert_cases(INDEX_TEST_CASES_RAW)
|
||||
|
||||
|
||||
def get_index_at_dt(case_input, env):
|
||||
history_spec = history.HistorySpec(
|
||||
case_input['bar_count'],
|
||||
case_input['frequency'],
|
||||
None,
|
||||
False,
|
||||
env=env,
|
||||
data_frequency='minute',
|
||||
)
|
||||
return history.index_at_dt(history_spec, case_input['algo_dt'], env=env)
|
||||
|
||||
|
||||
class TestHistoryIndex(TestCase):
|
||||
|
||||
@classmethod
|
||||
def setUpClass(cls):
|
||||
cls.environment = TradingEnvironment()
|
||||
|
||||
@classmethod
|
||||
def tearDownClass(cls):
|
||||
del cls.environment
|
||||
|
||||
@parameterized.expand(
|
||||
[(name, case['input'], case['expected'])
|
||||
for name, case in INDEX_TEST_CASES.items()]
|
||||
)
|
||||
def test_index_at_dt(self, name, case_input, expected):
|
||||
history_index = get_index_at_dt(case_input, self.environment)
|
||||
|
||||
history_series = pd.Series(index=history_index)
|
||||
expected_series = pd.Series(index=expected)
|
||||
|
||||
pd.util.testing.assert_series_equal(history_series, expected_series)
|
||||
|
||||
|
||||
class TestHistoryContainer(TestCase):
|
||||
|
||||
@@ -473,9 +348,9 @@ def handle_data(context, data):
|
||||
output = test_algo.run(source)
|
||||
self.assertIsNotNone(output)
|
||||
|
||||
history_trace = test_algo.history_trace
|
||||
df.columns = self.env.asset_finder.retrieve_all(df.columns)
|
||||
|
||||
for i, received in enumerate(history_trace[bar_count - 1:]):
|
||||
for i, received in enumerate(test_algo.history_trace[bar_count - 1:]):
|
||||
expected = df.iloc[i:i + bar_count]
|
||||
assert_frame_equal(expected, received)
|
||||
|
||||
|
||||
@@ -266,7 +266,7 @@ class TestStatelessRules(RuleTestCase):
|
||||
@subtest(minutes_for_days(), 'ms')
|
||||
def test_Always(self, ms):
|
||||
should_trigger = partial(Always().should_trigger, env=self.env)
|
||||
self.assertTrue(all(map(partial(should_trigger, env=self.env), ms)))
|
||||
self.assertTrue(all(map(should_trigger, ms)))
|
||||
|
||||
@subtest(minutes_for_days(), 'ms')
|
||||
def test_Never(self, ms):
|
||||
|
||||
+16
-5
@@ -1,5 +1,5 @@
|
||||
#
|
||||
# Copyright 2013 Quantopian, Inc.
|
||||
# Copyright 2016 Quantopian, Inc.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
@@ -22,6 +22,7 @@ from pandas.io.data import DataReader
|
||||
import pytz
|
||||
|
||||
from six import iteritems
|
||||
from six.moves.urllib_error import HTTPError
|
||||
|
||||
from . benchmarks import get_benchmark_returns
|
||||
from . import treasuries, treasuries_can
|
||||
@@ -239,8 +240,15 @@ def ensure_benchmark_data(symbol, first_date, last_date, now, trading_day):
|
||||
path=path,
|
||||
)
|
||||
|
||||
data = get_benchmark_returns(symbol, first_date - trading_day, last_date)
|
||||
data.to_csv(path)
|
||||
try:
|
||||
data = get_benchmark_returns(
|
||||
symbol,
|
||||
first_date - trading_day,
|
||||
last_date,
|
||||
)
|
||||
data.to_csv(path)
|
||||
except (OSError, IOError, HTTPError):
|
||||
logger.exception('failed to cache the new benchmark returns')
|
||||
if not has_data_for_dates(data, first_date, last_date):
|
||||
logger.warn("Still don't have expected data after redownload!")
|
||||
return data
|
||||
@@ -306,8 +314,11 @@ def ensure_treasury_data(bm_symbol, first_date, last_date, now):
|
||||
)
|
||||
)
|
||||
|
||||
data = loader_module.get_treasury_data(first_date, last_date)
|
||||
data.to_csv(path)
|
||||
try:
|
||||
data = loader_module.get_treasury_data(first_date, last_date)
|
||||
data.to_csv(path)
|
||||
except (OSError, IOError, HTTPError):
|
||||
logger.exception('failed to cache treasury data')
|
||||
if not has_data_for_dates(data, first_date, last_date):
|
||||
logger.warn("Still don't have expected data after redownload!")
|
||||
return data
|
||||
|
||||
@@ -206,7 +206,7 @@ class BcolzDailyBarWriter(with_metaclass(ABCMeta)):
|
||||
if column_name == 'id':
|
||||
# We know what the content of this column is, so don't
|
||||
# bother reading it.
|
||||
columns['id'].append(full((nrows,), asset_id))
|
||||
columns['id'].append(full((nrows,), asset_id, uint32))
|
||||
continue
|
||||
columns[column_name].append(
|
||||
self.to_uint32(table[column_name][:], column_name)
|
||||
|
||||
@@ -32,8 +32,8 @@ def ols_transform(data, sid1, sid2):
|
||||
"""Computes regression coefficient (slope and intercept)
|
||||
via Ordinary Least Squares between two SIDs.
|
||||
"""
|
||||
p0 = data.price[sid1]
|
||||
p1 = sm.add_constant(data.price[sid2], prepend=True)
|
||||
p0 = data.price[sid1].values
|
||||
p1 = sm.add_constant(data.price[sid2].values, prepend=True)
|
||||
slope, intercept = sm.OLS(p0, p1).fit().params
|
||||
|
||||
return slope, intercept
|
||||
|
||||
@@ -32,14 +32,12 @@ Position Tracking
|
||||
"""
|
||||
|
||||
from __future__ import division
|
||||
from math import (
|
||||
copysign,
|
||||
floor,
|
||||
)
|
||||
from math import copysign
|
||||
|
||||
from copy import copy
|
||||
|
||||
import logbook
|
||||
import numpy as np
|
||||
import zipline.protocol as zp
|
||||
|
||||
from zipline.utils.serialization_utils import (
|
||||
@@ -71,7 +69,8 @@ class Position(object):
|
||||
# stock dividend
|
||||
if dividend['payment_sid']:
|
||||
out['payment_sid'] = dividend['payment_sid']
|
||||
out['share_count'] = floor(self.amount * float(dividend['ratio']))
|
||||
out['share_count'] = np.floor(self.amount
|
||||
* float(dividend['ratio']))
|
||||
|
||||
# cash dividend
|
||||
if dividend['net_amount']:
|
||||
@@ -106,7 +105,7 @@ class Position(object):
|
||||
raw_share_count = self.amount / float(ratio)
|
||||
|
||||
# e.g., 33
|
||||
full_share_count = floor(raw_share_count)
|
||||
full_share_count = np.floor(raw_share_count)
|
||||
|
||||
# e.g., 0.333
|
||||
fractional_share_count = raw_share_count - full_share_count
|
||||
|
||||
@@ -70,6 +70,7 @@ from pandas.tseries.tools import normalize_date
|
||||
import zipline.finance.risk as risk
|
||||
from . period import PerformancePeriod
|
||||
|
||||
from zipline.utils.pandas_utils import sort_values
|
||||
from zipline.utils.serialization_utils import (
|
||||
VERSION_LABEL
|
||||
)
|
||||
@@ -200,9 +201,9 @@ class PerformanceTracker(object):
|
||||
)
|
||||
self._dividend_count += len(new_dividends)
|
||||
|
||||
self.dividend_frame = pd.concat(
|
||||
self.dividend_frame = sort_values(pd.concat(
|
||||
[self.dividend_frame, new_dividends]
|
||||
).sort(['pay_date', 'ex_date']).set_index('id', drop=False)
|
||||
), ['pay_date', 'ex_date']).set_index('id', drop=False)
|
||||
|
||||
def initialize_dividends_from_other(self, other):
|
||||
"""
|
||||
|
||||
@@ -374,7 +374,7 @@ class TradingEnvironment(object):
|
||||
|
||||
def get_open_and_close(self, day):
|
||||
index = self.open_and_closes.index.get_loc(day.date())
|
||||
todays_minutes = self.open_and_closes.values[index]
|
||||
todays_minutes = self.open_and_closes.iloc[index]
|
||||
return todays_minutes[0], todays_minutes[1]
|
||||
|
||||
def market_minutes_for_day(self, stamp):
|
||||
|
||||
@@ -15,8 +15,6 @@
|
||||
|
||||
from . history import (
|
||||
HistorySpec,
|
||||
days_index_at_dt,
|
||||
index_at_dt,
|
||||
Frequency,
|
||||
)
|
||||
|
||||
@@ -24,8 +22,6 @@ from . import history_container
|
||||
|
||||
__all__ = [
|
||||
'HistorySpec',
|
||||
'days_index_at_dt',
|
||||
'index_at_dt',
|
||||
'history_container',
|
||||
'Frequency',
|
||||
]
|
||||
|
||||
@@ -15,7 +15,6 @@
|
||||
|
||||
from __future__ import division
|
||||
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
import re
|
||||
|
||||
@@ -284,59 +283,3 @@ class HistorySpec(object):
|
||||
|
||||
def __repr__(self):
|
||||
return ''.join([self.__class__.__name__, "('", self.key_str, "')"])
|
||||
|
||||
|
||||
def days_index_at_dt(history_spec, algo_dt, env):
|
||||
"""
|
||||
Get the index of a frame to be used for a get_history call with daily
|
||||
frequency.
|
||||
"""
|
||||
# Get the previous (bar_count - 1) days' worth of market closes.
|
||||
day_delta = (history_spec.bar_count - 1) * history_spec.frequency.num
|
||||
market_closes = env.open_close_window(
|
||||
algo_dt,
|
||||
day_delta,
|
||||
offset=(-day_delta),
|
||||
step=history_spec.frequency.num,
|
||||
).market_close
|
||||
|
||||
if history_spec.frequency.data_frequency == 'daily':
|
||||
market_closes = market_closes.apply(pd.tslib.normalize_date)
|
||||
|
||||
# Append the current algo_dt as the last index value.
|
||||
# Using the 'rawer' numpy array values here because of a bottleneck
|
||||
# that appeared when using DatetimeIndex
|
||||
return np.append(market_closes.values, algo_dt)
|
||||
|
||||
|
||||
def minutes_index_at_dt(history_spec, algo_dt, env):
|
||||
"""
|
||||
Get the index of a frame to be used for a get_history_call with minutely
|
||||
frequency.
|
||||
"""
|
||||
# TODO: This is almost certainly going to be too slow for production.
|
||||
return env.market_minute_window(
|
||||
algo_dt,
|
||||
history_spec.bar_count,
|
||||
step=-1,
|
||||
)[::-1]
|
||||
|
||||
|
||||
def index_at_dt(history_spec, algo_dt, env):
|
||||
"""
|
||||
Returns index of a frame returned by get_history() with the given
|
||||
history_spec and algo_dt.
|
||||
|
||||
The resulting index will have @history_spec.bar_count bars, increasing in
|
||||
units of @history_spec.frequency, terminating at the given @algo_dt.
|
||||
|
||||
Note: The last bar of the returned frame represents an as-of-yet incomplete
|
||||
time window, so the delta between the last and second-to-last bars is
|
||||
usually always less than `@history_spec.frequency` for frequencies greater
|
||||
than 1m.
|
||||
"""
|
||||
frequency = history_spec.frequency
|
||||
if frequency.unit_str == 'd':
|
||||
return days_index_at_dt(history_spec, algo_dt, env)
|
||||
elif frequency.unit_str == 'm':
|
||||
return minutes_index_at_dt(history_spec, algo_dt, env)
|
||||
|
||||
@@ -5,7 +5,7 @@ announcements, acquisitions, dividends, etc.).
|
||||
from numpy import newaxis
|
||||
from zipline.pipeline.data.earnings import EarningsCalendar
|
||||
from zipline.utils.numpy_utils import (
|
||||
np_NaT,
|
||||
NaTD,
|
||||
busday_count_mask_NaT,
|
||||
datetime64D_dtype,
|
||||
float64_dtype,
|
||||
@@ -48,7 +48,7 @@ class BusinessDaysUntilNextEarnings(Factor):
|
||||
announce_dates = arrays[0].astype(datetime64D_dtype)
|
||||
|
||||
# Set masked values to NaT.
|
||||
announce_dates[~mask] = np_NaT
|
||||
announce_dates[~mask] = NaTD
|
||||
|
||||
# Convert row labels into a column vector for broadcasted comparison.
|
||||
reference_dates = dates.values.astype(datetime64D_dtype)[:, newaxis]
|
||||
@@ -84,7 +84,7 @@ class BusinessDaysSincePreviousEarnings(Factor):
|
||||
announce_dates = arrays[0].astype(datetime64D_dtype)
|
||||
|
||||
# Set masked values to NaT.
|
||||
announce_dates[~mask] = np_NaT
|
||||
announce_dates[~mask] = NaTD
|
||||
|
||||
# Convert row labels into a column vector for broadcasted comparison.
|
||||
reference_dates = dates.values.astype(datetime64D_dtype)[:, newaxis]
|
||||
|
||||
@@ -2,12 +2,6 @@
|
||||
Technical Analysis Factors
|
||||
--------------------------
|
||||
"""
|
||||
from bottleneck import (
|
||||
nanargmax,
|
||||
nanmax,
|
||||
nanmean,
|
||||
nansum,
|
||||
)
|
||||
from numbers import Number
|
||||
from numpy import (
|
||||
abs,
|
||||
@@ -31,6 +25,12 @@ from zipline.pipeline.data import USEquityPricing
|
||||
from zipline.pipeline.mixins import SingleInputMixin
|
||||
from zipline.utils.control_flow import ignore_nanwarnings
|
||||
from zipline.utils.input_validation import expect_types
|
||||
from zipline.utils.math_utils import (
|
||||
nanargmax,
|
||||
nanmax,
|
||||
nanmean,
|
||||
nansum,
|
||||
)
|
||||
from .factor import CustomFactor
|
||||
|
||||
|
||||
@@ -181,7 +181,7 @@ class _ExponentialWeightedFactor(SingleInputMixin, CustomFactor):
|
||||
Return weighting vector for an exponential moving statistic on `length`
|
||||
rows with a decay rate of `decay_rate`.
|
||||
"""
|
||||
return full(length, decay_rate) ** arange(length + 1, 1, -1)
|
||||
return full(length, decay_rate, float) ** arange(length + 1, 1, -1)
|
||||
|
||||
@classmethod
|
||||
@expect_types(span=Number)
|
||||
|
||||
@@ -174,6 +174,7 @@ from zipline.utils.input_validation import (
|
||||
optionally,
|
||||
)
|
||||
from zipline.utils.numpy_utils import repeat_last_axis
|
||||
from zipline.utils.pandas_utils import sort_values
|
||||
from zipline.utils.preprocess import preprocess
|
||||
|
||||
|
||||
@@ -626,10 +627,10 @@ def overwrite_novel_deltas(baseline, deltas, dates):
|
||||
) <= 1
|
||||
novel_deltas = deltas.loc[novel_idx]
|
||||
non_novel_deltas = deltas.loc[~novel_idx]
|
||||
return pd.concat(
|
||||
return sort_values(pd.concat(
|
||||
(baseline, novel_deltas),
|
||||
ignore_index=True,
|
||||
).sort(TS_FIELD_NAME), non_novel_deltas
|
||||
), TS_FIELD_NAME), non_novel_deltas
|
||||
|
||||
|
||||
def overwrite_from_dates(asof, dense_dates, sparse_dates, asset_idx, value):
|
||||
@@ -910,13 +911,13 @@ class BlazeLoader(dict):
|
||||
This can return more data than needed. The in memory reindex will
|
||||
handle this.
|
||||
"""
|
||||
return reduce(
|
||||
return sort_values(reduce(
|
||||
partial(pd.merge, on=added_query_fields, how='outer'),
|
||||
(
|
||||
odo(where(e, column), pd.DataFrame, **odo_kwargs)
|
||||
for column in columns
|
||||
),
|
||||
).sort(TS_FIELD_NAME) # sort for the groupby later
|
||||
), TS_FIELD_NAME) # sort for the groupby later
|
||||
|
||||
materialized_expr = collect_expr(expr)
|
||||
materialized_deltas = (
|
||||
|
||||
@@ -15,6 +15,7 @@ from pandas import (
|
||||
)
|
||||
from zipline.lib.adjusted_array import AdjustedArray
|
||||
from zipline.lib.adjustment import make_adjustment_from_labels
|
||||
from zipline.utils.pandas_utils import sort_values
|
||||
from .base import PipelineLoader
|
||||
|
||||
ADJUSTMENT_COLUMNS = Index([
|
||||
@@ -71,7 +72,7 @@ class DataFrameLoader(PipelineLoader):
|
||||
else:
|
||||
# Ensure that columns are in the correct order.
|
||||
adjustments = adjustments.reindex_axis(ADJUSTMENT_COLUMNS, axis=1)
|
||||
adjustments.sort(['apply_date', 'sid'], inplace=True)
|
||||
sort_values(adjustments, ['apply_date', 'sid'], inplace=True)
|
||||
|
||||
self.adjustments = adjustments
|
||||
self.adjustment_apply_dates = DatetimeIndex(adjustments.apply_date)
|
||||
|
||||
@@ -5,7 +5,7 @@ import pandas as pd
|
||||
from six import iteritems
|
||||
from six.moves import zip
|
||||
|
||||
from zipline.utils.numpy_utils import np_NaT
|
||||
from zipline.utils.numpy_utils import NaTns
|
||||
|
||||
|
||||
def next_date_frame(dates, events_by_sid):
|
||||
@@ -34,7 +34,7 @@ def next_date_frame(dates, events_by_sid):
|
||||
previous_date_frame
|
||||
"""
|
||||
cols = {
|
||||
equity: np.full_like(dates, np_NaT) for equity in events_by_sid
|
||||
equity: np.full_like(dates, NaTns) for equity in events_by_sid
|
||||
}
|
||||
raw_dates = dates.values
|
||||
for equity, event_dates in iteritems(events_by_sid):
|
||||
@@ -50,7 +50,7 @@ def next_date_frame(dates, events_by_sid):
|
||||
(knowledge_date <= raw_dates) &
|
||||
(raw_dates <= event_date)
|
||||
)
|
||||
value_mask = (event_date <= data) | (data == np_NaT)
|
||||
value_mask = (event_date <= data) | (data == NaTns)
|
||||
data[date_mask & value_mask] = event_date
|
||||
|
||||
return pd.DataFrame(index=dates, data=cols)
|
||||
@@ -82,7 +82,7 @@ def previous_date_frame(date_index, events_by_sid):
|
||||
next_date_frame
|
||||
"""
|
||||
sids = list(events_by_sid)
|
||||
out = np.full((len(date_index), len(sids)), np_NaT, dtype='datetime64[ns]')
|
||||
out = np.full((len(date_index), len(sids)), NaTns, dtype='datetime64[ns]')
|
||||
dn = date_index[-1].asm8
|
||||
for col_idx, sid in enumerate(sids):
|
||||
# events_by_sid[sid] is Series mapping knowledge_date to actual
|
||||
|
||||
@@ -309,8 +309,7 @@ def create_test_panel_source(sim_params=None, env=None, source_type=None):
|
||||
'arbitrary': arbitrary},
|
||||
index=index)
|
||||
if source_type:
|
||||
source_types = np.full(len(index), source_type)
|
||||
df['type'] = source_types
|
||||
df['type'] = source_type
|
||||
|
||||
panel = pd.Panel.from_dict({0: df})
|
||||
|
||||
|
||||
@@ -22,11 +22,19 @@ datetime64ns_dtype = dtype('datetime64[ns]')
|
||||
|
||||
make_datetime64ns = flip(datetime64, 'ns')
|
||||
make_datetime64D = flip(datetime64, 'D')
|
||||
np_NaT = make_datetime64ns('NaT')
|
||||
|
||||
NaTmap = {
|
||||
dtype('datetime64[%s]' % unit): datetime64('NaT', unit)
|
||||
for unit in ('ns', 'us', 'ms', 's', 'm', 'D')
|
||||
}
|
||||
NaT_for_dtype = NaTmap.__getitem__
|
||||
NaTns = NaT_for_dtype(datetime64ns_dtype)
|
||||
NaTD = NaT_for_dtype(datetime64D_dtype)
|
||||
|
||||
|
||||
_FILLVALUE_DEFAULTS = {
|
||||
float64_dtype: nan,
|
||||
datetime64ns_dtype: np_NaT,
|
||||
datetime64ns_dtype: NaTns,
|
||||
}
|
||||
|
||||
|
||||
@@ -127,7 +135,9 @@ def repeat_last_axis(array, count):
|
||||
_notNaT = make_datetime64D(0)
|
||||
|
||||
|
||||
def busday_count_mask_NaT(begindates, enddates, out=None):
|
||||
def busday_count_mask_NaT(begindates,
|
||||
enddates,
|
||||
out=None):
|
||||
"""
|
||||
Simple of numpy.busday_count that returns `float` arrays rather than int
|
||||
arrays, and handles `NaT`s by returning `NaN`s where the inputs were `NaT`.
|
||||
@@ -142,8 +152,8 @@ def busday_count_mask_NaT(begindates, enddates, out=None):
|
||||
if out is None:
|
||||
out = empty(broadcast(begindates, enddates).shape, dtype=float)
|
||||
|
||||
beginmask = (begindates == np_NaT)
|
||||
endmask = (enddates == np_NaT)
|
||||
beginmask = (begindates == NaTD)
|
||||
endmask = (enddates == NaTD)
|
||||
|
||||
out = busday_count(
|
||||
# Temporarily fill in non-NaT values.
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
"""
|
||||
Utilities for working with pandas objects.
|
||||
"""
|
||||
import pandas as pd
|
||||
|
||||
|
||||
def explode(df):
|
||||
@@ -10,3 +11,10 @@ def explode(df):
|
||||
(df.index, df.columns, df.values)
|
||||
"""
|
||||
return df.index, df.columns, df.values
|
||||
|
||||
|
||||
try:
|
||||
# pandas 0.16 compat
|
||||
sort_values = pd.DataFrame.sort_values
|
||||
except AttributeError:
|
||||
sort_values = pd.DataFrame.sort
|
||||
|
||||
Reference in New Issue
Block a user