diff --git a/.travis.yml b/.travis.yml index a478357b..ae663f0f 100644 --- a/.travis.yml +++ b/.travis.yml @@ -4,22 +4,28 @@ cache: directories: - $HOME/.cache/pip matrix: + fast_finish: true include: - python: 2.7 env: PANDAS_VERSION=0.16.1 NUMPY_VERSION=1.9.2 SCIPY_VERSION=0.15.1 + - python: 2.7 + env: PANDAS_VERSION=0.17.1 NUMPY_VERSION=1.10.2 SCIPY_VERSION=0.16.1 - python: 3.4 env: PANDAS_VERSION=0.16.1 NUMPY_VERSION=1.9.2 SCIPY_VERSION=0.15.1 + - python: 3.4 + env: PANDAS_VERSION=0.17.1 NUMPY_VERSION=1.10.2 SCIPY_VERSION=0.16.1 before_install: - if [ ${TRAVIS_PYTHON_VERSION:0:1} == "2" ]; then wget https://repo.continuum.io/miniconda/Miniconda-3.7.0-Linux-x86_64.sh -O miniconda.sh; else wget https://repo.continuum.io/miniconda/Miniconda3-3.7.0-Linux-x86_64.sh -O miniconda.sh; fi - chmod +x miniconda.sh - ./miniconda.sh -b -p $HOME/miniconda - export PATH="$HOME/miniconda/bin:$PATH" + - sed -i "s/numpy==.*/numpy==$NUMPY_VERSION/" etc/requirements.txt + - sed -i "s/pandas==.*/pandas==$PANDAS_VERSION/" etc/requirements.txt + - sed -i "s/scipy==.*/scipy==$SCIPY_VERSION/" etc/requirements.txt install: - - conda create -n testenv --yes pip python=$TRAVIS_PYTHON_VERSION + - conda create -n testenv --yes -c quantopian pip python=$TRAVIS_PYTHON_VERSION numpy=$NUMPY_VERSION scipy=$SCIPY_VERSION ta-lib=0.4.8 - source activate testenv - - conda install --yes -c quantopian numpy=$NUMPY_VERSION pandas=$PANDAS_VERSION scipy=$SCIPY_VERSION Cython=0.22.1 ta-lib=0.4.8 - - conda remove --yes six pytz # remove these packages so our requirements can downgrade them - pip install --upgrade pip coverage coveralls - pip install -r etc/requirements.txt - pip install -r etc/requirements_dev.txt @@ -27,9 +33,9 @@ install: - pip install -e . before_script: - pip freeze | sort - - flake8 zipline tests script: - - nosetests --with-timer --exclude=^test_examples --with-coverage --cover-package=zipline --timer-top-n=15 + - nosetests tests/ + - flake8 zipline tests after_success: - coveralls diff --git a/etc/requirements.txt b/etc/requirements.txt index 3f9f0456..ccf6359e 100644 --- a/etc/requirements.txt +++ b/etc/requirements.txt @@ -24,7 +24,7 @@ six==1.9.0 # For fetching remote data requests==2.9.1 -Cython==0.22.1 +Cython==0.23.4 # faster OrderedDict cyordereddict==0.2.2 @@ -41,7 +41,7 @@ networkx==1.9.1 numexpr==2.4.3 # On disk storage format for pipeline data. -bcolz==0.10.0 +bcolz==0.12.1 # Command line interface helper click==4.0.0 diff --git a/setup.cfg b/setup.cfg index 68171ee4..5ae21162 100644 --- a/setup.cfg +++ b/setup.cfg @@ -3,7 +3,9 @@ verbosity=2 detailed-errors=1 with-ignore-docstrings=1 with-timer=1 -timer-filter=warning +timer-top-n=15 +with-coverage=1 +cover-package=zipline [metadata] description-file = README.rst diff --git a/setup.py b/setup.py index da09b60c..d09f6170 100644 --- a/setup.py +++ b/setup.py @@ -33,43 +33,63 @@ from setuptools import ( import versioneer -class LazyCythonizingList(list): - cythonized = False - - def lazy_cythonize(self): - if self.cythonized: - return - self.cythonized = True - - from Cython.Build import cythonize - from numpy import get_include - - self[:] = cythonize( - [ - Extension(*ext_args, include_dirs=[get_include()]) - for ext_args in self - ] +class LazyBuildExtCommandClass(dict): + """ + Lazy command class that defers operations requiring Cython and numpy until + they've actually been downloaded and installed by setup_requires. + """ + def __contains__(self, key): + return ( + key == 'build_ext' + or super(LazyBuildExtCommandClass, self).__contains__(key) ) - def __iter__(self): - self.lazy_cythonize() - return super(LazyCythonizingList, self).__iter__() + def __setitem__(self, key, value): + if key == 'build_ext': + raise AssertionError("build_ext overridden!") + super(LazyBuildExtCommandClass, self).__setitem__(key, value) - def __getitem__(self, num): - self.lazy_cythonize() - return super(LazyCythonizingList, self).__getitem__(num) + def __getitem__(self, key): + if key != 'build_ext': + return super(LazyBuildExtCommandClass, self).__getitem__(key) + + from Cython.Distutils import build_ext as cython_build_ext + import numpy + + # Cython_build_ext isn't a new-style class in Py2. + class build_ext(cython_build_ext, object): + """ + Custom build_ext command that lazily adds numpy's include_dir to + extensions. + """ + def build_extensions(self): + """ + Lazily append numpy's include directory to Extension includes. + + This is done here rather than at module scope because setup.py + may be run before numpy has been installed, in which case + importing numpy and calling `numpy.get_include()` will fail. + """ + numpy_incl = numpy.get_include() + for ext in self.extensions: + ext.include_dirs.append(numpy_incl) + + super(build_ext, self).build_extensions() + return build_ext -ext_modules = LazyCythonizingList([ - ('zipline.assets._assets', ['zipline/assets/_assets.pyx']), - ('zipline.lib.adjustment', ['zipline/lib/adjustment.pyx']), - ('zipline.lib._float64window', ['zipline/lib/_float64window.pyx']), - ('zipline.lib._int64window', ['zipline/lib/_int64window.pyx']), - ('zipline.lib._uint8window', ['zipline/lib/_uint8window.pyx']), - ('zipline.lib.rank', ['zipline/lib/rank.pyx']), - ('zipline.data._equities', ['zipline/data/_equities.pyx']), - ('zipline.data._adjustments', ['zipline/data/_adjustments.pyx']), -]) +ext_modules = [ + Extension('zipline.assets._assets', ['zipline/assets/_assets.pyx']), + Extension('zipline.lib.adjustment', ['zipline/lib/adjustment.pyx']), + Extension( + 'zipline.lib._float64window', ['zipline/lib/_float64window.pyx'] + ), + Extension('zipline.lib._int64window', ['zipline/lib/_int64window.pyx']), + Extension('zipline.lib._uint8window', ['zipline/lib/_uint8window.pyx']), + Extension('zipline.lib.rank', ['zipline/lib/rank.pyx']), + Extension('zipline.data._equities', ['zipline/data/_equities.pyx']), + Extension('zipline.data._adjustments', ['zipline/data/_adjustments.pyx']), +] STR_TO_CMP = { @@ -116,9 +136,8 @@ def _filter_requirements(lines_iter): yield requirement -REQ_UPPER_BOUNDS = { - 'numpy': '<1.10', -} +# We don't currently have any known upper bounds. +REQ_UPPER_BOUNDS = {} def _with_bounds(req): @@ -183,11 +202,12 @@ def extras_requires(conda_format=False): } -def module_requirements(requirements_path, module_names): +def module_requirements(requirements_path, module_names, strict_bounds): module_names = set(module_names) found = set() module_lines = [] - for line in read_requirements(requirements_path, strict_bounds=True): + for line in read_requirements(requirements_path, + strict_bounds=strict_bounds): match = REQ_PATTERN.match(line) if match is None: raise AssertionError("Could not parse requirement: '%s'" % line) @@ -203,38 +223,12 @@ def module_requirements(requirements_path, module_names): ) return module_lines - -def pre_setup(): - if not set(sys.argv) & {'install', 'develop', 'egg_info', 'bdist_wheel'}: - return - - try: - import pip - if StrictVersion(pip.__version__) < StrictVersion('7.1.0'): - raise AssertionError( - "Zipline installation requires pip>=7.1.0, but your pip " - "version is {version}. \n" - "You can upgrade your pip with " - "'pip install --upgrade pip'.".format( - version=pip.__version__, - ) - ) - except ImportError: - raise AssertionError("Zipline installation requires pip") - - required = ('Cython', 'numpy') - for line in module_requirements('etc/requirements.txt', required): - pip.main(['install', line]) - - -pre_setup() - conda_build = os.path.basename(sys.argv[0]) == 'conda-build' setup( name='zipline', version=versioneer.get_version(), - cmdclass=versioneer.get_cmdclass(), + cmdclass=LazyBuildExtCommandClass(versioneer.get_cmdclass()), description='A backtester for financial algorithms.', author='Quantopian Inc.', author_email='opensource@quantopian.com', @@ -258,5 +252,10 @@ setup( ], install_requires=install_requires(conda_format=conda_build), extras_require=extras_requires(conda_format=conda_build), + setup_requires=module_requirements( + 'etc/requirements.txt', + ('Cython', 'numpy'), + strict_bounds=False, + ), url="http://zipline.io", ) diff --git a/tests/pipeline/test_blaze.py b/tests/pipeline/test_blaze.py index 0cbd8d84..202342e0 100644 --- a/tests/pipeline/test_blaze.py +++ b/tests/pipeline/test_blaze.py @@ -52,6 +52,19 @@ asset_infos = ( with_extra_sid = parameterized.expand(asset_infos) +def _utc_localize_index_level_0(df): + """``tz_localize`` the first level of a multiindexed dataframe to utc. + + Mutates df in place. + """ + idx = df.index + df.index = pd.MultiIndex.from_product( + (idx.levels[0].tz_localize('utc'), idx.levels[1]), + names=idx.names, + ) + return df + + class BlazeToPipelineTestCase(TestCase): @classmethod def setUpClass(cls): @@ -315,7 +328,11 @@ class BlazeToPipelineTestCase(TestCase): finder, ).run_pipeline(p, dates[0], dates[-1]) - assert_frame_equal(result, expected, check_dtype=False) + assert_frame_equal( + result, + _utc_localize_index_level_0(expected), + check_dtype=False, + ) def test_custom_query_time_tz(self): df = self.df.copy() @@ -345,7 +362,7 @@ class BlazeToPipelineTestCase(TestCase): expected = df.drop('asof_date', axis=1) expected['timestamp'] = expected['timestamp'].dt.normalize().astype( 'datetime64[ns]', - ) + ).dt.tz_localize('utc') expected.ix[3:5, 'timestamp'] += timedelta(days=1) expected.set_index(['timestamp', 'sid'], inplace=True) expected.index = pd.MultiIndex.from_product(( @@ -615,41 +632,26 @@ class BlazeToPipelineTestCase(TestCase): df['other'] = df.value + 1 fields = OrderedDict(self.macro_dshape.measure.fields) fields['other'] = fields['value'] - expr = bz.Data(df, name='expr', dshape=var * Record(fields)) - loader = BlazeLoader() - ds = from_blaze( - expr, - loader=loader, - no_deltas_rule=no_deltas_rules.ignore, - ) - p = Pipeline() - p.add(ds.value.latest, 'value') - p.add(ds.other.latest, 'other') - dates = self.dates asset_info = asset_infos[0][0] with tmp_asset_finder(equities=asset_info) as finder: - result = SimplePipelineEngine( - loader, - dates, + expected = pd.DataFrame( + np.array([[0, 1], + [1, 2], + [2, 3]]).repeat(3, axis=0), + index=pd.MultiIndex.from_product(( + df.timestamp, + finder.retrieve_all(asset_info.index), + )), + columns=('value', 'other'), + ).sort_index(axis=1) + self._test_id( + df, + var * Record(fields), + expected, finder, - ).run_pipeline(p, dates[0], dates[-1]) - - expected = pd.DataFrame( - np.array([[0, 1], - [1, 2], - [2, 3]]).repeat(3, axis=0), - index=pd.MultiIndex.from_product(( - df.timestamp, - finder.retrieve_all(asset_info.index), - )), - columns=('value', 'other'), - ).sort_index(axis=1) - assert_frame_equal( - result, - expected.sort_index(axis=1), - check_dtype=False, - ) + ('value', 'other'), + ) def test_id_take_last_in_group(self): T = pd.Timestamp @@ -804,7 +806,7 @@ class BlazeToPipelineTestCase(TestCase): assert_frame_equal( result, - expected_output, + _utc_localize_index_level_0(expected_output), check_dtype=False, ) diff --git a/tests/pipeline/test_earnings.py b/tests/pipeline/test_earnings.py index 8c52986f..57b72420 100644 --- a/tests/pipeline/test_earnings.py +++ b/tests/pipeline/test_earnings.py @@ -26,7 +26,7 @@ from zipline.pipeline.loaders.blaze import ( SID_FIELD_NAME, TS_FIELD_NAME, ) -from zipline.utils.numpy_utils import make_datetime64D, np_NaT +from zipline.utils.numpy_utils import make_datetime64D, NaTD from zipline.utils.test_utils import ( make_simple_equity_info, tmp_asset_finder, @@ -234,7 +234,7 @@ class EarningsCalendarLoaderTestCase(TestCase): # Set NaTs to 0 temporarily because busday_count doesn't support NaT. # We fill these entries with NaNs later. - whereNaT = raw_announce_dates == np_NaT + whereNaT = raw_announce_dates == NaTD raw_announce_dates[whereNaT] = make_datetime64D(0) # The abs call here makes it so that we can use this function to @@ -260,7 +260,7 @@ class EarningsCalendarLoaderTestCase(TestCase): '2014-01-10', '2014-01-15', '2014-01-20', - ]), + ], utc=True), )) def test_compute_earnings(self, dates): @@ -296,10 +296,12 @@ class EarningsCalendarLoaderTestCase(TestCase): assert_series_equal( computed_next.isnull(), computed_next_busday_offset.isnull(), + check_names=False, ) assert_series_equal( computed_previous.isnull(), computed_previous_busday_offset.isnull(), + check_names=False, ) for sid in self.sids: @@ -308,24 +310,28 @@ class EarningsCalendarLoaderTestCase(TestCase): computed_next.xs(sid, level=1), expected_next(sid), sid, + check_names=False, ) assert_series_equal( computed_previous.xs(sid, level=1), expected_previous(sid), sid, + check_names=False, ) assert_series_equal( computed_next_busday_offset.xs(sid, level=1), expected_next_busday_offset(sid), sid, + check_names=False, ) assert_series_equal( computed_previous_busday_offset.xs(sid, level=1), expected_previous_busday_offset(sid), sid, + check_names=False, ) diff --git a/tests/pipeline/test_engine.py b/tests/pipeline/test_engine.py index 438b5dbb..4dc6e606 100644 --- a/tests/pipeline/test_engine.py +++ b/tests/pipeline/test_engine.py @@ -157,22 +157,23 @@ class ConstantInputTestCase(TestCase): USEquityPricing.close: 3, USEquityPricing.high: 4, } - self.assets = [1, 2, 3] + self.asset_ids = [1, 2, 3] self.dates = date_range('2014-01', '2014-03', freq='D', tz='UTC') self.loader = ConstantLoader( constants=self.constants, dates=self.dates, - assets=self.assets, + assets=self.asset_ids, ) self.asset_info = make_simple_equity_info( - self.assets, + self.asset_ids, start_date=self.dates[0], end_date=self.dates[-1], ) environment = TradingEnvironment() environment.write_data(equities_df=self.asset_info) self.asset_finder = environment.asset_finder + self.assets = self.asset_finder.retrieve_all(self.asset_ids) def test_bad_dates(self): loader = self.loader @@ -192,7 +193,7 @@ class ConstantInputTestCase(TestCase): lambda column: loader, self.dates, self.asset_finder, ) factor = AssetID() - asset = self.assets[0] + asset = self.asset_ids[0] p = Pipeline(columns={'f': factor}, screen=factor <= asset) # The crux of this is that when we run the pipeline for a single day @@ -204,7 +205,7 @@ class ConstantInputTestCase(TestCase): def test_screen(self): loader = self.loader finder = self.asset_finder - assets = array(self.assets) + asset_ids = array(self.asset_ids) engine = SimplePipelineEngine( lambda column: loader, self.dates, self.asset_finder, ) @@ -212,11 +213,11 @@ class ConstantInputTestCase(TestCase): dates = self.dates[10:10 + num_dates] factor = AssetID() - for asset in assets: - p = Pipeline(columns={'f': factor}, screen=factor <= asset) + for asset_id in asset_ids: + p = Pipeline(columns={'f': factor}, screen=factor <= asset_id) result = engine.run_pipeline(p, dates[0], dates[-1]) - expected_sids = assets[assets <= asset] + expected_sids = asset_ids[asset_ids <= asset_id] expected_assets = finder.retrieve_all(expected_sids) expected_result = DataFrame( index=MultiIndex.from_product([dates, expected_assets]), @@ -228,7 +229,6 @@ class ConstantInputTestCase(TestCase): def test_single_factor(self): loader = self.loader - finder = self.asset_finder assets = self.assets engine = SimplePipelineEngine( lambda column: loader, self.dates, self.asset_finder, @@ -252,18 +252,17 @@ class ConstantInputTestCase(TestCase): result = engine.run_pipeline(p, dates[0], dates[-1]) self.assertEqual(set(result.columns), {'f'}) assert_multi_index_is_product( - self, result.index, dates, finder.retrieve_all(assets) + self, result.index, dates, assets ) check_arrays( result['f'].unstack().values, - full(result_shape, expected_result), + full(result_shape, expected_result, dtype=float), ) def test_multiple_rolling_factors(self): loader = self.loader - finder = self.asset_finder assets = self.assets engine = SimplePipelineEngine( lambda column: loader, self.dates, self.asset_finder, @@ -289,22 +288,22 @@ class ConstantInputTestCase(TestCase): self.assertEqual(set(results.columns), {'short', 'high', 'long'}) assert_multi_index_is_product( - self, results.index, dates, finder.retrieve_all(assets) + self, results.index, dates, assets ) # row-wise sum over an array whose values are all (1 - 2) check_arrays( results['short'].unstack().values, - full(shape, -short_factor.window_length), + full(shape, -short_factor.window_length, dtype=float), ) check_arrays( results['long'].unstack().values, - full(shape, -long_factor.window_length), + full(shape, -long_factor.window_length, dtype=float), ) # row-wise sum over an array whose values are all (1 - 3) check_arrays( results['high'].unstack().values, - full(shape, -2 * high_factor.window_length), + full(shape, -2 * high_factor.window_length, dtype=float), ) def test_numeric_factor(self): @@ -368,7 +367,7 @@ class ConstantInputTestCase(TestCase): loader = ConstantLoader( constants=constants, dates=self.dates, - assets=self.assets, + assets=self.asset_ids, ) engine = SimplePipelineEngine( lambda column: loader, self.dates, self.asset_finder, @@ -394,17 +393,23 @@ class ConstantInputTestCase(TestCase): set(result.columns) ) - result_index = self.assets * len(dates_to_test) + result_index = self.asset_ids * len(dates_to_test) result_shape = (len(result_index),) check_arrays( result['sumdiff'], - Series(index=result_index, data=full(result_shape, -3)), + Series( + index=result_index, + data=full(result_shape, -3, dtype=float), + ), ) for name, const in [('open', 1), ('close', 2), ('volume', 3)]: check_arrays( result[name], - Series(index=result_index, data=full(result_shape, const)), + Series( + index=result_index, + data=full(result_shape, const, dtype=float), + ), ) def test_loader_given_multiple_columns(self): @@ -427,12 +432,12 @@ class ConstantInputTestCase(TestCase): Loader1DataSet2.col2: 4} loader1 = RecordingConstantLoader(constants=constants1, dates=self.dates, - assets=self.assets) + assets=self.asset_ids) constants2 = {Loader2DataSet.col1: 5, Loader2DataSet.col2: 6} loader2 = RecordingConstantLoader(constants=constants2, dates=self.dates, - assets=self.assets) + assets=self.asset_ids) engine = SimplePipelineEngine( lambda column: @@ -471,19 +476,26 @@ class ConstantInputTestCase(TestCase): for name, pipe_col in iteritems(columns)} index = MultiIndex.from_product([self.dates[2:], self.assets]) + + def expected_for_col(col): + val = vals[col] + offset = columns[col].window_length - min_window + return concatenate( + [ + full(offset * index.levshape[1], nan), + full( + (index.levshape[0] - offset) * index.levshape[1], + val, + float, + ) + ], + ) + expected = DataFrame( - data={col: - concatenate(( - full((columns[col].window_length - min_window) - * index.levshape[1], - nan), - full((index.levshape[0] - - (columns[col].window_length - min_window)) - * index.levshape[1], - val))) - for col, val in iteritems(vals)}, + data={col: expected_for_col(col) for col in vals}, index=index, - columns=columns) + columns=columns, + ) assert_frame_equal(result, expected) @@ -504,7 +516,7 @@ class FrameInputTestCase(TestCase): cls.env = TradingEnvironment() day = cls.env.trading_day - cls.assets = Int64Index([1, 2, 3]) + cls.asset_ids = [1, 2, 3] cls.dates = date_range( '2015-01-01', '2015-01-31', @@ -513,12 +525,13 @@ class FrameInputTestCase(TestCase): ) asset_info = make_simple_equity_info( - cls.assets, + cls.asset_ids, start_date=cls.dates[0], end_date=cls.dates[-1], ) cls.env.write_data(equities_df=asset_info) cls.asset_finder = cls.env.asset_finder + cls.assets = cls.asset_finder.retrieve_all(cls.asset_ids) @classmethod def tearDownClass(cls): @@ -533,7 +546,7 @@ class FrameInputTestCase(TestCase): return DataFrame(data, columns=self.assets, index=self.dates) def test_compute_with_adjustments(self): - dates, assets = self.dates, self.assets + dates, asset_ids = self.dates, self.asset_ids low, high = USEquityPricing.low, USEquityPricing.high apply_idxs = [3, 10, 16] @@ -544,7 +557,7 @@ class FrameInputTestCase(TestCase): [ dict( kind=MULTIPLY, - sid=assets[1], + sid=asset_ids[1], value=2.0, start_date=None, end_date=apply_date(0, offset=-1), @@ -552,7 +565,7 @@ class FrameInputTestCase(TestCase): ), dict( kind=MULTIPLY, - sid=assets[1], + sid=asset_ids[1], value=3.0, start_date=None, end_date=apply_date(1, offset=-1), @@ -560,7 +573,7 @@ class FrameInputTestCase(TestCase): ), dict( kind=MULTIPLY, - sid=assets[1], + sid=asset_ids[1], value=5.0, start_date=None, end_date=apply_date(2, offset=-1), @@ -630,7 +643,7 @@ class SyntheticBcolzTestCase(TestCase): asset_lifetime=8, ) cls.last_asset_end = cls.asset_info['end_date'].max() - cls.all_assets = cls.asset_info.index + cls.all_asset_ids = cls.asset_info.index cls.env.write_data(equities_df=cls.asset_info) cls.finder = cls.env.asset_finder @@ -646,7 +659,7 @@ class SyntheticBcolzTestCase(TestCase): table = cls.writer.write( cls.temp_dir.getpath('testdata.bcolz'), cls.calendar, - cls.all_assets, + cls.all_asset_ids, ) cls.pipeline_loader = USEquityPricingLoader( @@ -698,7 +711,7 @@ class SyntheticBcolzTestCase(TestCase): self.finder, ) window_length = 5 - assets = self.all_assets + asset_ids = self.all_asset_ids dates = date_range( self.first_asset_start + self.trading_day, self.last_asset_end, @@ -722,7 +735,7 @@ class SyntheticBcolzTestCase(TestCase): # **previous** day's data. expected_raw = rolling_mean( self.writer.expected_values_2d( - dates - self.trading_day, assets, 'close', + dates - self.trading_day, asset_ids, 'close', ), window_length, min_periods=1, @@ -732,7 +745,7 @@ class SyntheticBcolzTestCase(TestCase): # Truncate off the extra rows needed to compute the SMAs. expected_raw[window_length:], index=dates_to_test, # dates_to_test is dates[window_length:] - columns=self.finder.retrieve_all(assets), + columns=self.finder.retrieve_all(asset_ids), ) self.write_nans(expected) result = results['sma'].unstack() @@ -750,7 +763,7 @@ class SyntheticBcolzTestCase(TestCase): self.finder, ) window_length = 5 - assets = self.all_assets + asset_ids = self.all_asset_ids dates = date_range( self.first_asset_start + self.trading_day, self.last_asset_end, @@ -772,9 +785,9 @@ class SyntheticBcolzTestCase(TestCase): # We expect NaNs when the asset was undefined, otherwise 0 everywhere, # since the input is always increasing. expected = DataFrame( - data=zeros((len(dates_to_test), len(assets)), dtype=float), + data=zeros((len(dates_to_test), len(asset_ids)), dtype=float), index=dates_to_test, - columns=self.finder.retrieve_all(assets), + columns=self.finder.retrieve_all(asset_ids), ) self.write_nans(expected) result = results['drawdown'].unstack() diff --git a/tests/pipeline/test_factor.py b/tests/pipeline/test_factor.py index 22a2242e..9e480b63 100644 --- a/tests/pipeline/test_factor.py +++ b/tests/pipeline/test_factor.py @@ -23,7 +23,11 @@ from zipline.pipeline.factors import ( RSI, ) from zipline.utils.test_utils import check_allclose, check_arrays -from zipline.utils.numpy_utils import datetime64ns_dtype, float64_dtype, np_NaT +from zipline.utils.numpy_utils import ( + datetime64ns_dtype, + float64_dtype, + NaTns, +) from .base import BasePipelineTestCase @@ -309,7 +313,7 @@ class FactorTestCase(BasePipelineTestCase): mask = eyemask if use_mask else nomask if set_missing: asfloat[:, 2] = nan - asdatetime[:, 2] = np_NaT + asdatetime[:, 2] = NaTns float_result = masked_rankdata_2d( data=asfloat, @@ -321,7 +325,7 @@ class FactorTestCase(BasePipelineTestCase): datetime_result = masked_rankdata_2d( data=asdatetime, mask=mask, - missing_value=np_NaT, + missing_value=NaTns, method=method, ascending=True, ) diff --git a/tests/pipeline/test_numerical_expression.py b/tests/pipeline/test_numerical_expression.py index 1026a762..0315d656 100644 --- a/tests/pipeline/test_numerical_expression.py +++ b/tests/pipeline/test_numerical_expression.py @@ -76,9 +76,9 @@ class NumericalExpressionTestCase(TestCase): self.h = H() self.d = DateFactor() self.fake_raw_data = { - self.f: full((5, 5), 3), - self.g: full((5, 5), 2), - self.h: full((5, 5), 1), + self.f: full((5, 5), 3, float), + self.g: full((5, 5), 2, float), + self.h: full((5, 5), 1, float), self.d: full((5, 5), 0, dtype='datetime64[ns]'), } self.mask = DataFrame(True, index=self.dates, columns=self.assets) @@ -94,7 +94,7 @@ class NumericalExpressionTestCase(TestCase): def check_constant_output(self, expr, expected): self.assertFalse(isnan(expected)) - return self.check_output(expr, full((5, 5), expected)) + return self.check_output(expr, full((5, 5), expected, float)) def test_validate_good(self): f = self.f @@ -435,9 +435,9 @@ class NumericalExpressionTestCase(TestCase): def test_comparisons(self): f, g, h = self.f, self.g, self.h self.fake_raw_data = { - f: arange(25).reshape(5, 5), - g: arange(25).reshape(5, 5) - eye(5), - h: full((5, 5), 5), + f: arange(25, dtype=float).reshape(5, 5), + g: arange(25, dtype=float).reshape(5, 5) - eye(5), + h: full((5, 5), 5, dtype=float), } f_data = self.fake_raw_data[f] g_data = self.fake_raw_data[g] @@ -479,9 +479,9 @@ class NumericalExpressionTestCase(TestCase): ) self.fake_raw_data = { - f: arange(25).reshape(5, 5), - g: arange(25).reshape(5, 5) - eye(5), - h: full((5, 5), 5), + f: arange(25, dtype=float).reshape(5, 5), + g: arange(25, dtype=float).reshape(5, 5) - eye(5), + h: full((5, 5), 5, dtype=float), custom_filter: custom_filter_mask, } diff --git a/tests/pipeline/test_pipeline_algo.py b/tests/pipeline/test_pipeline_algo.py index e2eb6605..eba5a254 100644 --- a/tests/pipeline/test_pipeline_algo.py +++ b/tests/pipeline/test_pipeline_algo.py @@ -401,7 +401,7 @@ class PipelineAlgorithmTestCase(TestCase): 'ratio': array([], dtype=float), 'sid': array([], dtype=int), }, - index=DatetimeIndex([], tz='UTC'), + index=DatetimeIndex([]), columns=['effective_date', 'ratio', 'sid'], ) dividends = DataFrame({ diff --git a/tests/pipeline/test_term.py b/tests/pipeline/test_term.py index f689bc33..c657811a 100644 --- a/tests/pipeline/test_term.py +++ b/tests/pipeline/test_term.py @@ -40,6 +40,7 @@ class SomeFactor(Factor): dtype = float64_dtype window_length = 5 inputs = [SomeDataSet.foo, SomeDataSet.bar] + SomeFactorAlias = SomeFactor diff --git a/tests/test_examples.py b/tests/test_examples.py index 5c93547b..04dc2ce3 100644 --- a/tests/test_examples.py +++ b/tests/test_examples.py @@ -16,14 +16,11 @@ # This code is based on a unittest written by John Salvatier: # https://github.com/pymc-devs/pymc/blob/pymc3/tests/test_examples.py -# Disable plotting -# - import glob -import imp import matplotlib from nose_parameterized import parameterized import os +import runpy from unittest import TestCase from zipline.utils import parse_args, run_pipeline @@ -45,7 +42,7 @@ class ExamplesTests(TestCase): @parameterized.expand(((os.path.basename(f).replace('.', '_'), f) for f in glob.glob(os.path.join(example_dir(), '*.py')))) def test_example(self, name, example): - imp.load_source('__main__', os.path.basename(example), open(example)) + runpy.run_path(example, run_name='__main__') # Test algorithm as if scripts/run_algo.py is being used. def test_example_run_pipline(self): diff --git a/tests/test_history.py b/tests/test_history.py index 2e8473e5..9179d3c7 100644 --- a/tests/test_history.py +++ b/tests/test_history.py @@ -41,131 +41,6 @@ from zipline.sources import RandomWalkSource, DataFrameSource import zipline.utils.factory as factory from zipline.utils.test_utils import subtest -# Cases are over the July 4th holiday, to ensure use of trading calendar. - -# March 2013 -# Su Mo Tu We Th Fr Sa -# 1 2 -# 3 4 5 6 7 8 9 -# 10 11 12 13 14 15 16 -# 17 18 19 20 21 22 23 -# 24 25 26 27 28 29 30 -# 31 -# April 2013 -# Su Mo Tu We Th Fr Sa -# 1 2 3 4 5 6 -# 7 8 9 10 11 12 13 -# 14 15 16 17 18 19 20 -# 21 22 23 24 25 26 27 -# 28 29 30 -# -# May 2013 -# Su Mo Tu We Th Fr Sa -# 1 2 3 4 -# 5 6 7 8 9 10 11 -# 12 13 14 15 16 17 18 -# 19 20 21 22 23 24 25 -# 26 27 28 29 30 31 -# -# June 2013 -# Su Mo Tu We Th Fr Sa -# 1 -# 2 3 4 5 6 7 8 -# 9 10 11 12 13 14 15 -# 16 17 18 19 20 21 22 -# 23 24 25 26 27 28 29 -# 30 -# July 2013 -# Su Mo Tu We Th Fr Sa -# 1 2 3 4 5 6 -# 7 8 9 10 11 12 13 -# 14 15 16 17 18 19 20 -# 21 22 23 24 25 26 27 -# 28 29 30 31 -# -# Times to be converted via: -# pd.Timestamp('2013-07-05 9:31', tz='US/Eastern').tz_convert('UTC')}, - -INDEX_TEST_CASES_RAW = { - 'week of daily data': { - 'input': {'bar_count': 5, - 'frequency': '1d', - 'algo_dt': '2013-07-05 9:31AM'}, - 'expected': [ - '2013-06-28 4:00PM', - '2013-07-01 4:00PM', - '2013-07-02 4:00PM', - '2013-07-03 1:00PM', - '2013-07-05 9:31AM', - ] - }, - 'five minutes on july 5th open': { - 'input': {'bar_count': 5, - 'frequency': '1m', - 'algo_dt': '2013-07-05 9:31AM'}, - 'expected': [ - '2013-07-03 12:57PM', - '2013-07-03 12:58PM', - '2013-07-03 12:59PM', - '2013-07-03 1:00PM', - '2013-07-05 9:31AM', - ] - }, -} - - -def to_timestamp(dt_str): - return pd.Timestamp(dt_str, tz='US/Eastern').tz_convert('UTC') - - -def convert_cases(cases): - """ - Convert raw strings to values comparable with system data. - """ - cases = cases.copy() - for case in cases.values(): - case['input']['algo_dt'] = to_timestamp(case['input']['algo_dt']) - case['expected'] = pd.DatetimeIndex([to_timestamp(dt_str) for dt_str - in case['expected']]) - return cases - -INDEX_TEST_CASES = convert_cases(INDEX_TEST_CASES_RAW) - - -def get_index_at_dt(case_input, env): - history_spec = history.HistorySpec( - case_input['bar_count'], - case_input['frequency'], - None, - False, - env=env, - data_frequency='minute', - ) - return history.index_at_dt(history_spec, case_input['algo_dt'], env=env) - - -class TestHistoryIndex(TestCase): - - @classmethod - def setUpClass(cls): - cls.environment = TradingEnvironment() - - @classmethod - def tearDownClass(cls): - del cls.environment - - @parameterized.expand( - [(name, case['input'], case['expected']) - for name, case in INDEX_TEST_CASES.items()] - ) - def test_index_at_dt(self, name, case_input, expected): - history_index = get_index_at_dt(case_input, self.environment) - - history_series = pd.Series(index=history_index) - expected_series = pd.Series(index=expected) - - pd.util.testing.assert_series_equal(history_series, expected_series) - class TestHistoryContainer(TestCase): @@ -473,9 +348,9 @@ def handle_data(context, data): output = test_algo.run(source) self.assertIsNotNone(output) - history_trace = test_algo.history_trace + df.columns = self.env.asset_finder.retrieve_all(df.columns) - for i, received in enumerate(history_trace[bar_count - 1:]): + for i, received in enumerate(test_algo.history_trace[bar_count - 1:]): expected = df.iloc[i:i + bar_count] assert_frame_equal(expected, received) diff --git a/tests/utils/test_events.py b/tests/utils/test_events.py index b97820d5..4c5c5b3c 100644 --- a/tests/utils/test_events.py +++ b/tests/utils/test_events.py @@ -266,7 +266,7 @@ class TestStatelessRules(RuleTestCase): @subtest(minutes_for_days(), 'ms') def test_Always(self, ms): should_trigger = partial(Always().should_trigger, env=self.env) - self.assertTrue(all(map(partial(should_trigger, env=self.env), ms))) + self.assertTrue(all(map(should_trigger, ms))) @subtest(minutes_for_days(), 'ms') def test_Never(self, ms): diff --git a/zipline/data/loader.py b/zipline/data/loader.py index b12ae551..d48bccb1 100644 --- a/zipline/data/loader.py +++ b/zipline/data/loader.py @@ -1,5 +1,5 @@ # -# Copyright 2013 Quantopian, Inc. +# Copyright 2016 Quantopian, Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -22,6 +22,7 @@ from pandas.io.data import DataReader import pytz from six import iteritems +from six.moves.urllib_error import HTTPError from . benchmarks import get_benchmark_returns from . import treasuries, treasuries_can @@ -239,8 +240,15 @@ def ensure_benchmark_data(symbol, first_date, last_date, now, trading_day): path=path, ) - data = get_benchmark_returns(symbol, first_date - trading_day, last_date) - data.to_csv(path) + try: + data = get_benchmark_returns( + symbol, + first_date - trading_day, + last_date, + ) + data.to_csv(path) + except (OSError, IOError, HTTPError): + logger.exception('failed to cache the new benchmark returns') if not has_data_for_dates(data, first_date, last_date): logger.warn("Still don't have expected data after redownload!") return data @@ -306,8 +314,11 @@ def ensure_treasury_data(bm_symbol, first_date, last_date, now): ) ) - data = loader_module.get_treasury_data(first_date, last_date) - data.to_csv(path) + try: + data = loader_module.get_treasury_data(first_date, last_date) + data.to_csv(path) + except (OSError, IOError, HTTPError): + logger.exception('failed to cache treasury data') if not has_data_for_dates(data, first_date, last_date): logger.warn("Still don't have expected data after redownload!") return data diff --git a/zipline/data/us_equity_pricing.py b/zipline/data/us_equity_pricing.py index dcd9b135..6b15f013 100644 --- a/zipline/data/us_equity_pricing.py +++ b/zipline/data/us_equity_pricing.py @@ -206,7 +206,7 @@ class BcolzDailyBarWriter(with_metaclass(ABCMeta)): if column_name == 'id': # We know what the content of this column is, so don't # bother reading it. - columns['id'].append(full((nrows,), asset_id)) + columns['id'].append(full((nrows,), asset_id, uint32)) continue columns[column_name].append( self.to_uint32(table[column_name][:], column_name) diff --git a/zipline/examples/pairtrade.py b/zipline/examples/pairtrade.py index d04e8e58..f7fa36c0 100755 --- a/zipline/examples/pairtrade.py +++ b/zipline/examples/pairtrade.py @@ -32,8 +32,8 @@ def ols_transform(data, sid1, sid2): """Computes regression coefficient (slope and intercept) via Ordinary Least Squares between two SIDs. """ - p0 = data.price[sid1] - p1 = sm.add_constant(data.price[sid2], prepend=True) + p0 = data.price[sid1].values + p1 = sm.add_constant(data.price[sid2].values, prepend=True) slope, intercept = sm.OLS(p0, p1).fit().params return slope, intercept diff --git a/zipline/finance/performance/position.py b/zipline/finance/performance/position.py index 155d019b..d8acec2c 100644 --- a/zipline/finance/performance/position.py +++ b/zipline/finance/performance/position.py @@ -32,14 +32,12 @@ Position Tracking """ from __future__ import division -from math import ( - copysign, - floor, -) +from math import copysign from copy import copy import logbook +import numpy as np import zipline.protocol as zp from zipline.utils.serialization_utils import ( @@ -71,7 +69,8 @@ class Position(object): # stock dividend if dividend['payment_sid']: out['payment_sid'] = dividend['payment_sid'] - out['share_count'] = floor(self.amount * float(dividend['ratio'])) + out['share_count'] = np.floor(self.amount + * float(dividend['ratio'])) # cash dividend if dividend['net_amount']: @@ -106,7 +105,7 @@ class Position(object): raw_share_count = self.amount / float(ratio) # e.g., 33 - full_share_count = floor(raw_share_count) + full_share_count = np.floor(raw_share_count) # e.g., 0.333 fractional_share_count = raw_share_count - full_share_count diff --git a/zipline/finance/performance/tracker.py b/zipline/finance/performance/tracker.py index 49a1dede..a084bdfc 100644 --- a/zipline/finance/performance/tracker.py +++ b/zipline/finance/performance/tracker.py @@ -70,6 +70,7 @@ from pandas.tseries.tools import normalize_date import zipline.finance.risk as risk from . period import PerformancePeriod +from zipline.utils.pandas_utils import sort_values from zipline.utils.serialization_utils import ( VERSION_LABEL ) @@ -200,9 +201,9 @@ class PerformanceTracker(object): ) self._dividend_count += len(new_dividends) - self.dividend_frame = pd.concat( + self.dividend_frame = sort_values(pd.concat( [self.dividend_frame, new_dividends] - ).sort(['pay_date', 'ex_date']).set_index('id', drop=False) + ), ['pay_date', 'ex_date']).set_index('id', drop=False) def initialize_dividends_from_other(self, other): """ diff --git a/zipline/finance/trading.py b/zipline/finance/trading.py index 9f64db97..dd436fdc 100644 --- a/zipline/finance/trading.py +++ b/zipline/finance/trading.py @@ -374,7 +374,7 @@ class TradingEnvironment(object): def get_open_and_close(self, day): index = self.open_and_closes.index.get_loc(day.date()) - todays_minutes = self.open_and_closes.values[index] + todays_minutes = self.open_and_closes.iloc[index] return todays_minutes[0], todays_minutes[1] def market_minutes_for_day(self, stamp): diff --git a/zipline/history/__init__.py b/zipline/history/__init__.py index d9a8a1cc..9efe333b 100644 --- a/zipline/history/__init__.py +++ b/zipline/history/__init__.py @@ -15,8 +15,6 @@ from . history import ( HistorySpec, - days_index_at_dt, - index_at_dt, Frequency, ) @@ -24,8 +22,6 @@ from . import history_container __all__ = [ 'HistorySpec', - 'days_index_at_dt', - 'index_at_dt', 'history_container', 'Frequency', ] diff --git a/zipline/history/history.py b/zipline/history/history.py index 8244aa01..f83f22ac 100644 --- a/zipline/history/history.py +++ b/zipline/history/history.py @@ -15,7 +15,6 @@ from __future__ import division -import numpy as np import pandas as pd import re @@ -284,59 +283,3 @@ class HistorySpec(object): def __repr__(self): return ''.join([self.__class__.__name__, "('", self.key_str, "')"]) - - -def days_index_at_dt(history_spec, algo_dt, env): - """ - Get the index of a frame to be used for a get_history call with daily - frequency. - """ - # Get the previous (bar_count - 1) days' worth of market closes. - day_delta = (history_spec.bar_count - 1) * history_spec.frequency.num - market_closes = env.open_close_window( - algo_dt, - day_delta, - offset=(-day_delta), - step=history_spec.frequency.num, - ).market_close - - if history_spec.frequency.data_frequency == 'daily': - market_closes = market_closes.apply(pd.tslib.normalize_date) - - # Append the current algo_dt as the last index value. - # Using the 'rawer' numpy array values here because of a bottleneck - # that appeared when using DatetimeIndex - return np.append(market_closes.values, algo_dt) - - -def minutes_index_at_dt(history_spec, algo_dt, env): - """ - Get the index of a frame to be used for a get_history_call with minutely - frequency. - """ - # TODO: This is almost certainly going to be too slow for production. - return env.market_minute_window( - algo_dt, - history_spec.bar_count, - step=-1, - )[::-1] - - -def index_at_dt(history_spec, algo_dt, env): - """ - Returns index of a frame returned by get_history() with the given - history_spec and algo_dt. - - The resulting index will have @history_spec.bar_count bars, increasing in - units of @history_spec.frequency, terminating at the given @algo_dt. - - Note: The last bar of the returned frame represents an as-of-yet incomplete - time window, so the delta between the last and second-to-last bars is - usually always less than `@history_spec.frequency` for frequencies greater - than 1m. - """ - frequency = history_spec.frequency - if frequency.unit_str == 'd': - return days_index_at_dt(history_spec, algo_dt, env) - elif frequency.unit_str == 'm': - return minutes_index_at_dt(history_spec, algo_dt, env) diff --git a/zipline/pipeline/factors/events.py b/zipline/pipeline/factors/events.py index 42db1fbf..2491efc8 100644 --- a/zipline/pipeline/factors/events.py +++ b/zipline/pipeline/factors/events.py @@ -5,7 +5,7 @@ announcements, acquisitions, dividends, etc.). from numpy import newaxis from zipline.pipeline.data.earnings import EarningsCalendar from zipline.utils.numpy_utils import ( - np_NaT, + NaTD, busday_count_mask_NaT, datetime64D_dtype, float64_dtype, @@ -48,7 +48,7 @@ class BusinessDaysUntilNextEarnings(Factor): announce_dates = arrays[0].astype(datetime64D_dtype) # Set masked values to NaT. - announce_dates[~mask] = np_NaT + announce_dates[~mask] = NaTD # Convert row labels into a column vector for broadcasted comparison. reference_dates = dates.values.astype(datetime64D_dtype)[:, newaxis] @@ -84,7 +84,7 @@ class BusinessDaysSincePreviousEarnings(Factor): announce_dates = arrays[0].astype(datetime64D_dtype) # Set masked values to NaT. - announce_dates[~mask] = np_NaT + announce_dates[~mask] = NaTD # Convert row labels into a column vector for broadcasted comparison. reference_dates = dates.values.astype(datetime64D_dtype)[:, newaxis] diff --git a/zipline/pipeline/factors/technical.py b/zipline/pipeline/factors/technical.py index fe08b9c0..3eb7eda2 100644 --- a/zipline/pipeline/factors/technical.py +++ b/zipline/pipeline/factors/technical.py @@ -2,12 +2,6 @@ Technical Analysis Factors -------------------------- """ -from bottleneck import ( - nanargmax, - nanmax, - nanmean, - nansum, -) from numbers import Number from numpy import ( abs, @@ -31,6 +25,12 @@ from zipline.pipeline.data import USEquityPricing from zipline.pipeline.mixins import SingleInputMixin from zipline.utils.control_flow import ignore_nanwarnings from zipline.utils.input_validation import expect_types +from zipline.utils.math_utils import ( + nanargmax, + nanmax, + nanmean, + nansum, +) from .factor import CustomFactor @@ -181,7 +181,7 @@ class _ExponentialWeightedFactor(SingleInputMixin, CustomFactor): Return weighting vector for an exponential moving statistic on `length` rows with a decay rate of `decay_rate`. """ - return full(length, decay_rate) ** arange(length + 1, 1, -1) + return full(length, decay_rate, float) ** arange(length + 1, 1, -1) @classmethod @expect_types(span=Number) diff --git a/zipline/pipeline/loaders/blaze/core.py b/zipline/pipeline/loaders/blaze/core.py index 7b8b71a8..859adcec 100644 --- a/zipline/pipeline/loaders/blaze/core.py +++ b/zipline/pipeline/loaders/blaze/core.py @@ -174,6 +174,7 @@ from zipline.utils.input_validation import ( optionally, ) from zipline.utils.numpy_utils import repeat_last_axis +from zipline.utils.pandas_utils import sort_values from zipline.utils.preprocess import preprocess @@ -626,10 +627,10 @@ def overwrite_novel_deltas(baseline, deltas, dates): ) <= 1 novel_deltas = deltas.loc[novel_idx] non_novel_deltas = deltas.loc[~novel_idx] - return pd.concat( + return sort_values(pd.concat( (baseline, novel_deltas), ignore_index=True, - ).sort(TS_FIELD_NAME), non_novel_deltas + ), TS_FIELD_NAME), non_novel_deltas def overwrite_from_dates(asof, dense_dates, sparse_dates, asset_idx, value): @@ -910,13 +911,13 @@ class BlazeLoader(dict): This can return more data than needed. The in memory reindex will handle this. """ - return reduce( + return sort_values(reduce( partial(pd.merge, on=added_query_fields, how='outer'), ( odo(where(e, column), pd.DataFrame, **odo_kwargs) for column in columns ), - ).sort(TS_FIELD_NAME) # sort for the groupby later + ), TS_FIELD_NAME) # sort for the groupby later materialized_expr = collect_expr(expr) materialized_deltas = ( diff --git a/zipline/pipeline/loaders/frame.py b/zipline/pipeline/loaders/frame.py index 396fb6d0..4466ed04 100644 --- a/zipline/pipeline/loaders/frame.py +++ b/zipline/pipeline/loaders/frame.py @@ -15,6 +15,7 @@ from pandas import ( ) from zipline.lib.adjusted_array import AdjustedArray from zipline.lib.adjustment import make_adjustment_from_labels +from zipline.utils.pandas_utils import sort_values from .base import PipelineLoader ADJUSTMENT_COLUMNS = Index([ @@ -71,7 +72,7 @@ class DataFrameLoader(PipelineLoader): else: # Ensure that columns are in the correct order. adjustments = adjustments.reindex_axis(ADJUSTMENT_COLUMNS, axis=1) - adjustments.sort(['apply_date', 'sid'], inplace=True) + sort_values(adjustments, ['apply_date', 'sid'], inplace=True) self.adjustments = adjustments self.adjustment_apply_dates = DatetimeIndex(adjustments.apply_date) diff --git a/zipline/pipeline/loaders/utils.py b/zipline/pipeline/loaders/utils.py index e5682461..7f9448e7 100644 --- a/zipline/pipeline/loaders/utils.py +++ b/zipline/pipeline/loaders/utils.py @@ -5,7 +5,7 @@ import pandas as pd from six import iteritems from six.moves import zip -from zipline.utils.numpy_utils import np_NaT +from zipline.utils.numpy_utils import NaTns def next_date_frame(dates, events_by_sid): @@ -34,7 +34,7 @@ def next_date_frame(dates, events_by_sid): previous_date_frame """ cols = { - equity: np.full_like(dates, np_NaT) for equity in events_by_sid + equity: np.full_like(dates, NaTns) for equity in events_by_sid } raw_dates = dates.values for equity, event_dates in iteritems(events_by_sid): @@ -50,7 +50,7 @@ def next_date_frame(dates, events_by_sid): (knowledge_date <= raw_dates) & (raw_dates <= event_date) ) - value_mask = (event_date <= data) | (data == np_NaT) + value_mask = (event_date <= data) | (data == NaTns) data[date_mask & value_mask] = event_date return pd.DataFrame(index=dates, data=cols) @@ -82,7 +82,7 @@ def previous_date_frame(date_index, events_by_sid): next_date_frame """ sids = list(events_by_sid) - out = np.full((len(date_index), len(sids)), np_NaT, dtype='datetime64[ns]') + out = np.full((len(date_index), len(sids)), NaTns, dtype='datetime64[ns]') dn = date_index[-1].asm8 for col_idx, sid in enumerate(sids): # events_by_sid[sid] is Series mapping knowledge_date to actual diff --git a/zipline/utils/factory.py b/zipline/utils/factory.py index 0c88a625..dfbcde8b 100644 --- a/zipline/utils/factory.py +++ b/zipline/utils/factory.py @@ -309,8 +309,7 @@ def create_test_panel_source(sim_params=None, env=None, source_type=None): 'arbitrary': arbitrary}, index=index) if source_type: - source_types = np.full(len(index), source_type) - df['type'] = source_types + df['type'] = source_type panel = pd.Panel.from_dict({0: df}) diff --git a/zipline/utils/numpy_utils.py b/zipline/utils/numpy_utils.py index 74fed9ae..aba56b26 100644 --- a/zipline/utils/numpy_utils.py +++ b/zipline/utils/numpy_utils.py @@ -22,11 +22,19 @@ datetime64ns_dtype = dtype('datetime64[ns]') make_datetime64ns = flip(datetime64, 'ns') make_datetime64D = flip(datetime64, 'D') -np_NaT = make_datetime64ns('NaT') + +NaTmap = { + dtype('datetime64[%s]' % unit): datetime64('NaT', unit) + for unit in ('ns', 'us', 'ms', 's', 'm', 'D') +} +NaT_for_dtype = NaTmap.__getitem__ +NaTns = NaT_for_dtype(datetime64ns_dtype) +NaTD = NaT_for_dtype(datetime64D_dtype) + _FILLVALUE_DEFAULTS = { float64_dtype: nan, - datetime64ns_dtype: np_NaT, + datetime64ns_dtype: NaTns, } @@ -127,7 +135,9 @@ def repeat_last_axis(array, count): _notNaT = make_datetime64D(0) -def busday_count_mask_NaT(begindates, enddates, out=None): +def busday_count_mask_NaT(begindates, + enddates, + out=None): """ Simple of numpy.busday_count that returns `float` arrays rather than int arrays, and handles `NaT`s by returning `NaN`s where the inputs were `NaT`. @@ -142,8 +152,8 @@ def busday_count_mask_NaT(begindates, enddates, out=None): if out is None: out = empty(broadcast(begindates, enddates).shape, dtype=float) - beginmask = (begindates == np_NaT) - endmask = (enddates == np_NaT) + beginmask = (begindates == NaTD) + endmask = (enddates == NaTD) out = busday_count( # Temporarily fill in non-NaT values. diff --git a/zipline/utils/pandas_utils.py b/zipline/utils/pandas_utils.py index e488ce84..2dd282e9 100644 --- a/zipline/utils/pandas_utils.py +++ b/zipline/utils/pandas_utils.py @@ -1,6 +1,7 @@ """ Utilities for working with pandas objects. """ +import pandas as pd def explode(df): @@ -10,3 +11,10 @@ def explode(df): (df.index, df.columns, df.values) """ return df.index, df.columns, df.values + + +try: + # pandas 0.16 compat + sort_values = pd.DataFrame.sort_values +except AttributeError: + sort_values = pd.DataFrame.sort