mirror of
https://github.com/wassname/catalyst.git
synced 2026-06-30 05:23:38 +08:00
Merge pull request #1339 from quantopian/latest-numpy-pandas
Latest numpy and pandas
This commit is contained in:
@@ -55,6 +55,9 @@ nosetests.xml
|
||||
# Built documentation
|
||||
docs/_build/*
|
||||
|
||||
# Un-tarred example data input. We should only commit the tarball.
|
||||
tests/resources/example_data/*
|
||||
|
||||
# database of vbench
|
||||
benchmarks.db
|
||||
|
||||
|
||||
+2
-6
@@ -9,8 +9,7 @@ env:
|
||||
# ANACONDA_TOKEN with api:write
|
||||
- secure: "RJJjjQloUjCSkhII93QM+YAsr6YYq7hPFvlbnT07ogn1NeUPsVCyJ97oiZfKtPgdbd24hdQP/CHfB0HgTTES8n996tN3QWc4hZj0e10kFyIlas9qnkrRYRR1jxGShBDXrLdx/tGh8z1qnnCm1fy+fDhAF7Zerouwy4EA2YEzxdE="
|
||||
matrix:
|
||||
- PANDAS_VERSION=0.16.1 NUMPY_VERSION=1.9.2 SCIPY_VERSION=0.15.1
|
||||
- PANDAS_VERSION=0.17.1 NUMPY_VERSION=1.10.4 SCIPY_VERSION=0.16.1
|
||||
- NUMPY_VERSION=1.11.1 SCIPY_VERSION=0.17.1
|
||||
cache:
|
||||
directories:
|
||||
- $HOME/.cache/.pip/
|
||||
@@ -20,14 +19,11 @@ before_install:
|
||||
- chmod +x miniconda.sh
|
||||
- ./miniconda.sh -b -p $HOME/miniconda
|
||||
- export PATH="$HOME/miniconda/bin:$PATH"
|
||||
- sed -i "s/numpy==.*/numpy==$NUMPY_VERSION/" etc/requirements.txt
|
||||
- sed -i "s/pandas==.*/pandas==$PANDAS_VERSION/" etc/requirements.txt
|
||||
- sed -i "s/scipy==.*/scipy==$SCIPY_VERSION/" etc/requirements.txt
|
||||
install:
|
||||
- conda install conda=4.1.11 conda-build=1.21.11 anaconda-client=1.5.1 --yes
|
||||
|
||||
- TALIB_VERSION=$(cat ./etc/requirements_talib.txt | sed "s/TA-Lib==\(.*\)/\1/")
|
||||
- conda create -n testenv --yes -c quantopian pip python=$TRAVIS_PYTHON_VERSION numpy=$NUMPY_VERSION scipy=$SCIPY_VERSION libgfortran=1.0 ta-lib=$TALIB_VERSION
|
||||
- conda create -n testenv --yes -c quantopian pip python=$TRAVIS_PYTHON_VERSION numpy=$NUMPY_VERSION scipy=$SCIPY_VERSION libgfortran=3.0 ta-lib=$TALIB_VERSION
|
||||
- source activate testenv
|
||||
- IFS='.' read -r -a NPY_VERSION_ARR <<< "$NUMPY_VERSION"
|
||||
- CONDA_NPY=${NPY_VERSION_ARR[0]}${NPY_VERSION_ARR[1]}
|
||||
|
||||
+6
-18
@@ -15,27 +15,15 @@ environment:
|
||||
matrix:
|
||||
- PYTHON_VERSION: "2.7"
|
||||
PYTHON_ARCH: "64"
|
||||
PANDAS_VERSION: "0.16.1"
|
||||
NUMPY_VERSION: "1.9.2"
|
||||
SCIPY_VERSION: "0.15.1"
|
||||
PANDAS_VERSION: "0.18.1"
|
||||
NUMPY_VERSION: "1.11.1"
|
||||
SCIPY_VERSION: "0.17.1"
|
||||
|
||||
- PYTHON_VERSION: "3.4"
|
||||
PYTHON_ARCH: "64"
|
||||
PANDAS_VERSION: "0.16.1"
|
||||
NUMPY_VERSION: "1.9.2"
|
||||
SCIPY_VERSION: "0.15.1"
|
||||
|
||||
- PYTHON_VERSION: "2.7"
|
||||
PYTHON_ARCH: "64"
|
||||
PANDAS_VERSION: "0.17.1"
|
||||
NUMPY_VERSION: "1.10.4"
|
||||
SCIPY_VERSION: "0.16.1"
|
||||
|
||||
- PYTHON_VERSION: "3.4"
|
||||
PYTHON_ARCH: "64"
|
||||
PANDAS_VERSION: "0.17.1"
|
||||
NUMPY_VERSION: "1.10.4"
|
||||
SCIPY_VERSION: "0.16.1"
|
||||
PANDAS_VERSION: "0.18.1"
|
||||
NUMPY_VERSION: "1.11.1"
|
||||
SCIPY_VERSION: "0.17.1"
|
||||
|
||||
# We always use a 64-bit machine, but can build x86 distributions
|
||||
# with the PYTHON_ARCH variable (which is used by CMD_IN_ENV).
|
||||
|
||||
@@ -8,13 +8,13 @@ Logbook==0.12.5
|
||||
|
||||
# Scientific Libraries
|
||||
|
||||
pytz==2015.4
|
||||
numpy==1.9.2
|
||||
pytz==2016.4
|
||||
numpy==1.11.1
|
||||
|
||||
# scipy and pandas are required for statsmodels,
|
||||
# statsmodels in turn is required for some pandas packages
|
||||
scipy==0.15.1
|
||||
pandas==0.16.1
|
||||
scipy==0.17.1
|
||||
pandas==0.18.1
|
||||
pandas-datareader==0.2.1
|
||||
# Needed for parts of pandas.stats
|
||||
patsy==0.4.0
|
||||
@@ -38,6 +38,7 @@ contextlib2==0.4.0
|
||||
|
||||
# networkx requires decorator
|
||||
decorator==4.0.0
|
||||
|
||||
# Graph algorithms used by zipline.pipeline
|
||||
networkx==1.9.1
|
||||
|
||||
|
||||
@@ -1,3 +1,3 @@
|
||||
-e git://github.com/quantopian/datashape.git@bf06a41dc0908baf7c324aeacadba8820468ee78#egg=datashape-dev
|
||||
-e git://github.com/quantopian/odo.git@9e16310b5f2c3f05162145200db7e7908f0a866e#egg=odo-dev
|
||||
-e git://github.com/quantopian/blaze.git@7b8f70c15279c7ebacf2ac2f5f74acf08b300d71#egg=blaze-dev
|
||||
-e git://github.com/quantopian/odo.git@da7f26d87702f5d293763e8ed54c7e25fd3af386#egg=odo-dev
|
||||
-e git://github.com/quantopian/blaze.git@929afbd3466df7efa0cf720b74dba223e0b3ac4a#egg=blaze-dev
|
||||
|
||||
@@ -148,7 +148,7 @@ def _filter_requirements(lines_iter, filter_names=None,
|
||||
|
||||
REQ_UPPER_BOUNDS = {
|
||||
'bcolz': '<1',
|
||||
'pandas': '<0.18',
|
||||
'pandas': '<0.19',
|
||||
}
|
||||
|
||||
|
||||
|
||||
@@ -590,7 +590,7 @@ class BcolzMinuteBarTestCase(WithTradingCalendars,
|
||||
'high': full(9, nan),
|
||||
'low': full(9, nan),
|
||||
'close': full(9, nan),
|
||||
'volume': full(9, 0),
|
||||
'volume': full(9, 0.0),
|
||||
},
|
||||
index=[minutes])
|
||||
self.writer.write_sid(sid, data)
|
||||
@@ -633,7 +633,7 @@ class BcolzMinuteBarTestCase(WithTradingCalendars,
|
||||
view(float64),
|
||||
'close': ((0b11111111111 << 52) + arange(31, 40, dtype=int64)).
|
||||
view(float64),
|
||||
'volume': full(9, 0),
|
||||
'volume': full(9, 0.0),
|
||||
},
|
||||
index=[minutes])
|
||||
self.writer.write_sid(sid, data)
|
||||
|
||||
@@ -16,6 +16,7 @@ import datetime
|
||||
from inspect import isabstract
|
||||
import random
|
||||
from unittest import TestCase
|
||||
import warnings
|
||||
|
||||
from nose_parameterized import parameterized
|
||||
import pandas as pd
|
||||
@@ -439,6 +440,38 @@ class StatelessRulesTests(RuleTestCase):
|
||||
self.assertIs(composed.second, rule2)
|
||||
self.assertFalse(any(map(should_trigger, minute)))
|
||||
|
||||
@parameterized.expand([
|
||||
('month_start', NthTradingDayOfMonth),
|
||||
('month_end', NDaysBeforeLastTradingDayOfMonth),
|
||||
('week_start', NthTradingDayOfWeek),
|
||||
('week_end', NthTradingDayOfWeek),
|
||||
])
|
||||
def test_pass_float_to_day_of_period_rule(self, name, rule_type):
|
||||
with warnings.catch_warnings(record=True) as raised_warnings:
|
||||
warnings.simplefilter('always')
|
||||
rule_type(n=3) # Shouldn't trigger a warning.
|
||||
rule_type(n=3.0) # Should trigger a warning about float coercion.
|
||||
|
||||
self.assertEqual(len(raised_warnings), 1)
|
||||
|
||||
# We only implicitly convert from float to int when there's no loss of
|
||||
# precision.
|
||||
with self.assertRaises(TypeError):
|
||||
rule_type(3.1)
|
||||
|
||||
def test_invalid_offsets(self):
|
||||
with self.assertRaises(ValueError):
|
||||
NthTradingDayOfWeek(5)
|
||||
|
||||
with self.assertRaises(ValueError):
|
||||
NthTradingDayOfWeek(-1)
|
||||
|
||||
with self.assertRaises(ValueError):
|
||||
NthTradingDayOfMonth(-1)
|
||||
|
||||
with self.assertRaises(ValueError):
|
||||
NthTradingDayOfMonth(24)
|
||||
|
||||
|
||||
class StatefulRulesTests(RuleTestCase):
|
||||
CALENDAR_STRING = "NYSE"
|
||||
|
||||
@@ -784,6 +784,7 @@ class BlazeToPipelineTestCase(WithAssetFinder, ZiplineTestCase):
|
||||
expr,
|
||||
loader=loader,
|
||||
no_deltas_rule='ignore',
|
||||
no_checkpoints_rule='ignore',
|
||||
missing_values=self.missing_values,
|
||||
)
|
||||
|
||||
|
||||
@@ -11,6 +11,7 @@ from zipline.lib.labelarray import LabelArray
|
||||
from zipline.pipeline import Pipeline
|
||||
from zipline.pipeline.data.testing import TestingDataSet as TDS
|
||||
from zipline.testing import chrange, temp_pipeline_engine
|
||||
from zipline.utils.pandas_utils import ignore_pandas_nan_categorical_warning
|
||||
|
||||
|
||||
class LatestTestCase(TestCase):
|
||||
@@ -71,6 +72,8 @@ class LatestTestCase(TestCase):
|
||||
dates_to_test[-1],
|
||||
)
|
||||
for column in columns:
|
||||
col_result = result[column.name].unstack()
|
||||
with ignore_pandas_nan_categorical_warning():
|
||||
col_result = result[column.name].unstack()
|
||||
|
||||
expected_col_result = self.expected_latest(column, cal_slice)
|
||||
assert_frame_equal(col_result, expected_col_result)
|
||||
|
||||
@@ -25,12 +25,8 @@ from pandas import (
|
||||
Categorical,
|
||||
DataFrame,
|
||||
date_range,
|
||||
ewma,
|
||||
ewmstd,
|
||||
Int64Index,
|
||||
MultiIndex,
|
||||
rolling_apply,
|
||||
rolling_mean,
|
||||
Series,
|
||||
Timestamp,
|
||||
)
|
||||
@@ -1008,15 +1004,17 @@ class SyntheticBcolzTestCase(WithAdjustmentReader,
|
||||
# Shift back the raw inputs by a trading day because we expect our
|
||||
# computed results to be computed using values anchored on the
|
||||
# **previous** day's data.
|
||||
expected_raw = rolling_mean(
|
||||
expected_raw = DataFrame(
|
||||
expected_bar_values_2d(
|
||||
dates - self.trading_calendar.day,
|
||||
self.equity_info,
|
||||
'close',
|
||||
),
|
||||
).rolling(
|
||||
window_length,
|
||||
min_periods=1,
|
||||
)
|
||||
).mean(
|
||||
).values
|
||||
|
||||
expected = DataFrame(
|
||||
# Truncate off the extra rows needed to compute the SMAs.
|
||||
@@ -1122,19 +1120,31 @@ class ParameterizedFactorTestCase(WithTradingEnvironment, ZiplineTestCase):
|
||||
def expected_ewma(self, window_length, decay_rate):
|
||||
alpha = 1 - decay_rate
|
||||
span = (2 / alpha) - 1
|
||||
return rolling_apply(
|
||||
self.raw_data,
|
||||
window_length,
|
||||
lambda window: ewma(window, span=span)[-1],
|
||||
|
||||
# XXX: This is a comically inefficient way to compute a windowed EWMA.
|
||||
# Don't use it outside of testing. We're using rolling-apply of an
|
||||
# ewma (which is itself a rolling-window function) because we only want
|
||||
# to look at ``window_length`` rows at a time.
|
||||
return self.raw_data.rolling(window_length).apply(
|
||||
lambda subarray: (DataFrame(subarray)
|
||||
.ewm(span=span)
|
||||
.mean()
|
||||
.values[-1])
|
||||
)[window_length:]
|
||||
|
||||
def expected_ewmstd(self, window_length, decay_rate):
|
||||
alpha = 1 - decay_rate
|
||||
span = (2 / alpha) - 1
|
||||
return rolling_apply(
|
||||
self.raw_data,
|
||||
window_length,
|
||||
lambda window: ewmstd(window, span=span)[-1],
|
||||
|
||||
# XXX: This is a comically inefficient way to compute a windowed
|
||||
# EWMSTD. Don't use it outside of testing. We're using rolling-apply
|
||||
# of an ewma (which is itself a rolling-window function) because we
|
||||
# only want to look at ``window_length`` rows at a time.
|
||||
return self.raw_data.rolling(window_length).apply(
|
||||
lambda subarray: (DataFrame(subarray)
|
||||
.ewm(span=span)
|
||||
.std()
|
||||
.values[-1])
|
||||
)[window_length:]
|
||||
|
||||
@parameterized.expand([
|
||||
@@ -1259,7 +1269,7 @@ class ParameterizedFactorTestCase(WithTradingEnvironment, ZiplineTestCase):
|
||||
expected_1 = (self.raw_data[5:] ** 2) * 2
|
||||
assert_frame_equal(results['dv1'].unstack(), expected_1)
|
||||
|
||||
expected_5 = rolling_mean((self.raw_data ** 2) * 2, window=5)[5:]
|
||||
expected_5 = ((self.raw_data ** 2) * 2).rolling(5).mean()[5:]
|
||||
assert_frame_equal(results['dv5'].unstack(), expected_5)
|
||||
|
||||
# The following two use USEquityPricing.open and .volume as inputs.
|
||||
@@ -1269,9 +1279,11 @@ class ParameterizedFactorTestCase(WithTradingEnvironment, ZiplineTestCase):
|
||||
* self.raw_data[5:] * 2).fillna(0)
|
||||
assert_frame_equal(results['dv1_nan'].unstack(), expected_1_nan)
|
||||
|
||||
expected_5_nan = rolling_mean((self.raw_data_with_nans
|
||||
* self.raw_data * 2).fillna(0),
|
||||
window=5)[5:]
|
||||
expected_5_nan = ((self.raw_data_with_nans * self.raw_data * 2)
|
||||
.fillna(0)
|
||||
.rolling(5).mean()
|
||||
[5:])
|
||||
|
||||
assert_frame_equal(results['dv5_nan'].unstack(), expected_5_nan)
|
||||
|
||||
|
||||
|
||||
@@ -153,7 +153,7 @@ class EventIndexerTestCase(ZiplineTestCase):
|
||||
@classmethod
|
||||
def init_class_fixtures(cls):
|
||||
super(EventIndexerTestCase, cls).init_class_fixtures()
|
||||
cls.events = make_events(add_nulls=False).sort('event_date')
|
||||
cls.events = make_events(add_nulls=False).sort_values('event_date')
|
||||
cls.events.reset_index(inplace=True)
|
||||
|
||||
def test_previous_event_indexer(self):
|
||||
@@ -354,7 +354,7 @@ class EventsLoaderTestCase(WithAssetFinder,
|
||||
# from pandas won't be tz_localized.
|
||||
dates = self.trading_days.tz_localize(None)
|
||||
|
||||
for asset, asset_result in results.iterkv():
|
||||
for asset, asset_result in results.iteritems():
|
||||
relevant_events = events[events.sid == asset.sid]
|
||||
self.assertEqual(len(relevant_events), 2)
|
||||
|
||||
@@ -397,7 +397,7 @@ class EventsLoaderTestCase(WithAssetFinder,
|
||||
# Remove timezone info from trading days, since the outputs
|
||||
# from pandas won't be tz_localized.
|
||||
dates = self.trading_days.tz_localize(None)
|
||||
for asset, asset_result in results.iterkv():
|
||||
for asset, asset_result in results.iteritems():
|
||||
relevant_events = events[events.sid == asset.sid]
|
||||
self.assertEqual(len(relevant_events), 2)
|
||||
|
||||
|
||||
@@ -346,7 +346,7 @@ class PipelineAlgorithmTestCase(WithBcolzEquityDailyBarReaderFromCSVs,
|
||||
AAPL = 1
|
||||
MSFT = 2
|
||||
BRK_A = 3
|
||||
assets = ASSET_FINDER_EQUITY_SIDS = AAPL, MSFT, BRK_A
|
||||
ASSET_FINDER_EQUITY_SIDS = AAPL, MSFT, BRK_A
|
||||
ASSET_FINDER_EQUITY_SYMBOLS = 'AAPL', 'MSFT', 'BRK_A'
|
||||
START_DATE = Timestamp('2014')
|
||||
END_DATE = Timestamp('2015')
|
||||
@@ -403,6 +403,9 @@ class PipelineAlgorithmTestCase(WithBcolzEquityDailyBarReaderFromCSVs,
|
||||
)
|
||||
cls.dates = cls.raw_data[cls.AAPL].index.tz_localize('UTC')
|
||||
cls.AAPL_split_date = Timestamp("2014-06-09", tz='UTC')
|
||||
cls.assets = cls.asset_finder.retrieve_all(
|
||||
cls.ASSET_FINDER_EQUITY_SIDS
|
||||
)
|
||||
|
||||
def compute_expected_vwaps(self, window_lengths):
|
||||
AAPL, MSFT, BRK_A = self.AAPL, self.MSFT, self.BRK_A
|
||||
@@ -488,7 +491,7 @@ class PipelineAlgorithmTestCase(WithBcolzEquityDailyBarReaderFromCSVs,
|
||||
(False,),
|
||||
])
|
||||
def test_handle_adjustment(self, set_screen):
|
||||
AAPL, MSFT, BRK_A = assets = self.AAPL, self.MSFT, self.BRK_A
|
||||
AAPL, MSFT, BRK_A = assets = self.assets
|
||||
|
||||
window_lengths = [1, 2, 5, 10]
|
||||
vwaps = self.compute_expected_vwaps(window_lengths)
|
||||
|
||||
@@ -158,17 +158,17 @@ class TestFastStochasticOscillator(ZiplineTestCase):
|
||||
fso = FastStochasticOscillator()
|
||||
|
||||
today = pd.Timestamp('2015')
|
||||
assets = np.arange(3, dtype=np.float)
|
||||
out = np.empty(shape=(3,), dtype=np.float)
|
||||
assets = np.arange(3, dtype=np.float64)
|
||||
out = np.empty(shape=(3,), dtype=np.float64)
|
||||
|
||||
highs = np.full((50, 3), 3)
|
||||
lows = np.full((50, 3), 2)
|
||||
closes = np.full((50, 3), 4)
|
||||
highs = np.full((50, 3), 3, dtype=np.float64)
|
||||
lows = np.full((50, 3), 2, dtype=np.float64)
|
||||
closes = np.full((50, 3), 4, dtype=np.float64)
|
||||
|
||||
fso.compute(today, assets, out, closes, lows, highs)
|
||||
|
||||
# Expected %K
|
||||
assert_equal(out, np.full((3,), 200))
|
||||
assert_equal(out, np.full((3,), 200, dtype=np.float64))
|
||||
|
||||
@parameter_space(seed=range(5))
|
||||
def test_fso_expected_with_talib(self, seed):
|
||||
@@ -381,7 +381,7 @@ class TestLinearWeightedMovingAverage(ZiplineTestCase):
|
||||
today = pd.Timestamp('2015')
|
||||
assets = np.arange(5, dtype=np.int64)
|
||||
|
||||
data = np.arange(50, dtype=float).reshape((10, 5))
|
||||
data = np.arange(50, dtype=np.float64).reshape((10, 5))
|
||||
out = np.zeros(data.shape[1])
|
||||
|
||||
wma2.compute(today, assets, out, data)
|
||||
@@ -397,9 +397,9 @@ class TestTrueRange(ZiplineTestCase):
|
||||
assets = np.arange(3, dtype=np.int64)
|
||||
out = np.empty(3, dtype=np.float64)
|
||||
|
||||
highs = np.full((2, 3), 3)
|
||||
lows = np.full((2, 3), 2)
|
||||
closes = np.full((2, 3), 1)
|
||||
highs = np.full((2, 3), 3.)
|
||||
lows = np.full((2, 3), 2.)
|
||||
closes = np.full((2, 3), 1.)
|
||||
|
||||
tr.compute(today, assets, out, highs, lows, closes)
|
||||
assert_equal(out, np.full((3,), 2))
|
||||
assert_equal(out, np.full((3,), 2.))
|
||||
|
||||
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
-1
File diff suppressed because one or more lines are too long
-1
@@ -1 +0,0 @@
|
||||
{"names": ["open", "high", "low", "close", "volume", "day", "id"]}
|
||||
-1
@@ -1 +0,0 @@
|
||||
{}
|
||||
BIN
Binary file not shown.
-1
@@ -1 +0,0 @@
|
||||
{"nbytes": 98584, "shape": [24646], "cbytes": 131072}
|
||||
-1
@@ -1 +0,0 @@
|
||||
{"chunklen": 32768, "dtype": "uint32", "expectedlen": 24646, "dflt": 0, "cparams": {"shuffle": true, "clevel": 5}}
|
||||
-1
@@ -1 +0,0 @@
|
||||
{}
|
||||
BIN
Binary file not shown.
-1
@@ -1 +0,0 @@
|
||||
{"nbytes": 98584, "shape": [24646], "cbytes": 131072}
|
||||
-1
@@ -1 +0,0 @@
|
||||
{"chunklen": 32768, "dtype": "uint32", "expectedlen": 24646, "dflt": 0, "cparams": {"shuffle": true, "clevel": 5}}
|
||||
-1
@@ -1 +0,0 @@
|
||||
{}
|
||||
BIN
Binary file not shown.
-1
@@ -1 +0,0 @@
|
||||
{"nbytes": 98584, "shape": [24646], "cbytes": 131072}
|
||||
-1
@@ -1 +0,0 @@
|
||||
{"chunklen": 32768, "dtype": "uint32", "expectedlen": 24646, "dflt": 0, "cparams": {"shuffle": true, "clevel": 5}}
|
||||
-1
@@ -1 +0,0 @@
|
||||
{}
|
||||
BIN
Binary file not shown.
-1
@@ -1 +0,0 @@
|
||||
{"nbytes": 98584, "shape": [24646], "cbytes": 131072}
|
||||
-1
@@ -1 +0,0 @@
|
||||
{"chunklen": 32768, "dtype": "uint32", "expectedlen": 24646, "dflt": 0, "cparams": {"shuffle": true, "clevel": 5}}
|
||||
-1
@@ -1 +0,0 @@
|
||||
{}
|
||||
BIN
Binary file not shown.
-1
@@ -1 +0,0 @@
|
||||
{"nbytes": 98584, "shape": [24646], "cbytes": 131072}
|
||||
-1
@@ -1 +0,0 @@
|
||||
{"chunklen": 32768, "dtype": "uint32", "expectedlen": 24646, "dflt": 0, "cparams": {"shuffle": true, "clevel": 5}}
|
||||
-1
@@ -1 +0,0 @@
|
||||
{}
|
||||
BIN
Binary file not shown.
-1
@@ -1 +0,0 @@
|
||||
{"nbytes": 98584, "shape": [24646], "cbytes": 131072}
|
||||
-1
@@ -1 +0,0 @@
|
||||
{"chunklen": 32768, "dtype": "uint32", "expectedlen": 24646, "dflt": 0, "cparams": {"shuffle": true, "clevel": 5}}
|
||||
-1
@@ -1 +0,0 @@
|
||||
{}
|
||||
BIN
Binary file not shown.
-1
@@ -1 +0,0 @@
|
||||
{"nbytes": 98584, "shape": [24646], "cbytes": 131072}
|
||||
-1
@@ -1 +0,0 @@
|
||||
{"chunklen": 32768, "dtype": "uint32", "expectedlen": 24646, "dflt": 0, "cparams": {"shuffle": true, "clevel": 5}}
|
||||
-1
File diff suppressed because one or more lines are too long
@@ -167,8 +167,12 @@ def main(ctx, rebuild_input):
|
||||
shutil.rmtree(expected_perf_path)
|
||||
shutil.copytree(new_perf_path, expected_perf_path)
|
||||
|
||||
with tarfile.open(example_path, 'w|gz') as tar:
|
||||
tar.add(d.getpath('example_data'), 'example_data')
|
||||
# Clear out all the temporary new perf so it doesn't get added to the
|
||||
# tarball.
|
||||
shutil.rmtree(d.getpath('example_data/new_perf/'))
|
||||
|
||||
with tarfile.open(example_path, 'w|gz') as tar:
|
||||
tar.add(d.getpath('example_data'), 'example_data')
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
||||
@@ -32,7 +32,7 @@ from testfixtures import TempDirectory
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
import pytz
|
||||
from pandas.io.common import PerformanceWarning
|
||||
from pandas.core.common import PerformanceWarning
|
||||
|
||||
from zipline import run_algorithm
|
||||
from zipline import TradingAlgorithm
|
||||
@@ -3476,7 +3476,7 @@ class TestOrderCancelation(WithDataPortal,
|
||||
'high': minutes_arr + 2,
|
||||
'low': minutes_arr - 1,
|
||||
'close': minutes_arr,
|
||||
'volume': np.full(minutes_count, 1),
|
||||
'volume': np.full(minutes_count, 1.0),
|
||||
},
|
||||
index=asset_minutes,
|
||||
)
|
||||
@@ -3485,11 +3485,11 @@ class TestOrderCancelation(WithDataPortal,
|
||||
def make_equity_daily_bar_data(cls):
|
||||
yield 1, pd.DataFrame(
|
||||
{
|
||||
'open': np.full(3, 1),
|
||||
'high': np.full(3, 1),
|
||||
'low': np.full(3, 1),
|
||||
'close': np.full(3, 1),
|
||||
'volume': np.full(3, 1),
|
||||
'open': np.full(3, 1, dtype=np.float64),
|
||||
'high': np.full(3, 1, dtype=np.float64),
|
||||
'low': np.full(3, 1, dtype=np.float64),
|
||||
'close': np.full(3, 1, dtype=np.float64),
|
||||
'volume': np.full(3, 1, dtype=np.float64),
|
||||
},
|
||||
index=cls.sim_params.sessions,
|
||||
)
|
||||
|
||||
@@ -3,7 +3,7 @@ import warnings
|
||||
from mock import patch
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
from pandas.io.common import PerformanceWarning
|
||||
from pandas.core.common import PerformanceWarning
|
||||
|
||||
from zipline import TradingAlgorithm
|
||||
from zipline.finance.trading import SimulationParameters
|
||||
|
||||
+2
-24
@@ -297,7 +297,6 @@ def handle_data(context, data):
|
||||
|
||||
@parameterized.expand([("unspecified", ""),
|
||||
("none", "usecols=None"),
|
||||
("empty", "usecols=[]"),
|
||||
("without date", "usecols=['Value']"),
|
||||
("with date", "usecols=('Value', 'Date')")])
|
||||
def test_usecols(self, testname, usecols):
|
||||
@@ -325,30 +324,9 @@ def initialize(context):
|
||||
context.stocks = [sid(3766), sid(25317)]
|
||||
|
||||
def handle_data(context, data):
|
||||
if {should_have_data}:
|
||||
try:
|
||||
data.current("urban", "cpi")
|
||||
except (KeyError, ValueError):
|
||||
assert False
|
||||
else:
|
||||
try:
|
||||
data.current("urban", "cpi")
|
||||
except (KeyError, ValueError):
|
||||
assert True
|
||||
data.current("urban", "cpi")
|
||||
"""
|
||||
|
||||
results = self.run_algo(
|
||||
code.format(
|
||||
usecols=usecols,
|
||||
should_have_data=testname in [
|
||||
'none',
|
||||
'unspecified',
|
||||
'without date',
|
||||
'with date',
|
||||
],
|
||||
)
|
||||
)
|
||||
|
||||
results = self.run_algo(code.format(usecols=usecols))
|
||||
# 251 trading days in 2006
|
||||
self.assertEqual(len(results), 251)
|
||||
|
||||
|
||||
+29
-15
@@ -1,6 +1,7 @@
|
||||
from itertools import product
|
||||
from operator import eq, ne
|
||||
import numpy as np
|
||||
import warnings
|
||||
|
||||
from zipline.lib.labelarray import LabelArray
|
||||
from zipline.testing import check_arrays, parameter_space, ZiplineTestCase
|
||||
@@ -234,22 +235,35 @@ class LabelArrayTestCase(ZiplineTestCase):
|
||||
l = LabelArray(self.strs, '')
|
||||
ints = np.arange(len(l))
|
||||
|
||||
for func in all_ufuncs():
|
||||
# Different ufuncs vary between returning NotImplemented and
|
||||
# raising a TypeError when provided with unknown dtypes.
|
||||
# This is a bit unfortunate, but still better than silently
|
||||
# accepting an int array.
|
||||
try:
|
||||
if func.nin == 1:
|
||||
ret = func(l)
|
||||
elif func.nin == 2:
|
||||
ret = func(l, ints)
|
||||
with warnings.catch_warnings():
|
||||
# Some ufuncs return NotImplemented, but warn that they will fail
|
||||
# in the future. Both outcomes are fine, so ignore the warnings.
|
||||
warnings.filterwarnings(
|
||||
'ignore',
|
||||
message="unorderable dtypes.*",
|
||||
category=DeprecationWarning,
|
||||
)
|
||||
warnings.filterwarnings(
|
||||
'ignore',
|
||||
message="elementwise comparison failed.*",
|
||||
category=FutureWarning,
|
||||
)
|
||||
for func in all_ufuncs():
|
||||
# Different ufuncs vary between returning NotImplemented and
|
||||
# raising a TypeError when provided with unknown dtypes.
|
||||
# This is a bit unfortunate, but still better than silently
|
||||
# accepting an int array.
|
||||
try:
|
||||
if func.nin == 1:
|
||||
ret = func(l)
|
||||
elif func.nin == 2:
|
||||
ret = func(l, ints)
|
||||
else:
|
||||
self.fail("Who added a ternary ufunc !?!")
|
||||
except TypeError:
|
||||
pass
|
||||
else:
|
||||
self.fail("Who added a ternary ufunc !?!")
|
||||
except TypeError:
|
||||
pass
|
||||
else:
|
||||
self.assertIs(ret, NotImplemented)
|
||||
self.assertIs(ret, NotImplemented)
|
||||
|
||||
@parameter_space(
|
||||
__fail_fast=True,
|
||||
|
||||
@@ -819,8 +819,9 @@ class TradingAlgorithm(object):
|
||||
else:
|
||||
self.risk_report = perf
|
||||
|
||||
daily_dts = [np.datetime64(perf['period_close'], utc=True)
|
||||
for perf in daily_perfs]
|
||||
daily_dts = pd.DatetimeIndex(
|
||||
[p['period_close'] for p in daily_perfs], tz='UTC'
|
||||
)
|
||||
daily_stats = pd.DataFrame(daily_perfs, index=daily_dts)
|
||||
|
||||
return daily_stats
|
||||
|
||||
@@ -189,7 +189,7 @@ def _generate_output_dataframe(data_subset, defaults):
|
||||
|
||||
|
||||
def _check_asset_group(group):
|
||||
row = group.sort('end_date').iloc[-1]
|
||||
row = group.sort_values('end_date').iloc[-1]
|
||||
row.start_date = group.start_date.min()
|
||||
row.end_date = group.end_date.max()
|
||||
row.drop(list(symbol_columns), inplace=True)
|
||||
|
||||
@@ -224,7 +224,7 @@ def make_future_info(first_sid,
|
||||
'exchange': "TEST",
|
||||
'exchange_full': 'TEST FULL',
|
||||
})
|
||||
return pd.DataFrame.from_records(contracts, index='sid').convert_objects()
|
||||
return pd.DataFrame.from_records(contracts, index='sid')
|
||||
|
||||
|
||||
def make_commodity_future_info(first_sid,
|
||||
|
||||
@@ -33,6 +33,8 @@ from zipline.lib.adjustment import Float64Multiply
|
||||
from zipline.assets.asset_writer import (
|
||||
SQLITE_MAX_VARIABLE_NUMBER as SQLITE_MAX_IN_STATEMENT,
|
||||
)
|
||||
from zipline.utils.pandas_utils import timedelta_to_integral_seconds
|
||||
|
||||
|
||||
_SID_QUERY_TEMPLATE = """
|
||||
SELECT DISTINCT sid FROM {0}
|
||||
@@ -170,8 +172,8 @@ cpdef load_adjustments_from_sqlite(object adjustments_db, # sqlite3.Connection
|
||||
index.
|
||||
"""
|
||||
|
||||
cdef int start_date = int((dates[0] - EPOCH).total_seconds())
|
||||
cdef int end_date = int((dates[-1] - EPOCH).total_seconds())
|
||||
cdef int start_date = timedelta_to_integral_seconds(dates[0] - EPOCH)
|
||||
cdef int end_date = timedelta_to_integral_seconds(dates[-1] - EPOCH)
|
||||
|
||||
cdef set split_sids = _get_split_sids(
|
||||
adjustments_db,
|
||||
|
||||
@@ -119,7 +119,7 @@ def fetch_symbol_metadata_frame(api_key,
|
||||
'name': 'asset_name',
|
||||
'oldest_available_date': 'start_date',
|
||||
'newest_available_date': 'end_date',
|
||||
}).sort('symbol')
|
||||
}).sort_values('symbol')
|
||||
|
||||
data = data[~data.symbol.isin(excluded_symbols)]
|
||||
# cut out all the other stuff in the name column
|
||||
|
||||
+49
-29
@@ -44,6 +44,7 @@ from zipline.utils.math_utils import (
|
||||
nanstd
|
||||
)
|
||||
from zipline.utils.memoize import remember_last, weak_lru_cache
|
||||
from zipline.utils.pandas_utils import timedelta_to_integral_minutes
|
||||
from zipline.errors import (
|
||||
NoTradeDataAvailableTooEarly,
|
||||
NoTradeDataAvailableTooLate,
|
||||
@@ -1106,44 +1107,63 @@ class DataPortal(object):
|
||||
else:
|
||||
return [assets] if isinstance(assets, Asset) else []
|
||||
|
||||
# cache size picked somewhat loosely. this code exists purely to
|
||||
# handle deprecated API.
|
||||
@weak_lru_cache(20)
|
||||
def _get_minute_count_for_transform(self, ending_minute, days_count):
|
||||
# cache size picked somewhat loosely. this code exists purely to
|
||||
# handle deprecated API.
|
||||
# This function works in three steps.
|
||||
# Step 1. Count the minutes from ``ending_minute`` to the start of its
|
||||
# session.
|
||||
# Step 2. Count the minutes from the prior ``days_count - 1`` sessions.
|
||||
# Step 3. Return the sum of the results from steps (1) and (2).
|
||||
|
||||
# bars is the number of days desired. we have to translate that
|
||||
# into the number of minutes we want.
|
||||
# we get all the minutes for the last (bars - 1) days, then add
|
||||
# all the minutes so far today. the +2 is to account for ignoring
|
||||
# today, and the previous day, in doing the math.
|
||||
session_for_minute = self.trading_calendar.minute_to_session_label(
|
||||
ending_minute
|
||||
)
|
||||
previous_session = self.trading_calendar.previous_session_label(
|
||||
session_for_minute
|
||||
# Example (NYSE Calendar)
|
||||
# ending_minute = 2016-12-28 9:40 AM US/Eastern
|
||||
# days_count = 3
|
||||
# Step 1. Calculate that there are 10 minutes in the ending session.
|
||||
# Step 2. Calculate that there are 390 + 210 = 600 minutes in the prior
|
||||
# two sessions. (Prior sessions are 2015-12-23 and 2015-12-24.)
|
||||
# 2015-12-24 is a half day.
|
||||
# Step 3. Return 600 + 10 = 610.
|
||||
|
||||
cal = self.trading_calendar
|
||||
|
||||
ending_session = cal.minute_to_session_label(
|
||||
ending_minute,
|
||||
direction="none", # It's an error to pass a non-trading minute.
|
||||
)
|
||||
|
||||
sessions = self.trading_calendar.sessions_in_range(
|
||||
self.trading_calendar.sessions_window(previous_session,
|
||||
-days_count + 2)[0],
|
||||
previous_session,
|
||||
# Assume that calendar days are always full of contiguous minutes,
|
||||
# which means we can just take 1 + (number of minutes between the last
|
||||
# minute and the start of the session). We add one so that we include
|
||||
# the ending minute in the total.
|
||||
ending_session_minute_count = timedelta_to_integral_minutes(
|
||||
ending_minute - cal.open_and_close_for_session(ending_session)[0]
|
||||
) + 1
|
||||
|
||||
if days_count == 1:
|
||||
# We just need sessions for the active day.
|
||||
return ending_session_minute_count
|
||||
|
||||
# XXX: We're subtracting 2 here to account for two offsets:
|
||||
# 1. We only want ``days_count - 1`` sessions, since we've already
|
||||
# accounted for the ending session above.
|
||||
# 2. The API of ``sessions_window`` is to return one more session than
|
||||
# the requested number. I don't think any consumers actually want
|
||||
# that behavior, but it's the tested and documented behavior right
|
||||
# now, so we have to request one less session than we actually want.
|
||||
completed_sessions = cal.sessions_window(
|
||||
cal.previous_session_label(ending_session),
|
||||
2 - days_count,
|
||||
)
|
||||
|
||||
minutes_count = \
|
||||
completed_sessions_minute_count = (
|
||||
self.trading_calendar.minutes_count_for_sessions_in_range(
|
||||
sessions[0],
|
||||
sessions[-1]
|
||||
completed_sessions[0],
|
||||
completed_sessions[-1]
|
||||
)
|
||||
|
||||
# add the minutes for today
|
||||
today_open = self.trading_calendar.open_and_close_for_session(
|
||||
session_for_minute
|
||||
)[0]
|
||||
|
||||
minutes_count += \
|
||||
((ending_minute - today_open).total_seconds() // 60) + 1
|
||||
|
||||
return minutes_count
|
||||
)
|
||||
return ending_session_minute_count + completed_sessions_minute_count
|
||||
|
||||
def get_simple_transform(self, asset, transform_name, dt, data_frequency,
|
||||
bars=None):
|
||||
|
||||
@@ -695,12 +695,12 @@ class BcolzMinuteBarWriter(object):
|
||||
|
||||
all_minutes = self._minute_index
|
||||
# Get the latest minute we wish to write to the ctable
|
||||
last_minute_to_write = dts[-1]
|
||||
last_minute_to_write = pd.Timestamp(dts[-1], tz='UTC')
|
||||
|
||||
# In the event that we've already written some minutely data to the
|
||||
# ctable, guard against overwritting that data.
|
||||
# ctable, guard against overwriting that data.
|
||||
if num_rec_mins > 0:
|
||||
last_recorded_minute = np.datetime64(all_minutes[num_rec_mins - 1])
|
||||
last_recorded_minute = all_minutes[num_rec_mins - 1]
|
||||
if last_minute_to_write <= last_recorded_minute:
|
||||
raise BcolzMinuteOverlappingData(dedent("""
|
||||
Data with last_date={0} already includes input start={1} for
|
||||
|
||||
@@ -54,8 +54,7 @@ def minute_to_session(minute_frame, calendar):
|
||||
"""
|
||||
how = OrderedDict((c, _MINUTE_TO_SESSION_OHCLV_HOW[c])
|
||||
for c in minute_frame.columns)
|
||||
return minute_frame.groupby(calendar.minute_to_session_label).agg(
|
||||
how)
|
||||
return minute_frame.groupby(calendar.minute_to_session_label).agg(how)
|
||||
|
||||
|
||||
class DailyHistoryAggregator(object):
|
||||
|
||||
@@ -19,6 +19,7 @@ import logbook
|
||||
|
||||
from six import iteritems
|
||||
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
|
||||
from . import risk
|
||||
@@ -95,11 +96,11 @@ class RiskMetricsPeriod(object):
|
||||
raise Exception(message)
|
||||
|
||||
self.num_trading_days = len(self.benchmark_returns)
|
||||
self.trading_day_counts = pd.stats.moments.rolling_count(
|
||||
self.algorithm_returns, self.num_trading_days)
|
||||
|
||||
self.mean_algorithm_returns = \
|
||||
self.algorithm_returns.cumsum() / self.trading_day_counts
|
||||
self.mean_algorithm_returns = (
|
||||
self.algorithm_returns.cumsum() /
|
||||
np.arange(1, self.num_trading_days + 1, dtype=np.float64)
|
||||
)
|
||||
|
||||
self.benchmark_volatility = annual_volatility(self.benchmark_returns)
|
||||
self.algorithm_volatility = annual_volatility(self.algorithm_returns)
|
||||
|
||||
+14
-13
@@ -24,6 +24,7 @@ from zipline.utils.numpy_utils import (
|
||||
int_dtype_with_size_in_bytes,
|
||||
is_object,
|
||||
)
|
||||
from zipline.utils.pandas_utils import ignore_pandas_nan_categorical_warning
|
||||
|
||||
from ._factorize import (
|
||||
factorize_strings,
|
||||
@@ -231,8 +232,9 @@ class LabelArray(ndarray):
|
||||
In this case, obj will be None. We treat this as an error case and
|
||||
fail.
|
||||
|
||||
2. Someone (most likely our own __new__) calls
|
||||
other_array.view(type=LabelArray).
|
||||
2. Someone (most likely our own __new__) does::
|
||||
|
||||
>>> other_array.view(type=LabelArray) # doctest: +SKIP
|
||||
|
||||
In this case, `self` will be the new LabelArray instance, and
|
||||
``obj` will be the array on which ``view`` is being called.
|
||||
@@ -283,14 +285,16 @@ class LabelArray(ndarray):
|
||||
"""
|
||||
if len(self.shape) > 1:
|
||||
raise ValueError("Can't convert a 2D array to a categorical.")
|
||||
return pd.Categorical.from_codes(
|
||||
self.as_int_array(),
|
||||
# We need to make a copy because pandas >= 0.17 fails if this
|
||||
# buffer isn't writeable.
|
||||
self.categories.copy(),
|
||||
ordered=False,
|
||||
name=name,
|
||||
)
|
||||
|
||||
with ignore_pandas_nan_categorical_warning():
|
||||
return pd.Categorical.from_codes(
|
||||
self.as_int_array(),
|
||||
# We need to make a copy because pandas >= 0.17 fails if this
|
||||
# buffer isn't writeable.
|
||||
self.categories.copy(),
|
||||
ordered=False,
|
||||
name=name,
|
||||
)
|
||||
|
||||
def as_categorical_frame(self, index, columns, name=None):
|
||||
"""
|
||||
@@ -475,9 +479,6 @@ class LabelArray(ndarray):
|
||||
)
|
||||
|
||||
def __repr__(self):
|
||||
# This happens if you call a ufunc on a LabelArray that changes the
|
||||
# dtype. This is generally an indicator that the array has been used
|
||||
# incorrectly, and it means we're no longer valid for anything.
|
||||
repr_lines = repr(self.as_string_array()).splitlines()
|
||||
repr_lines[0] = repr_lines[0].replace('array(', 'LabelArray(', 1)
|
||||
repr_lines[-1] = repr_lines[-1].rsplit(',', 1)[0] + ')'
|
||||
|
||||
+1
-10
@@ -18,7 +18,7 @@ from numpy import apply_along_axis, float64, isnan, nan
|
||||
from scipy.stats import rankdata
|
||||
|
||||
from zipline.utils.numpy_utils import (
|
||||
is_float,
|
||||
is_missing,
|
||||
float64_dtype,
|
||||
int64_dtype,
|
||||
datetime64ns_dtype,
|
||||
@@ -28,15 +28,6 @@ from zipline.utils.numpy_utils import (
|
||||
import_array()
|
||||
|
||||
|
||||
cpdef is_missing(ndarray data, object missing_value):
|
||||
"""
|
||||
Generic is_missing function that handles quirks with NaN.
|
||||
"""
|
||||
if is_float(data) and isnan(missing_value):
|
||||
return isnan(data)
|
||||
return (data == missing_value)
|
||||
|
||||
|
||||
def rankdata_1d_descending(ndarray data, str method):
|
||||
"""
|
||||
1D descending version of scipy.stats.rankdata.
|
||||
|
||||
@@ -187,7 +187,6 @@ from zipline.utils.input_validation import (
|
||||
optionally,
|
||||
)
|
||||
from zipline.utils.numpy_utils import bool_dtype, categorical_dtype
|
||||
from zipline.utils.pandas_utils import sort_values
|
||||
from zipline.utils.pool import SequentialPool
|
||||
from zipline.utils.preprocess import preprocess
|
||||
|
||||
@@ -754,7 +753,7 @@ def overwrite_novel_deltas(baseline, deltas, dates):
|
||||
ignore_index=True,
|
||||
copy=False,
|
||||
)
|
||||
sort_values(cat, TS_FIELD_NAME, inplace=True)
|
||||
cat.sort_values(TS_FIELD_NAME, inplace=True)
|
||||
return cat, non_novel_deltas
|
||||
|
||||
|
||||
|
||||
@@ -109,10 +109,12 @@ class EventsLoader(PipelineLoader):
|
||||
events = events[events[EVENT_DATE_FIELD_NAME].notnull()]
|
||||
|
||||
# We always work with entries from ``events`` directly as numpy arrays,
|
||||
# so we coerce from a frame here.
|
||||
# so we coerce from a frame to a dict of arrays here.
|
||||
self.events = {
|
||||
name: np.asarray(series)
|
||||
for name, series in events.sort(EVENT_DATE_FIELD_NAME).iteritems()
|
||||
for name, series in (
|
||||
events.sort_values(EVENT_DATE_FIELD_NAME).iteritems()
|
||||
)
|
||||
}
|
||||
|
||||
# Columns to load with self.load_next_events.
|
||||
|
||||
@@ -16,7 +16,6 @@ from pandas import (
|
||||
from zipline.lib.adjusted_array import AdjustedArray
|
||||
from zipline.lib.adjustment import make_adjustment_from_labels
|
||||
from zipline.utils.numpy_utils import as_column
|
||||
from zipline.utils.pandas_utils import sort_values
|
||||
from .base import PipelineLoader
|
||||
|
||||
ADJUSTMENT_COLUMNS = Index([
|
||||
@@ -73,7 +72,7 @@ class DataFrameLoader(PipelineLoader):
|
||||
else:
|
||||
# Ensure that columns are in the correct order.
|
||||
adjustments = adjustments.reindex_axis(ADJUSTMENT_COLUMNS, axis=1)
|
||||
sort_values(adjustments, ['apply_date', 'sid'], inplace=True)
|
||||
adjustments.sort_values(['apply_date', 'sid'], inplace=True)
|
||||
|
||||
self.adjustments = adjustments
|
||||
self.adjustment_apply_dates = DatetimeIndex(adjustments.apply_date)
|
||||
|
||||
@@ -172,7 +172,7 @@ class SeededRandomLoader(PrecomputedLoader):
|
||||
"""
|
||||
Return uniformly-distributed integers between 0 and 100.
|
||||
"""
|
||||
return (self.state.random_integers(low=0, high=100, size=shape)
|
||||
return (self.state.randint(low=0, high=100, size=shape)
|
||||
.astype('int64')) # default is system int
|
||||
|
||||
def _datetime_values(self, shape):
|
||||
@@ -180,7 +180,7 @@ class SeededRandomLoader(PrecomputedLoader):
|
||||
Return uniformly-distributed dates in 2014.
|
||||
"""
|
||||
start = Timestamp('2014', tz='UTC').asm8
|
||||
offsets = self.state.random_integers(
|
||||
offsets = self.state.randint(
|
||||
low=0,
|
||||
high=364,
|
||||
size=shape,
|
||||
|
||||
@@ -224,13 +224,13 @@ class PandasCSV(with_metaclass(ABCMeta, object)):
|
||||
date_str_series.values,
|
||||
format=format_str,
|
||||
utc=True,
|
||||
coerce=True,
|
||||
errors='coerce',
|
||||
)
|
||||
else:
|
||||
parsed = pd.to_datetime(
|
||||
date_str_series.values,
|
||||
format=format_str,
|
||||
coerce=True,
|
||||
errors='coerce',
|
||||
).tz_localize(tz_str).tz_convert('UTC')
|
||||
|
||||
if data_frequency == 'daily':
|
||||
@@ -301,7 +301,7 @@ class PandasCSV(with_metaclass(ABCMeta, object)):
|
||||
df['sid'] = self.symbol
|
||||
elif self.finder:
|
||||
|
||||
df.sort(self.symbol_column)
|
||||
df.sort_values(by=self.symbol_column, inplace=True)
|
||||
|
||||
# Pop the 'sid' column off of the DataFrame, just in case the user
|
||||
# has assigned it, and throw a warning
|
||||
|
||||
+16
-3
@@ -49,7 +49,8 @@ from zipline.pipeline.loaders.testing import make_seeded_random_loader
|
||||
from zipline.utils import security_list
|
||||
from zipline.utils.calendars import get_calendar
|
||||
from zipline.utils.input_validation import expect_dimensions
|
||||
from zipline.utils.numpy_utils import as_column
|
||||
from zipline.utils.numpy_utils import as_column, isnat
|
||||
from zipline.utils.pandas_utils import timedelta_to_integral_seconds
|
||||
from zipline.utils.sentinel import sentinel
|
||||
|
||||
import numpy as np
|
||||
@@ -78,7 +79,7 @@ def str_to_seconds(s):
|
||||
>>> str_to_seconds('2014-01-01')
|
||||
1388534400
|
||||
"""
|
||||
return int((pd.Timestamp(s, tz='UTC') - EPOCH).total_seconds())
|
||||
return timedelta_to_integral_seconds(pd.Timestamp(s, tz='UTC') - EPOCH)
|
||||
|
||||
|
||||
def drain_zipline(test, zipline):
|
||||
@@ -393,6 +394,18 @@ def check_arrays(x, y, err_msg='', verbose=True, check_dtypes=True):
|
||||
# ...then check the actual values as well.
|
||||
x = x.as_string_array()
|
||||
y = y.as_string_array()
|
||||
elif x.dtype.kind in 'mM':
|
||||
x_isnat = isnat(x)
|
||||
y_isnat = isnat(y)
|
||||
assert_array_equal(
|
||||
x_isnat,
|
||||
y_isnat,
|
||||
err_msg="NaTs not equal",
|
||||
verbose=verbose,
|
||||
)
|
||||
# Fill NaTs with zero for comparison.
|
||||
x = np.where(x_isnat, np.zeros_like(x), x)
|
||||
y = np.where(x_isnat, np.zeros_like(x), x)
|
||||
|
||||
return assert_array_equal(x, y, err_msg=err_msg, verbose=verbose)
|
||||
|
||||
@@ -709,7 +722,7 @@ class FakeDataPortal(DataPortal):
|
||||
]
|
||||
|
||||
df = pd.DataFrame(
|
||||
np.full((bar_count, len(assets)), 100),
|
||||
np.full((bar_count, len(assets)), 100.0),
|
||||
index=days,
|
||||
columns=assets
|
||||
)
|
||||
|
||||
@@ -14,6 +14,7 @@
|
||||
# limitations under the License.
|
||||
from abc import ABCMeta, abstractproperty
|
||||
from lru import LRU
|
||||
import warnings
|
||||
|
||||
from pandas.tseries.holiday import AbstractHolidayCalendar
|
||||
from six import with_metaclass
|
||||
@@ -66,7 +67,14 @@ class TradingCalendar(with_metaclass(ABCMeta)):
|
||||
"""
|
||||
def __init__(self, start=start_default, end=end_default):
|
||||
# Midnight in UTC for each trading day.
|
||||
_all_days = date_range(start, end, freq=self.day, tz='UTC')
|
||||
|
||||
# In pandas 0.18.1, pandas calls into its own code here in a way that
|
||||
# fires a warning. The calling code in pandas tries to suppress the
|
||||
# warning, but does so incorrectly, causing it to bubble out here.
|
||||
# Actually catch and suppress the warning here:
|
||||
with warnings.catch_warnings():
|
||||
warnings.simplefilter('ignore')
|
||||
_all_days = date_range(start, end, freq=self.day, tz='UTC')
|
||||
|
||||
# `DatetimeIndex`s of standard opens/closes for each day.
|
||||
self._opens = days_at_time(_all_days, self.open_time, self.tz,
|
||||
|
||||
+56
-15
@@ -15,12 +15,17 @@
|
||||
from abc import ABCMeta, abstractmethod
|
||||
from collections import namedtuple
|
||||
import six
|
||||
import warnings
|
||||
|
||||
import datetime
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
import pytz
|
||||
from toolz import curry
|
||||
|
||||
from zipline.utils.input_validation import preprocess
|
||||
from zipline.utils.memoize import lazyval
|
||||
|
||||
from .context_tricks import nop_context
|
||||
|
||||
|
||||
@@ -147,6 +152,31 @@ def _build_time(time, kwargs):
|
||||
return datetime.time(**kwargs)
|
||||
|
||||
|
||||
@curry
|
||||
def lossless_float_to_int(funcname, func, argname, arg):
|
||||
"""
|
||||
A preprocessor that coerces integral floats to ints.
|
||||
|
||||
Receipt of non-integral floats raises a TypeError.
|
||||
"""
|
||||
if not isinstance(arg, float):
|
||||
return arg
|
||||
|
||||
arg_as_int = int(arg)
|
||||
if arg == arg_as_int:
|
||||
warnings.warn(
|
||||
"{f} expected an int for argument {name!r}, but got float {arg}."
|
||||
" Coercing to int.".format(
|
||||
f=funcname,
|
||||
name=argname,
|
||||
arg=arg,
|
||||
),
|
||||
)
|
||||
return arg_as_int
|
||||
|
||||
raise TypeError(arg)
|
||||
|
||||
|
||||
class EventManager(object):
|
||||
"""Manages a list of Event objects.
|
||||
This manages the logic for checking the rules and dispatching to the
|
||||
@@ -401,23 +431,28 @@ class NotHalfDay(StatelessRule):
|
||||
|
||||
|
||||
class TradingDayOfWeekRule(six.with_metaclass(ABCMeta, StatelessRule)):
|
||||
@preprocess(n=lossless_float_to_int('TradingDayOfWeekRule'))
|
||||
def __init__(self, n, invert):
|
||||
if not 0 <= n < MAX_WEEK_RANGE:
|
||||
raise _out_of_range_error(MAX_WEEK_RANGE)
|
||||
|
||||
self.td_delta = (-n - 1) if invert else n
|
||||
|
||||
@lazyval
|
||||
def execution_periods(self):
|
||||
# calculate the list of periods that match the given criteria
|
||||
return self.cal.schedule.groupby(
|
||||
pd.Grouper(freq="W")
|
||||
).nth(int(self.td_delta)).index
|
||||
|
||||
def should_trigger(self, dt):
|
||||
# is this market minute's period in the list of execution periods?
|
||||
return self.cal.minute_to_session_label(dt) in \
|
||||
self.execution_periods
|
||||
val = self.cal.minute_to_session_label(dt, direction="none").value
|
||||
return val in self.execution_period_values
|
||||
|
||||
@lazyval
|
||||
def execution_period_values(self):
|
||||
# calculate the list of periods that match the given criteria
|
||||
sessions = self.cal.all_sessions
|
||||
return set(
|
||||
pd.Series(data=sessions)
|
||||
.groupby([sessions.year, sessions.weekofyear])
|
||||
.nth(self.td_delta)
|
||||
.astype(np.int64)
|
||||
)
|
||||
|
||||
|
||||
class NthTradingDayOfWeek(TradingDayOfWeekRule):
|
||||
@@ -438,6 +473,8 @@ class NDaysBeforeLastTradingDayOfWeek(TradingDayOfWeekRule):
|
||||
|
||||
|
||||
class TradingDayOfMonthRule(six.with_metaclass(ABCMeta, StatelessRule)):
|
||||
|
||||
@preprocess(n=lossless_float_to_int('TradingDayOfMonthRule'))
|
||||
def __init__(self, n, invert):
|
||||
if not 0 <= n < MAX_MONTH_RANGE:
|
||||
raise _out_of_range_error(MAX_MONTH_RANGE)
|
||||
@@ -448,15 +485,19 @@ class TradingDayOfMonthRule(six.with_metaclass(ABCMeta, StatelessRule)):
|
||||
|
||||
def should_trigger(self, dt):
|
||||
# is this market minute's period in the list of execution periods?
|
||||
return self.cal.minute_to_session_label(dt) in \
|
||||
self.execution_periods
|
||||
value = self.cal.minute_to_session_label(dt, direction="none").value
|
||||
return value in self.execution_period_values
|
||||
|
||||
@lazyval
|
||||
def execution_periods(self):
|
||||
def execution_period_values(self):
|
||||
# calculate the list of periods that match the given criteria
|
||||
return self.cal.schedule.groupby(
|
||||
pd.Grouper(freq="M")
|
||||
).nth(int(self.td_delta)).index
|
||||
sessions = self.cal.all_sessions
|
||||
return set(
|
||||
pd.Series(data=sessions)
|
||||
.groupby([sessions.year, sessions.month])
|
||||
.nth(self.td_delta)
|
||||
.astype(np.int64)
|
||||
)
|
||||
|
||||
|
||||
class NthTradingDayOfMonth(TradingDayOfMonthRule):
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
"""
|
||||
Utilities for working with numpy arrays.
|
||||
"""
|
||||
from collections import OrderedDict
|
||||
from datetime import datetime
|
||||
from warnings import (
|
||||
catch_warnings,
|
||||
@@ -16,6 +17,7 @@ from numpy import (
|
||||
empty,
|
||||
flatnonzero,
|
||||
hstack,
|
||||
isnan,
|
||||
nan,
|
||||
vectorize,
|
||||
where
|
||||
@@ -60,12 +62,12 @@ _FILLVALUE_DEFAULTS = {
|
||||
object_dtype: None,
|
||||
}
|
||||
|
||||
INT_DTYPES_BY_SIZE_BYTES = {
|
||||
1: dtype('int8'),
|
||||
2: dtype('int16'),
|
||||
4: dtype('int32'),
|
||||
8: dtype('int64'),
|
||||
}
|
||||
INT_DTYPES_BY_SIZE_BYTES = OrderedDict([
|
||||
(1, dtype('int8')),
|
||||
(2, dtype('int16')),
|
||||
(4, dtype('int32')),
|
||||
(8, dtype('int64')),
|
||||
])
|
||||
|
||||
|
||||
def int_dtype_with_size_in_bytes(size):
|
||||
@@ -285,6 +287,28 @@ def rolling_window(array, length):
|
||||
|
||||
# Sentinel value that isn't NaT.
|
||||
_notNaT = make_datetime64D(0)
|
||||
iNaT = NaTns.view(int64_dtype)
|
||||
assert iNaT == NaTD.view(int64_dtype), "iNaTns != iNaTD"
|
||||
|
||||
|
||||
def isnat(obj):
|
||||
"""
|
||||
Check if a value is np.NaT.
|
||||
"""
|
||||
if obj.dtype.kind not in ('m', 'M'):
|
||||
raise ValueError("%s is not a numpy datetime or timedelta")
|
||||
return obj.view(int64_dtype) == iNaT
|
||||
|
||||
|
||||
def is_missing(data, missing_value):
|
||||
"""
|
||||
Generic is_missing function that handles NaN and NaT.
|
||||
"""
|
||||
if is_float(data) and isnan(missing_value):
|
||||
return isnan(data)
|
||||
elif is_datetime(data) and isnat(missing_value):
|
||||
return isnat(data)
|
||||
return (data == missing_value)
|
||||
|
||||
|
||||
def busday_count_mask_NaT(begindates, enddates, out=None):
|
||||
@@ -302,8 +326,8 @@ def busday_count_mask_NaT(begindates, enddates, out=None):
|
||||
if out is None:
|
||||
out = empty(broadcast(begindates, enddates).shape, dtype=float)
|
||||
|
||||
beginmask = (begindates == NaTD)
|
||||
endmask = (enddates == NaTD)
|
||||
beginmask = isnat(begindates)
|
||||
endmask = isnat(enddates)
|
||||
|
||||
out = busday_count(
|
||||
# Temporarily fill in non-NaT values.
|
||||
|
||||
@@ -1,8 +1,10 @@
|
||||
"""
|
||||
Utilities for working with pandas objects.
|
||||
"""
|
||||
from contextlib import contextmanager
|
||||
from itertools import product
|
||||
import operator as op
|
||||
import warnings
|
||||
|
||||
import pandas as pd
|
||||
from distutils.version import StrictVersion
|
||||
@@ -10,6 +12,10 @@ from distutils.version import StrictVersion
|
||||
pandas_version = StrictVersion(pd.__version__)
|
||||
|
||||
|
||||
def july_5th_holiday_observance(datetime_index):
|
||||
return datetime_index[datetime_index.year != 2013]
|
||||
|
||||
|
||||
def explode(df):
|
||||
"""
|
||||
Take a DataFrame and return a triple of
|
||||
@@ -19,19 +25,6 @@ def explode(df):
|
||||
return df.index, df.columns, df.values
|
||||
|
||||
|
||||
try:
|
||||
# This branch is hit in pandas 17
|
||||
sort_values = pd.DataFrame.sort_values
|
||||
except AttributeError:
|
||||
# This branch is hit in pandas 16
|
||||
sort_values = pd.DataFrame.sort
|
||||
|
||||
if pandas_version >= StrictVersion('0.17.1'):
|
||||
july_5th_holiday_observance = lambda dtix: dtix[dtix.year != 2013]
|
||||
else:
|
||||
july_5th_holiday_observance = lambda dt: None if dt.year == 2013 else dt
|
||||
|
||||
|
||||
def _time_to_micros(time):
|
||||
"""Convert a time into microseconds since midnight.
|
||||
Parameters
|
||||
@@ -146,3 +139,30 @@ def nearest_unequal_elements(dts, dt):
|
||||
upper_value = dts[upper_ix] if upper_ix < len(dts) else None
|
||||
|
||||
return lower_value, upper_value
|
||||
|
||||
|
||||
def timedelta_to_integral_seconds(delta):
|
||||
"""
|
||||
Convert a pd.Timedelta to a number of seconds as an int.
|
||||
"""
|
||||
return int(delta.total_seconds())
|
||||
|
||||
|
||||
def timedelta_to_integral_minutes(delta):
|
||||
"""
|
||||
Convert a pd.Timedelta to a number of minutes as an int.
|
||||
"""
|
||||
return timedelta_to_integral_seconds(delta) // 60
|
||||
|
||||
|
||||
@contextmanager
|
||||
def ignore_pandas_nan_categorical_warning():
|
||||
with warnings.catch_warnings():
|
||||
# Pandas >= 0.18 doesn't like null-ish values in catgories, but
|
||||
# avoiding that requires a broader change to how missing values are
|
||||
# handled in pipeline, so for now just silence the warning.
|
||||
warnings.filterwarnings(
|
||||
'ignore',
|
||||
category=FutureWarning,
|
||||
)
|
||||
yield
|
||||
|
||||
Reference in New Issue
Block a user