From 0e3b1e76e82b42733ede1a44e2cf807a081a1756 Mon Sep 17 00:00:00 2001 From: Thomas Wiecki Date: Thu, 20 Dec 2012 11:09:32 -0500 Subject: [PATCH 1/7] ENH: batch_transform now supports sid-filtering. DOC: Added docs to batch_transform. --- tests/test_transforms.py | 4 ++++ zipline/test_algorithms.py | 15 +++++++++++++++ zipline/transforms/utils.py | 37 +++++++++++++++++++++++++++++++++---- 3 files changed, 52 insertions(+), 4 deletions(-) diff --git a/tests/test_transforms.py b/tests/test_transforms.py index f3b2ffe2..a83785e0 100644 --- a/tests/test_transforms.py +++ b/tests/test_transforms.py @@ -342,6 +342,10 @@ class TestBatchTransform(TestCase): 'arbitrary dataframe should contain only "test"' ) + for data in algo.history_return_sid_filter[wl:]: + self.assertIn(0, data.columns) + self.assertNotIn(1, data.columns) + # test overloaded class for test_history in [algo.history_return_price_class, algo.history_return_price_decorator]: diff --git a/zipline/test_algorithms.py b/zipline/test_algorithms.py index ee3e41a2..7dbc1385 100644 --- a/zipline/test_algorithms.py +++ b/zipline/test_algorithms.py @@ -268,6 +268,7 @@ class BatchTransformAlgorithm(TradingAlgorithm): self.history_return_args = [] self.history_return_arbitrary_fields = [] self.history_return_nan = [] + self.history_return_sid_filter = [] self.return_price_class = ReturnPriceBatchTransform( refresh_period=self.refresh_period, @@ -305,6 +306,13 @@ class BatchTransformAlgorithm(TradingAlgorithm): clean_nans=True ) + self.return_sid_filter = return_price_batch_decorator( + refresh_period=self.refresh_period, + window_length=self.window_length, + clean_nans=True, + sids=[0] + ) + self.iter = 0 self.set_slippage(FixedSlippage()) @@ -339,6 +347,13 @@ class BatchTransformAlgorithm(TradingAlgorithm): self.iter += 1 + # Add a new sid to check that it does not get included + extra_sid_data = deepcopy(data) + extra_sid_data[1] = extra_sid_data[0] + self.history_return_sid_filter.append( + self.return_sid_filter.handle_data(extra_sid_data) + ) + class SetPortfolioAlgorithm(TradingAlgorithm): """ diff --git a/zipline/transforms/utils.py b/zipline/transforms/utils.py index 369f8054..5f4c706a 100644 --- a/zipline/transforms/utils.py +++ b/zipline/transforms/utils.py @@ -343,7 +343,27 @@ class BatchTransform(EventWindow): func=None, refresh_period=None, window_length=None, - clean_nans=True): + clean_nans=True, + sids=None): + """Instantiate new batch_transform object. + + :Arguments: + func : python function + If supplied will be called after each refresh_period + with the data panel and all args and kwargs supplied + to the handle_data() call. + refresh_period : int + Interval to call batch_transform function. + window_length : int + How many days the trailing window should have. + clean_nans : bool + Whether to (forward) fill in nans. + sids : list + Which sids to include in the moving window. If not + supplied sids will be extracted from incoming + events. + + """ super(BatchTransform, self).__init__(True, window_length=window_length) @@ -355,6 +375,8 @@ class BatchTransform(EventWindow): self.clean_nans = clean_nans + self.sids = sids + self.refresh_period = refresh_period self.window_length = window_length self.trading_days_since_update = 0 @@ -445,7 +467,13 @@ class BatchTransform(EventWindow): """ # This Panel data structure ultimately gets passed to the # user-overloaded get_value() method. - sids = set.union(*[set(tick.data.keys()) for tick in self.ticks]) + + # If sids are set, use those. Otherwise extract. + if self.sids is not None: + sids = self.sids + else: + sids = set.union(*[set(tick.data.keys()) for tick in self.ticks]) + dts = [tick.dt for tick in self.ticks] data = pd.Panel(items=self.field_names, major_axis=dts, @@ -454,9 +482,10 @@ class BatchTransform(EventWindow): # Fill data panel for tick in self.ticks: dt = tick.dt - for sid, fields in tick.data.iteritems(): + for sid in sids: + fields = tick.data[sid] for field_name in self.field_names: - data[field_name][sid].ix[dt] = fields[field_name] + data[field_name][sid].ix[dt] = fields[field_name] if self.clean_nans: # Fills in gaps of missing data during transform From 2729936affbfaae0535d5e96e3725502e909bf4b Mon Sep 17 00:00:00 2001 From: Thomas Wiecki Date: Thu, 20 Dec 2012 11:56:49 -0500 Subject: [PATCH 2/7] ENH: batch_transform now supports field filtering. --- tests/test_transforms.py | 8 ++++++++ zipline/test_algorithms.py | 25 +++++++++++++++++++++++++ zipline/transforms/utils.py | 13 +++++++++---- 3 files changed, 42 insertions(+), 4 deletions(-) diff --git a/tests/test_transforms.py b/tests/test_transforms.py index a83785e0..1b2536dc 100644 --- a/tests/test_transforms.py +++ b/tests/test_transforms.py @@ -346,6 +346,14 @@ class TestBatchTransform(TestCase): self.assertIn(0, data.columns) self.assertNotIn(1, data.columns) + for data in algo.history_return_field_filter[wl:]: + self.assertIn('price', data.items) + self.assertNotIn('ignore', data.items) + + for data in algo.history_return_field_no_filter[wl:]: + self.assertIn('price', data.items) + self.assertIn('ignore', data.items) + # test overloaded class for test_history in [algo.history_return_price_class, algo.history_return_price_decorator]: diff --git a/zipline/test_algorithms.py b/zipline/test_algorithms.py index 7dbc1385..735e3348 100644 --- a/zipline/test_algorithms.py +++ b/zipline/test_algorithms.py @@ -269,6 +269,8 @@ class BatchTransformAlgorithm(TradingAlgorithm): self.history_return_arbitrary_fields = [] self.history_return_nan = [] self.history_return_sid_filter = [] + self.history_return_field_filter = [] + self.history_return_field_no_filter = [] self.return_price_class = ReturnPriceBatchTransform( refresh_period=self.refresh_period, @@ -313,6 +315,19 @@ class BatchTransformAlgorithm(TradingAlgorithm): sids=[0] ) + self.return_field_filter = return_data( + refresh_period=self.refresh_period, + window_length=self.window_length, + clean_nans=True, + fields=['price'] + ) + + self.return_field_no_filter = return_data( + refresh_period=self.refresh_period, + window_length=self.window_length, + clean_nans=True + ) + self.iter = 0 self.set_slippage(FixedSlippage()) @@ -354,6 +369,16 @@ class BatchTransformAlgorithm(TradingAlgorithm): self.return_sid_filter.handle_data(extra_sid_data) ) + # Add a field to check that it does not get included + extra_field_data = deepcopy(data) + extra_field_data[0]['ignore'] = extra_sid_data[0]['price'] + self.history_return_field_filter.append( + self.return_field_filter.handle_data(extra_field_data) + ) + self.history_return_field_no_filter.append( + self.return_field_no_filter.handle_data(extra_field_data) + ) + class SetPortfolioAlgorithm(TradingAlgorithm): """ diff --git a/zipline/transforms/utils.py b/zipline/transforms/utils.py index 5f4c706a..117fe8c8 100644 --- a/zipline/transforms/utils.py +++ b/zipline/transforms/utils.py @@ -344,7 +344,8 @@ class BatchTransform(EventWindow): refresh_period=None, window_length=None, clean_nans=True, - sids=None): + sids=None, + fields=None): """Instantiate new batch_transform object. :Arguments: @@ -362,7 +363,10 @@ class BatchTransform(EventWindow): Which sids to include in the moving window. If not supplied sids will be extracted from incoming events. - + fields : list + Which fields to include in the moving window + (e.g. 'price'). If not supplied, fields will be + extracted from incoming events. """ super(BatchTransform, self).__init__(True, @@ -388,7 +392,7 @@ class BatchTransform(EventWindow): self.updated = False self.cached = None - self.field_names = None + self.field_names = fields def handle_data(self, data, *args, **kwargs): """ @@ -432,7 +436,8 @@ class BatchTransform(EventWindow): def handle_add(self, event): if not self.last_dt: - self.field_names = self._extract_field_names(event) + if self.field_names is None: + self.field_names = self._extract_field_names(event) self.last_dt = event.dt return From 5deeb38fb6a73009f19045b5b92b8bb958982695 Mon Sep 17 00:00:00 2001 From: Thomas Wiecki Date: Thu, 20 Dec 2012 11:58:23 -0500 Subject: [PATCH 3/7] ENH: sid and field filter kwargs can also be strings or ints. --- zipline/transforms/utils.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/zipline/transforms/utils.py b/zipline/transforms/utils.py index 117fe8c8..4e3f72d1 100644 --- a/zipline/transforms/utils.py +++ b/zipline/transforms/utils.py @@ -380,6 +380,12 @@ class BatchTransform(EventWindow): self.clean_nans = clean_nans self.sids = sids + if isinstance(self.sids, (str, int)): + self.sids = [self.sids] + + self.field_names = fields + if isinstance(self.field_names, str): + self.field_names = [self.field_names] self.refresh_period = refresh_period self.window_length = window_length @@ -392,8 +398,6 @@ class BatchTransform(EventWindow): self.updated = False self.cached = None - self.field_names = fields - def handle_data(self, data, *args, **kwargs): """ New method to handle a data frame as sent to the algorithm's From d1dace948e3261a0a818d54a376de036459abd68 Mon Sep 17 00:00:00 2001 From: Thomas Wiecki Date: Sun, 30 Dec 2012 17:04:01 -0500 Subject: [PATCH 4/7] ENH: Added new kwarg to batch_transform: create_panel. --- tests/test_transforms.py | 5 +++++ zipline/test_algorithms.py | 12 +++++++++++- zipline/transforms/utils.py | 17 ++++++++++++++--- 3 files changed, 30 insertions(+), 4 deletions(-) diff --git a/tests/test_transforms.py b/tests/test_transforms.py index 1b2536dc..1d0197a9 100644 --- a/tests/test_transforms.py +++ b/tests/test_transforms.py @@ -13,6 +13,8 @@ # See the License for the specific language governing permissions and # limitations under the License. +from collections import deque + import pytz import numpy as np import pandas as pd @@ -354,6 +356,9 @@ class TestBatchTransform(TestCase): self.assertIn('price', data.items) self.assertIn('ignore', data.items) + for data in algo.history_return_ticks[wl:]: + self.assertTrue(isinstance(data, deque)) + # test overloaded class for test_history in [algo.history_return_price_class, algo.history_return_price_decorator]: diff --git a/zipline/test_algorithms.py b/zipline/test_algorithms.py index 735e3348..c82edcf8 100644 --- a/zipline/test_algorithms.py +++ b/zipline/test_algorithms.py @@ -72,6 +72,7 @@ The algorithm must expose methods: """ from copy import deepcopy +import numpy as np from zipline.algorithm import TradingAlgorithm from zipline.finance.slippage import FixedSlippage @@ -271,6 +272,7 @@ class BatchTransformAlgorithm(TradingAlgorithm): self.history_return_sid_filter = [] self.history_return_field_filter = [] self.history_return_field_no_filter = [] + self.history_return_ticks = [] self.return_price_class = ReturnPriceBatchTransform( refresh_period=self.refresh_period, @@ -328,6 +330,13 @@ class BatchTransformAlgorithm(TradingAlgorithm): clean_nans=True ) + self.return_ticks = return_data( + refresh_period=self.refresh_period, + window_length=self.window_length, + clean_nans=True, + create_panel=False + ) + self.iter = 0 self.set_slippage(FixedSlippage()) @@ -340,6 +349,8 @@ class BatchTransformAlgorithm(TradingAlgorithm): self.history_return_args.append( self.return_args_batch.handle_data( data, *self.args, **self.kwargs)) + self.history_return_ticks.append( + self.return_ticks.handle_data(data)) new_data = deepcopy(data) for sid in new_data: @@ -354,7 +365,6 @@ class BatchTransformAlgorithm(TradingAlgorithm): self.return_nan.handle_data(data)) else: nan_data = deepcopy(data) - import numpy as np for sid in nan_data.iterkeys(): nan_data[sid].price = np.nan self.history_return_nan.append( diff --git a/zipline/transforms/utils.py b/zipline/transforms/utils.py index 4e3f72d1..8182441c 100644 --- a/zipline/transforms/utils.py +++ b/zipline/transforms/utils.py @@ -345,7 +345,8 @@ class BatchTransform(EventWindow): window_length=None, clean_nans=True, sids=None, - fields=None): + fields=None, + create_panel=True): """Instantiate new batch_transform object. :Arguments: @@ -367,6 +368,12 @@ class BatchTransform(EventWindow): Which fields to include in the moving window (e.g. 'price'). If not supplied, fields will be extracted from incoming events. + create_panel : bool + If False, will create a pandas panel every refresh + period and pass it to the user-defined function. + If True, will pass the underlying deque reference + directly to the function which will be significantly + faster. """ super(BatchTransform, self).__init__(True, @@ -378,6 +385,7 @@ class BatchTransform(EventWindow): self.compute_transform_value = self.get_value self.clean_nans = clean_nans + self.create_panel = create_panel self.sids = sids if isinstance(self.sids, (str, int)): @@ -528,8 +536,11 @@ class BatchTransform(EventWindow): return None if self.updated: - self.cached = self.compute_transform_value(self.get_data(), - *args, **kwargs) + # Either create new pandas panel or pass ticks dequeue + # directly + data = self.get_data() if self.create_panel else self.ticks + self.cached = self.compute_transform_value(data, *args, + **kwargs) return self.cached From 0f88e4133dfc8e42f639db7f9d31ef53fa7cefd6 Mon Sep 17 00:00:00 2001 From: Thomas Wiecki Date: Sun, 30 Dec 2012 17:26:07 -0500 Subject: [PATCH 5/7] ENH: New batch_transform feature: compute_only_full. --- tests/test_transforms.py | 3 +++ zipline/test_algorithms.py | 10 +++++++++- zipline/transforms/utils.py | 23 +++++++++++++---------- 3 files changed, 25 insertions(+), 11 deletions(-) diff --git a/tests/test_transforms.py b/tests/test_transforms.py index 1d0197a9..8b775990 100644 --- a/tests/test_transforms.py +++ b/tests/test_transforms.py @@ -359,6 +359,9 @@ class TestBatchTransform(TestCase): for data in algo.history_return_ticks[wl:]: self.assertTrue(isinstance(data, deque)) + for data in algo.history_return_not_full: + self.assertIsNot(data, None) + # test overloaded class for test_history in [algo.history_return_price_class, algo.history_return_price_decorator]: diff --git a/zipline/test_algorithms.py b/zipline/test_algorithms.py index c82edcf8..b22d2719 100644 --- a/zipline/test_algorithms.py +++ b/zipline/test_algorithms.py @@ -273,6 +273,7 @@ class BatchTransformAlgorithm(TradingAlgorithm): self.history_return_field_filter = [] self.history_return_field_no_filter = [] self.history_return_ticks = [] + self.history_return_not_full = [] self.return_price_class = ReturnPriceBatchTransform( refresh_period=self.refresh_period, @@ -333,10 +334,15 @@ class BatchTransformAlgorithm(TradingAlgorithm): self.return_ticks = return_data( refresh_period=self.refresh_period, window_length=self.window_length, - clean_nans=True, create_panel=False ) + self.return_not_full = return_data( + refresh_period=0, + window_length=self.window_length, + compute_only_full=False + ) + self.iter = 0 self.set_slippage(FixedSlippage()) @@ -351,6 +357,8 @@ class BatchTransformAlgorithm(TradingAlgorithm): data, *self.args, **self.kwargs)) self.history_return_ticks.append( self.return_ticks.handle_data(data)) + self.history_return_not_full.append( + self.return_not_full.handle_data(data)) new_data = deepcopy(data) for sid in new_data: diff --git a/zipline/transforms/utils.py b/zipline/transforms/utils.py index 8182441c..0fd647b5 100644 --- a/zipline/transforms/utils.py +++ b/zipline/transforms/utils.py @@ -346,7 +346,9 @@ class BatchTransform(EventWindow): clean_nans=True, sids=None, fields=None, - create_panel=True): + create_panel=True, + compute_only_full=True): + """Instantiate new batch_transform object. :Arguments: @@ -374,6 +376,9 @@ class BatchTransform(EventWindow): If True, will pass the underlying deque reference directly to the function which will be significantly faster. + compute_only_full : bool + Only call the user-defined function once the window is + full. Returns None if window is not full yet. """ super(BatchTransform, self).__init__(True, @@ -386,6 +391,7 @@ class BatchTransform(EventWindow): self.clean_nans = clean_nans self.create_panel = create_panel + self.compute_only_full = compute_only_full self.sids = sids if isinstance(self.sids, (str, int)): @@ -451,7 +457,6 @@ class BatchTransform(EventWindow): if self.field_names is None: self.field_names = self._extract_field_names(event) self.last_dt = event.dt - return # update trading day counters if self.last_dt.day != event.dt.day: @@ -459,15 +464,14 @@ class BatchTransform(EventWindow): self.trading_days_since_update += 1 self.trading_days_total += 1 - if ( - self.trading_days_total >= self.window_length and - self.trading_days_since_update >= self.refresh_period - ): + if self.trading_days_total >= self.window_length: + self.full = True + + if self.trading_days_since_update >= self.refresh_period: # Setting updated to True will cause get_transform_value() # to call the user-defined batch-transform with the most # recent datapanel self.updated = True - self.full = True self.trading_days_since_update = 0 else: self.updated = False @@ -517,8 +521,7 @@ class BatchTransform(EventWindow): return data def handle_remove(self, event): - # since an event is expiring, we know the window is full - self.full = True + pass def get_value(self, *args, **kwargs): raise NotImplementedError( @@ -532,7 +535,7 @@ class BatchTransform(EventWindow): has actually been updated. Otherwise, the previously, cached value will be returned. """ - if not self.full: + if self.compute_only_full and not self.full: return None if self.updated: From 48b05397e21d6383e666383134f6c168699fa50d Mon Sep 17 00:00:00 2001 From: Thomas Wiecki Date: Thu, 3 Jan 2013 16:46:58 -0500 Subject: [PATCH 6/7] MIN: Changed isinstance check to allow more types. --- zipline/transforms/utils.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/zipline/transforms/utils.py b/zipline/transforms/utils.py index 0fd647b5..bec3e117 100644 --- a/zipline/transforms/utils.py +++ b/zipline/transforms/utils.py @@ -24,6 +24,7 @@ from copy import deepcopy from datetime import datetime from collections import deque from abc import ABCMeta, abstractmethod +from numbers import Integral import pandas as pd @@ -394,7 +395,7 @@ class BatchTransform(EventWindow): self.compute_only_full = compute_only_full self.sids = sids - if isinstance(self.sids, (str, int)): + if isinstance(self.sids, (basestring, Integral)): self.sids = [self.sids] self.field_names = fields From bf2e8e3586e8d4e985b73c4cbb4df74879d6dbcb Mon Sep 17 00:00:00 2001 From: Thomas Wiecki Date: Sun, 30 Dec 2012 21:47:17 -0500 Subject: [PATCH 7/7] DOC: Typo. --- zipline/transforms/utils.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/zipline/transforms/utils.py b/zipline/transforms/utils.py index bec3e117..e343a40c 100644 --- a/zipline/transforms/utils.py +++ b/zipline/transforms/utils.py @@ -372,9 +372,9 @@ class BatchTransform(EventWindow): (e.g. 'price'). If not supplied, fields will be extracted from incoming events. create_panel : bool - If False, will create a pandas panel every refresh + If True, will create a pandas panel every refresh period and pass it to the user-defined function. - If True, will pass the underlying deque reference + If False, will pass the underlying deque reference directly to the function which will be significantly faster. compute_only_full : bool