From c4cb7d41f2cd3869d8b21f6de183270d5a0458f1 Mon Sep 17 00:00:00 2001 From: scottsanderson Date: Wed, 15 Aug 2012 12:15:41 -0400 Subject: [PATCH 1/4] added tests for stddev --- tests/test_transforms.py | 32 +++++++++++++++++++++++++++++++- 1 file changed, 31 insertions(+), 1 deletion(-) diff --git a/tests/test_transforms.py b/tests/test_transforms.py index 491df1b5..69ab0ac1 100644 --- a/tests/test_transforms.py +++ b/tests/test_transforms.py @@ -15,6 +15,7 @@ from zipline.gens.tradegens import SpecificEquityTrades from zipline.gens.transform import StatefulTransform, EventWindow from zipline.gens.vwap import VWAP from zipline.gens.mavg import MovingAverage +from zipline.gens.stddev import MovingStandardDev from zipline.gens.returns import Returns import zipline.utils.factory as factory @@ -70,6 +71,7 @@ class EventWindowTestCase(TestCase): delta = timedelta(minutes = 5), days = None ) + now = utcnow() # 15 dates, increasing in 1 minute increments. @@ -99,6 +101,7 @@ class EventWindowTestCase(TestCase): delta = None, days = 1 ) + dates = ([self.pre_open]*3) dates += ([self.mid_day]*3) dates += ([self.post_close]*3) @@ -239,11 +242,12 @@ class FinanceTransformsTestCase(TestCase): fields = ['price', 'volume'], delta = timedelta(days = 2), ) + transformed = list(mavg.transform(self.source)) # Output values. tnfm_prices = [message.tnfm_value.price for message in transformed] tnfm_volumes = [message.tnfm_value.volume for message in transformed] - + # "Hand-calculated" values expected_prices = [ ((10.0) / 1.0), @@ -264,3 +268,29 @@ class FinanceTransformsTestCase(TestCase): assert tnfm_prices == expected_prices assert tnfm_volumes == expected_volumes + + def test_moving_stddev(self): + + trade_history = factory.create_trade_history( + 133, + [10.0, 15.0, 13.0, 12.0], + [100, 100, 100, 100], + timedelta(days=1), + self.trading_environment + ) + + stddev = StatefulTransform( + MovingStandardDev, + market_aware = False, + delta = timedelta(days = 2), + ) + self.source = SpecificEquityTrades(event_list=trade_history) + + transformed = list(stddev.transform(self.source)) + + vals = [message.tnfm_value for message in transformed] + + assert vals == [0.0, 2.5, 1.0, 0.5] + + + From 496851ef72bcce6e17407f6d9e802b4673a9177b Mon Sep 17 00:00:00 2001 From: scottsanderson Date: Wed, 15 Aug 2012 12:16:38 -0400 Subject: [PATCH 2/4] added stddev transform --- zipline/gens/stddev.py | 100 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 100 insertions(+) create mode 100644 zipline/gens/stddev.py diff --git a/zipline/gens/stddev.py b/zipline/gens/stddev.py new file mode 100644 index 00000000..144f3ace --- /dev/null +++ b/zipline/gens/stddev.py @@ -0,0 +1,100 @@ +from numbers import Number +from datetime import datetime, timedelta +from collections import defaultdict +from math import sqrt + +from zipline import ndict +from zipline.gens.transform import EventWindow + +class MovingStandardDev(object): + """ + Class that maintains a dicitonary from sids to + MovingStandardDevWindows. For each sid, we maintain a the + standard deviation of all events falling within the specified + window. + """ + + def __init__(self, market_aware, days = None, delta = None): + + self.market_aware = market_aware + + self.delta = delta + self.days = days + + # Market-aware mode only works with full-day windows. + if self.market_aware: + assert self.days and not self.delta,\ + "Market-aware mode only works with full-day windows." + + # Non-market-aware mode requires a timedelta. + else: + assert self.delta and not self.days, \ + "Non-market-aware mode requires a timedelta." + + # No way to pass arguments to the defaultdict factory, so we + # need to define a method to generate the correct EventWindows. + self.sid_windows = defaultdict(self.create_window) + + def create_window(self): + """ + Factory method for self.sid_windows. + """ + return MovingStandardDevWindow( + self.market_aware, + self.days, + self.delta + ) + + def update(self, event): + """ + Update the event window for this event's sid. Return an ndict + from tracked fields to moving averages. + """ + # This will create a new EventWindow if this is the first + # message for this sid. + window = self.sid_windows[event.sid] + window.update(event) + return window.get_stddev() + +class MovingStandardDevWindow(EventWindow): + """ + Iteratively calculates standard deviation for a particular sid + over a given time window. The expected functionality of this + class is to be instantiated inside a MovingStandardDev. + """ + + def __init__(self, market_aware, days, delta): + + # Call the superclass constructor to set up base EventWindow + # infrastructure. + EventWindow.__init__(self, market_aware, days, delta) + + self.sum = 0.0 + self.sum_sqr = 0.0 + + def handle_add(self, event): + assert event.has_key('price') + assert isinstance(event.price, Number) + + self.sum += event.price + self.sum_sqr += event.price ** 2 + + def handle_remove(self, event): + assert event.has_key('price') + assert isinstance(event.price, Number) + + self.sum -= event.price + self.sum_sqr -= event.price ** 2 + + def get_stddev(self): + + # Stddev is 0 if we have only one event. len(self) is + # provided by EventWindow superclass. + if len(self) == 1: + return 0.0 + + else: + average = self.sum /len(self.ticks) + variance = (self.sum_sqr - self.sum*average) / len(self) + stddev = sqrt(variance) + return stddev From 9537067138a9849e28b1be87ad157d7e002b95b4 Mon Sep 17 00:00:00 2001 From: scottsanderson Date: Wed, 15 Aug 2012 13:31:15 -0400 Subject: [PATCH 3/4] fix comment in EventWindow --- zipline/gens/transform.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/zipline/gens/transform.py b/zipline/gens/transform.py index 60d0be85..51475f45 100644 --- a/zipline/gens/transform.py +++ b/zipline/gens/transform.py @@ -159,8 +159,9 @@ class EventWindow: from the window. Subclass these methods along with init(*args, **kwargs) to calculate metrics over the window. - The market_aware flag is used to toggle whether the eventwindow - calculates + If the market_aware flag is True, the EventWindow drops old events + based on the number of elapsed trading days between newest and oldest. + Otherwise old events are dropped based on a raw timedelta. See zipline/gens/mavg.py and zipline/gens/vwap.py for example implementations of moving average and volume-weighted average From 1659cfff454fe3eff646479fa15747868b2923c2 Mon Sep 17 00:00:00 2001 From: scottsanderson Date: Wed, 15 Aug 2012 16:16:36 -0400 Subject: [PATCH 4/4] fixed bessel correction in stddev --- tests/test_transforms.py | 28 +++++++++++++++++++++++----- zipline/gens/stddev.py | 14 +++++++------- 2 files changed, 30 insertions(+), 12 deletions(-) diff --git a/tests/test_transforms.py b/tests/test_transforms.py index 69ab0ac1..e515d725 100644 --- a/tests/test_transforms.py +++ b/tests/test_transforms.py @@ -1,4 +1,5 @@ import pytz +import numpy from datetime import timedelta, datetime from collections import defaultdict @@ -270,27 +271,44 @@ class FinanceTransformsTestCase(TestCase): assert tnfm_volumes == expected_volumes def test_moving_stddev(self): - trade_history = factory.create_trade_history( 133, [10.0, 15.0, 13.0, 12.0], [100, 100, 100, 100], - timedelta(days=1), + timedelta(hours = 1), self.trading_environment ) stddev = StatefulTransform( MovingStandardDev, market_aware = False, - delta = timedelta(days = 2), + delta = timedelta(minutes = 150), ) self.source = SpecificEquityTrades(event_list=trade_history) transformed = list(stddev.transform(self.source)) - + vals = [message.tnfm_value for message in transformed] + + expected = [ + None, + numpy.std([10.0, 15.0], ddof = 1), + numpy.std([10.0, 15.0, 13.0], ddof = 1), + numpy.std([15.0, 13.0, 12.0], ddof = 1), + ] - assert vals == [0.0, 2.5, 1.0, 0.5] + # numpy has odd rounding behavior, cf. + # http://docs.scipy.org/doc/numpy/reference/generated/numpy.std.html + for v1, v2 in zip(vals, expected): + + if v1 == None: + assert v2 == None + continue + assert round(v1, 5) == round(v2, 5) + + + + diff --git a/zipline/gens/stddev.py b/zipline/gens/stddev.py index 144f3ace..1f46429a 100644 --- a/zipline/gens/stddev.py +++ b/zipline/gens/stddev.py @@ -88,13 +88,13 @@ class MovingStandardDevWindow(EventWindow): def get_stddev(self): - # Stddev is 0 if we have only one event. len(self) is - # provided by EventWindow superclass. - if len(self) == 1: - return 0.0 + # Sample standard deviation is undefined for a single event or + # no events. + if len(self) <= 1: + return None else: - average = self.sum /len(self.ticks) - variance = (self.sum_sqr - self.sum*average) / len(self) - stddev = sqrt(variance) + average = self.sum /len(self) + s_squared = (self.sum_sqr - self.sum*average) / (len(self) - 1) + stddev = sqrt(s_squared) return stddev