From 4deabcdfda16a11b98273bc1b473d5e422c19fc1 Mon Sep 17 00:00:00 2001 From: scottsanderson Date: Wed, 1 Aug 2012 10:42:55 -0400 Subject: [PATCH 01/73] new world order --- zipline/core/monitor.py | 2 +- zipline/gens/composites.py | 28 +++----- zipline/gens/tradegens.py | 9 +++ zipline/gens/tradesimulation.py | 120 ++++++++++++++++++++++++++++++++ zipline/gens/transform.py | 2 +- zipline/gens/utils.py | 19 +++-- 6 files changed, 155 insertions(+), 25 deletions(-) create mode 100644 zipline/gens/tradesimulation.py diff --git a/zipline/core/monitor.py b/zipline/core/monitor.py index 1f64cdd9..c305e8ff 100644 --- a/zipline/core/monitor.py +++ b/zipline/core/monitor.py @@ -296,7 +296,7 @@ class Controller(object): # We break out of this loop if the time between # sending and receiving the heartbeat is more # than our poll period. - + if tic - self.ctime > self.period: log.info("heartbeat loop timedout: %s" % (tic - self.ctime)) log.info(repr(self.responses)) diff --git a/zipline/gens/composites.py b/zipline/gens/composites.py index 832909ad..66697fa7 100644 --- a/zipline/gens/composites.py +++ b/zipline/gens/composites.py @@ -11,34 +11,26 @@ from zipline.gens.transform import stateful_transform SortBundle = namedtuple("SortBundle", ['source', 'args', 'kwargs']) MergeBundle = namedtuple("MergeBundle", ['stream', 'tnfm', 'args', 'kwargs']) -def date_sorted_sources(sources, source_args, source_kwargs): +def date_sorted_sources(bundles): """ - Takes a list of generator functions, a list of tuples of positional arguments, - and a list of dictionaries of keyword arguments. Packages up all arguments - and passes them into a date_sort. + Takes an iterable of SortBundles, generating namestrings and initialized datasources + for each before piping them into a date_sort. """ - assert len(sources) == len(source_args) == len(source_kwargs) - # Package up sources and arguments. - - # Create a generator of SortBundle objects to be turned into - # namestrings and generator objects. - bundle_gen = starmap(SortBundle, zip(sources, source_args, source_kwargs)) - - # Load the results of the generator into a tuple so that the - # results can be used twice (once in namestring comprehension, - # once in the generator comprehension for intialized sources. - bundles = tuple(bundle_gen) + assert isinstance(bundles, (list, tuple)) + for bundle in bundles: + assert isinstance(bundle, SortBundle) # Calculate namestring hashes to pass to date_sort. names = [bundle.source.__name__ + hash_args(*bundle.args, **bundle.kwargs) for bundle in bundles] + # Pass each source its arguments. initialized = [bundle.source(*bundle.args, **bundle.kwargs) - for bundle in bundles] - + for bundle in bundles] + # Convert the list of generators into a flat stream by pulling # one element at a time from each. - stream_in = roundrobin(*initialized) + stream_in = roundrobin(initialized, names) # Guarantee the flat stream will be sorted by date, using source_id as # tie-breaker, which is fully deterministic (given deterministic string diff --git a/zipline/gens/tradegens.py b/zipline/gens/tradegens.py index c7ee74f8..a24cbe58 100644 --- a/zipline/gens/tradegens.py +++ b/zipline/gens/tradegens.py @@ -54,6 +54,15 @@ def SpecificEquityTrades(*args, **config): Yields all events in event_list that match the given sid_filter. If no event_list is specified, generates an internal stream of events to filter. Returns all events if filter is None. + + Configuration options: + + count: integer representing number of trades + sids : list of values representing simulated internal sids + start: start date + delta: timedelta between internal events + + """ # We shouldn't get any positional arguments. assert args == () diff --git a/zipline/gens/tradesimulation.py b/zipline/gens/tradesimulation.py new file mode 100644 index 00000000..65523a41 --- /dev/null +++ b/zipline/gens/tradesimulation.py @@ -0,0 +1,120 @@ +from numbers import Integral + +from zipline.gens import stateful_transform +from zipline.finance.trading import TransactionSimulator +from zipline.finance.performance import PerformanceTracker + +def trade_simulation_client(stream_in, algo, environment, sim_style): + """ + Generator that takes the expected output of a merge, a user + algorithm, a trading environment, and a simulator style as + arguments. Pipes the merge stream through a TransactionSimulator + and a PerformanceTracker, which keep track of the current state of + our algorithm's simulated universe. Results are fed to the user's + algorithm, which directly inserts transactions into the + TransactionSimulator's order book. + + TransactionSimulator maintains a dictionary from sids to the + unfulfilled orders placed by the user's algorithm. As trade + events arrive, if the algorithm has open orders against the + trade's sid, the simulator will fill orders up to 25% of market + cap. Applied transactions are added to a txn field on the event + and forwarded to PerformanceTracker. The txn field is set to None + on non-trade events and events that do not match any open orders. + + PerformanceTracker receives the updated event messages from + TransactionSimulator, maintaining a set of daily and cumulative + performance metrics for the algorithm. The tracker removes the + txn field from each event it receives, replacing it with a + portfolio field to be fed into the user algo. At the end of each + trading day, the PerformanceTracker also generates a daily + performance report, which is appended to event's perf_report + field. + + Fully processed events are run through a batcher generator, which + batches together events with the same dt field into a single event + to be fed to the algo. The portfolio object is repeatedly + overwritten so that only the most recent snapshot of the universe + is sent to the algo. + """ + + #============ + # Algo Setup + #============ + + # Initialize txn_sim's dictionary of orders here so that we can + # reference it from within the user's algorithm. + sids = algo.get_sid_filter() + open_orders = {} + + for sid in sids: + open_orders[sids] = [] + + # Closure to pass into the user's algo to allow placing orders + # into the txn_sim's dict of open orders. + def order(self, sid, amount): + assert sid in sids, "Order on invalid sid: %i" % sid + order = zp.ndict({ + 'dt' : self.current_dt, + 'sid' : sid, + 'amount' : int(amount) + 'filled' : 0 + }) + + # Tell the user if they try to buy 0 shares of something. + if order.amount == 0: + log = "requested to trade zero shares of {sid}".format( + sid=event.sid + ) + log.debug(log) + return + + open_orders[sid].append(event) + + # Set the algo's order method. + algo.set_order(order) + + # Provide a logbook logging interface to user code. + algo.set_logger(Logger("Algolog")) + + # Call user-defined initialize method before we process any + # events. + algo.initialize() + + # Pipe the in stream into the transaction simulator. + # Creates a TRANSACTION field on the event containing transaction + # information if we filled any pending orders on the event's sid. + # TRANSACTION is None if we didn't fill any orders. + with_txns = stateful_transform(stream_in, + TransactionSimulator, + open_orders, + style = sim_style) + + + # Pipe the events with transactions to perf. This will remove the + # TRANSACTION field added by TransactionSimulator and replace it with + # a portfolio object to be passed to the user's algorithm. Also adds + # a PERF_MESSAGE field which is usually none, but contains an update + # message once per day. + with_portfolio_and_perf_msg = stateful_transform(stream_with_txns, + PerformanceTracker, + trading_environment, + sids) + + # Batch the event stream by dt to be processed by the user's algo. + # Will also set the PERF_MESSAGE field if the batch contains a perf + # message. + + batches = batcher(with_portfolio_and_perf_msg) + + for batch in batches: + algo.handle_data(batch.data) + if batch.perf_message: + yield perf_message + + + + + + + diff --git a/zipline/gens/transform.py b/zipline/gens/transform.py index a03a841a..64c817ca 100644 --- a/zipline/gens/transform.py +++ b/zipline/gens/transform.py @@ -43,7 +43,7 @@ def functional_transform(stream_in, func, *args, **kwargs): def stateful_transform(stream_in, tnfm_class, *args, **kwargs): """ Generic transform generator that takes each message from an in-stream - and sorts it to a state class. For each call to update, the state + and passes it to a state class. For each call to update, the state class must produce a message to be fed downstream. """ diff --git a/zipline/gens/utils.py b/zipline/gens/utils.py index e2f859cb..209c98b0 100644 --- a/zipline/gens/utils.py +++ b/zipline/gens/utils.py @@ -27,15 +27,24 @@ def alternate(g1, g2): if e2 != None: yield e2 -def roundrobin(*args): +def roundrobin(sources, namestrings): """ Takes N generators, pulling one element off each until all inputs are empty. """ - for elem_tuple in izip_longest(*args): - for value in elem_tuple: - if value != None: - yield value + assert len(sources) == len(namestrings) + mapping = OrderedDict(zip(namestrings, sources)) + + # While our generators have not been exhausted, pull elements + while mapping != []: + for namestring, source in mapping: + try: + message = source.next() + yield message + except StopIteration: + yield done_message(namestring) + del mapping(namestring) + def hash_args(*args, **kwargs): From 211cd0271f071d42ef042b7a3137bd6f430ed909 Mon Sep 17 00:00:00 2001 From: scottsanderson Date: Wed, 1 Aug 2012 10:42:55 -0400 Subject: [PATCH 02/73] new world order --- zipline/core/monitor.py | 2 +- zipline/gens/composites.py | 28 +++----- zipline/gens/tradegens.py | 9 +++ zipline/gens/tradesimulation.py | 120 ++++++++++++++++++++++++++++++++ zipline/gens/transform.py | 2 +- zipline/gens/utils.py | 19 +++-- 6 files changed, 155 insertions(+), 25 deletions(-) create mode 100644 zipline/gens/tradesimulation.py diff --git a/zipline/core/monitor.py b/zipline/core/monitor.py index 50c036aa..5b2b4c72 100644 --- a/zipline/core/monitor.py +++ b/zipline/core/monitor.py @@ -298,7 +298,7 @@ class Controller(object): # We break out of this loop if the time between # sending and receiving the heartbeat is more # than our poll period. - + if tic - self.ctime > self.period: log.info("heartbeat loop timedout: %s" % (tic - self.ctime)) log.info(repr(self.responses)) diff --git a/zipline/gens/composites.py b/zipline/gens/composites.py index 832909ad..66697fa7 100644 --- a/zipline/gens/composites.py +++ b/zipline/gens/composites.py @@ -11,34 +11,26 @@ from zipline.gens.transform import stateful_transform SortBundle = namedtuple("SortBundle", ['source', 'args', 'kwargs']) MergeBundle = namedtuple("MergeBundle", ['stream', 'tnfm', 'args', 'kwargs']) -def date_sorted_sources(sources, source_args, source_kwargs): +def date_sorted_sources(bundles): """ - Takes a list of generator functions, a list of tuples of positional arguments, - and a list of dictionaries of keyword arguments. Packages up all arguments - and passes them into a date_sort. + Takes an iterable of SortBundles, generating namestrings and initialized datasources + for each before piping them into a date_sort. """ - assert len(sources) == len(source_args) == len(source_kwargs) - # Package up sources and arguments. - - # Create a generator of SortBundle objects to be turned into - # namestrings and generator objects. - bundle_gen = starmap(SortBundle, zip(sources, source_args, source_kwargs)) - - # Load the results of the generator into a tuple so that the - # results can be used twice (once in namestring comprehension, - # once in the generator comprehension for intialized sources. - bundles = tuple(bundle_gen) + assert isinstance(bundles, (list, tuple)) + for bundle in bundles: + assert isinstance(bundle, SortBundle) # Calculate namestring hashes to pass to date_sort. names = [bundle.source.__name__ + hash_args(*bundle.args, **bundle.kwargs) for bundle in bundles] + # Pass each source its arguments. initialized = [bundle.source(*bundle.args, **bundle.kwargs) - for bundle in bundles] - + for bundle in bundles] + # Convert the list of generators into a flat stream by pulling # one element at a time from each. - stream_in = roundrobin(*initialized) + stream_in = roundrobin(initialized, names) # Guarantee the flat stream will be sorted by date, using source_id as # tie-breaker, which is fully deterministic (given deterministic string diff --git a/zipline/gens/tradegens.py b/zipline/gens/tradegens.py index c7ee74f8..a24cbe58 100644 --- a/zipline/gens/tradegens.py +++ b/zipline/gens/tradegens.py @@ -54,6 +54,15 @@ def SpecificEquityTrades(*args, **config): Yields all events in event_list that match the given sid_filter. If no event_list is specified, generates an internal stream of events to filter. Returns all events if filter is None. + + Configuration options: + + count: integer representing number of trades + sids : list of values representing simulated internal sids + start: start date + delta: timedelta between internal events + + """ # We shouldn't get any positional arguments. assert args == () diff --git a/zipline/gens/tradesimulation.py b/zipline/gens/tradesimulation.py new file mode 100644 index 00000000..65523a41 --- /dev/null +++ b/zipline/gens/tradesimulation.py @@ -0,0 +1,120 @@ +from numbers import Integral + +from zipline.gens import stateful_transform +from zipline.finance.trading import TransactionSimulator +from zipline.finance.performance import PerformanceTracker + +def trade_simulation_client(stream_in, algo, environment, sim_style): + """ + Generator that takes the expected output of a merge, a user + algorithm, a trading environment, and a simulator style as + arguments. Pipes the merge stream through a TransactionSimulator + and a PerformanceTracker, which keep track of the current state of + our algorithm's simulated universe. Results are fed to the user's + algorithm, which directly inserts transactions into the + TransactionSimulator's order book. + + TransactionSimulator maintains a dictionary from sids to the + unfulfilled orders placed by the user's algorithm. As trade + events arrive, if the algorithm has open orders against the + trade's sid, the simulator will fill orders up to 25% of market + cap. Applied transactions are added to a txn field on the event + and forwarded to PerformanceTracker. The txn field is set to None + on non-trade events and events that do not match any open orders. + + PerformanceTracker receives the updated event messages from + TransactionSimulator, maintaining a set of daily and cumulative + performance metrics for the algorithm. The tracker removes the + txn field from each event it receives, replacing it with a + portfolio field to be fed into the user algo. At the end of each + trading day, the PerformanceTracker also generates a daily + performance report, which is appended to event's perf_report + field. + + Fully processed events are run through a batcher generator, which + batches together events with the same dt field into a single event + to be fed to the algo. The portfolio object is repeatedly + overwritten so that only the most recent snapshot of the universe + is sent to the algo. + """ + + #============ + # Algo Setup + #============ + + # Initialize txn_sim's dictionary of orders here so that we can + # reference it from within the user's algorithm. + sids = algo.get_sid_filter() + open_orders = {} + + for sid in sids: + open_orders[sids] = [] + + # Closure to pass into the user's algo to allow placing orders + # into the txn_sim's dict of open orders. + def order(self, sid, amount): + assert sid in sids, "Order on invalid sid: %i" % sid + order = zp.ndict({ + 'dt' : self.current_dt, + 'sid' : sid, + 'amount' : int(amount) + 'filled' : 0 + }) + + # Tell the user if they try to buy 0 shares of something. + if order.amount == 0: + log = "requested to trade zero shares of {sid}".format( + sid=event.sid + ) + log.debug(log) + return + + open_orders[sid].append(event) + + # Set the algo's order method. + algo.set_order(order) + + # Provide a logbook logging interface to user code. + algo.set_logger(Logger("Algolog")) + + # Call user-defined initialize method before we process any + # events. + algo.initialize() + + # Pipe the in stream into the transaction simulator. + # Creates a TRANSACTION field on the event containing transaction + # information if we filled any pending orders on the event's sid. + # TRANSACTION is None if we didn't fill any orders. + with_txns = stateful_transform(stream_in, + TransactionSimulator, + open_orders, + style = sim_style) + + + # Pipe the events with transactions to perf. This will remove the + # TRANSACTION field added by TransactionSimulator and replace it with + # a portfolio object to be passed to the user's algorithm. Also adds + # a PERF_MESSAGE field which is usually none, but contains an update + # message once per day. + with_portfolio_and_perf_msg = stateful_transform(stream_with_txns, + PerformanceTracker, + trading_environment, + sids) + + # Batch the event stream by dt to be processed by the user's algo. + # Will also set the PERF_MESSAGE field if the batch contains a perf + # message. + + batches = batcher(with_portfolio_and_perf_msg) + + for batch in batches: + algo.handle_data(batch.data) + if batch.perf_message: + yield perf_message + + + + + + + diff --git a/zipline/gens/transform.py b/zipline/gens/transform.py index a03a841a..64c817ca 100644 --- a/zipline/gens/transform.py +++ b/zipline/gens/transform.py @@ -43,7 +43,7 @@ def functional_transform(stream_in, func, *args, **kwargs): def stateful_transform(stream_in, tnfm_class, *args, **kwargs): """ Generic transform generator that takes each message from an in-stream - and sorts it to a state class. For each call to update, the state + and passes it to a state class. For each call to update, the state class must produce a message to be fed downstream. """ diff --git a/zipline/gens/utils.py b/zipline/gens/utils.py index e2f859cb..209c98b0 100644 --- a/zipline/gens/utils.py +++ b/zipline/gens/utils.py @@ -27,15 +27,24 @@ def alternate(g1, g2): if e2 != None: yield e2 -def roundrobin(*args): +def roundrobin(sources, namestrings): """ Takes N generators, pulling one element off each until all inputs are empty. """ - for elem_tuple in izip_longest(*args): - for value in elem_tuple: - if value != None: - yield value + assert len(sources) == len(namestrings) + mapping = OrderedDict(zip(namestrings, sources)) + + # While our generators have not been exhausted, pull elements + while mapping != []: + for namestring, source in mapping: + try: + message = source.next() + yield message + except StopIteration: + yield done_message(namestring) + del mapping(namestring) + def hash_args(*args, **kwargs): From e4a21e7c61fc805e3d9060b426d4056ca36d6ee5 Mon Sep 17 00:00:00 2001 From: fawce Date: Wed, 1 Aug 2012 11:03:23 -0400 Subject: [PATCH 03/73] resolved conflicts --- zipline/core/monitor.py | 2 +- zipline/test_algorithms.py | 26 ++++++++++++++++++++++++++ 2 files changed, 27 insertions(+), 1 deletion(-) diff --git a/zipline/core/monitor.py b/zipline/core/monitor.py index 5b2b4c72..50c036aa 100644 --- a/zipline/core/monitor.py +++ b/zipline/core/monitor.py @@ -298,7 +298,7 @@ class Controller(object): # We break out of this loop if the time between # sending and receiving the heartbeat is more # than our poll period. - + if tic - self.ctime > self.period: log.info("heartbeat loop timedout: %s" % (tic - self.ctime)) log.info(repr(self.responses)) diff --git a/zipline/test_algorithms.py b/zipline/test_algorithms.py index 68f3d160..a7881fa8 100644 --- a/zipline/test_algorithms.py +++ b/zipline/test_algorithms.py @@ -221,6 +221,32 @@ class DivByZeroAlgorithm(): def get_sid_filter(self): return [self.sid] +class TimeoutAlgorithm(): + + def __init__(self, sid): + self.sid = sid + self.incr = 0 + + def initialize(self): + pass + + def set_order(self, order_callable): + pass + + def set_logger(self, logger): + pass + + def set_portfolio(self, portfolio): + pass + + def handle_data(self, data): + if self.incr > 4: + import time + time.sleep(100) + pass + + def get_sid_filter(self): + return [self.sid] class TestPrintAlgorithm(): From 6cb3516b6b2a194750232efac3725b9a4be016c9 Mon Sep 17 00:00:00 2001 From: scottsanderson Date: Wed, 1 Aug 2012 11:12:09 -0400 Subject: [PATCH 04/73] save for attempted merge --- zipline/finance/performance.py | 15 +++++++------ zipline/finance/trading.py | 35 ++++++++---------------------- zipline/gens/examples.py | 38 +++++++++++++++++++++++++++++++++ zipline/gens/tradesimulation.py | 20 ++++++++++------- zipline/gens/zmq_gens.py | 8 ++++--- 5 files changed, 73 insertions(+), 43 deletions(-) create mode 100644 zipline/gens/examples.py diff --git a/zipline/finance/performance.py b/zipline/finance/performance.py index fdca878d..a96f5d98 100644 --- a/zipline/finance/performance.py +++ b/zipline/finance/performance.py @@ -144,7 +144,7 @@ class PerformanceTracker(object): """ - def __init__(self, trading_environment): + def __init__(self, trading_environment, sid_list): self.trading_environment = trading_environment self.trading_day = datetime.timedelta(hours = 6, minutes = 30) @@ -164,7 +164,6 @@ class PerformanceTracker(object): self.txn_count = 0 self.event_count = 0 self.last_dict = None - self.order_log = [] self.exceeded_max_loss = False self.results_socket = None @@ -198,9 +197,14 @@ class PerformanceTracker(object): keep_transactions = True ) - def set_sids(self, sid_list): for sid in sid_list: self.cumulative_performance.positions[sid] = Position(sid) + self.daily_performance.positions[sid] = Position(sid) + + def update(self, event): + event.perf_message = self.process_event() + event.portfolio = self.get_portfolio + return event def get_portfolio(self): return self.cumulative_performance.as_portfolio() @@ -238,8 +242,6 @@ class PerformanceTracker(object): 'cumulative_risk_metrics' : self.cumulative_risk_metrics.to_dict() } - def log_order(self, order): - self.order_log.append(order) def process_event(self, event): @@ -288,6 +290,8 @@ class PerformanceTracker(object): # calculate progress of test self.progress = self.day_count / self.total_days + # TODO!!!! + # Output results if self.results_socket: msg = zp.PERF_FRAME(self.to_dict()) @@ -584,7 +588,6 @@ class PerformancePeriod(object): return positions - # def get_positions_list(self): positions = [] for sid, pos in self.positions.iteritems(): diff --git a/zipline/finance/trading.py b/zipline/finance/trading.py index bf3a5374..9cae6e72 100644 --- a/zipline/finance/trading.py +++ b/zipline/finance/trading.py @@ -10,9 +10,8 @@ log = logbook.Logger('Transaction Simulator') class TransactionSimulator(object): - def __init__(self, style=SIMULATION_STYLE.PARTIAL_VOLUME): - self.open_orders = {} - self.order_count = 0 + def __init__(self, open_orders, style=SIMULATION_STYLE.PARTIAL_VOLUME): + self.open_orders = open_orders self.txn_count = 0 self.trade_window = datetime.timedelta(seconds=30) self.orderTTL = datetime.timedelta(days=1) @@ -27,28 +26,12 @@ class TransactionSimulator(object): elif style == SIMULATION_STYLE.NOOP: self.apply_trade_to_open_orders = self.simulate_noop - def add_open_order(self, event): - # Orders are captured in a buffer by sid. No calculations are done here. - # Amount is explicitly converted to an int. - # Orders of amount zero are ignored. - - self.order_count += 1 - event.amount = int(event.amount) - - if event.amount == 0: - log = "requested to trade zero shares of {sid}".format( - sid=event.sid - ) - log.debug(log) - return - - if not self.open_orders.has_key(event.sid): - self.open_orders[event.sid] = [] - - # set the filled property to zero - event.filled = 0 - self.open_orders[event.sid].append(event) - + def update(self, event): + event.txn = None + if event.type == zp.DATASOURCE_TYPE.TRADE: + event.txn = self.apply_trade_to_open_orders(event) + return event + def simulate_buy_all(self, event): txn = self.create_transaction( event.sid, @@ -81,7 +64,7 @@ class TransactionSimulator(object): txn = self.create_transaction( event.sid, amount, - event.price + 0.10, + event.price + 0.10, # Magic constant? event.dt, direction ) diff --git a/zipline/gens/examples.py b/zipline/gens/examples.py new file mode 100644 index 00000000..d9051b10 --- /dev/null +++ b/zipline/gens/examples.py @@ -0,0 +1,38 @@ +from zipline.gens.composites import + +if __name__ == "__main__": + + filter = [1,2,3,4] + #Set up source a. One hour between events. + args_a = tuple() + kwargs_a = {'sids' : [1,2,3,4], + 'start' : datetime(2012,6,6,0), + 'delta' : timedelta(minutes = ), + 'filter' : filter + } + #Set up source b. One day between events. + args_b = tuple() + kwargs_b = {'sids' : [1,2,3,4], + 'start' : datetime(2012,6,6,0), + 'delta' : timedelta(days = 1), + 'filter' : filter + } + #Set up source c. One minute between events. + args_c = tuple() + kwargs_c = {'sids' : [1,2,3,4], + 'start' : datetime(2012,6,6,0), + 'delta' : timedelta(minutes = 1), + 'filter' : filter + } + + sources = (SpecificEquityTrades,) * 4 + source_args = (args_a, args_b, args_c, args_d) + source_kwargs = (kwargs_a, kwargs_b, kwargs_c, kwargs_d) + + # Generate our expected source_ids. + zip_args = zip(source_args, source_kwargs) + expected_ids = ["SpecificEquityTrades" + hash_args(*args, **kwargs) + for args, kwargs in zip_args] + + # Pipe our sources into sort. + sort_out = date_sorted_sources(sources, source_args, source_kwargs) diff --git a/zipline/gens/tradesimulation.py b/zipline/gens/tradesimulation.py index 65523a41..7f0a30eb 100644 --- a/zipline/gens/tradesimulation.py +++ b/zipline/gens/tradesimulation.py @@ -85,10 +85,12 @@ def trade_simulation_client(stream_in, algo, environment, sim_style): # Creates a TRANSACTION field on the event containing transaction # information if we filled any pending orders on the event's sid. # TRANSACTION is None if we didn't fill any orders. - with_txns = stateful_transform(stream_in, - TransactionSimulator, - open_orders, - style = sim_style) + with_txns = stateful_transform( + stream_in, + TransactionSimulator, + open_orders, + style = sim_style + ) # Pipe the events with transactions to perf. This will remove the @@ -96,10 +98,12 @@ def trade_simulation_client(stream_in, algo, environment, sim_style): # a portfolio object to be passed to the user's algorithm. Also adds # a PERF_MESSAGE field which is usually none, but contains an update # message once per day. - with_portfolio_and_perf_msg = stateful_transform(stream_with_txns, - PerformanceTracker, - trading_environment, - sids) + with_portfolio_and_perf_msg = stateful_transform( + stream_with_txns, + PerformanceTracker, + trading_environment, + sids + ) # Batch the event stream by dt to be processed by the user's algo. # Will also set the PERF_MESSAGE field if the batch contains a perf diff --git a/zipline/gens/zmq_gens.py b/zipline/gens/zmq_gens.py index 524852a7..e60dae2b 100644 --- a/zipline/gens/zmq_gens.py +++ b/zipline/gens/zmq_gens.py @@ -2,15 +2,17 @@ import zmq import zipline.protocol as zp -def gen_from_zmq(poller, unframe): +def gen_from_zmq(poller, unframe, namestring): """ A generator that takes an initialized zmq poller and yields messages from the poller until it gets a zp.CONTROL_PROTOCOL.DONE. """ while True: message = poller.recv() - if message = zp.CONTROL_PROTOCOL.DONE: - yield "DONE" + # Done protocol should now be a message type so that + # done messages can also have source_ids. + if message.type == zp.CONTROL_PROTOCOL.DONE: + yield done_message(message.source_id) break else: yield unframe(message) From fe8a443051f1df7c368862d62fc9db967e72e737 Mon Sep 17 00:00:00 2001 From: fawce Date: Wed, 1 Aug 2012 11:19:59 -0400 Subject: [PATCH 05/73] intersticial for merge with @ssanderson's latest --- zipline/core/monitor.py | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/zipline/core/monitor.py b/zipline/core/monitor.py index 50c036aa..65c37d32 100644 --- a/zipline/core/monitor.py +++ b/zipline/core/monitor.py @@ -1,4 +1,3 @@ -import inspect import os import zmq import sys @@ -69,7 +68,12 @@ class Controller(object): debug = True period = PARAMETERS.GENERATIONAL_PERIOD - def __init__(self, pub_socket, route_socket, send_sighup=False): + def __init__( + self, + pub_socket, + route_socket, + exception_socket, + send_sighup=False): self.nosignals = False self.context = None @@ -90,8 +94,9 @@ class Controller(object): self.associated = [] - self.pub_socket = pub_socket - self.route_socket = route_socket + self.pub_socket = pub_socket + self.route_socket = route_socket + self.exception_socket = exception_socket self.error_replay = OrderedDict() From 9f7293e2d200bfa4d555e8995d340c6d64d4902f Mon Sep 17 00:00:00 2001 From: scottsanderson Date: Wed, 1 Aug 2012 17:19:08 -0400 Subject: [PATCH 06/73] pipeline through merge --- zipline/gens/composites.py | 46 ++++++++++------------ zipline/gens/examples.py | 81 +++++++++++++++++++++++--------------- zipline/gens/merge.py | 39 +++++++++++------- zipline/gens/tradegens.py | 6 +-- zipline/gens/transform.py | 49 ++++++++++++++++------- zipline/gens/utils.py | 19 ++++++--- 6 files changed, 145 insertions(+), 95 deletions(-) diff --git a/zipline/gens/composites.py b/zipline/gens/composites.py index 66697fa7..234db714 100644 --- a/zipline/gens/composites.py +++ b/zipline/gens/composites.py @@ -8,8 +8,8 @@ from zipline.gens.sort import date_sort from zipline.gens.merge import merge from zipline.gens.transform import stateful_transform -SortBundle = namedtuple("SortBundle", ['source', 'args', 'kwargs']) -MergeBundle = namedtuple("MergeBundle", ['stream', 'tnfm', 'args', 'kwargs']) +SourceBundle = namedtuple("SourceBundle", ['source', 'args', 'kwargs']) +TransformBundle = namedtuple("TransformBundle", ['tnfm', 'args', 'kwargs']) def date_sorted_sources(bundles): """ @@ -18,19 +18,19 @@ def date_sorted_sources(bundles): """ assert isinstance(bundles, (list, tuple)) for bundle in bundles: - assert isinstance(bundle, SortBundle) + assert isinstance(bundle, SourceBundle) # Calculate namestring hashes to pass to date_sort. names = [bundle.source.__name__ + hash_args(*bundle.args, **bundle.kwargs) for bundle in bundles] # Pass each source its arguments. - initialized = [bundle.source(*bundle.args, **bundle.kwargs) + source_gens = [bundle.source(*bundle.args, **bundle.kwargs) for bundle in bundles] # Convert the list of generators into a flat stream by pulling # one element at a time from each. - stream_in = roundrobin(initialized, names) + stream_in = roundrobin(source_gens, names) # Guarantee the flat stream will be sorted by date, using source_id as # tie-breaker, which is fully deterministic (given deterministic string @@ -38,7 +38,7 @@ def date_sorted_sources(bundles): return date_sort(stream_in, names) -def merged_transforms(sorted_stream, tnfms, tnfm_args, tnfm_kwargs): +def merged_transforms(sorted_stream, bundles): """ A generator that takes the expected output of a date_sort, pipes it through a given set of transforms, and runs the results throught a @@ -48,36 +48,30 @@ def merged_transforms(sorted_stream, tnfms, tnfm_args, tnfm_kwargs): tnfm_kwargs should be a list of dictionaries representing keyword arguments to each transform. """ - - # We should have as many sets of args as we have transforms. - assert len(tnfms) == len(tnfm_args) == len(tnfm_kwargs) + # Generate expected hashes for each transform + namestrings = [bundle.tnfm.__name__ + hash_args(*bundle.args, **bundle.kwargs) + for bundle in bundles] # Create a copy of the stream for each transform. - split = tee(sorted_stream, len(tnfms)) + split = tee(sorted_stream, len(bundles)) + # Package a stream copy with each bundle + tnfms_with_streams = zip(split, bundles) - # Package each transform with a stream copy and set of args. Use a list - # so that we can re-use this for calculating hashes. - bundle_gen = starmap(MergeBundle, zip(split, tnfms, tnfm_args, tnfm_kwargs)) - - bundles = tuple(bundle_gen) - # list comprehension to create transform generators from - # bundles + # Convert the copies into transform streams. tnfm_gens = [ stateful_transform( - bundle.stream, + stream_copy, bundle.tnfm, *bundle.args, **bundle.kwargs ) - for bundle in bundles] - - # Generate expected hashes for each transform - hashes = [bundle.tnfm.__name__ + hash_args(*bundle.args, **bundle.kwargs) - for bundle in bundles] + for stream_copy, bundle in tnfms_with_streams + ] # Roundrobin the outputs of our transforms to create a single flat stream. - to_merge = roundrobin(*tnfm_gens) + to_merge = roundrobin(tnfm_gens, namestrings) # Pipe the stream into merge. - merged = merge(to_merge, hashes) - return merged_transforms + merged = merge(to_merge, namestrings) + # Return the merged events. + return merged diff --git a/zipline/gens/examples.py b/zipline/gens/examples.py index d9051b10..fb3d8827 100644 --- a/zipline/gens/examples.py +++ b/zipline/gens/examples.py @@ -1,38 +1,57 @@ -from zipline.gens.composites import +from datetime import datetime, timedelta + +from zipline.utils.factory import create_trading_environment +from zipline.test_algorithms import TestAlgorithm + +from zipline.gens.composites import SourceBundle, TransformBundle, date_sorted_sources, merged_transforms +from zipline.gens.tradegens import SpecificEquityTrades +from zipline.gens.transform import MovingAverage, Passthrough if __name__ == "__main__": filter = [1,2,3,4] - #Set up source a. One hour between events. + #Set up source a. One minute between events. args_a = tuple() - kwargs_a = {'sids' : [1,2,3,4], - 'start' : datetime(2012,6,6,0), - 'delta' : timedelta(minutes = ), - 'filter' : filter - } - #Set up source b. One day between events. - args_b = tuple() - kwargs_b = {'sids' : [1,2,3,4], - 'start' : datetime(2012,6,6,0), - 'delta' : timedelta(days = 1), - 'filter' : filter - } - #Set up source c. One minute between events. - args_c = tuple() - kwargs_c = {'sids' : [1,2,3,4], - 'start' : datetime(2012,6,6,0), - 'delta' : timedelta(minutes = 1), - 'filter' : filter - } + kwargs_a = { + 'sids' : [1,2], + 'start' : datetime(2012,6,6,0), + 'delta' : timedelta(minutes = 1), + 'filter' : filter + } + bundle_a = SourceBundle(SpecificEquityTrades, args_a, kwargs_a) - sources = (SpecificEquityTrades,) * 4 - source_args = (args_a, args_b, args_c, args_d) - source_kwargs = (kwargs_a, kwargs_b, kwargs_c, kwargs_d) - - # Generate our expected source_ids. - zip_args = zip(source_args, source_kwargs) - expected_ids = ["SpecificEquityTrades" + hash_args(*args, **kwargs) - for args, kwargs in zip_args] - + #Set up source b. Two minutes between events. + args_b = tuple() + kwargs_b = { + 'sids' : [2,3], + 'start' : datetime(2012,6,6,0), + 'delta' : timedelta(minutes = 2), + 'filter' : filter + } + bundle_b = SourceBundle(SpecificEquityTrades, args_b, kwargs_b) + + #Set up source c. Three minutes between events. + args_c = tuple() + kwargs_c = { + 'sids' : [3,4], + 'start' : datetime(2012,6,6,0), + 'delta' : timedelta(minutes = 3), + 'filter' : filter + } + bundle_c = SourceBundle(SpecificEquityTrades, args_c, kwargs_c) + + source_bundles = (bundle_a, bundle_b, bundle_c) # Pipe our sources into sort. - sort_out = date_sorted_sources(sources, source_args, source_kwargs) + sort_out = date_sorted_sources(source_bundles) + + passthrough = TransformBundle(Passthrough, (), {}) + mavg_price = TransformBundle(MovingAverage, (timedelta(minutes = 20), ['price', 'volume']), {}) + tnfm_bundles = (passthrough, mavg_price) + + merge_out = merged_transforms(sort_out, tnfm_bundles) + + for message in merge_out: + print "Event: \n", message.event + print "Transforms: \n", message.tnfms + + diff --git a/zipline/gens/merge.py b/zipline/gens/merge.py index 4778ed5b..0e8fab93 100644 --- a/zipline/gens/merge.py +++ b/zipline/gens/merge.py @@ -7,7 +7,7 @@ from collections import deque from zipline import ndict from zipline.gens.utils import hash_args, \ assert_merge_protocol - +from itertools import repeat def merge(stream_in, tnfm_ids): """ @@ -17,7 +17,7 @@ def merge(stream_in, tnfm_ids): and merge them together into an event. We raise an error if we do not receive the same number of events from all sources. """ - + assert isinstance(tnfm_ids, list) # Set up an internal queue for each expected source. @@ -28,22 +28,22 @@ def merge(stream_in, tnfm_ids): # Process incoming streams. for message in stream_in: - assert isinstance(message, tuple), \ - "Bad message in merge: %s" %message - assert len(message) == 2 - id, value = message + assert isinstance(message, ndict) + assert message.has_key('tnfm_id') + assert message.has_key('tnfm_value') + assert message.has_key('dt') + + id = message.tnfm_id assert id in tnfm_ids, \ "Message from unexpected tnfm: %s, %s" % (id, tnfm_ids) - assert isinstance(value, ndict), "Bad message in merge: %s" %message - - tnfms[id].append(value) + + tnfms[id].append(message) # Only pop messages when we have a pending message from # all datasources. Stop if all sources have signalled done. while ready(tnfms) and not done(tnfms): message = merge_one(tnfms) - assert_merge_protocol(tnfm_ids, message) yield message # We should have only a done message left in each queue. @@ -53,11 +53,22 @@ def merge(stream_in, tnfm_ids): "Bad last message in merge on exit: %s" % queue def merge_one(sources): - output = ndict() + dict_primer = zip(sources.keys(), repeat(None)) + transforms = ndict(dict_primer) + event_fields = ndict() + for key, queue in sources.iteritems(): - new_xform = ndict({key: queue.popleft()}) - output.merge(new_xform) - return output + + # Add transform value to the transforms dict. + message = queue.popleft() + transforms[message.tnfm_id] = message.tnfm_value + del message['tnfm_id'] + del message['tnfm_value'] + + # Merge any remaining fields into the event dict. + event_fields.merge(message) + + return ndict({'event' : event_fields, 'tnfms' : transforms}) #TODO: This is replicated in sort. Probably should be one source file. diff --git a/zipline/gens/tradegens.py b/zipline/gens/tradegens.py index a24cbe58..e3b88a0e 100644 --- a/zipline/gens/tradegens.py +++ b/zipline/gens/tradegens.py @@ -106,10 +106,8 @@ def SpecificEquityTrades(*args, **config): else: filtered = unfiltered - # Add a done message to the end of the stream. For a live - # datasource this would be handled by the containing Component. - out = chain(filtered, [mock_done(namestring)]) - return out + # Return the filtered event stream. + return filtered def RandomEquityTrades(*args, **config): # We shouldn't get any positional args. diff --git a/zipline/gens/transform.py b/zipline/gens/transform.py index 64c817ca..ece2d383 100644 --- a/zipline/gens/transform.py +++ b/zipline/gens/transform.py @@ -3,6 +3,7 @@ Generator versions of transforms. """ import types +from copy import deepcopy from datetime import datetime from collections import deque, defaultdict from numbers import Number @@ -12,6 +13,7 @@ from zipline.gens.utils import assert_sort_unframe_protocol, \ assert_transform_protocol, hash_args class Passthrough(object): + FORWARDER = True """ Trivial class for forwarding events. """ @@ -19,10 +21,7 @@ class Passthrough(object): pass def update(self, event): - assert isinstance(event, ndict),"Bad event in Passthrough: %s" % event - assert event.has_key('sid'), "No sid in Passthrough: %s" % event - assert event.has_key('dt'), "No dt in Passthorughz: %s" % event - return event + pass def functional_transform(stream_in, func, *args, **kwargs): """ @@ -44,9 +43,13 @@ def stateful_transform(stream_in, tnfm_class, *args, **kwargs): """ Generic transform generator that takes each message from an in-stream and passes it to a state class. For each call to update, the state - class must produce a message to be fed downstream. + class must produce a message to be fed downstream. Any transform class + with the FORWARDER class variable set to true will forward all fields + in the original message. Otherwise only dt, tnfm_id, and tnfm_value + are forwarded. """ - + forward_all_fields = tnfm_class.__dict__.get('FORWARDER', False) + assert isinstance(tnfm_class, (types.ObjectType, types.ClassType)), \ "Stateful transform requires a class." assert tnfm_class.__dict__.has_key('update'), \ @@ -58,11 +61,31 @@ def stateful_transform(stream_in, tnfm_class, *args, **kwargs): # Generate the string associated with this generator's output. namestring = tnfm_class.__name__ + hash_args(*args, **kwargs) + # IMPORTANT: Messages may contain pointers that are shared with + # other streams, so we only manipulate copies. for message in stream_in: + assert_sort_unframe_protocol(message) - out_value = state.update(message) - assert_transform_protocol(out_value) - yield (namestring, out_value) + message_copy = deepcopy(message) + + # Same shared pointer issue here as above. + tnfm_value = state.update(deepcopy(message_copy)) + + # If we want to keep all original values, just append tnfm_id + # and tnfm_value. + if forward_all_fields: + out_message = message_copy + out_message.tnfm_id = namestring + out_message.tnfm_value = tnfm_value + yield out_message + + # Otherwise send tnfm_id, tnfm_value, and the message date. + else: + out_message = ndict() + out_message.tnfm_id = namestring + out_message.tnfm_value = tnfm_value + out_message.dt = message_copy.dt + yield out_message class MovingAverage(object): """ @@ -70,6 +93,7 @@ class MovingAverage(object): Upon receipt of each message we update the corresponding window and return the calculated average. """ + FORWARDER = False def __init__(self, delta, fields): self.delta = delta @@ -93,16 +117,11 @@ class MovingAverage(object): assert event.has_key('sid'), "No sid in MovingAverage: %s" % event assert event.has_key('dt'), "No dt in MovingAverage: %s" % event - output = ndict({'sid': event.sid, 'dt': event.dt}) # This will create a new EventWindow if this is the first # message for this sid. window = self.sid_windows[event.sid] window.update(event) - averages = window.get_averages() - - # Return the calculated averages along with - output.merge(averages) - return output + return window.get_averages() class EventWindow(object): """ diff --git a/zipline/gens/utils.py b/zipline/gens/utils.py index 209c98b0..d51452bf 100644 --- a/zipline/gens/utils.py +++ b/zipline/gens/utils.py @@ -1,6 +1,7 @@ import pytz import numbers +from collections import OrderedDict from hashlib import md5 from datetime import datetime from itertools import izip_longest @@ -16,8 +17,16 @@ def mock_raw_event(sid, dt): } return event -def mock_done(source_id): - return ndict({'dt': "DONE", "source_id" : source_id, 'type' : 0}) +def mock_done(id): + return ndict({ + 'dt' : "DONE", + "source_id" : id, + 'tnfm_id' : id, + 'tnfm_value': None, + 'type' : 0 + }) + +done_message = mock_done def alternate(g1, g2): """Specialized version of roundrobin for just 2 generators.""" @@ -36,14 +45,14 @@ def roundrobin(sources, namestrings): mapping = OrderedDict(zip(namestrings, sources)) # While our generators have not been exhausted, pull elements - while mapping != []: - for namestring, source in mapping: + while mapping.keys() != []: + for namestring, source in mapping.iteritems(): try: message = source.next() yield message except StopIteration: yield done_message(namestring) - del mapping(namestring) + del mapping[namestring] From 5910e0f789bdcea6acb63f7eacada94aaab656f8 Mon Sep 17 00:00:00 2001 From: scottsanderson Date: Wed, 1 Aug 2012 17:45:52 -0400 Subject: [PATCH 07/73] tsc work --- zipline/gens/examples.py | 23 +++++++++++++++++------ zipline/gens/tradesimulation.py | 16 +++++++++++----- 2 files changed, 28 insertions(+), 11 deletions(-) diff --git a/zipline/gens/examples.py b/zipline/gens/examples.py index fb3d8827..d99da662 100644 --- a/zipline/gens/examples.py +++ b/zipline/gens/examples.py @@ -3,9 +3,13 @@ from datetime import datetime, timedelta from zipline.utils.factory import create_trading_environment from zipline.test_algorithms import TestAlgorithm -from zipline.gens.composites import SourceBundle, TransformBundle, date_sorted_sources, merged_transforms +from zipline.gens.composites import SourceBundle, TransformBundle, \ + date_sorted_sources, merged_transforms from zipline.gens.tradegens import SpecificEquityTrades from zipline.gens.transform import MovingAverage, Passthrough +from zipline.gens.tradesimulation import trade_simulation_client as tsc + +import zipline.protocol as zp if __name__ == "__main__": @@ -45,13 +49,20 @@ if __name__ == "__main__": sort_out = date_sorted_sources(source_bundles) passthrough = TransformBundle(Passthrough, (), {}) - mavg_price = TransformBundle(MovingAverage, (timedelta(minutes = 20), ['price', 'volume']), {}) + mavg_price = TransformBundle(MovingAverage, (timedelta(minutes = 20), ['price']), {}) tnfm_bundles = (passthrough, mavg_price) merge_out = merged_transforms(sort_out, tnfm_bundles) - for message in merge_out: - print "Event: \n", message.event - print "Transforms: \n", message.tnfms - +# for message in merge_out: +# print "Event: \n", message.event +# print "Transforms: \n", message.tnfms + algo = TestAlgorithm(2, 100, 100) + environment = create_trading_environment() + style = zp.SIMULATION_STYLE.PARTIAL_VOLUME + + client_out = tsc(merge_out, algo, environment, style) + + for message in client_out: + print message diff --git a/zipline/gens/tradesimulation.py b/zipline/gens/tradesimulation.py index 07bd48d6..76d7938e 100644 --- a/zipline/gens/tradesimulation.py +++ b/zipline/gens/tradesimulation.py @@ -1,6 +1,10 @@ +import logbook + from numbers import Integral -from zipline.gens import stateful_transform +from zipline import ndict + +from zipline.gens.transform import stateful_transform from zipline.finance.trading import TransactionSimulator from zipline.finance.performance import PerformanceTracker @@ -44,20 +48,22 @@ def trade_simulation_client(stream_in, algo, environment, sim_style): # Initialize txn_sim's dictionary of orders here so that we can # reference it from within the user's algorithm. + + import nose.tools; nose.tools.set_trace() sids = algo.get_sid_filter() open_orders = {} for sid in sids: - open_orders[sids] = [] + open_orders[sid] = [] # Closure to pass into the user's algo to allow placing orders # into the txn_sim's dict of open orders. def order(self, sid, amount): assert sid in sids, "Order on invalid sid: %i" % sid - order = zp.ndict({ + order = ndict({ 'dt' : self.current_dt, 'sid' : sid, - 'amount' : int(amount) + 'amount' : int(amount), 'filled' : 0 }) @@ -75,7 +81,7 @@ def trade_simulation_client(stream_in, algo, environment, sim_style): algo.set_order(order) # Provide a logbook logging interface to user code. - algo.set_logger(Logger("Algolog")) + algo.set_logger(logbook.Logger("Algolog")) # Call user-defined initialize method before we process any # events. From 4768bea6465b0729cf1e87f06b1bfc19350b9d6b Mon Sep 17 00:00:00 2001 From: scottsanderson Date: Wed, 1 Aug 2012 18:53:09 -0400 Subject: [PATCH 08/73] more progress on tsc --- zipline/finance/trading.py | 1 + zipline/gens/examples.py | 10 ++++------ zipline/gens/merge.py | 6 ++---- zipline/gens/tradesimulation.py | 13 ++++++++----- zipline/gens/transform.py | 9 +++++++-- zipline/utils/factory.py | 2 -- 6 files changed, 22 insertions(+), 19 deletions(-) diff --git a/zipline/finance/trading.py b/zipline/finance/trading.py index 9cae6e72..3676437f 100644 --- a/zipline/finance/trading.py +++ b/zipline/finance/trading.py @@ -9,6 +9,7 @@ from zipline.protocol import SIMULATION_STYLE log = logbook.Logger('Transaction Simulator') class TransactionSimulator(object): + FORWARDER = True def __init__(self, open_orders, style=SIMULATION_STYLE.PARTIAL_VOLUME): self.open_orders = open_orders diff --git a/zipline/gens/examples.py b/zipline/gens/examples.py index d99da662..def7954f 100644 --- a/zipline/gens/examples.py +++ b/zipline/gens/examples.py @@ -54,15 +54,13 @@ if __name__ == "__main__": merge_out = merged_transforms(sort_out, tnfm_bundles) -# for message in merge_out: -# print "Event: \n", message.event -# print "Transforms: \n", message.tnfms + # for message in merge_out: +# print message algo = TestAlgorithm(2, 100, 100) environment = create_trading_environment() style = zp.SIMULATION_STYLE.PARTIAL_VOLUME - + client_out = tsc(merge_out, algo, environment, style) - for message in client_out: - print message + diff --git a/zipline/gens/merge.py b/zipline/gens/merge.py index 0e8fab93..5a4e6bde 100644 --- a/zipline/gens/merge.py +++ b/zipline/gens/merge.py @@ -54,21 +54,19 @@ def merge(stream_in, tnfm_ids): def merge_one(sources): dict_primer = zip(sources.keys(), repeat(None)) - transforms = ndict(dict_primer) event_fields = ndict() for key, queue in sources.iteritems(): # Add transform value to the transforms dict. message = queue.popleft() - transforms[message.tnfm_id] = message.tnfm_value + event_fields[message.tnfm_id] = message.tnfm_value del message['tnfm_id'] del message['tnfm_value'] # Merge any remaining fields into the event dict. event_fields.merge(message) - - return ndict({'event' : event_fields, 'tnfms' : transforms}) + return event_fields #TODO: This is replicated in sort. Probably should be one source file. diff --git a/zipline/gens/tradesimulation.py b/zipline/gens/tradesimulation.py index 76d7938e..db80a8ed 100644 --- a/zipline/gens/tradesimulation.py +++ b/zipline/gens/tradesimulation.py @@ -49,7 +49,6 @@ def trade_simulation_client(stream_in, algo, environment, sim_style): # Initialize txn_sim's dictionary of orders here so that we can # reference it from within the user's algorithm. - import nose.tools; nose.tools.set_trace() sids = algo.get_sid_filter() open_orders = {} @@ -88,7 +87,7 @@ def trade_simulation_client(stream_in, algo, environment, sim_style): algo.initialize() # Pipe the in stream into the transaction simulator. - # Creates a TRANSACTION field on the event containing transaction + # Creates a txn field on the event containing transaction # information if we filled any pending orders on the event's sid. # TRANSACTION is None if we didn't fill any orders. with_txns = stateful_transform( @@ -100,14 +99,14 @@ def trade_simulation_client(stream_in, algo, environment, sim_style): # Pipe the events with transactions to perf. This will remove the - # TRANSACTION field added by TransactionSimulator and replace it with + # txn field added by TransactionSimulator and replace it with # a portfolio object to be passed to the user's algorithm. Also adds # a PERF_MESSAGE field which is usually none, but contains an update # message once per day. with_portfolio_and_perf_msg = stateful_transform( - stream_with_txns, + with_txns, PerformanceTracker, - trading_environment, + environment, sids ) @@ -115,6 +114,10 @@ def trade_simulation_client(stream_in, algo, environment, sim_style): # Will also set the PERF_MESSAGE field if the batch contains a perf # message. + def batcher(stream): + for msg in stream: + yield msg + batches = batcher(with_portfolio_and_perf_msg) for batch in batches: diff --git a/zipline/gens/transform.py b/zipline/gens/transform.py index ece2d383..2adc44e8 100644 --- a/zipline/gens/transform.py +++ b/zipline/gens/transform.py @@ -54,7 +54,7 @@ def stateful_transform(stream_in, tnfm_class, *args, **kwargs): "Stateful transform requires a class." assert tnfm_class.__dict__.has_key('update'), \ "Stateful transform requires the class to have an update method" - + # Create an instance of our transform class. state = tnfm_class(*args, **kwargs) @@ -71,13 +71,18 @@ def stateful_transform(stream_in, tnfm_class, *args, **kwargs): # Same shared pointer issue here as above. tnfm_value = state.update(deepcopy(message_copy)) - # If we want to keep all original values, just append tnfm_id + # If we want to keep all original values, plus append tnfm_id # and tnfm_value. if forward_all_fields: out_message = message_copy out_message.tnfm_id = namestring out_message.tnfm_value = tnfm_value yield out_message + + # Special logic for TransactionSimulator. This is ugly but I + # want to get to testing faster. Should be refactored later + # to something that doesn't make Scott cry. + elif tnfm_class.__name__ == 'TransactionSimulator' # Otherwise send tnfm_id, tnfm_value, and the message date. else: diff --git a/zipline/utils/factory.py b/zipline/utils/factory.py index db440891..004f542a 100644 --- a/zipline/utils/factory.py +++ b/zipline/utils/factory.py @@ -89,8 +89,6 @@ def get_next_trading_dt(current, interval, trading_calendar): return next - - def create_trade_history(sid, prices, amounts, interval, trading_calendar): trades = [] current = trading_calendar.first_open From 9e1a5c11cbfcc5011ccd9065926a86bb387e1f30 Mon Sep 17 00:00:00 2001 From: scottsanderson Date: Wed, 1 Aug 2012 18:56:29 -0400 Subject: [PATCH 09/73] fix syntax in transform --- zipline/gens/transform.py | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/zipline/gens/transform.py b/zipline/gens/transform.py index 2adc44e8..9d90aeb8 100644 --- a/zipline/gens/transform.py +++ b/zipline/gens/transform.py @@ -79,11 +79,21 @@ def stateful_transform(stream_in, tnfm_class, *args, **kwargs): out_message.tnfm_value = tnfm_value yield out_message - # Special logic for TransactionSimulator. This is ugly but I - # want to get to testing faster. Should be refactored later - # to something that doesn't make Scott cry. - elif tnfm_class.__name__ == 'TransactionSimulator' + # Special logic for TransactionSimulator and + # PerformanceTracker. This is ugly but I want to get to + # testing faster. Should be refactored later to something + # that doesn't make Scott cry. + elif tnfm_class.__name__ == 'TransactionSimulator': + out_message = message_copy + out_message.txn = tnfm_value + yield out_message + elif tnfm_class.__name__ == 'PerformanceTracker': + out_message = message_copy + del out_message['txn'] + out_message.portfolio = tnfm_value + yield out_message + # Otherwise send tnfm_id, tnfm_value, and the message date. else: out_message = ndict() From 14067d83239f53907e889143132eea0dff922aa2 Mon Sep 17 00:00:00 2001 From: scottsanderson Date: Wed, 1 Aug 2012 21:42:55 -0400 Subject: [PATCH 10/73] commit for fawce --- zipline/finance/performance.py | 7 +++-- zipline/finance/trading.py | 6 ++-- zipline/gens/examples.py | 21 ++++++------- zipline/gens/merge.py | 2 +- zipline/gens/tradegens.py | 2 +- zipline/gens/tradesimulation.py | 52 ++++++++++++++++++++++++++------- zipline/gens/transform.py | 25 ++++++---------- 7 files changed, 71 insertions(+), 44 deletions(-) diff --git a/zipline/finance/performance.py b/zipline/finance/performance.py index 6871cd07..065d5095 100644 --- a/zipline/finance/performance.py +++ b/zipline/finance/performance.py @@ -133,6 +133,7 @@ import zipline.finance.risk as risk log = logbook.Logger('Performance') class PerformanceTracker(object): + UPDATER = True """ Tracks the performance of the zipline as it is running in the simulator, relays this out to the Deluge broker and then @@ -202,8 +203,10 @@ class PerformanceTracker(object): self.todays_performance.positions[sid] = Position(sid) def update(self, event): - event.perf_message = self.process_event() - event.portfolio = self.get_portfolio + import nose.tools; nose.tools.set_trace() + event.perf_message = self.process_event(event) + event.portfolio = self.get_portfolio() + del event['TRANSACTION'] return event def get_portfolio(self): diff --git a/zipline/finance/trading.py b/zipline/finance/trading.py index 3676437f..dd6345d4 100644 --- a/zipline/finance/trading.py +++ b/zipline/finance/trading.py @@ -9,7 +9,7 @@ from zipline.protocol import SIMULATION_STYLE log = logbook.Logger('Transaction Simulator') class TransactionSimulator(object): - FORWARDER = True + UPDATER = True def __init__(self, open_orders, style=SIMULATION_STYLE.PARTIAL_VOLUME): self.open_orders = open_orders @@ -28,9 +28,9 @@ class TransactionSimulator(object): self.apply_trade_to_open_orders = self.simulate_noop def update(self, event): - event.txn = None + event.TRANSACTION = None if event.type == zp.DATASOURCE_TYPE.TRADE: - event.txn = self.apply_trade_to_open_orders(event) + event.TRANSACTION = self.apply_trade_to_open_orders(event) return event def simulate_buy_all(self, event): diff --git a/zipline/gens/examples.py b/zipline/gens/examples.py index def7954f..7e01c293 100644 --- a/zipline/gens/examples.py +++ b/zipline/gens/examples.py @@ -1,3 +1,4 @@ +import pytz from datetime import datetime, timedelta from zipline.utils.factory import create_trading_environment @@ -17,8 +18,8 @@ if __name__ == "__main__": #Set up source a. One minute between events. args_a = tuple() kwargs_a = { - 'sids' : [1,2], - 'start' : datetime(2012,6,6,0), + 'sids' : [1], + 'start' : datetime(2012,1,3,15, tzinfo = pytz.utc), 'delta' : timedelta(minutes = 1), 'filter' : filter } @@ -27,9 +28,9 @@ if __name__ == "__main__": #Set up source b. Two minutes between events. args_b = tuple() kwargs_b = { - 'sids' : [2,3], - 'start' : datetime(2012,6,6,0), - 'delta' : timedelta(minutes = 2), + 'sids' : [2], + 'start' : datetime(2012,1,3,15, tzinfo = pytz.utc), + 'delta' : timedelta(minutes = 1), 'filter' : filter } bundle_b = SourceBundle(SpecificEquityTrades, args_b, kwargs_b) @@ -37,9 +38,9 @@ if __name__ == "__main__": #Set up source c. Three minutes between events. args_c = tuple() kwargs_c = { - 'sids' : [3,4], - 'start' : datetime(2012,6,6,0), - 'delta' : timedelta(minutes = 3), + 'sids' : [3], + 'start' : datetime(2012,1,3,15, tzinfo = pytz.utc), + 'delta' : timedelta(minutes = 1), 'filter' : filter } bundle_c = SourceBundle(SpecificEquityTrades, args_c, kwargs_c) @@ -58,9 +59,9 @@ if __name__ == "__main__": # print message algo = TestAlgorithm(2, 100, 100) - environment = create_trading_environment() + environment = create_trading_environment(year = 2012) style = zp.SIMULATION_STYLE.PARTIAL_VOLUME client_out = tsc(merge_out, algo, environment, style) - + client_out.next() diff --git a/zipline/gens/merge.py b/zipline/gens/merge.py index 5a4e6bde..dfd904d2 100644 --- a/zipline/gens/merge.py +++ b/zipline/gens/merge.py @@ -19,7 +19,7 @@ def merge(stream_in, tnfm_ids): """ assert isinstance(tnfm_ids, list) - + # Set up an internal queue for each expected source. tnfms = {} for id in tnfm_ids: diff --git a/zipline/gens/tradegens.py b/zipline/gens/tradegens.py index e3b88a0e..7420e1b4 100644 --- a/zipline/gens/tradegens.py +++ b/zipline/gens/tradegens.py @@ -9,7 +9,7 @@ from datetime import datetime, timedelta from zipline.utils.factory import create_trade from zipline.gens.utils import hash_args, mock_done -def date_gen(start = datetime(2012, 6, 6, 0), +def date_gen(start = datetime(2006, 6, 6, 12), delta = timedelta(minutes = 1), count = 100): """ diff --git a/zipline/gens/tradesimulation.py b/zipline/gens/tradesimulation.py index db80a8ed..2c12dc6f 100644 --- a/zipline/gens/tradesimulation.py +++ b/zipline/gens/tradesimulation.py @@ -1,5 +1,6 @@ import logbook +from datetime import datetime, timedelta from numbers import Integral from zipline import ndict @@ -75,7 +76,7 @@ def trade_simulation_client(stream_in, algo, environment, sim_style): return open_orders[sid].append(event) - + # Set the algo's order method. algo.set_order(order) @@ -85,7 +86,7 @@ def trade_simulation_client(stream_in, algo, environment, sim_style): # Call user-defined initialize method before we process any # events. algo.initialize() - + # Pipe the in stream into the transaction simulator. # Creates a txn field on the event containing transaction # information if we filled any pending orders on the event's sid. @@ -111,16 +112,45 @@ def trade_simulation_client(stream_in, algo, environment, sim_style): ) # Batch the event stream by dt to be processed by the user's algo. - # Will also set the PERF_MESSAGE field if the batch contains a perf - # message. + # Yields perf messages whenever it encounters them. + perf_messages = algo_simulator(with_portfolio_and_perf_msg, algo) - def batcher(stream): - for msg in stream: - yield msg + - batches = batcher(with_portfolio_and_perf_msg) - for batch in batches: - algo.handle_data(batch.data) - if batch.perf_message: +def algo_simulator(stream_in, sids, algo): + + current_dt = None + universe = ndict() + + for sid in sids: + universe[sid] = None + universe.portfolio = None + + for update in stream_in: + #Yield perf messages to be relayed back to the browser. + if update.perf_message: yield perf_message + + if current_dt = None: + current_dt = update.dt + + # If this message is newer than the algorithm's simulated dt, + # call handle data on a snapshot of the current algo universe, + # then + if message.dt >= current_dt + last_delta: + start_tic = datetime.now() + algo.handle_data(universe) + stop_tic = datetime.now() + last_delta = datetime + + current_dt = message.dt + last_delta + + batch.data[message.sid] = message + batch.data.portfolio = message.portfolio + + + + + + diff --git a/zipline/gens/transform.py b/zipline/gens/transform.py index 9d90aeb8..44ec4b5a 100644 --- a/zipline/gens/transform.py +++ b/zipline/gens/transform.py @@ -49,6 +49,7 @@ def stateful_transform(stream_in, tnfm_class, *args, **kwargs): are forwarded. """ forward_all_fields = tnfm_class.__dict__.get('FORWARDER', False) + update_in_place = tnfm_class.__dict__.get('UPDATER', False) assert isinstance(tnfm_class, (types.ObjectType, types.ClassType)), \ "Stateful transform requires a class." @@ -72,29 +73,21 @@ def stateful_transform(stream_in, tnfm_class, *args, **kwargs): tnfm_value = state.update(deepcopy(message_copy)) # If we want to keep all original values, plus append tnfm_id - # and tnfm_value. + # and tnfm_value. Used for Passthrough. if forward_all_fields: out_message = message_copy out_message.tnfm_id = namestring out_message.tnfm_value = tnfm_value yield out_message - # Special logic for TransactionSimulator and - # PerformanceTracker. This is ugly but I want to get to - # testing faster. Should be refactored later to something - # that doesn't make Scott cry. - elif tnfm_class.__name__ == 'TransactionSimulator': - out_message = message_copy - out_message.txn = tnfm_value - yield out_message + # Our expectation is that the transform simply updated the + # message it was passed. Useful for chaining together + # multiple transforms, e.g. TransactionSimulator/PerformanceTracker. + elif update_in_place: + yield tnfm_value - elif tnfm_class.__name__ == 'PerformanceTracker': - out_message = message_copy - del out_message['txn'] - out_message.portfolio = tnfm_value - yield out_message - - # Otherwise send tnfm_id, tnfm_value, and the message date. + # Otherwise send tnfm_id, tnfm_value, and the message + # date. Useful for transforms being piped to a merge. else: out_message = ndict() out_message.tnfm_id = namestring From 318065125fbe958761e6e51752494a15c35fe8ce Mon Sep 17 00:00:00 2001 From: fawce Date: Wed, 1 Aug 2012 21:43:15 -0400 Subject: [PATCH 11/73] refactored component to use a generator --- tests/test_components.py | 81 +++++++++ zipline/core/component.py | 324 ++++++++++++------------------------ zipline/core/monitor.py | 7 - zipline/gens/utils.py | 6 +- zipline/utils/test_utils.py | 32 ++++ 5 files changed, 227 insertions(+), 223 deletions(-) create mode 100644 tests/test_components.py diff --git a/tests/test_components.py b/tests/test_components.py new file mode 100644 index 00000000..5c39abbf --- /dev/null +++ b/tests/test_components.py @@ -0,0 +1,81 @@ +import zmq +from datetime import datetime, timedelta + +from unittest2 import TestCase +from collections import defaultdict + +from zipline.test_algorithms import ExceptionAlgorithm, DivByZeroAlgorithm +from zipline.finance.trading import SIMULATION_STYLE +from zipline.core.devsimulator import AddressAllocator +from zipline.lines import SimulatedTrading + +from zipline.utils.test_utils import ( + drain_zipline, + check, + setup_logger, + teardown_logger, + launch_component, + gen_from_socket +) + + +from zipline.core import Component +from zipline.protocol import ( + DATASOURCE_FRAME +) + +from zipline.gens.tradegens import SpecificEquityTrades +from zipline.gens.utils import hash_args + + +import logbook +log = logbook.Logger('ComponentTestCase') + +allocator = AddressAllocator(1000) + + +class ComponentTestCase(TestCase): + + leased_sockets = defaultdict(list) + + def setUp(self): + self.zipline_test_config = { + 'allocator' : allocator, + 'sid' : 133, + 'devel' : False, + 'results_socket' : allocator.lease(1)[0], + 'simulation_style' : SIMULATION_STYLE.FIXED_SLIPPAGE + } + self.ctx = zmq.Context() + setup_logger(self) + + def tearDown(self): + self.ctx.term() + teardown_logger(self) + + def test_specific_equity_source(self): + #Set up source a. One minute between events. + args_a = tuple() + kwargs_a = { + 'sids' : [1,2], + 'start' : datetime(2012,6,6,0), + 'delta' : timedelta(minutes = 1), + 'filter' : filter + } + + c_id = SpecificEquityTrades.__name__ + hash_args(args_a, kwargs_a) + + c = Component( + SpecificEquityTrades, + args_a, + kwargs_a, + out_uri=self.out_uri, + frame=DATASOURCE_FRAME, + monitor_uri=None + ) + # launch in a process + proc = launch_component(c) + + for msg in gen_from_socket(self.out_uri): + # assert things about the messages. + log.info(msg) diff --git a/zipline/core/component.py b/zipline/core/component.py index 962966f4..ac92b565 100644 --- a/zipline/core/component.py +++ b/zipline/core/component.py @@ -11,6 +11,7 @@ import logbook import traceback import humanhash from setproctitle import setproctitle +from collections import namedtuple # pyzmq import zmq @@ -26,14 +27,15 @@ from zipline.protocol import ( EXCEPTION_FRAME ) -log = logbook.Logger('Component') -from zipline.exceptions import ComponentNoInit +log = logbook.Logger('Component') class KillSignal(Exception): def __init__(self): pass +ComponentSocketArgs = namedtuple('ComponentSocket',['uri','style','bind']) + class Component(object): """ @@ -74,52 +76,64 @@ class Component(object): # Construction # ------------ - abstract = True - #__metaclass__ = WorkflowMeta + def __init__(self, + gen_func, + gen_args, + gen_kwargs, + component_id, + out_socket_args, + controller=None, + in_socket_args=None + ): - def __init__(self, *args, **kwargs): - self.zmq = None - self.context = None - self.addresses = None - self.waiting = None + assert component_id, \ + "Every component needs a unique and invariant identifier" + assert isinstance(component_id, basestring), \ + "Components must have string IDs" + assert isinstance(out_socket_args, ComponentSocketArgs), \ + "out_socket_args args must be ComponentSocketArgs" + + if in_socket_args: + assert isinstance(in_socket_args, ComponentSocketArgs), \ + "in_socket_args args must be ComponentSocketArgs" + + if monitor_socket_args: + assert isinstance(monitor_socket_args, ComponentSocketArgs), \ + "monitor_socket_args args must be ComponentSocketArgs" + + + # ----------------- + # Generator + # ----------------- + self.component_id = component_id + self.gen_args = gen_args + self.gen_kwargs = gen_kwargs + self.gen_func = gen_func + self.generator = None + + # lock for waiting on monitor "GO" + self.waiting = None + + # ----------------- + # ZMQ properties + # ----------------- + self.in_socket_args = in_socket_args + self.out_socket_args = out_socket_args + self.zmq = None + self.context = None + self.out_socket = None + self.in_socket = None + self.controller = controller - self.out_socket = None - self.killed = False - self.controller = None - # timeout on heartbeat is very short to avoid burning - # cycles on heartbeating. unit is milliconds - self.heartbeat_timeout = 0 # TODO: state_flag is deprecated, remove - # TODO: error_state is deprecated, remove - self.state_flag = COMPONENT_STATE.OK - self.error_state = COMPONENT_FAILURE.NOFAILURE - self.on_done = None + self.state_flag = COMPONENT_STATE.OK - self._exception = None - self.fail_time = None - self.start_tic = None - self.stop_tic = None - self.note = None - self.confirmed = False - self.devel = False - self.socks = None - self.last_ping = None + self.last_ping = None # Humanhashes make this way easier to debug because they stick # in your mind unlike a 32 byte string of random hex. - self.guid = uuid.uuid4() - self.huid = humanhash.humanize(self.guid.hex) - - # This is where component specific constructors should be - # defined. Arguments passed to init are threaded through. - self.init(*args, **kwargs) - - def init(self): - """ - Subclasses should override this to extend the setup for the - class. Shouldn't have side effects. - """ - raise ComponentNoInit(self.__class__) + self.guid = uuid.uuid4() + self.huid = humanhash.humanize(self.guid.hex) # ------------ @@ -129,8 +143,45 @@ class Component(object): def open(self): """ Open the connections needed to start doing work. + Perform any setup that must be done within process. """ - raise NotImplementedError + # The process title so you can watch it in top, ps. + setproctitle(self.generator.__name__) + + if self.in_socket_args: + self.in_socket = self.open_socket(self.in_socket_args) + poller_gen = self.gen_from_zmq(self.in_socket) + self.gen_func(poller_gen, *self.gen_args, **self.gen_kwargs) + else: + self.generator = self.gen_func(*self.gen_args, **self.gen_kwargs) + + self.out_socket = self.open_socket(self.out_socket_args) + + def open_socket(self, sock_args): + if sock_args.bind: + return self.bind_socket(sock_args) + else: + return self.connect_socket(sock_args) + + def bind_socket(self, sock_args): + if sock_args.style == zmq.PULL: + return self.bind_pull_socket(sock_args.uri) + if sock_args.style == zmq.PUSH: + return self.bind_push_socket(sock_args.uri) + if sock_args.style == zmq.PUB: + return self.bind_pub_socket(sock_args.uri) + + raise Exception("Invalid socket arguments") + + def connect_socket(self, sock_args): + if sock_args.style == zmq.PULL: + return self.connect_pull_socket(sock_args.uri) + if sock_args.style == zmq.PUSH: + return self.connect_push_socket(sock_args.uri) + if sock_args.style == zmq.SUB: + return self.connect_sub_socket(sock_args.uri) + + raise Exception("Invalid socket arguments") def ready(self): """ @@ -148,23 +199,10 @@ class Component(object): return self.state_flag == COMPONENT_STATE.DONE and not \ self.exception - @property - def exception(self): - """ - Holds the exception that the component failed on, or ``None`` if - the component has not failed. - """ - return self._exception - - def do_work(self): - raise NotImplementedError - def init_zmq(self): self.zmq = zmq self.context = self.zmq.Context() self.zmq_poller = self.zmq.Poller - # The the process title so you can watch it in top - setproctitle(self.__class__.__name__) return def _run(self): @@ -178,13 +216,10 @@ class Component(object): log.info("Pid %s" % os.getpid()) log.info("Group %s" % os.getpgrp()) - self.start_tic = time.time() - self.done = False # TODO: use state flag self.sockets = [] self.init_zmq() - self.setup_poller() self.setup_control() @@ -193,6 +228,7 @@ class Component(object): self.signal_ready() self.lock_ready() self.wait_ready() + # ----------------------- # YOU SHALL NOT PASS!!!!! # ----------------------- @@ -200,7 +236,6 @@ class Component(object): self.loop() - self.stop_tic = time.time() def run(self, catch_exceptions=True): """ @@ -219,7 +254,6 @@ class Component(object): self.teardown_sockets() finally: - self.shutdown() log.info("Exiting %r" % self) def working(self): @@ -236,30 +270,22 @@ class Component(object): """ Loop to do work while we still have work to do. """ - while self.working(): - self.heartbeat() - self.do_work() - def runtime(self): - if self.ready() and self.start_tic and self.stop_tic: - return self.stop_tic - self.start_tic + for event in self.generator: + self.heartbeat() + msg = self.frame(event) + self.out_socket.send(msg) def heartbeat(self, timeout=0): # wait for synchronization reply from the host - self.socks = dict(self.poll.poll(timeout)) + socks = dict(self.poll.poll(timeout)) # ---------------- # Control Dispatch # ---------------- assert self.control_in, 'Component does not have a control_in socket' - # If we're in devel mode drop out because the controller - # isn't guaranteed to be around anymore - if self.devel: - log.warn("Skipping heartbeat because of devel flag") - return - - if self.socks.get(self.control_in) == zmq.POLLIN: + if socks.get(self.control_in) == zmq.POLLIN: msg = self.control_in.recv() event, payload = CONTROL_UNFRAME(msg) @@ -307,9 +333,7 @@ class Component(object): # In case we didn't receive a ping, send a pre-emptive # pong to the monitor. - elif hasattr(self, 'control_out') and \ - self.last_ping and \ - time.time() - self.last_ping > 1: + elif self.last_ping and time.time() - self.last_ping > 1: # send a ping ahead of schedule pre_pong = time.time() heartbeat_frame = CONTROL_FRAME( @@ -342,16 +366,6 @@ class Component(object): for sock in self.sockets: sock.close() - def shutdown(self): - """ - Clean shutdown. - - Tear down after normal operation. - """ - if self.on_done: - log.warn("{id} calling done.".format(id=self.get_id)) - self.on_done() - def kill(self): """ Unclean shutdown. @@ -359,7 +373,6 @@ class Component(object): Tear down ( fast ) as a mode of failure in the simulation or on service halt. """ - # sys.exit(1) raise KillSignal() # ---------------------- @@ -452,28 +465,11 @@ class Component(object): def signal_ready(self): log.info(self.__class__.__name__ + ' is ready') - - if hasattr(self, 'control_out'): - frame = CONTROL_FRAME( - CONTROL_PROTOCOL.READY, - '' - ) - self.control_out.send(frame) - - def signal_cancel(self): - self.done = True - - # TODO: no hasattr hacks - #if not self.controller: - if hasattr(self, 'control_out'): - frame = CONTROL_FRAME( - CONTROL_PROTOCOL.SHUTDOWN, - None - ) - self.control_out.send(frame) - - # then proceeds to do shutdown(), and teardown_sockets() - # to complete the process + frame = CONTROL_FRAME( + CONTROL_PROTOCOL.READY, + '' + ) + self.control_out.send(frame) def signal_exception(self, exc=None, scope=None): """ @@ -483,19 +479,7 @@ class Component(object): Will inform the system that the component has failed and how it has failed. """ - - if scope == 'algo': - self.error_state = COMPONENT_FAILURE.ALGOEXCEPT - else: - self.error_state = COMPONENT_FAILURE.HOSTEXCEPT - self.state_flag = COMPONENT_STATE.EXCEPTION - # mark the time of failure so we can track the failure - # progogation through the system. - - self.stop_tic = time.time() - - self._exception = exc exc_type, exc_value, exc_traceback = sys.exc_info() # if a downstream component fails, this component may try @@ -571,9 +555,6 @@ class Component(object): # last heartbeat, and wait an unusually long time. self.heartbeat(timeout=5000) - - - # ----------- # Messaging # ----------- @@ -585,30 +566,6 @@ class Component(object): """ self.poll = self.zmq_poller() - def bind_data(self): - return self.bind_pull_socket(self.addresses['data_address']) - - def connect_data(self): - return self.connect_push_socket(self.addresses['data_address']) - - def bind_feed(self): - return self.bind_pub_socket(self.addresses['feed_address']) - - def connect_feed(self): - return self.connect_sub_socket(self.addresses['feed_address']) - - def bind_merge(self): - return self.bind_pull_socket(self.addresses['merge_address']) - - def connect_merge(self): - return self.connect_push_socket(self.addresses['merge_address']) - - def bind_result(self): - return self.bind_push_socket(self.addresses['results_address']) - - def connect_result(self): - return self.connect_pull_socket(self.addresses['results_address']) - def bind_push_socket(self, addr): push_socket = self.context.socket(self.zmq.PUSH) push_socket.bind(addr) @@ -638,7 +595,6 @@ class Component(object): def connect_push_socket(self, addr): push_socket = self.context.socket(self.zmq.PUSH) push_socket.connect(addr) - #push_socket.setsockopt(self.zmq.LINGER,0) self.sockets.append(push_socket) self.out_socket = push_socket @@ -647,7 +603,6 @@ class Component(object): def bind_pub_socket(self, addr): pub_socket = self.context.socket(self.zmq.PUB) pub_socket.bind(addr) - #pub_socket.setsockopt(self.zmq.LINGER, 0) self.out_socket = pub_socket return pub_socket @@ -668,12 +623,6 @@ class Component(object): of the simulation and to forcefully tear down the simulation in case of a failure. """ - - # Allow for the possibility of not having a controller, - # possibly the zipline devsimulator may not want this. - if not self.controller: - return - self.control_out = self.controller.message_sender( identity = self.get_id, context = self.context, @@ -686,29 +635,6 @@ class Component(object): self.poll.register(self.control_in, self.zmq.POLLIN) self.sockets.extend([self.control_in, self.control_out]) - # ----------- - # FSM Actions - # ----------- - - #@property - #def state(self): - #if not hasattr(self, '_state'): - #self._state = self.initial_state - #else: - #return self._state - - #@state.setter - #def state(self, new): - #if not hasattr(self, '_state'): - #self._state = self.initial_state - - #old = self._state - - #if (old, new) in self.workflow: - #self._state = new - #else: - #raise RuntimeError("Invalid State Transition : %s -> %s" %(old, new)) - # --------------------- # Description and Debug # --------------------- @@ -728,32 +654,10 @@ class Component(object): @property def get_id(self): """ - The descriptive name of the component. + The time invariant name for this component. + Must be unique within this zipline. """ - # Prevents the bug that Thomas ran into - raise NotImplementedError - - @property - def get_type(self): - """ - The data flow type of the component. - - - ``SOURCE`` - - ``CONDUIT`` - - ``SINK`` - - """ - raise NotImplementedError - - @property - def get_pure(self): - """ - Describes whehter this component purely functional, i.e. for a - given set of inputs is it guaranteed to always give the same - output . Components that are side-effectful are, generally, not - pure. - """ - return False + return self.component_id def debug(self): """ @@ -766,18 +670,12 @@ class Component(object): 'pid' : os.getpid() , 'memaddress' : hex(id(self)) , 'ready' : self.successful() , - 'succesfull' : self.ready() , + 'successful' : self.ready() , } - def __len__(self): - """ - Some components overload this for debug purposes - """ - raise NotImplementedError - def __repr__(self): """ - Return a usefull string representation of the component to + Return a useful string representation of the component to indicate its type, unique identifier, and computational context identifier name. """ diff --git a/zipline/core/monitor.py b/zipline/core/monitor.py index ff6adf0c..08dfd601 100644 --- a/zipline/core/monitor.py +++ b/zipline/core/monitor.py @@ -103,8 +103,6 @@ class Controller(object): self.route_socket = route_socket self.exception_socket = exception_socket - self.error_replay = OrderedDict() - self.missed_beats = Counter() self.send_sighup = send_sighup @@ -499,7 +497,6 @@ class Controller(object): # Error Handling # -------------- def exception(self, component, exception_data): - self.error_replay[(component, time.time())] = exception_data log.error('Component in exception state: %s. Shutting down system and sending exception data to listeners.'\ % component) # Send the exception message out to listeners. @@ -616,10 +613,6 @@ class Controller(object): self.associated.append(s) return s - def do_error_replay(self): - for (component, time), error in self.error_replay.iteritems(): - log.info('Component Log for -- %s --:\n%s' % (component, error)) - def kill(self): """Aggressively exit the whole zipline. """ diff --git a/zipline/gens/utils.py b/zipline/gens/utils.py index d51452bf..d76966f5 100644 --- a/zipline/gens/utils.py +++ b/zipline/gens/utils.py @@ -19,8 +19,8 @@ def mock_raw_event(sid, dt): def mock_done(id): return ndict({ - 'dt' : "DONE", - "source_id" : id, + 'dt' : "DONE", + "source_id" : id, 'tnfm_id' : id, 'tnfm_value': None, 'type' : 0 @@ -43,7 +43,7 @@ def roundrobin(sources, namestrings): """ assert len(sources) == len(namestrings) mapping = OrderedDict(zip(namestrings, sources)) - + # While our generators have not been exhausted, pull elements while mapping.keys() != []: for namestring, source in mapping.iteritems(): diff --git a/zipline/utils/test_utils.py b/zipline/utils/test_utils.py index 6655c265..74591858 100644 --- a/zipline/utils/test_utils.py +++ b/zipline/utils/test_utils.py @@ -1,3 +1,4 @@ +import multiprocessing import zmq import time import zipline.protocol as zp @@ -7,6 +8,7 @@ from zipline.utils.date_utils import EPOCH from itertools import izip from logbook import FileHandler + def setup_logger(test, path='/var/log/zipline/zipline.log'): test.log_handler = FileHandler(path) test.log_handler.push_application() @@ -140,3 +142,33 @@ def assert_single_position(test, zipline): sid, "Portfolio should have one position in " + str(sid) ) + + +def launch_component(self, component): + proc = multiprocessing.Process(target=component.run) + proc.start() + self.subprocesses.append(proc) + + self.mapping[proc.pid] = component.get_id + return proc + +def gen_from_socket(socket_uri, context, unframe): + """ + A generator that takes a socket_uri, and yields + messages from the poller until it gets a zp.CONTROL_PROTOCOL.DONE. + """ + pull_socket = context.socket(zmq.PULL) + pull_socket.connect(socket_uri) + poller = zmq.Poller() + poller.register(pull_socket, zmq.POLLIN) + + while True: + socks = dict(poller.poll(1000)) + + if socks.get(pull_socket) == zmq.POLLIN: + message = pull_socket.recv() + + if message.type == zp.CONTROL_PROTOCOL.DONE: + break + else: + yield unframe(message) From dd1056bf309c1dc3725a1fd3eca41db216a6dfd6 Mon Sep 17 00:00:00 2001 From: fawce Date: Wed, 1 Aug 2012 23:41:44 -0400 Subject: [PATCH 12/73] generator backed component, and a starter test for a source. --- tests/test_components.py | 45 +++- tests/test_monitor.py | 10 +- zipline/__init__.py | 4 +- zipline/core/__init__.py | 4 +- zipline/core/component.py | 514 +++++++++++++++--------------------- zipline/core/host.py | 2 +- zipline/core/monitor.py | 4 +- zipline/core/process.py | 8 +- zipline/gens/zmqgen.py | 27 ++ zipline/lines.py | 14 +- zipline/utils/test_utils.py | 44 +-- 11 files changed, 324 insertions(+), 352 deletions(-) create mode 100644 zipline/gens/zmqgen.py diff --git a/tests/test_components.py b/tests/test_components.py index 5c39abbf..02b05a69 100644 --- a/tests/test_components.py +++ b/tests/test_components.py @@ -1,10 +1,10 @@ import zmq +import pytz from datetime import datetime, timedelta from unittest2 import TestCase from collections import defaultdict -from zipline.test_algorithms import ExceptionAlgorithm, DivByZeroAlgorithm from zipline.finance.trading import SIMULATION_STYLE from zipline.core.devsimulator import AddressAllocator from zipline.lines import SimulatedTrading @@ -15,18 +15,21 @@ from zipline.utils.test_utils import ( setup_logger, teardown_logger, launch_component, - gen_from_socket + create_monitor, + launch_monitor ) from zipline.core import Component +from zipline.core.component import ComponentSocketArgs from zipline.protocol import ( - DATASOURCE_FRAME + DATASOURCE_FRAME, + DATASOURCE_UNFRAME ) from zipline.gens.tradegens import SpecificEquityTrades from zipline.gens.utils import hash_args - +from zipline.gens.zmqgen import gen_from_poller import logbook log = logbook.Logger('ComponentTestCase') @@ -54,28 +57,50 @@ class ComponentTestCase(TestCase): teardown_logger(self) def test_specific_equity_source(self): + filter = [1,2,3,4] #Set up source a. One minute between events. args_a = tuple() kwargs_a = { 'sids' : [1,2], - 'start' : datetime(2012,6,6,0), + 'start' : datetime(2012,6,6,0,tzinfo=pytz.utc), 'delta' : timedelta(minutes = 1), - 'filter' : filter + 'filter' : filter, + 'count' : 100 } c_id = SpecificEquityTrades.__name__ + hash_args(args_a, kwargs_a) + mon = create_monitor(allocator) + + out_socket_args = ComponentSocketArgs( + style=zmq.PUSH, + uri=allocator.lease(1)[0], + bind=True + ) c = Component( SpecificEquityTrades, args_a, kwargs_a, - out_uri=self.out_uri, - frame=DATASOURCE_FRAME, - monitor_uri=None + c_id, + out_socket_args, + DATASOURCE_FRAME, + mon ) + + mon.manage(set([c.get_id])) + mon_proc = launch_monitor(mon) + # launch in a process proc = launch_component(c) - for msg in gen_from_socket(self.out_uri): + pull_socket = self.ctx.socket(zmq.PULL) + pull_socket.connect(out_socket_args.uri) + poller = zmq.Poller() + poller.register(pull_socket, zmq.POLLIN) + unframe = DATASOURCE_UNFRAME + for msg in gen_from_poller(poller, pull_socket, unframe): # assert things about the messages. log.info(msg) + + pull_socket.close() + log.info("DONE!") diff --git a/tests/test_monitor.py b/tests/test_monitor.py index 5f55aaee..3d063954 100644 --- a/tests/test_monitor.py +++ b/tests/test_monitor.py @@ -1,7 +1,7 @@ from zipline.utils.test_utils import setup_logger, teardown_logger from unittest2 import TestCase, skip -from zipline.core.monitor import Controller +from zipline.core.monitor import Monitor class TestMonitor(TestCase): def setUp(self): @@ -15,12 +15,12 @@ class TestMonitor(TestCase): pub_socket = 'tcp://127.0.0.1:5000' route_socket = 'tcp://127.0.0.1:5001' - con = Controller(pub_socket, route_socket) - con.manage([]) + mon = Monitor(pub_socket, route_socket) + mon.manage([]) def test_init_topology(self): pub_socket = 'tcp://127.0.0.1:5000' route_socket = 'tcp://127.0.0.1:5001' - con = Controller(pub_socket, route_socket, ) - con.manage([ 'a', 'b', 'c', 'd' ]) + mon = Monitor(pub_socket, route_socket, ) + mon.manage([ 'a', 'b', 'c', 'd' ]) diff --git a/zipline/__init__.py b/zipline/__init__.py index 23bcca40..a84cd345 100644 --- a/zipline/__init__.py +++ b/zipline/__init__.py @@ -6,14 +6,14 @@ Zipline # it is a place to expose the public interfaces. import protocol # namespace -from core.monitor import Controller +from core.monitor import Monitor from lines import SimulatedTrading from core.host import ComponentHost from utils.protocol_utils import ndict __all__ = [ SimulatedTrading, - Controller, + Monitor, ComponentHost, protocol, ndict diff --git a/zipline/core/__init__.py b/zipline/core/__init__.py index d487dd05..a7d6b1f8 100644 --- a/zipline/core/__init__.py +++ b/zipline/core/__init__.py @@ -1,9 +1,9 @@ from host import ComponentHost from component import Component -from monitor import Controller +from monitor import Monitor __all__ = [ Component, - Controller, + Monitor, ComponentHost ] diff --git a/zipline/core/component.py b/zipline/core/component.py index ac92b565..f9dc63c1 100644 --- a/zipline/core/component.py +++ b/zipline/core/component.py @@ -16,6 +16,8 @@ from collections import namedtuple # pyzmq import zmq +from zipline.gens.zmqgen import gen_from_poller + from zipline.core.monitor import PARAMETERS from zipline.protocol import ( @@ -38,40 +40,6 @@ ComponentSocketArgs = namedtuple('ComponentSocket',['uri','style','bind']) class Component(object): - """ - Base class for components. Defines the the base messaging - interface for components. - - :param addresses: a dict of name_string -> zmq port address strings. - Must have the following entries - - :param data_address: socket address used for data sources to stream - their records. Will be used in PUSH/PULL sockets - between data sources and a Feed. Bind will always - be on the PULL side (we always have N producers and - 1 consumer) - - :param feed_address: socket address used to publish consolidated feed - from serialization of data sources - will be used in PUB/SUB sockets between Feed and - Transforms. Bind is always on the PUB side. - - :param merge_address: socket address used to publish transformed - values. will be used in PUSH/PULL from many - transforms to one Merge Bind will always be on - the PULL side (we always have N producers and - 1 consumer) - - :param results_address: socket address used to publish merged data - source feed and transforms to clients will be - used in PUB/SUB from one Merge to one or many - clients. Bind is always on the PUB side. - - bind/connect methods will return the correct socket type for each - address. - - """ - # ------------ # Construction # ------------ @@ -82,8 +50,10 @@ class Component(object): gen_kwargs, component_id, out_socket_args, - controller=None, - in_socket_args=None + frame, + monitor, + in_socket_args=None, + unframe=None ): assert component_id, \ @@ -97,11 +67,6 @@ class Component(object): assert isinstance(in_socket_args, ComponentSocketArgs), \ "in_socket_args args must be ComponentSocketArgs" - if monitor_socket_args: - assert isinstance(monitor_socket_args, ComponentSocketArgs), \ - "monitor_socket_args args must be ComponentSocketArgs" - - # ----------------- # Generator # ----------------- @@ -110,6 +75,7 @@ class Component(object): self.gen_kwargs = gen_kwargs self.gen_func = gen_func self.generator = None + self.frame = frame # lock for waiting on monitor "GO" self.waiting = None @@ -123,11 +89,13 @@ class Component(object): self.context = None self.out_socket = None self.in_socket = None - self.controller = controller + self.monitor = monitor + self.unframe = unframe # TODO: state_flag is deprecated, remove self.state_flag = COMPONENT_STATE.OK + # track time of last ping we received from monitor self.last_ping = None # Humanhashes make this way easier to debug because they stick @@ -140,70 +108,6 @@ class Component(object): # Core Methods # ------------ - def open(self): - """ - Open the connections needed to start doing work. - Perform any setup that must be done within process. - """ - # The process title so you can watch it in top, ps. - setproctitle(self.generator.__name__) - - if self.in_socket_args: - self.in_socket = self.open_socket(self.in_socket_args) - poller_gen = self.gen_from_zmq(self.in_socket) - self.gen_func(poller_gen, *self.gen_args, **self.gen_kwargs) - else: - self.generator = self.gen_func(*self.gen_args, **self.gen_kwargs) - - self.out_socket = self.open_socket(self.out_socket_args) - - def open_socket(self, sock_args): - if sock_args.bind: - return self.bind_socket(sock_args) - else: - return self.connect_socket(sock_args) - - def bind_socket(self, sock_args): - if sock_args.style == zmq.PULL: - return self.bind_pull_socket(sock_args.uri) - if sock_args.style == zmq.PUSH: - return self.bind_push_socket(sock_args.uri) - if sock_args.style == zmq.PUB: - return self.bind_pub_socket(sock_args.uri) - - raise Exception("Invalid socket arguments") - - def connect_socket(self, sock_args): - if sock_args.style == zmq.PULL: - return self.connect_pull_socket(sock_args.uri) - if sock_args.style == zmq.PUSH: - return self.connect_push_socket(sock_args.uri) - if sock_args.style == zmq.SUB: - return self.connect_sub_socket(sock_args.uri) - - raise Exception("Invalid socket arguments") - - def ready(self): - """ - Return ``True`` if and only if the component has finished - execution. - """ - return self.state_flag in [COMPONENT_STATE.DONE, \ - COMPONENT_STATE.EXCEPTION] - - def successful(self): - """ - Return ``True`` if and only if the component has finished - execution successfully, that is, without raising an error. - """ - return self.state_flag == COMPONENT_STATE.DONE and not \ - self.exception - - def init_zmq(self): - self.zmq = zmq - self.context = self.zmq.Context() - self.zmq_poller = self.zmq.Poller - return def _run(self): """ @@ -212,17 +116,15 @@ class Component(object): The core logic of the all components is run here. """ + # The process title so you can watch it in top, ps. + setproctitle(self.gen_func.__name__) + log.info("Start %r" % self) log.info("Pid %s" % os.getpid()) log.info("Group %s" % os.getpgrp()) - self.done = False # TODO: use state flag self.sockets = [] - self.init_zmq() - self.setup_poller() - - self.setup_control() self.open() self.signal_ready() @@ -232,10 +134,14 @@ class Component(object): # ----------------------- # YOU SHALL NOT PASS!!!!! # ----------------------- - # ... until the controller signals GO + # ... until the monitor signals GO - self.loop() + for event in self.generator: + self.heartbeat() + msg = self.frame(event) + self.out_socket.send(msg) + self.signal_done() def run(self, catch_exceptions=True): """ @@ -249,108 +155,10 @@ class Component(object): else: # if we get a kill signal, forcibly close all the # sockets. - # exc_info = sys.exc_info() - # self.relay_exception(exc_info[0], exc_info[1], exc_info[2]) self.teardown_sockets() - finally: log.info("Exiting %r" % self) - def working(self): - """ - Controls when the work loop will start and end - - If we encounter an exception or signal done exit. - - Overload for higher order behavior. - """ - return (not self.done) - - def loop(self, lockstep=True): - """ - Loop to do work while we still have work to do. - """ - - for event in self.generator: - self.heartbeat() - msg = self.frame(event) - self.out_socket.send(msg) - - def heartbeat(self, timeout=0): - # wait for synchronization reply from the host - socks = dict(self.poll.poll(timeout)) - - # ---------------- - # Control Dispatch - # ---------------- - assert self.control_in, 'Component does not have a control_in socket' - - if socks.get(self.control_in) == zmq.POLLIN: - msg = self.control_in.recv() - event, payload = CONTROL_UNFRAME(msg) - - # =========== - # Heartbeat - # =========== - - # The controller will send out a single number packed in - # a CONTROL_FRAME with ``heartbeat`` event every - # (n)-seconds. The component then has n seconds to - # respond to it. If not then it will be considered as - # malfunctioning or maybe CPU bound. - - if event == CONTROL_PROTOCOL.HEARTBEAT: - # Heart outgoing - heartbeat_frame = CONTROL_FRAME( - CONTROL_PROTOCOL.OK, - payload - ) - - self.last_ping = float(payload) - # Echo back the heartbeat identifier to tell the - # controller that this component is still alive and - # doing work - self.control_out.send(heartbeat_frame) - - - # ========= - # Soft Kill - # ========= - - # Try and clean up properly and send out any reports or - # data that are done during a clean shutdown. Inform the - # controller that we're done. - elif event == CONTROL_PROTOCOL.SHUTDOWN: - self.signal_done() - - # ========= - # Hard Kill - # ========= - - # Just exit. - elif event == CONTROL_PROTOCOL.KILL: - self.kill() - - # In case we didn't receive a ping, send a pre-emptive - # pong to the monitor. - elif self.last_ping and time.time() - self.last_ping > 1: - # send a ping ahead of schedule - pre_pong = time.time() - heartbeat_frame = CONTROL_FRAME( - CONTROL_PROTOCOL.OK, - str(pre_pong) - ) - - # Echo back the heartbeat identifier to tell the - # controller that this component is still alive and - # doing work - self.control_out.send(heartbeat_frame, self.zmq.NOBLOCK) - self.last_ping = pre_pong - elif self.last_ping and \ - time.time() - self.last_ping > PARAMETERS.MAX_COMPONENT_WAIT: - # monitor is gone without sending the shutdown - # signal, do a hard exit. - self.kill() # ---------------------------- # Cleanup & Modes of Failure @@ -375,6 +183,62 @@ class Component(object): """ raise KillSignal() + def signal_exception(self, exc=None, scope=None): + """ + All exceptions inside any component should boil back to + this handler. + + Will inform the system that the component has failed and how it + has failed. + """ + self.state_flag = COMPONENT_STATE.EXCEPTION + exc_type, exc_value, exc_traceback = sys.exc_info() + + # if a downstream component fails, this component may try + # sending when there are zero connections to the socket, + # which will raise ZMQError(EAGAIN). So, it doesn't make + # sense to relay this exception to Monitor and the rest + # of the zipline. + if isinstance(exc, zmq.ZMQError) and exc.errno == zmq.EAGAIN: + log.warn("{id} raised a ZMQError(EAGAIN) not relaying"\ + .format(id=self.get_id)) + return + + # sys.stdout.write(trace) + log.exception("Unexpected error in run for {id}.".format(id=self.get_id)) + + try: + log.info('{id} sending exception to monitor'\ + .format(id=self.get_id)) + msg = EXCEPTION_FRAME( + exc_traceback, + exc_type.__name__, + exc_value.message + ) + + exception_frame = CONTROL_FRAME( + CONTROL_PROTOCOL.EXCEPTION, + msg + ) + self.control_out.send(exception_frame, self.zmq.NOBLOCK) + # The monitor should relay the exception back + # to all zipline components. Wait here until the + # notice arrives, and we can assume other zipline + # components have broken out of their message + # loops. + for i in xrange(PARAMETERS.MAX_COMPONENT_WAIT): + self.heartbeat(timeout=1000) + log.warn("{id} never heard back from monitor."\ + .format(id=self.get_id)) + + except KillSignal: + log.info("{id} received confirmation from monitor"\ + .format(id=self.get_id)) + except: + log.exception("Exception waiting for monitor reply") + + + # ---------------------- # Internal Maintenance # ---------------------- @@ -418,7 +282,7 @@ class Component(object): # Go # ==== - # A distributed lock from the controller to ensure + # A distributed lock from the monitor to ensure # synchronized start. if event == CONTROL_PROTOCOL.HEARTBEAT: @@ -430,7 +294,7 @@ class Component(object): log.info('Prestart Heartbeat ' + self.get_id) elif event == CONTROL_PROTOCOL.GO: - # Side effectful call from the controller to unlock + # Side effectful call from the monitor to unlock # and begin doing work only when the entire topology # of the system beings to come online log.info('Unlocking ' + self.__class__.__name__) @@ -442,7 +306,7 @@ class Component(object): # Try and clean up properly and send out any reports or # data that are done during a clean shutdown. Inform the - # controller that we're done. + # monitor that we're done. elif event == CONTROL_PROTOCOL.SHUTDOWN: self.signal_done() break @@ -462,6 +326,82 @@ class Component(object): self.kill() break + def heartbeat(self, timeout=0): + # wait for synchronization reply from the host + socks = dict(self.poll.poll(timeout)) + + # ---------------- + # Control Dispatch + # ---------------- + assert self.control_in, 'Component does not have a control_in socket' + + if socks.get(self.control_in) == zmq.POLLIN: + msg = self.control_in.recv() + event, payload = CONTROL_UNFRAME(msg) + + # =========== + # Heartbeat + # =========== + + # The monitor will send out a single number packed in + # a CONTROL_FRAME with ``heartbeat`` event every + # (n)-seconds. The component then has n seconds to + # respond to it. If not then it will be considered as + # malfunctioning or maybe CPU bound. + + if event == CONTROL_PROTOCOL.HEARTBEAT: + # Heart outgoing + heartbeat_frame = CONTROL_FRAME( + CONTROL_PROTOCOL.OK, + payload + ) + + self.last_ping = float(payload) + # Echo back the heartbeat identifier to tell the + # monitor that this component is still alive and + # doing work + self.control_out.send(heartbeat_frame) + + + # ========= + # Soft Kill + # ========= + + # Try and clean up properly and send out any reports or + # data that are done during a clean shutdown. Inform the + # monitor that we're done. + elif event == CONTROL_PROTOCOL.SHUTDOWN: + self.signal_done() + + # ========= + # Hard Kill + # ========= + + # Just exit. + elif event == CONTROL_PROTOCOL.KILL: + self.kill() + + # In case we didn't receive a ping, send a pre-emptive + # pong to the monitor. + elif self.last_ping and time.time() - self.last_ping > 1: + # send a ping ahead of schedule + pre_pong = time.time() + heartbeat_frame = CONTROL_FRAME( + CONTROL_PROTOCOL.OK, + str(pre_pong) + ) + + # Echo back the heartbeat identifier to tell the + # monitor that this component is still alive and + # doing work + self.control_out.send(heartbeat_frame, self.zmq.NOBLOCK) + self.last_ping = pre_pong + elif self.last_ping and \ + time.time() - self.last_ping > PARAMETERS.MAX_COMPONENT_WAIT: + # monitor is gone without sending the shutdown + # signal, do a hard exit. + self.kill() + def signal_ready(self): log.info(self.__class__.__name__ + ' is ready') @@ -471,60 +411,6 @@ class Component(object): ) self.control_out.send(frame) - def signal_exception(self, exc=None, scope=None): - """ - All exceptions inside any component should boil back to - this handler. - - Will inform the system that the component has failed and how it - has failed. - """ - self.state_flag = COMPONENT_STATE.EXCEPTION - exc_type, exc_value, exc_traceback = sys.exc_info() - - # if a downstream component fails, this component may try - # sending when there are zero connections to the socket, - # which will raise ZMQError(EAGAIN). So, it doesn't make - # sense to relay this exception to Monitor and the rest - # of the zipline. - if isinstance(exc, zmq.ZMQError) and exc.errno == zmq.EAGAIN: - log.warn("{id} raised a ZMQError(EAGAIN) not relaying"\ - .format(id=self.get_id)) - return - - # sys.stdout.write(trace) - log.exception("Unexpected error in run for {id}.".format(id=self.get_id)) - - if hasattr(self, 'control_out') and self.control_out: - try: - log.info('{id} sending exception to controller'\ - .format(id=self.get_id)) - msg = EXCEPTION_FRAME( - exc_traceback, - exc_type.__name__, - exc_value.message - ) - - exception_frame = CONTROL_FRAME( - CONTROL_PROTOCOL.EXCEPTION, - msg - ) - self.control_out.send(exception_frame, self.zmq.NOBLOCK) - # The controller should relay the exception back - # to all zipline components. Wait here until the - # notice arrives, and we can assume other zipline - # components have broken out of their message - # loops. - for i in xrange(PARAMETERS.MAX_COMPONENT_WAIT): - self.heartbeat(timeout=1000) - log.warn("{id} never heard back from monitor."\ - .format(id=self.get_id)) - except KillSignal: - log.info("{id} received confirmation from controller"\ - .format(id=self.get_id)) - except: - log.exception("Exception waiting for controller reply") - def signal_done(self): """ Notify down stream components that we're done. @@ -534,20 +420,18 @@ class Component(object): # notify internal work loop that we're done self.done = True # TODO: use state flag - if hasattr(self, 'out_socket') and self.out_socket: - msg = zmq.Message(str(CONTROL_PROTOCOL.DONE)) - self.out_socket.send(msg) + msg = zmq.Message(str(CONTROL_PROTOCOL.DONE)) + self.out_socket.send(msg) - if hasattr(self, 'control_out'): - # notify controller we're done - done_frame = CONTROL_FRAME( - CONTROL_PROTOCOL.DONE, - '' - ) + # notify monitor we're done + done_frame = CONTROL_FRAME( + CONTROL_PROTOCOL.DONE, + '' + ) - self.control_out.send(done_frame) - log.info("[%s] sent control done" % self.get_id) + self.control_out.send(done_frame) + log.info("[%s] sent control done" % self.get_id) # there is a narrow race condition where we finish just # after the Monitor accepts our prior heartbeat, but just @@ -559,12 +443,60 @@ class Component(object): # Messaging # ----------- - def setup_poller(self): + def open(self): """ - Setup the poller used for multiplexing the incoming data - handling sockets. + Open the connections needed to start doing work. + Perform any setup that must be done within process. """ - self.poll = self.zmq_poller() + + self.zmq = zmq + self.context = self.zmq.Context() + self.poll = self.zmq.Poller() + + self.setup_control() + + if self.in_socket_args: + self.in_socket = self.open_socket(self.in_socket_args) + poller_gen = gen_from_poller( + self.poller, + self.in_socket, + self.unframe + ) + self.generator = self.gen_func( + poller_gen, + *self.gen_args, + **self.gen_kwargs + ) + else: + self.generator = self.gen_func(*self.gen_args, **self.gen_kwargs) + + self.out_socket = self.open_socket(self.out_socket_args) + + def open_socket(self, sock_args): + if sock_args.bind: + return self.bind_socket(sock_args) + else: + return self.connect_socket(sock_args) + + def bind_socket(self, sock_args): + if sock_args.style == zmq.PULL: + return self.bind_pull_socket(sock_args.uri) + if sock_args.style == zmq.PUSH: + return self.bind_push_socket(sock_args.uri) + if sock_args.style == zmq.PUB: + return self.bind_pub_socket(sock_args.uri) + + raise Exception("Invalid socket arguments") + + def connect_socket(self, sock_args): + if sock_args.style == zmq.PULL: + return self.connect_pull_socket(sock_args.uri) + if sock_args.style == zmq.PUSH: + return self.connect_push_socket(sock_args.uri) + if sock_args.style == zmq.SUB: + return self.connect_sub_socket(sock_args.uri) + + raise Exception("Invalid socket arguments") def bind_push_socket(self, addr): push_socket = self.context.socket(self.zmq.PUSH) @@ -623,12 +555,12 @@ class Component(object): of the simulation and to forcefully tear down the simulation in case of a failure. """ - self.control_out = self.controller.message_sender( + self.control_out = self.monitor.message_sender( identity = self.get_id, context = self.context, ) - self.control_in = self.controller.message_listener( + self.control_in = self.monitor.message_listener( context = self.context ) @@ -639,18 +571,6 @@ class Component(object): # Description and Debug # --------------------- - def extern_logger(self): - """ - Pipe logs out to a provided logging interface. - """ - pass - - def setup_extern_logger(self): - """ - Pipe logs out to a provided logging interface. - """ - pass - @property def get_id(self): """ diff --git a/zipline/core/host.py b/zipline/core/host.py index ea1ca0aa..37de82aa 100644 --- a/zipline/core/host.py +++ b/zipline/core/host.py @@ -103,7 +103,7 @@ class ComponentHost(object): log.info('== Roll Call ==') - log.info('Controller') + log.info('Monitor') self.launch_controller() diff --git a/zipline/core/monitor.py b/zipline/core/monitor.py index 08dfd601..183de45b 100644 --- a/zipline/core/monitor.py +++ b/zipline/core/monitor.py @@ -38,7 +38,7 @@ class UnknownChatter(Exception): return """Component calling itself "%s" talking on unexpected channel""" % self.named -log = logbook.Logger('Controller') +log = logbook.Logger('Monitor') # The scalars determining the timing of the monitor behavior for # the system. @@ -56,7 +56,7 @@ PARAMETERS = ndict(dict( SYSTEM_TIMEOUT = 50, )) -class Controller(object): +class Monitor(object): """ A N to M messaging system for inter component communication. diff --git a/zipline/core/process.py b/zipline/core/process.py index b2a01429..dc1fcd3c 100644 --- a/zipline/core/process.py +++ b/zipline/core/process.py @@ -40,13 +40,13 @@ class ProcessSimulator(ComponentHost): # invoked by the host's open() def launch_controller(self): - proc = multiprocessing.Process(target=self.controller.run) + proc = multiprocessing.Process(target=self.monitor.run) proc.start() self.con = proc # Process specific - self.controller_process = proc - self.mapping[proc.pid] = 'Controller' + self.monitor_process = proc + self.mapping[proc.pid] = 'Monitor' def launch_component(self, component): proc = multiprocessing.Process(target=component.run) @@ -81,7 +81,7 @@ class ProcessSimulator(ComponentHost): process.join(timeout=1) process.terminate() - self.controller.shutdown(soft=True) + self.monitor.shutdown(soft=True) self.running = False self.con.terminate() diff --git a/zipline/gens/zmqgen.py b/zipline/gens/zmqgen.py new file mode 100644 index 00000000..e51e3bab --- /dev/null +++ b/zipline/gens/zmqgen.py @@ -0,0 +1,27 @@ +import zmq +import zipline.protocol as zp + +def gen_from_pull_socket(socket_uri, context, unframe): + """ + A generator that takes a socket_uri, and yields + messages from the poller until it gets a zp.CONTROL_PROTOCOL.DONE. + """ + pull_socket = context.socket(zmq.PULL) + pull_socket.connect(socket_uri) + poller = zmq.Poller() + poller.register(pull_socket, zmq.POLLIN) + + return gen_from_poller(poller, pull_socket, unframe) + +def gen_from_poller(poller, in_socket, unframe): + + while True: + socks = dict(poller.poll(1000)) + + if socks.get(in_socket) == zmq.POLLIN: + message = in_socket.recv() + if message == str(zp.CONTROL_PROTOCOL.DONE): + break + else: + event = unframe(message) + yield event diff --git a/zipline/lines.py b/zipline/lines.py index 6a70bc14..3d3ab2a6 100644 --- a/zipline/lines.py +++ b/zipline/lines.py @@ -70,7 +70,7 @@ from zipline.transforms import BaseTransform from zipline.test_algorithms import TestAlgorithm from zipline.components import TradeSimulationClient from zipline.core.process import ProcessSimulator -from zipline.core.monitor import Controller +from zipline.core.monitor import Monitor from zipline.finance.trading import SIMULATION_STYLE log = logbook.Logger('Lines') @@ -131,7 +131,7 @@ class SimulatedTrading(object): 'results_address' : sockets[4], } - self.con = Controller( + self.monitor = Monitor( # pub socket sockets[5], # route socket @@ -163,7 +163,7 @@ class SimulatedTrading(object): #setup transforms self.transforms = {} - self.sim.register_controller( self.con ) + self.sim.register_monitor( self.monitor ) @staticmethod @@ -348,15 +348,15 @@ class SimulatedTrading(object): return base | transforms | sources - def setup_controller(self): + def setup_monitor(self): """ - Prepare the controller to manage the topology specified + Prepare the monitor to manage the topology specified by this line. """ - self.con.manage(self.topology) + self.monitor.manage(self.topology) def simulate(self, blocking=True): - self.setup_controller() + self.setup_monitor() self.started = True self.sim_context = self.sim.simulate() diff --git a/zipline/utils/test_utils.py b/zipline/utils/test_utils.py index 74591858..eda5a133 100644 --- a/zipline/utils/test_utils.py +++ b/zipline/utils/test_utils.py @@ -7,7 +7,7 @@ import blist from zipline.utils.date_utils import EPOCH from itertools import izip from logbook import FileHandler - +from zipline.core.monitor import Monitor def setup_logger(test, path='/var/log/zipline/zipline.log'): test.log_handler = FileHandler(path) @@ -144,31 +144,31 @@ def assert_single_position(test, zipline): ) -def launch_component(self, component): +def launch_component(component): proc = multiprocessing.Process(target=component.run) proc.start() - self.subprocesses.append(proc) - - self.mapping[proc.pid] = component.get_id return proc -def gen_from_socket(socket_uri, context, unframe): - """ - A generator that takes a socket_uri, and yields - messages from the poller until it gets a zp.CONTROL_PROTOCOL.DONE. - """ - pull_socket = context.socket(zmq.PULL) - pull_socket.connect(socket_uri) - poller = zmq.Poller() - poller.register(pull_socket, zmq.POLLIN) +def launch_monitor(monitor): + proc = multiprocessing.Process(target=monitor.run) + proc.start() + return proc - while True: - socks = dict(poller.poll(1000)) - if socks.get(pull_socket) == zmq.POLLIN: - message = pull_socket.recv() +def create_monitor(allocator): + sockets = allocator.lease(3) + mon = Monitor( + # pub socket + sockets[0], + # route socket + sockets[1], + # exception socket to match tradesimclient's result + # socket, because we want to relay exceptions to the + # same listener + sockets[2], + # this controller is expected to run in a test, so no + # need to signal the parent process on success or error. + send_sighup=False + ) - if message.type == zp.CONTROL_PROTOCOL.DONE: - break - else: - yield unframe(message) + return mon From 56177a7c4f0cd64df5d594ae0a5449840c186b5b Mon Sep 17 00:00:00 2001 From: scottsanderson Date: Thu, 2 Aug 2012 00:49:48 -0400 Subject: [PATCH 13/73] end to end zipline with pure generators --- zipline/finance/performance.py | 28 +++--- zipline/gens/examples.py | 19 ++-- zipline/gens/tradesimulation.py | 158 ++++++++++++++++++++------------ 3 files changed, 125 insertions(+), 80 deletions(-) diff --git a/zipline/finance/performance.py b/zipline/finance/performance.py index 065d5095..92c96f3a 100644 --- a/zipline/finance/performance.py +++ b/zipline/finance/performance.py @@ -197,13 +197,12 @@ class PerformanceTracker(object): # save the transactions for the daily periods keep_transactions = True ) - + for sid in sid_list: self.cumulative_performance.positions[sid] = Position(sid) self.todays_performance.positions[sid] = Position(sid) def update(self, event): - import nose.tools; nose.tools.set_trace() event.perf_message = self.process_event(event) event.portfolio = self.get_portfolio() del event['TRANSACTION'] @@ -247,6 +246,8 @@ class PerformanceTracker(object): def process_event(self, event): + + message = None if self.exceeded_max_loss: return @@ -255,7 +256,7 @@ class PerformanceTracker(object): self.event_count += 1 if(event.dt >= self.market_close): - self.handle_market_close() + message = self.handle_market_close() if event.TRANSACTION: self.txn_count += 1 @@ -270,8 +271,10 @@ class PerformanceTracker(object): self.cumulative_performance.calculate_performance() self.todays_performance.calculate_performance() - def handle_market_close(self): + return message + def handle_market_close(self): + # add the return results from today to the list of DailyReturn objects. todays_date = self.market_close.replace(hour=0, minute=0, second=0) todays_return_obj = risk.DailyReturn( @@ -293,14 +296,9 @@ class PerformanceTracker(object): # calculate progress of test self.progress = self.day_count / self.total_days - # TODO!!!! + #TODO TODO TODO!! + daily_update = self.to_dict() - # Output results - if self.results_socket: - msg = zp.PERF_FRAME(self.to_dict()) - self.results_socket.send(msg) - - # if self.trading_environment.max_drawdown: returns = self.todays_performance.returns max_dd = -1 * self.trading_environment.max_drawdown @@ -311,7 +309,7 @@ class PerformanceTracker(object): # so it shows up in the update, but don't end the test # here. Let the update go out before stopping self.exceeded_max_loss = True - return + return daily_update #move the market day markers forward @@ -333,6 +331,8 @@ class PerformanceTracker(object): self.market_close, keep_transactions = True ) + + return daily_update def handle_simulation_end(self): """ @@ -369,8 +369,8 @@ class Position(object): self.sid = sid self.amount = 0 self.cost_basis = 0.0 ##per share - self.last_sale_price = None - self.last_sale_date = None + self.last_sale_price = 0.0 + self.last_sale_date = 0.0 def update(self, txn): if(self.sid != txn.sid): diff --git a/zipline/gens/examples.py b/zipline/gens/examples.py index 7e01c293..967d0808 100644 --- a/zipline/gens/examples.py +++ b/zipline/gens/examples.py @@ -14,13 +14,13 @@ import zipline.protocol as zp if __name__ == "__main__": - filter = [1,2,3,4] + filter = [2] #Set up source a. One minute between events. args_a = tuple() kwargs_a = { - 'sids' : [1], + 'sids' : [1,2,3,4], 'start' : datetime(2012,1,3,15, tzinfo = pytz.utc), - 'delta' : timedelta(minutes = 1), + 'delta' : timedelta(hours = 1), 'filter' : filter } bundle_a = SourceBundle(SpecificEquityTrades, args_a, kwargs_a) @@ -28,9 +28,9 @@ if __name__ == "__main__": #Set up source b. Two minutes between events. args_b = tuple() kwargs_b = { - 'sids' : [2], + 'sids' : [1,2,3,4], 'start' : datetime(2012,1,3,15, tzinfo = pytz.utc), - 'delta' : timedelta(minutes = 1), + 'delta' : timedelta(hours = 1), 'filter' : filter } bundle_b = SourceBundle(SpecificEquityTrades, args_b, kwargs_b) @@ -38,9 +38,9 @@ if __name__ == "__main__": #Set up source c. Three minutes between events. args_c = tuple() kwargs_c = { - 'sids' : [3], + 'sids' : [1,2,3,4], 'start' : datetime(2012,1,3,15, tzinfo = pytz.utc), - 'delta' : timedelta(minutes = 1), + 'delta' : timedelta(hours = 1), 'filter' : filter } bundle_c = SourceBundle(SpecificEquityTrades, args_c, kwargs_c) @@ -63,5 +63,8 @@ if __name__ == "__main__": style = zp.SIMULATION_STYLE.PARTIAL_VOLUME client_out = tsc(merge_out, algo, environment, style) - client_out.next() + import nose.tools; nose.tools.set_trace() + for message in client_out: + pass + diff --git a/zipline/gens/tradesimulation.py b/zipline/gens/tradesimulation.py index 2c12dc6f..74048982 100644 --- a/zipline/gens/tradesimulation.py +++ b/zipline/gens/tradesimulation.py @@ -55,37 +55,6 @@ def trade_simulation_client(stream_in, algo, environment, sim_style): for sid in sids: open_orders[sid] = [] - - # Closure to pass into the user's algo to allow placing orders - # into the txn_sim's dict of open orders. - def order(self, sid, amount): - assert sid in sids, "Order on invalid sid: %i" % sid - order = ndict({ - 'dt' : self.current_dt, - 'sid' : sid, - 'amount' : int(amount), - 'filled' : 0 - }) - - # Tell the user if they try to buy 0 shares of something. - if order.amount == 0: - log = "requested to trade zero shares of {sid}".format( - sid=event.sid - ) - log.debug(log) - return - - open_orders[sid].append(event) - - # Set the algo's order method. - algo.set_order(order) - - # Provide a logbook logging interface to user code. - algo.set_logger(logbook.Logger("Algolog")) - - # Call user-defined initialize method before we process any - # events. - algo.initialize() # Pipe the in stream into the transaction simulator. # Creates a txn field on the event containing transaction @@ -113,44 +82,117 @@ def trade_simulation_client(stream_in, algo, environment, sim_style): # Batch the event stream by dt to be processed by the user's algo. # Yields perf messages whenever it encounters them. - perf_messages = algo_simulator(with_portfolio_and_perf_msg, algo) + perf_messages = algo_simulator(with_portfolio_and_perf_msg, sids, algo, open_orders) + for message in perf_messages: + yield message + + +def algo_simulator(stream_in, sids, algo, order_book): + simulation_dt = None + # Closure to pass into the user's algo to allow placing orders + # into the txn_sim's dict of open orders. + def order(sid, amount): + assert sid in sids, "Order on invalid sid: %i" % sid + order = ndict({ + 'dt' : simulation_dt, + 'sid' : sid, + 'amount' : int(amount), + 'filled' : 0 + }) -def algo_simulator(stream_in, sids, algo): + # Tell the user if they try to buy 0 shares of something. + if order.amount == 0: + log = "requested to trade zero shares of {sid}".format( + sid=event.sid + ) + log.debug(log) + return + + order_book[sid].append(order) + + # Set the algo's order method. + algo.set_order(order) + + # Provide a logbook logging interface to user code. + algo.set_logger(logbook.Logger("Algolog")) + + # Call user-defined initialize method before we process any + # events. + algo.initialize() + + this_snapshot_dt = None - current_dt = None universe = ndict() for sid in sids: - universe[sid] = None + universe[sid] = ndict() universe.portfolio = None - for update in stream_in: - #Yield perf messages to be relayed back to the browser. - if update.perf_message: - yield perf_message - - if current_dt = None: - current_dt = update.dt - - # If this message is newer than the algorithm's simulated dt, - # call handle data on a snapshot of the current algo universe, - # then - if message.dt >= current_dt + last_delta: - start_tic = datetime.now() - algo.handle_data(universe) - stop_tic = datetime.now() - last_delta = datetime - - current_dt = message.dt + last_delta - - batch.data[message.sid] = message - batch.data.portfolio = message.portfolio - + for event in stream_in: + # Yield any perf messages received to be relayed back to the browser. + if event.perf_message: + yield event.perf_message - + # This should only happen for the first event we run. + if simulation_dt == None: + simulation_dt = event.dt + + # If we are currently creating a new message and this update + # matches the message dt, update the state of the universe. + + if this_snapshot_dt != None: + + if event.dt == this_snapshot_dt: + update_universe(event, universe) + + # If we are constructing a snapshot and we hit a new dt, call + # handle_data and record how long it takes. + else: + + start_tic = datetime.now() + algo.handle_data(universe) + stop_tic = datetime.now() + + # How long did you take? + delta = stop_tic - start_tic + + # Update the simulation time. + simulation_dt = this_snapshot_dt + delta + + # Update the universe with the new event. + update_universe(event, universe) + + # If the current event is later than the simulation + # time, update the universe and start constructing + # another snapshot. + if event.dt >= simulation_dt: + this_snapshot_dt = event.dt + else: + this_snapshot_dt = None + # We have been fastforwarding. Update the universe + # and check if we can start a new snapshot. + else: + update_universe(event, universe) + if event.dt >= simulation_dt: + this_snapshot_dt = event.dt + + + + +def update_universe(event, universe): + + universe.portfolio = event.portfolio + del event['portfolio'] + + event_sid = event.sid + del event['sid'] + + for field in event.keys(): + universe[event_sid][field] = event[field] + From 8bcaed9044d67480194a7a6e19d806946a7fbbbe Mon Sep 17 00:00:00 2001 From: scottsanderson Date: Thu, 2 Aug 2012 14:44:11 -0400 Subject: [PATCH 14/73] moving to class-style generators --- zipline/gens/examples.py | 67 ++++++++-------- zipline/gens/tradegens.py | 132 ++++++++++++++++++-------------- zipline/gens/tradesimulation.py | 10 +-- 3 files changed, 107 insertions(+), 102 deletions(-) diff --git a/zipline/gens/examples.py b/zipline/gens/examples.py index 967d0808..50b955e4 100644 --- a/zipline/gens/examples.py +++ b/zipline/gens/examples.py @@ -1,4 +1,7 @@ import pytz +from time import sleep + +from pprint import pprint as pp from datetime import datetime, timedelta from zipline.utils.factory import create_trading_environment @@ -18,53 +21,43 @@ if __name__ == "__main__": #Set up source a. One minute between events. args_a = tuple() kwargs_a = { - 'sids' : [1,2,3,4], + 'sids' : [2], 'start' : datetime(2012,1,3,15, tzinfo = pytz.utc), - 'delta' : timedelta(hours = 1), + 'delta' : timedelta(minutes = 1), 'filter' : filter } - bundle_a = SourceBundle(SpecificEquityTrades, args_a, kwargs_a) + source_a = SpecificEquityTrades(*args_a, **kwargs_a) #Set up source b. Two minutes between events. args_b = tuple() kwargs_b = { - 'sids' : [1,2,3,4], - 'start' : datetime(2012,1,3,15, tzinfo = pytz.utc), - 'delta' : timedelta(hours = 1), + 'sids' : [2], + 'start' : datetime(2012,1,3,14, tzinfo = pytz.utc), + 'delta' : timedelta(minutes = 1), 'filter' : filter } - bundle_b = SourceBundle(SpecificEquityTrades, args_b, kwargs_b) - + source_b = SpecificEquityTrades(*args_a, **kwargs_a) + #Set up source c. Three minutes between events. - args_c = tuple() - kwargs_c = { - 'sids' : [1,2,3,4], - 'start' : datetime(2012,1,3,15, tzinfo = pytz.utc), - 'delta' : timedelta(hours = 1), - 'filter' : filter - } - bundle_c = SourceBundle(SpecificEquityTrades, args_c, kwargs_c) + + # sort_out = date_sorted_sources(source_a, source_b) + +# passthrough = TransformBundle(Passthrough, (), {}) +# mavg_price = TransformBundle(MovingAverage, (timedelta(minutes = 20), ['price']), {}) +# tnfm_bundles = (passthrough, mavg_price) + +# merge_out = merged_transforms(sort_out, tnfm_bundles) + +# # for message in merge_out: +# # print message + +# algo = TestAlgorithm(2, 100, 100) +# environment = create_trading_environment(year = 2012) +# style = zp.SIMULATION_STYLE.FIXED_SLIPPAGE + +# client_out = tsc(merge_out, algo, environment, style) +# for message in client_out: + # pp(message) - source_bundles = (bundle_a, bundle_b, bundle_c) - # Pipe our sources into sort. - sort_out = date_sorted_sources(source_bundles) - - passthrough = TransformBundle(Passthrough, (), {}) - mavg_price = TransformBundle(MovingAverage, (timedelta(minutes = 20), ['price']), {}) - tnfm_bundles = (passthrough, mavg_price) - - merge_out = merged_transforms(sort_out, tnfm_bundles) - - # for message in merge_out: -# print message - - algo = TestAlgorithm(2, 100, 100) - environment = create_trading_environment(year = 2012) - style = zp.SIMULATION_STYLE.PARTIAL_VOLUME - - client_out = tsc(merge_out, algo, environment, style) - import nose.tools; nose.tools.set_trace() - for message in client_out: - pass diff --git a/zipline/gens/tradegens.py b/zipline/gens/tradegens.py index 7420e1b4..8552b530 100644 --- a/zipline/gens/tradegens.py +++ b/zipline/gens/tradegens.py @@ -49,7 +49,7 @@ def fuzzy_dates(count = 500): for date in date_gen(count = count): yield date + timedelta(seconds = random.randint(-10, 10)) -def SpecificEquityTrades(*args, **config): +class SpecificEquityTrades(object): """ Yields all events in event_list that match the given sid_filter. If no event_list is specified, generates an internal stream of events @@ -57,71 +57,85 @@ def SpecificEquityTrades(*args, **config): Configuration options: - count: integer representing number of trades - sids : list of values representing simulated internal sids - start: start date - delta: timedelta between internal events - - + count : integer representing number of trades + sids : list of values representing simulated internal sids + start : start date + delta : timedelta between internal events + filter : filter to remove the sids """ - # We shouldn't get any positional arguments. - assert args == () - # Unpack config dictionary with default values. - count = config.get('count', 500) - sids = config.get('sids', [1, 2]) - start = config.get('start', datetime(2012, 6, 6, 0)) - delta = config.get('delta', timedelta(minutes = 1)) + def __init__(self, *args, **kwargs): + # We shouldn't get any positional arguments. + assert len(args) == 0 + + # Unpack config dictionary with default values. + self.count = kwargs.get('count', 500) + self.sids = kwargs.get('sids', [1, 2]) + self.start = kwargs.get('start', datetime(2012, 6, 6, 0)) + self.delta = kwargs.get('delta', timedelta(minutes = 1)) + + # Default to None for event_list and filter. + self.event_list = kwargs.get('event_list') + self.filter = kwargs.get('filter') + + # Hash_value for downstream sorting. + self.arg_string = hash_args(*args, **kwargs) - # Default to None for event_list and filter. - event_list = config.get('event_list') - filter = config.get('filter') + def get_hash(self): + return self.__class__.__name__ + "-" + self.arg_string + + def __iter__(self): + + if self.event_list: + unfiltered = (event for event in event_list) - arg_string = hash_args(*args, **config) - namestring = "SpecificEquityTrades" + arg_string - # If we have an event_list, ignore the other arguments and use the list. - # TODO: still append our namestring? - if event_list: - unfiltered = (event for event in event_list) + # Set up iterators for each expected field. + else: + dates = date_gen(count=self.count, + start=self.start, + delta=self.delta + ) + prices = mock_prices(self.count) + volumes = mock_volumes(self.count) + sids = cycle(self.sids) + + # Combine the iterators into a single iterator of arguments + arg_gen = izip(sids, prices, volumes, dates) - # Set up iterators for each expected field. - else: - dates = date_gen(count = count, start = start, delta = delta) - prices = mock_prices(count) - volumes = mock_volumes(count) + # Convert argument packages into events. + unfiltered = (create_trade(*args, source_id = self.get_hash()) + for args in arg_gen) + + # If we specified a sid filter, filter out elements that don't + # match the filter. + if self.filter: + filtered = ifilter(lambda event: event.sid in self.filter, unfiltered) + + # Otherwise just use all events. + else: + filtered = unfiltered + + # Return the filtered event stream. + return filtered + + +# !!!!!!! Deprecated for now !!!!!!!!! + +def RandomEquityTrades(object): + + def __init__(self): + # We shouldn't get any positional args. + assert args == () + + self.count = config.get('count', 500) + self.sids = config.get('sids', [1,2]) + self.filter = config.get('filter') + + dates = fuzzy_dates(count) + prices = mock_prices(count, rand = True) + volumes = mock_volumes(count, rand = True) sids = cycle(sids) - # Combine the iterators into a single iterator of arguments - arg_gen = izip(sids, prices, volumes, dates) - - # Convert argument packages into events. - unfiltered = (create_trade(*args, source_id = namestring) - for args in arg_gen) - - # If we specified a sid filter, filter out elements that don't match the filter. - if filter: - filtered = ifilter(lambda event: event.sid in filter, unfiltered) - - # Otherwise just use all events. - else: - filtered = unfiltered - - # Return the filtered event stream. - return filtered - -def RandomEquityTrades(*args, **config): - # We shouldn't get any positional args. - assert args == () - - count = config.get('count', 500) - sids = config.get('sids', [1,2]) - filter = config.get('filter') - - dates = fuzzy_dates(count) - prices = mock_prices(count, rand = True) - volumes = mock_volumes(count, rand = True) - sids = cycle(sids) - arg_gen = izip(sids, prices, volumes, dates) unfiltered = (create_trade(*args) for args in arg_gen) diff --git a/zipline/gens/tradesimulation.py b/zipline/gens/tradesimulation.py index 74048982..04567fac 100644 --- a/zipline/gens/tradesimulation.py +++ b/zipline/gens/tradesimulation.py @@ -84,7 +84,7 @@ def trade_simulation_client(stream_in, algo, environment, sim_style): # Yields perf messages whenever it encounters them. perf_messages = algo_simulator(with_portfolio_and_perf_msg, sids, algo, open_orders) - for message in perf_messages: + for message in perf_messages: yield message @@ -109,7 +109,7 @@ def algo_simulator(stream_in, sids, algo, order_book): sid=event.sid ) log.debug(log) - return + return order_book[sid].append(order) @@ -123,18 +123,17 @@ def algo_simulator(stream_in, sids, algo, order_book): # events. algo.initialize() - this_snapshot_dt = None - universe = ndict() - for sid in sids: universe[sid] = ndict() universe.portfolio = None + this_snapshot_dt = None for event in stream_in: # Yield any perf messages received to be relayed back to the browser. if event.perf_message: yield event.perf_message + del event['perf_message'] # This should only happen for the first event we run. if simulation_dt == None: @@ -151,7 +150,6 @@ def algo_simulator(stream_in, sids, algo, order_book): # If we are constructing a snapshot and we hit a new dt, call # handle_data and record how long it takes. else: - start_tic = datetime.now() algo.handle_data(universe) stop_tic = datetime.now() From d141422adac0e17e48a7706e16cb345371642701 Mon Sep 17 00:00:00 2001 From: fawce Date: Thu, 2 Aug 2012 14:45:13 -0400 Subject: [PATCH 15/73] component is self-contained! --- tests/test_components.py | 148 ++++++++++++++++++++++++++++---------- zipline/core/component.py | 122 ++++++++++++++++++++----------- zipline/core/monitor.py | 15 ++++ zipline/gens/examples.py | 11 ++- zipline/gens/zmqgen.py | 5 ++ 5 files changed, 215 insertions(+), 86 deletions(-) diff --git a/tests/test_components.py b/tests/test_components.py index 02b05a69..a9b765d0 100644 --- a/tests/test_components.py +++ b/tests/test_components.py @@ -4,17 +4,14 @@ from datetime import datetime, timedelta from unittest2 import TestCase from collections import defaultdict +from zipline.gens.composite import date_sorted_sources from zipline.finance.trading import SIMULATION_STYLE from zipline.core.devsimulator import AddressAllocator -from zipline.lines import SimulatedTrading from zipline.utils.test_utils import ( - drain_zipline, - check, setup_logger, teardown_logger, - launch_component, create_monitor, launch_monitor ) @@ -28,7 +25,7 @@ from zipline.protocol import ( ) from zipline.gens.tradegens import SpecificEquityTrades -from zipline.gens.utils import hash_args +from zipline.gens.sort import date_sort from zipline.gens.zmqgen import gen_from_poller import logbook @@ -53,10 +50,14 @@ class ComponentTestCase(TestCase): setup_logger(self) def tearDown(self): - self.ctx.term() + #self.ctx.term() teardown_logger(self) - def test_specific_equity_source(self): + def test_source(self): + monitor = create_monitor(allocator) + socket_uri = allocator.lease(1)[0] + count = 100 + filter = [1,2,3,4] #Set up source a. One minute between events. args_a = tuple() @@ -65,42 +66,113 @@ class ComponentTestCase(TestCase): 'start' : datetime(2012,6,6,0,tzinfo=pytz.utc), 'delta' : timedelta(minutes = 1), 'filter' : filter, - 'count' : 100 + 'count' : count } - c_id = SpecificEquityTrades.__name__ + hash_args(args_a, kwargs_a) - mon = create_monitor(allocator) - - out_socket_args = ComponentSocketArgs( - style=zmq.PUSH, - uri=allocator.lease(1)[0], - bind=True + comp_a = Component( + SpecificEquityTrades, + args_a, + kwargs_a, + monitor, + socket_uri, + DATASOURCE_FRAME, + DATASOURCE_UNFRAME ) - c = Component( - SpecificEquityTrades, - args_a, - kwargs_a, - c_id, - out_socket_args, - DATASOURCE_FRAME, - mon - ) + launch_monitor(monitor) - mon.manage(set([c.get_id])) - mon_proc = launch_monitor(mon) + for event in comp_a: + log.info(event) - # launch in a process - proc = launch_component(c) - pull_socket = self.ctx.socket(zmq.PULL) - pull_socket.connect(out_socket_args.uri) - poller = zmq.Poller() - poller.register(pull_socket, zmq.POLLIN) - unframe = DATASOURCE_UNFRAME - for msg in gen_from_poller(poller, pull_socket, unframe): - # assert things about the messages. - log.info(msg) + def test_sort(self): + monitor = create_monitor(allocator) + poller = zmq.Poller() + socket_uris = allocator.lease(3) + count = 100 - pull_socket.close() - log.info("DONE!") + filter = [1,2,3,4] + #Set up source a. One minute between events. + args_a = tuple() + kwargs_a = { + 'sids' : [1,2], + 'start' : datetime(2012,6,6,0,tzinfo=pytz.utc), + 'delta' : timedelta(minutes = 1), + 'filter' : filter, + 'count' : count + } + + + comp_a = Component( + SpecificEquityTrades, + args_a, + kwargs_a, + monitor, + socket_uris[0], + DATASOURCE_FRAME, + DATASOURCE_UNFRAME + ) + + + #Set up source b. Two minutes between events. + args_b = tuple() + kwargs_b = { + 'sids' : [2], + 'start' : datetime(2012,1,3,15, tzinfo = pytz.utc), + 'delta' : timedelta(minutes = 1), + 'filter' : filter, + 'count' : count + } + + + comp_b = Component( + SpecificEquityTrades, + args_b, + kwargs_b, + monitor, + socket_uris[1], + DATASOURCE_FRAME, + DATASOURCE_UNFRAME + ) + + #Set up source c. Three minutes between events. + args_c = tuple() + kwargs_c = { + 'sids' : [3], + 'start' : datetime(2012,1,3,15, tzinfo = pytz.utc), + 'delta' : timedelta(minutes = 1), + 'filter' : filter, + 'count' : count + } + + comp_c = Component( + SpecificEquityTrades, + args_c, + kwargs_c, + monitor, + socket_uris[2], + DATASOURCE_FRAME, + DATASOURCE_UNFRAME + ) + + names = [ + comp_a.get_id, + comp_b.get_id, + comp_c.get_id + ] + + monitor.manage(set(names)) + launch_monitor(monitor) + + sorted_out = date_sorted_sources([comp_a, comp_b, comp_c]) + + prev = None + sort_count = 0 + for msg in sorted_out: + if prev: + self.assertTrue(msg.dt >= prev.dt, \ + "Messages should be in date ascending order") + prev = msg + sort_count += 1 + + self.assertEqual(count*3, sort_count) diff --git a/zipline/core/component.py b/zipline/core/component.py index f9dc63c1..287373b4 100644 --- a/zipline/core/component.py +++ b/zipline/core/component.py @@ -10,8 +10,11 @@ import socket import logbook import traceback import humanhash +import multiprocessing from setproctitle import setproctitle from collections import namedtuple +from zipline.gens.utils import hash_args + # pyzmq import zmq @@ -36,7 +39,7 @@ class KillSignal(Exception): def __init__(self): pass -ComponentSocketArgs = namedtuple('ComponentSocket',['uri','style','bind']) +ComponentSocketArgs = namedtuple('ComponentSocketArgs',['uri','style','bind']) class Component(object): @@ -49,33 +52,27 @@ class Component(object): gen_args, gen_kwargs, component_id, - out_socket_args, - frame, monitor, - in_socket_args=None, - unframe=None + socket_uri, + frame, + unframe ): assert component_id, \ "Every component needs a unique and invariant identifier" assert isinstance(component_id, basestring), \ "Components must have string IDs" - assert isinstance(out_socket_args, ComponentSocketArgs), \ - "out_socket_args args must be ComponentSocketArgs" - - if in_socket_args: - assert isinstance(in_socket_args, ComponentSocketArgs), \ - "in_socket_args args must be ComponentSocketArgs" # ----------------- # Generator # ----------------- - self.component_id = component_id self.gen_args = gen_args self.gen_kwargs = gen_kwargs self.gen_func = gen_func self.generator = None self.frame = frame + self.component_id = self.gen_func.__name__ \ + + hash_args(gen_args, gen_kwargs) # lock for waiting on monitor "GO" self.waiting = None @@ -83,14 +80,27 @@ class Component(object): # ----------------- # ZMQ properties # ----------------- - self.in_socket_args = in_socket_args - self.out_socket_args = out_socket_args + self.in_socket_args = ComponentSocketArgs( + uri = socket_uri, + style = zmq.PULL, + bind = False + ) + self.out_socket_args = ComponentSocketArgs( + uri = socket_uri, + style = zmq.PUSH, + bind = True + ) self.zmq = None self.context = None self.out_socket = None self.in_socket = None - self.monitor = monitor + self.monitor = monitor self.unframe = unframe + self.prefix = "" + + # register two components with the monitor + monitor.add_to_topology(self.component_id) + monitor.add_to_topology("FORK-"+self.component_id) # TODO: state_flag is deprecated, remove self.state_flag = COMPONENT_STATE.OK @@ -109,7 +119,7 @@ class Component(object): # ------------ - def _run(self): + def _run_out(self): """ The main component loop. This is wrapped inside a exception reporting context inside of run. @@ -118,13 +128,12 @@ class Component(object): """ # The process title so you can watch it in top, ps. setproctitle(self.gen_func.__name__) + self.prefix = "FORK-" log.info("Start %r" % self) log.info("Pid %s" % os.getpid()) log.info("Group %s" % os.getpgrp()) - self.sockets = [] - self.open() self.signal_ready() @@ -138,17 +147,36 @@ class Component(object): for event in self.generator: self.heartbeat() + event.source_id = self.get_id msg = self.frame(event) self.out_socket.send(msg) self.signal_done() - def run(self, catch_exceptions=True): + def _run_in(self): + self.open(send=False) + self.signal_ready() + self.lock_ready() + self.wait_ready() + # ----------------------- + # YOU SHALL NOT PASS!!!!! + # ----------------------- + # ... until the monitor signals GO + + # return the generator + for event in gen_from_poller(self.poll, self.in_socket, self.unframe): + event.source_id = self.get_id + yield event + + self.signal_done() + + def run_safe(self, func): """ - Run the component. + Run a function that is assumed to include wait_ready and + heartbeat. Used to wrap fork_generator and consume_gen. """ try: - self._run() + return func() except Exception as exc: if not isinstance(exc, KillSignal): self.signal_exception(exc) @@ -160,6 +188,23 @@ class Component(object): log.info("Exiting %r" % self) + def _launch(self): + # first, start the generator in its own process. Once + # Monitor says "go", Events from the generator will be + # FRAME'd and PUSH'd to self.socket_uri. + proc = multiprocessing.Process( + target=self.run_safe, + args=(self._run_out,) + ) + proc.start() + + # Start the poller-generator, which will PULL messages + # from self.sockiet_uri, UNFRAME'd them, and yield them. + return self.run_safe(self._run_in) + + def __iter__(self): + return self._launch() + # ---------------------------- # Cleanup & Modes of Failure # ---------------------------- @@ -420,8 +465,9 @@ class Component(object): # notify internal work loop that we're done self.done = True # TODO: use state flag - msg = zmq.Message(str(CONTROL_PROTOCOL.DONE)) - self.out_socket.send(msg) + if self.out_socket: + msg = zmq.Message(str(CONTROL_PROTOCOL.DONE)) + self.out_socket.send(msg) # notify monitor we're done @@ -437,40 +483,32 @@ class Component(object): # after the Monitor accepts our prior heartbeat, but just # before the next one is sent. So, we hang around for one # last heartbeat, and wait an unusually long time. - self.heartbeat(timeout=5000) + # TODO: decided if this is really necessary. + # self.heartbeat(timeout=5000) # ----------- # Messaging # ----------- - def open(self): + def open(self, send=True): """ Open the connections needed to start doing work. Perform any setup that must be done within process. """ - + self.sockets = [] self.zmq = zmq self.context = self.zmq.Context() self.poll = self.zmq.Poller() self.setup_control() - if self.in_socket_args: - self.in_socket = self.open_socket(self.in_socket_args) - poller_gen = gen_from_poller( - self.poller, - self.in_socket, - self.unframe - ) - self.generator = self.gen_func( - poller_gen, - *self.gen_args, - **self.gen_kwargs - ) - else: + if send: self.generator = self.gen_func(*self.gen_args, **self.gen_kwargs) - - self.out_socket = self.open_socket(self.out_socket_args) + self.out_socket = self.open_socket(self.out_socket_args) + self.sockets.extend([self.out_socket]) + else: + self.in_socket = self.open_socket(self.in_socket_args) + self.sockets.extend([self.in_socket]) def open_socket(self, sock_args): if sock_args.bind: @@ -577,7 +615,7 @@ class Component(object): The time invariant name for this component. Must be unique within this zipline. """ - return self.component_id + return self.prefix + self.component_id def debug(self): """ diff --git a/zipline/core/monitor.py b/zipline/core/monitor.py index 183de45b..83b022c5 100644 --- a/zipline/core/monitor.py +++ b/zipline/core/monitor.py @@ -105,6 +105,9 @@ class Monitor(object): self.missed_beats = Counter() + # start with an empty topology + self.topology = set([]) + self.send_sighup = send_sighup if self.send_sighup: log.info("Request to send sighup/sigint") @@ -116,6 +119,17 @@ class Monitor(object): self.zmq_poller = self.zmq.Poller return + def add_to_topology(self, component_id): + add = set([component_id]) + self.topology.update(add) + + def freeze_topology(self): + if isinstance(self.topology, frozenset): + return + # we've been incrementally adding components. + # time to freeze. + self.manage(self.topology) + def manage(self, topology): """ Give the controller a set set of components to manage and @@ -147,6 +161,7 @@ class Monitor(object): raise RuntimeError("Invalid State Transition : %s -> %s" %(old, new)) def run(self): + self.freeze_topology() self.running = True self.init_zmq() setproctitle('Monitor') diff --git a/zipline/gens/examples.py b/zipline/gens/examples.py index 7e01c293..55704f27 100644 --- a/zipline/gens/examples.py +++ b/zipline/gens/examples.py @@ -13,7 +13,7 @@ from zipline.gens.tradesimulation import trade_simulation_client as tsc import zipline.protocol as zp if __name__ == "__main__": - + filter = [1,2,3,4] #Set up source a. One minute between events. args_a = tuple() @@ -44,10 +44,10 @@ if __name__ == "__main__": 'filter' : filter } bundle_c = SourceBundle(SpecificEquityTrades, args_c, kwargs_c) - + source_bundles = (bundle_a, bundle_b, bundle_c) # Pipe our sources into sort. - sort_out = date_sorted_sources(source_bundles) + sort_out = date_sorted_sources(source_bundles) passthrough = TransformBundle(Passthrough, (), {}) mavg_price = TransformBundle(MovingAverage, (timedelta(minutes = 20), ['price']), {}) @@ -57,11 +57,10 @@ if __name__ == "__main__": # for message in merge_out: # print message - + algo = TestAlgorithm(2, 100, 100) environment = create_trading_environment(year = 2012) style = zp.SIMULATION_STYLE.PARTIAL_VOLUME - + client_out = tsc(merge_out, algo, environment, style) client_out.next() - diff --git a/zipline/gens/zmqgen.py b/zipline/gens/zmqgen.py index e51e3bab..f9d5f919 100644 --- a/zipline/gens/zmqgen.py +++ b/zipline/gens/zmqgen.py @@ -13,6 +13,11 @@ def gen_from_pull_socket(socket_uri, context, unframe): return gen_from_poller(poller, pull_socket, unframe) + +# this generator needs to know about the source_ids coming in via +# the poller, and need to yield DONE messages for each +# source_id. + def gen_from_poller(poller, in_socket, unframe): while True: From 07284017fd768cf265c0bf896aa22799cd14db0f Mon Sep 17 00:00:00 2001 From: scottsanderson Date: Thu, 2 Aug 2012 15:04:28 -0400 Subject: [PATCH 16/73] rewinds --- zipline/gens/tradegens.py | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/zipline/gens/tradegens.py b/zipline/gens/tradegens.py index 8552b530..2c49ea19 100644 --- a/zipline/gens/tradegens.py +++ b/zipline/gens/tradegens.py @@ -80,14 +80,25 @@ class SpecificEquityTrades(object): # Hash_value for downstream sorting. self.arg_string = hash_args(*args, **kwargs) + + self.generator = self.create_fresh_generator() + + def __iter__(self): + return self.generator + + def next(self): + return self.generator.next() + + def rewind(self): + self.generator = self.create_fresh_generator() def get_hash(self): return self.__class__.__name__ + "-" + self.arg_string - def __iter__(self): + def create_fresh_generator(self): if self.event_list: - unfiltered = (event for event in event_list) + unfiltered = (event for event in self.event_list) # Set up iterators for each expected field. else: From 7d148e0f36243c2b9c26bee817c2bc353713d56a Mon Sep 17 00:00:00 2001 From: fawce Date: Thu, 2 Aug 2012 15:09:54 -0400 Subject: [PATCH 17/73] whitespace cleanse --- zipline/gens/tradegens.py | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/zipline/gens/tradegens.py b/zipline/gens/tradegens.py index 2c49ea19..a8a691b5 100644 --- a/zipline/gens/tradegens.py +++ b/zipline/gens/tradegens.py @@ -7,7 +7,7 @@ from itertools import chain, cycle, ifilter, izip from datetime import datetime, timedelta from zipline.utils.factory import create_trade -from zipline.gens.utils import hash_args, mock_done +from zipline.gens.utils import hash_args def date_gen(start = datetime(2006, 6, 6, 12), delta = timedelta(minutes = 1), @@ -54,9 +54,9 @@ class SpecificEquityTrades(object): Yields all events in event_list that match the given sid_filter. If no event_list is specified, generates an internal stream of events to filter. Returns all events if filter is None. - + Configuration options: - + count : integer representing number of trades sids : list of values representing simulated internal sids start : start date @@ -67,22 +67,22 @@ class SpecificEquityTrades(object): def __init__(self, *args, **kwargs): # We shouldn't get any positional arguments. assert len(args) == 0 - + # Unpack config dictionary with default values. self.count = kwargs.get('count', 500) self.sids = kwargs.get('sids', [1, 2]) self.start = kwargs.get('start', datetime(2012, 6, 6, 0)) self.delta = kwargs.get('delta', timedelta(minutes = 1)) - + # Default to None for event_list and filter. self.event_list = kwargs.get('event_list') self.filter = kwargs.get('filter') - + # Hash_value for downstream sorting. self.arg_string = hash_args(*args, **kwargs) - + self.generator = self.create_fresh_generator() - + def __iter__(self): return self.generator @@ -94,22 +94,22 @@ class SpecificEquityTrades(object): def get_hash(self): return self.__class__.__name__ + "-" + self.arg_string - + def create_fresh_generator(self): - + if self.event_list: unfiltered = (event for event in self.event_list) # Set up iterators for each expected field. else: - dates = date_gen(count=self.count, - start=self.start, + dates = date_gen(count=self.count, + start=self.start, delta=self.delta ) prices = mock_prices(self.count) volumes = mock_volumes(self.count) sids = cycle(self.sids) - + # Combine the iterators into a single iterator of arguments arg_gen = izip(sids, prices, volumes, dates) @@ -137,7 +137,7 @@ def RandomEquityTrades(object): def __init__(self): # We shouldn't get any positional args. assert args == () - + self.count = config.get('count', 500) self.sids = config.get('sids', [1,2]) self.filter = config.get('filter') From 26ad7c4818cbf7165798697d205b46b1ff9b640a Mon Sep 17 00:00:00 2001 From: scottsanderson Date: Thu, 2 Aug 2012 15:16:17 -0400 Subject: [PATCH 18/73] intersticial --- zipline/gens/composites.py | 18 +++++++----------- zipline/gens/examples.py | 12 ++++++------ 2 files changed, 13 insertions(+), 17 deletions(-) diff --git a/zipline/gens/composites.py b/zipline/gens/composites.py index 234db714..964bbe42 100644 --- a/zipline/gens/composites.py +++ b/zipline/gens/composites.py @@ -11,23 +11,19 @@ from zipline.gens.transform import stateful_transform SourceBundle = namedtuple("SourceBundle", ['source', 'args', 'kwargs']) TransformBundle = namedtuple("TransformBundle", ['tnfm', 'args', 'kwargs']) -def date_sorted_sources(bundles): +def date_sorted_sources(*sources): """ Takes an iterable of SortBundles, generating namestrings and initialized datasources for each before piping them into a date_sort. """ - assert isinstance(bundles, (list, tuple)) - for bundle in bundles: - assert isinstance(bundle, SourceBundle) - # Calculate namestring hashes to pass to date_sort. - names = [bundle.source.__name__ + hash_args(*bundle.args, **bundle.kwargs) - for bundle in bundles] + for source in sources: + assert source.__dict__.has_key('__init__') + assert source.__dict__.has_key('get_hash') + + # Get name hashes to pass to date_sort. + names = [source.get_hash for source in sources) - # Pass each source its arguments. - source_gens = [bundle.source(*bundle.args, **bundle.kwargs) - for bundle in bundles] - # Convert the list of generators into a flat stream by pulling # one element at a time from each. stream_in = roundrobin(source_gens, names) diff --git a/zipline/gens/examples.py b/zipline/gens/examples.py index 50b955e4..afc8ebe8 100644 --- a/zipline/gens/examples.py +++ b/zipline/gens/examples.py @@ -17,11 +17,11 @@ import zipline.protocol as zp if __name__ == "__main__": - filter = [2] + filter = [2,3] #Set up source a. One minute between events. args_a = tuple() kwargs_a = { - 'sids' : [2], + 'sids' : [1,2,3], 'start' : datetime(2012,1,3,15, tzinfo = pytz.utc), 'delta' : timedelta(minutes = 1), 'filter' : filter @@ -31,7 +31,7 @@ if __name__ == "__main__": #Set up source b. Two minutes between events. args_b = tuple() kwargs_b = { - 'sids' : [2], + 'sids' : [2,3,4], 'start' : datetime(2012,1,3,14, tzinfo = pytz.utc), 'delta' : timedelta(minutes = 1), 'filter' : filter @@ -39,8 +39,7 @@ if __name__ == "__main__": source_b = SpecificEquityTrades(*args_a, **kwargs_a) #Set up source c. Three minutes between events. - - # sort_out = date_sorted_sources(source_a, source_b) + sort_out = date_sorted_sources(source_a, source_b) # passthrough = TransformBundle(Passthrough, (), {}) # mavg_price = TransformBundle(MovingAverage, (timedelta(minutes = 20), ['price']), {}) @@ -60,4 +59,5 @@ if __name__ == "__main__": # pp(message) - + + From 12c7cd3b41343046a9e16bd52b3f89abb843a87e Mon Sep 17 00:00:00 2001 From: fawce Date: Thu, 2 Aug 2012 15:40:26 -0400 Subject: [PATCH 19/73] removed extraneous imports to remove circularity --- tests/test_components.py | 4 +--- zipline/core/component.py | 14 +++----------- zipline/lines.py | 2 +- 3 files changed, 5 insertions(+), 15 deletions(-) diff --git a/tests/test_components.py b/tests/test_components.py index a9b765d0..9be71304 100644 --- a/tests/test_components.py +++ b/tests/test_components.py @@ -70,9 +70,7 @@ class ComponentTestCase(TestCase): } comp_a = Component( - SpecificEquityTrades, - args_a, - kwargs_a, + SpecificEquityTrades(*args_a, **kwargs_a), monitor, socket_uri, DATASOURCE_FRAME, diff --git a/zipline/core/component.py b/zipline/core/component.py index 287373b4..b76e78cf 100644 --- a/zipline/core/component.py +++ b/zipline/core/component.py @@ -13,7 +13,6 @@ import humanhash import multiprocessing from setproctitle import setproctitle from collections import namedtuple -from zipline.gens.utils import hash_args # pyzmq @@ -48,9 +47,7 @@ class Component(object): # ------------ def __init__(self, - gen_func, - gen_args, - gen_kwargs, + generator, component_id, monitor, socket_uri, @@ -66,13 +63,9 @@ class Component(object): # ----------------- # Generator # ----------------- - self.gen_args = gen_args - self.gen_kwargs = gen_kwargs - self.gen_func = gen_func - self.generator = None + self.generator = generator self.frame = frame - self.component_id = self.gen_func.__name__ \ - + hash_args(gen_args, gen_kwargs) + self.component_id = hash(self.generator) # lock for waiting on monitor "GO" self.waiting = None @@ -503,7 +496,6 @@ class Component(object): self.setup_control() if send: - self.generator = self.gen_func(*self.gen_args, **self.gen_kwargs) self.out_socket = self.open_socket(self.out_socket_args) self.sockets.extend([self.out_socket]) else: diff --git a/zipline/lines.py b/zipline/lines.py index 3d3ab2a6..a5a3858e 100644 --- a/zipline/lines.py +++ b/zipline/lines.py @@ -62,7 +62,7 @@ before invoking simulate. import inspect import logbook -import zipline.utils.factory as factory +#import zipline.utils.factory as factory from zipline.components import DataSource from zipline.transforms import BaseTransform From c49806187dc32ba046ec1b64dde32fa340224593 Mon Sep 17 00:00:00 2001 From: scottsanderson Date: Thu, 2 Aug 2012 16:13:23 -0400 Subject: [PATCH 20/73] sources as classes --- zipline/gens/composites.py | 8 ++++---- zipline/gens/examples.py | 6 +++--- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/zipline/gens/composites.py b/zipline/gens/composites.py index 964bbe42..af2fdc8c 100644 --- a/zipline/gens/composites.py +++ b/zipline/gens/composites.py @@ -18,15 +18,15 @@ def date_sorted_sources(*sources): """ for source in sources: - assert source.__dict__.has_key('__init__') - assert source.__dict__.has_key('get_hash') + assert iter(source), "Source %s not iterable" % source + assert source.__class__.__dict__.has_key('get_hash'), "No get_hash" # Get name hashes to pass to date_sort. - names = [source.get_hash for source in sources) + names = [source.get_hash() for source in sources] # Convert the list of generators into a flat stream by pulling # one element at a time from each. - stream_in = roundrobin(source_gens, names) + stream_in = roundrobin(sources, names) # Guarantee the flat stream will be sorted by date, using source_id as # tie-breaker, which is fully deterministic (given deterministic string diff --git a/zipline/gens/examples.py b/zipline/gens/examples.py index 93f77d47..5a24493f 100644 --- a/zipline/gens/examples.py +++ b/zipline/gens/examples.py @@ -23,7 +23,7 @@ if __name__ == "__main__": 'delta' : timedelta(minutes = 1), 'filter' : filter } - bundle_a = SourceBundle(SpecificEquityTrades, args_a, kwargs_a) + source_a = SpecificEquityTrades(*args_a, **kwargs_a) #Set up source b. Two minutes between events. args_b = tuple() @@ -33,12 +33,12 @@ if __name__ == "__main__": 'delta' : timedelta(minutes = 1), 'filter' : filter } - bundle_b = SourceBundle(SpecificEquityTrades, args_b, kwargs_b) + source_b = SpecificEquityTrades(*args_b, **kwargs_b) #Set up source c. Three minutes between events. sort_out = date_sorted_sources(source_a, source_b) - + # passthrough = TransformBundle(Passthrough, (), {}) # mavg_price = TransformBundle(MovingAverage, (timedelta(minutes = 20), ['price']), {}) # tnfm_bundles = (passthrough, mavg_price) From cfc49d3cccdca1234c11fd6c7083702113c015ce Mon Sep 17 00:00:00 2001 From: fawce Date: Thu, 2 Aug 2012 16:20:15 -0400 Subject: [PATCH 21/73] using classes instead of pure generators in component --- tests/test_components.py | 19 ++++--------------- zipline/core/component.py | 12 ++---------- 2 files changed, 6 insertions(+), 25 deletions(-) diff --git a/tests/test_components.py b/tests/test_components.py index 9be71304..3626e54c 100644 --- a/tests/test_components.py +++ b/tests/test_components.py @@ -4,7 +4,7 @@ from datetime import datetime, timedelta from unittest2 import TestCase from collections import defaultdict -from zipline.gens.composite import date_sorted_sources +from zipline.gens.composites import date_sorted_sources from zipline.finance.trading import SIMULATION_STYLE from zipline.core.devsimulator import AddressAllocator @@ -16,17 +16,13 @@ from zipline.utils.test_utils import ( launch_monitor ) - from zipline.core import Component -from zipline.core.component import ComponentSocketArgs from zipline.protocol import ( DATASOURCE_FRAME, DATASOURCE_UNFRAME ) from zipline.gens.tradegens import SpecificEquityTrades -from zipline.gens.sort import date_sort -from zipline.gens.zmqgen import gen_from_poller import logbook log = logbook.Logger('ComponentTestCase') @@ -85,7 +81,6 @@ class ComponentTestCase(TestCase): def test_sort(self): monitor = create_monitor(allocator) - poller = zmq.Poller() socket_uris = allocator.lease(3) count = 100 @@ -102,9 +97,7 @@ class ComponentTestCase(TestCase): comp_a = Component( - SpecificEquityTrades, - args_a, - kwargs_a, + SpecificEquityTrades(*args_a, **kwargs_a), monitor, socket_uris[0], DATASOURCE_FRAME, @@ -124,9 +117,7 @@ class ComponentTestCase(TestCase): comp_b = Component( - SpecificEquityTrades, - args_b, - kwargs_b, + SpecificEquityTrades(*args_b, **kwargs_b), monitor, socket_uris[1], DATASOURCE_FRAME, @@ -144,9 +135,7 @@ class ComponentTestCase(TestCase): } comp_c = Component( - SpecificEquityTrades, - args_c, - kwargs_c, + SpecificEquityTrades(*args_c, **kwargs_c), monitor, socket_uris[2], DATASOURCE_FRAME, diff --git a/zipline/core/component.py b/zipline/core/component.py index b76e78cf..37d22d16 100644 --- a/zipline/core/component.py +++ b/zipline/core/component.py @@ -8,7 +8,6 @@ import uuid import time import socket import logbook -import traceback import humanhash import multiprocessing from setproctitle import setproctitle @@ -25,7 +24,6 @@ from zipline.core.monitor import PARAMETERS from zipline.protocol import ( CONTROL_PROTOCOL, COMPONENT_STATE, - COMPONENT_FAILURE, CONTROL_FRAME, CONTROL_UNFRAME, EXCEPTION_FRAME @@ -48,24 +46,18 @@ class Component(object): def __init__(self, generator, - component_id, monitor, socket_uri, frame, unframe ): - assert component_id, \ - "Every component needs a unique and invariant identifier" - assert isinstance(component_id, basestring), \ - "Components must have string IDs" - # ----------------- # Generator # ----------------- self.generator = generator self.frame = frame - self.component_id = hash(self.generator) + self.component_id = self.generator.get_hash() # lock for waiting on monitor "GO" self.waiting = None @@ -120,7 +112,7 @@ class Component(object): The core logic of the all components is run here. """ # The process title so you can watch it in top, ps. - setproctitle(self.gen_func.__name__) + setproctitle(self.generator.__class__.__name__) self.prefix = "FORK-" log.info("Start %r" % self) From 18d327069a4b434a317d81bde1ea31b693249282 Mon Sep 17 00:00:00 2001 From: scottsanderson Date: Thu, 2 Aug 2012 18:01:16 -0400 Subject: [PATCH 22/73] stateful transform as class --- zipline/gens/composites.py | 8 ++- zipline/gens/examples.py | 11 ++-- zipline/gens/tradesimulation.py | 2 +- zipline/gens/transform.py | 107 ++++++++++++++++++-------------- zipline/gens/utils.py | 3 +- 5 files changed, 72 insertions(+), 59 deletions(-) diff --git a/zipline/gens/composites.py b/zipline/gens/composites.py index af2fdc8c..88976bb0 100644 --- a/zipline/gens/composites.py +++ b/zipline/gens/composites.py @@ -6,7 +6,7 @@ from zipline.gens.tradegens import SpecificEquityTrades from zipline.gens.utils import roundrobin, hash_args from zipline.gens.sort import date_sort from zipline.gens.merge import merge -from zipline.gens.transform import stateful_transform +from zipline.gens.transform import StatefulTransform SourceBundle = namedtuple("SourceBundle", ['source', 'args', 'kwargs']) TransformBundle = namedtuple("TransformBundle", ['tnfm', 'args', 'kwargs']) @@ -54,8 +54,8 @@ def merged_transforms(sorted_stream, bundles): tnfms_with_streams = zip(split, bundles) # Convert the copies into transform streams. - tnfm_gens = [ - stateful_transform( + tnfms = [ + StatefulTransform( stream_copy, bundle.tnfm, *bundle.args, @@ -63,6 +63,8 @@ def merged_transforms(sorted_stream, bundles): ) for stream_copy, bundle in tnfms_with_streams ] + tnfm_gens = [tnfm.gen() for tnfm in tnfms] + # Roundrobin the outputs of our transforms to create a single flat stream. to_merge = roundrobin(tnfm_gens, namestrings) diff --git a/zipline/gens/examples.py b/zipline/gens/examples.py index 5a24493f..2f7830c8 100644 --- a/zipline/gens/examples.py +++ b/zipline/gens/examples.py @@ -7,7 +7,7 @@ from zipline.test_algorithms import TestAlgorithm from zipline.gens.composites import SourceBundle, TransformBundle, \ date_sorted_sources, merged_transforms from zipline.gens.tradegens import SpecificEquityTrades -from zipline.gens.transform import MovingAverage, Passthrough +from zipline.gens.transform import MovingAverage, Passthrough, StatefulTransform from zipline.gens.tradesimulation import trade_simulation_client as tsc import zipline.protocol as zp @@ -39,11 +39,10 @@ if __name__ == "__main__": sort_out = date_sorted_sources(source_a, source_b) -# passthrough = TransformBundle(Passthrough, (), {}) -# mavg_price = TransformBundle(MovingAverage, (timedelta(minutes = 20), ['price']), {}) -# tnfm_bundles = (passthrough, mavg_price) - -# merge_out = merged_transforms(sort_out, tnfm_bundles) + passthrough = TransformBundle(Passthrough, (), {}) + mavg_price = TransformBundle(MovingAverage, (timedelta(minutes = 20), ['price']), {}) + tnfm_bundles = (passthrough, mavg_price) + merge_out = merged_transforms(sort_out, tnfm_bundles) # # for message in merge_out: # # print message diff --git a/zipline/gens/tradesimulation.py b/zipline/gens/tradesimulation.py index 04567fac..6174ac35 100644 --- a/zipline/gens/tradesimulation.py +++ b/zipline/gens/tradesimulation.py @@ -5,7 +5,7 @@ from numbers import Integral from zipline import ndict -from zipline.gens.transform import stateful_transform +from zipline.gens.transform import StatefulTransform from zipline.finance.trading import TransactionSimulator from zipline.finance.performance import PerformanceTracker diff --git a/zipline/gens/transform.py b/zipline/gens/transform.py index 44ec4b5a..4d453927 100644 --- a/zipline/gens/transform.py +++ b/zipline/gens/transform.py @@ -39,61 +39,72 @@ def functional_transform(stream_in, func, *args, **kwargs): assert_transform_protocol(out_value) yield(namestring, out_value) -def stateful_transform(stream_in, tnfm_class, *args, **kwargs): +class StatefulTransform(object): """ - Generic transform generator that takes each message from an in-stream - and passes it to a state class. For each call to update, the state - class must produce a message to be fed downstream. Any transform class - with the FORWARDER class variable set to true will forward all fields - in the original message. Otherwise only dt, tnfm_id, and tnfm_value - are forwarded. + Generic transform generator that takes each message from an + in-stream and passes it to a state class. For each call to + update, the state class must produce a message to be fed + downstream. Any transform class with the FORWARDER class variable + set to true will forward all fields in the original message. + Otherwise only dt, tnfm_id, and tnfm_value are forwarded. """ - forward_all_fields = tnfm_class.__dict__.get('FORWARDER', False) - update_in_place = tnfm_class.__dict__.get('UPDATER', False) - - assert isinstance(tnfm_class, (types.ObjectType, types.ClassType)), \ + def __init__(self, stream_in, tnfm_class, *args, **kwargs): + assert isinstance(tnfm_class, (types.ObjectType, types.ClassType)), \ "Stateful transform requires a class." - assert tnfm_class.__dict__.has_key('update'), \ + assert tnfm_class.__dict__.has_key('update'), \ "Stateful transform requires the class to have an update method" - - # Create an instance of our transform class. - state = tnfm_class(*args, **kwargs) - - # Generate the string associated with this generator's output. - namestring = tnfm_class.__name__ + hash_args(*args, **kwargs) - - # IMPORTANT: Messages may contain pointers that are shared with - # other streams, so we only manipulate copies. - for message in stream_in: - assert_sort_unframe_protocol(message) - message_copy = deepcopy(message) - - # Same shared pointer issue here as above. - tnfm_value = state.update(deepcopy(message_copy)) - - # If we want to keep all original values, plus append tnfm_id - # and tnfm_value. Used for Passthrough. - if forward_all_fields: - out_message = message_copy - out_message.tnfm_id = namestring - out_message.tnfm_value = tnfm_value - yield out_message + self.forward_all = tnfm_class.__dict__.get('FORWARDER', False) + self.update_in_place = tnfm_class.__dict__.get('UPDATER', False) + assert not all([self.forward_all, self.update_in_place]) - # Our expectation is that the transform simply updated the - # message it was passed. Useful for chaining together - # multiple transforms, e.g. TransactionSimulator/PerformanceTracker. - elif update_in_place: - yield tnfm_value + self.stream_in = stream_in - # Otherwise send tnfm_id, tnfm_value, and the message - # date. Useful for transforms being piped to a merge. - else: - out_message = ndict() - out_message.tnfm_id = namestring - out_message.tnfm_value = tnfm_value - out_message.dt = message_copy.dt - yield out_message + # Create an instance of our transform class. + self.state = tnfm_class(*args, **kwargs) + + # Generate the string associated with this generator's output. + self.namestring = tnfm_class.__name__ + hash_args(*args, **kwargs) + + def get_hash(self): + return self.namestring + + def __iter__(self): + return self.gen() + + def gen(self): + # IMPORTANT: Messages may contain pointers that are shared with + # other streams, so we only manipulate copies. + for message in self.stream_in: + + assert_sort_unframe_protocol(message) + message_copy = deepcopy(message) + + # Same shared pointer issue here as above. + tnfm_value = self.state.update(deepcopy(message_copy)) + + # If we want to keep all original values, plus append tnfm_id + # and tnfm_value. Used for Passthrough. + if self.forward_all: + out_message = message_copy + out_message.tnfm_id = self.namestring + out_message.tnfm_value = tnfm_value + yield out_message + + # Our expectation is that the transform simply updated the + # message it was passed. Useful for chaining together + # multiple transforms, e.g. TransactionSimulator/PerformanceTracker. + elif self.update_in_place: + yield tnfm_value + + # Otherwise send tnfm_id, tnfm_value, and the message + # date. Useful for transforms being piped to a merge. + else: + out_message = ndict() + out_message.tnfm_id = self.namestring + out_message.tnfm_value = tnfm_value + out_message.dt = message_copy.dt + yield out_message class MovingAverage(object): """ diff --git a/zipline/gens/utils.py b/zipline/gens/utils.py index d76966f5..4a95198f 100644 --- a/zipline/gens/utils.py +++ b/zipline/gens/utils.py @@ -43,7 +43,8 @@ def roundrobin(sources, namestrings): """ assert len(sources) == len(namestrings) mapping = OrderedDict(zip(namestrings, sources)) - + + import nose.tools; nose.tools.set_trace() # While our generators have not been exhausted, pull elements while mapping.keys() != []: for namestring, source in mapping.iteritems(): From c2b4689668a019cfc45bddd47f582ba744102123 Mon Sep 17 00:00:00 2001 From: fawce Date: Thu, 2 Aug 2012 18:02:43 -0400 Subject: [PATCH 23/73] intersticial to merge with scott --- tests/test_components.py | 14 ++++---------- zipline/core/component.py | 13 ++++++++++++- zipline/gens/composites.py | 30 +++++++++++++++++------------- 3 files changed, 33 insertions(+), 24 deletions(-) diff --git a/tests/test_components.py b/tests/test_components.py index 3626e54c..45f632fd 100644 --- a/tests/test_components.py +++ b/tests/test_components.py @@ -46,7 +46,6 @@ class ComponentTestCase(TestCase): setup_logger(self) def tearDown(self): - #self.ctx.term() teardown_logger(self) def test_source(self): @@ -74,6 +73,9 @@ class ComponentTestCase(TestCase): ) launch_monitor(monitor) + iter_a = iter(comp_a) + ev = iter_a.next() + return for event in comp_a: log.info(event) @@ -142,16 +144,8 @@ class ComponentTestCase(TestCase): DATASOURCE_UNFRAME ) - names = [ - comp_a.get_id, - comp_b.get_id, - comp_c.get_id - ] - - monitor.manage(set(names)) launch_monitor(monitor) - - sorted_out = date_sorted_sources([comp_a, comp_b, comp_c]) + sorted_out = date_sorted_sources(comp_a, comp_b, comp_c) prev = None sort_count = 0 diff --git a/zipline/core/component.py b/zipline/core/component.py index 37d22d16..4cb4c0d0 100644 --- a/zipline/core/component.py +++ b/zipline/core/component.py @@ -98,6 +98,11 @@ class Component(object): self.guid = uuid.uuid4() self.huid = humanhash.humanize(self.guid.hex) + # ------------ + # Generator + # ------------ + self.gen = None + # ------------ # Core Methods @@ -188,7 +193,10 @@ class Component(object): return self.run_safe(self._run_in) def __iter__(self): - return self._launch() + if not self.gen: + self.gen = self._launch() + + return self.gen # ---------------------------- # Cleanup & Modes of Failure @@ -601,6 +609,9 @@ class Component(object): """ return self.prefix + self.component_id + def get_hash(self): + return self.component_id + def debug(self): """ Debug information about the component. diff --git a/zipline/gens/composites.py b/zipline/gens/composites.py index af2fdc8c..b4ecde8d 100644 --- a/zipline/gens/composites.py +++ b/zipline/gens/composites.py @@ -11,25 +11,29 @@ from zipline.gens.transform import stateful_transform SourceBundle = namedtuple("SourceBundle", ['source', 'args', 'kwargs']) TransformBundle = namedtuple("TransformBundle", ['tnfm', 'args', 'kwargs']) -def date_sorted_sources(*sources): +def date_sorted_sources(bundles): """ Takes an iterable of SortBundles, generating namestrings and initialized datasources for each before piping them into a date_sort. """ + assert isinstance(bundles, (list, tuple)) + for bundle in bundles: + assert isinstance(bundle, SourceBundle) - for source in sources: - assert iter(source), "Source %s not iterable" % source - assert source.__class__.__dict__.has_key('get_hash'), "No get_hash" + # Calculate namestring hashes to pass to date_sort. + names = [bundle.source.__name__ + hash_args(*bundle.args, **bundle.kwargs) + for bundle in bundles] - # Get name hashes to pass to date_sort. - names = [source.get_hash() for source in sources] + # Pass each source its arguments. + source_gens = [bundle.source(*bundle.args, **bundle.kwargs) + for bundle in bundles] # Convert the list of generators into a flat stream by pulling # one element at a time from each. - stream_in = roundrobin(sources, names) - + stream_in = roundrobin(source_gens, names) + # Guarantee the flat stream will be sorted by date, using source_id as - # tie-breaker, which is fully deterministic (given deterministic string + # tie-breaker, which is fully deterministic (given deterministic string # representation for all args/kwargs) return date_sort(stream_in, names) @@ -50,15 +54,15 @@ def merged_transforms(sorted_stream, bundles): # Create a copy of the stream for each transform. split = tee(sorted_stream, len(bundles)) - # Package a stream copy with each bundle + # Package a stream copy with each bundle tnfms_with_streams = zip(split, bundles) # Convert the copies into transform streams. tnfm_gens = [ stateful_transform( - stream_copy, - bundle.tnfm, - *bundle.args, + stream_copy, + bundle.tnfm, + *bundle.args, **bundle.kwargs ) for stream_copy, bundle in tnfms_with_streams From 598b342655aec487b981cefbd9c3642a599c0e58 Mon Sep 17 00:00:00 2001 From: scottsanderson Date: Thu, 2 Aug 2012 18:13:03 -0400 Subject: [PATCH 24/73] minor --- zipline/gens/examples.py | 17 ++++++++--------- zipline/gens/tradesimulation.py | 5 ++--- 2 files changed, 10 insertions(+), 12 deletions(-) diff --git a/zipline/gens/examples.py b/zipline/gens/examples.py index 2f7830c8..84d42c3d 100644 --- a/zipline/gens/examples.py +++ b/zipline/gens/examples.py @@ -42,17 +42,16 @@ if __name__ == "__main__": passthrough = TransformBundle(Passthrough, (), {}) mavg_price = TransformBundle(MovingAverage, (timedelta(minutes = 20), ['price']), {}) tnfm_bundles = (passthrough, mavg_price) + merge_out = merged_transforms(sort_out, tnfm_bundles) - -# # for message in merge_out: -# # print message -# algo = TestAlgorithm(2, 100, 100) -# environment = create_trading_environment(year = 2012) -# style = zp.SIMULATION_STYLE.FIXED_SLIPPAGE + import nose.tools; nose.tools.set_trace() + algo = TestAlgorithm(2, 100, 100, sid_filter = [2,3]) + environment = create_trading_environment(year = 2012) + style = zp.SIMULATION_STYLE.FIXED_SLIPPAGE -# client_out = tsc(merge_out, algo, environment, style) -# for message in client_out: - # pp(message) + client_out = tsc(merge_out, algo, environment, style) + for message in client_out: + pp(message) diff --git a/zipline/gens/tradesimulation.py b/zipline/gens/tradesimulation.py index 6174ac35..a4a576fe 100644 --- a/zipline/gens/tradesimulation.py +++ b/zipline/gens/tradesimulation.py @@ -60,20 +60,19 @@ def trade_simulation_client(stream_in, algo, environment, sim_style): # Creates a txn field on the event containing transaction # information if we filled any pending orders on the event's sid. # TRANSACTION is None if we didn't fill any orders. - with_txns = stateful_transform( + with_txns = StatefulTransform( stream_in, TransactionSimulator, open_orders, style = sim_style ) - # Pipe the events with transactions to perf. This will remove the # txn field added by TransactionSimulator and replace it with # a portfolio object to be passed to the user's algorithm. Also adds # a PERF_MESSAGE field which is usually none, but contains an update # message once per day. - with_portfolio_and_perf_msg = stateful_transform( + with_portfolio_and_perf_msg = StatefulTransform( with_txns, PerformanceTracker, environment, From b6f7dc543ad15e9f876fb2d55cd6c2dda1da94cc Mon Sep 17 00:00:00 2001 From: scottsanderson Date: Thu, 2 Aug 2012 18:16:04 -0400 Subject: [PATCH 25/73] trivial for fawce --- zipline/gens/composites.py | 1 + 1 file changed, 1 insertion(+) diff --git a/zipline/gens/composites.py b/zipline/gens/composites.py index 88976bb0..4d448641 100644 --- a/zipline/gens/composites.py +++ b/zipline/gens/composites.py @@ -31,6 +31,7 @@ def date_sorted_sources(*sources): # Guarantee the flat stream will be sorted by date, using source_id as # tie-breaker, which is fully deterministic (given deterministic string # representation for all args/kwargs) + return date_sort(stream_in, names) From 193dc20a7a61fac8a1bec4af87eeaa645264e425 Mon Sep 17 00:00:00 2001 From: fawce Date: Thu, 2 Aug 2012 18:23:47 -0400 Subject: [PATCH 26/73] checking in latest from scotty --- zipline/gens/composites.py | 18 +++++++----------- 1 file changed, 7 insertions(+), 11 deletions(-) diff --git a/zipline/gens/composites.py b/zipline/gens/composites.py index 904aff42..4ac99502 100644 --- a/zipline/gens/composites.py +++ b/zipline/gens/composites.py @@ -11,26 +11,22 @@ from zipline.gens.transform import StatefulTransform SourceBundle = namedtuple("SourceBundle", ['source', 'args', 'kwargs']) TransformBundle = namedtuple("TransformBundle", ['tnfm', 'args', 'kwargs']) -def date_sorted_sources(bundles): +def date_sorted_sources(*sources): """ Takes an iterable of SortBundles, generating namestrings and initialized datasources for each before piping them into a date_sort. """ - assert isinstance(bundles, (list, tuple)) - for bundle in bundles: - assert isinstance(bundle, SourceBundle) - # Calculate namestring hashes to pass to date_sort. - names = [bundle.source.__name__ + hash_args(*bundle.args, **bundle.kwargs) - for bundle in bundles] + for source in sources: + assert iter(source), "Source %s not iterable" % source + assert source.__class__.__dict__.has_key('get_hash'), "No get_hash" - # Pass each source its arguments. - source_gens = [bundle.source(*bundle.args, **bundle.kwargs) - for bundle in bundles] + # Get name hashes to pass to date_sort. + names = [source.get_hash() for source in sources] # Convert the list of generators into a flat stream by pulling # one element at a time from each. - stream_in = roundrobin(source_gens, names) + stream_in = roundrobin(sources, names) # Guarantee the flat stream will be sorted by date, using source_id as # tie-breaker, which is fully deterministic (given deterministic string From d74bc1bbc53976d5cdb6dbfaa0671c658e7ede7b Mon Sep 17 00:00:00 2001 From: scottsanderson Date: Thu, 2 Aug 2012 21:49:39 -0400 Subject: [PATCH 27/73] save --- zipline/gens/composites.py | 14 +++++++------- zipline/gens/examples.py | 12 +++++++----- zipline/gens/utils.py | 3 --- 3 files changed, 14 insertions(+), 15 deletions(-) diff --git a/zipline/gens/composites.py b/zipline/gens/composites.py index 4ac99502..f7875816 100644 --- a/zipline/gens/composites.py +++ b/zipline/gens/composites.py @@ -15,7 +15,7 @@ def date_sorted_sources(*sources): """ Takes an iterable of SortBundles, generating namestrings and initialized datasources for each before piping them into a date_sort. - """ +n """ for source in sources: assert iter(source), "Source %s not iterable" % source @@ -27,9 +27,9 @@ def date_sorted_sources(*sources): # Convert the list of generators into a flat stream by pulling # one element at a time from each. stream_in = roundrobin(sources, names) - + # Guarantee the flat stream will be sorted by date, using source_id as - # tie-breaker, which is fully deterministic (given deterministic string + # tie-breaker, which is fully deterministic (given deterministic string # representation for all args/kwargs) return date_sort(stream_in, names) @@ -51,15 +51,15 @@ def merged_transforms(sorted_stream, bundles): # Create a copy of the stream for each transform. split = tee(sorted_stream, len(bundles)) - # Package a stream copy with each bundle + # Package a stream copy with each bundle tnfms_with_streams = zip(split, bundles) # Convert the copies into transform streams. tnfms = [ StatefulTransform( - stream_copy, - bundle.tnfm, - *bundle.args, + stream_copy, + bundle.tnfm, + *bundle.args, **bundle.kwargs ) for stream_copy, bundle in tnfms_with_streams diff --git a/zipline/gens/examples.py b/zipline/gens/examples.py index 84d42c3d..3027eab7 100644 --- a/zipline/gens/examples.py +++ b/zipline/gens/examples.py @@ -1,4 +1,7 @@ import pytz + +from time import sleep +from pprint import pprint as pp from datetime import datetime, timedelta from zipline.utils.factory import create_trading_environment @@ -20,7 +23,7 @@ if __name__ == "__main__": kwargs_a = { 'sids' : [1,2,3], 'start' : datetime(2012,1,3,15, tzinfo = pytz.utc), - 'delta' : timedelta(minutes = 1), + 'delta' : timedelta(minutes = 10), 'filter' : filter } source_a = SpecificEquityTrades(*args_a, **kwargs_a) @@ -30,7 +33,7 @@ if __name__ == "__main__": kwargs_b = { 'sids' : [2,3,4], 'start' : datetime(2012,1,3,14, tzinfo = pytz.utc), - 'delta' : timedelta(minutes = 1), + 'delta' : timedelta(minutes = 10), 'filter' : filter } source_b = SpecificEquityTrades(*args_b, **kwargs_b) @@ -45,13 +48,12 @@ if __name__ == "__main__": merge_out = merged_transforms(sort_out, tnfm_bundles) - import nose.tools; nose.tools.set_trace() - algo = TestAlgorithm(2, 100, 100, sid_filter = [2,3]) + algo = TestAlgorithm(2, 10, 100, sid_filter = [2,3]) environment = create_trading_environment(year = 2012) style = zp.SIMULATION_STYLE.FIXED_SLIPPAGE client_out = tsc(merge_out, algo, environment, style) for message in client_out: pp(message) - + sleep(1) diff --git a/zipline/gens/utils.py b/zipline/gens/utils.py index 4a95198f..d7968ed3 100644 --- a/zipline/gens/utils.py +++ b/zipline/gens/utils.py @@ -44,7 +44,6 @@ def roundrobin(sources, namestrings): assert len(sources) == len(namestrings) mapping = OrderedDict(zip(namestrings, sources)) - import nose.tools; nose.tools.set_trace() # While our generators have not been exhausted, pull elements while mapping.keys() != []: for namestring, source in mapping.iteritems(): @@ -55,8 +54,6 @@ def roundrobin(sources, namestrings): yield done_message(namestring) del mapping[namestring] - - def hash_args(*args, **kwargs): """Define a unique string for any set of representable args.""" arg_string = '_'.join([str(arg) for arg in args]) From f166626ea83efc58c486dfb009ea056c5edf185c Mon Sep 17 00:00:00 2001 From: fawce Date: Thu, 2 Aug 2012 22:51:17 -0400 Subject: [PATCH 28/73] multiple sources, each as component, feeding sort. --- tests/test_components.py | 62 ++++++++------- zipline/core/component.py | 154 +++++++++++++++++++------------------- zipline/core/monitor.py | 2 +- zipline/gens/utils.py | 3 +- zipline/gens/zmqgen.py | 2 +- 5 files changed, 113 insertions(+), 110 deletions(-) diff --git a/tests/test_components.py b/tests/test_components.py index ff353186..bbd881e7 100644 --- a/tests/test_components.py +++ b/tests/test_components.py @@ -64,18 +64,19 @@ class ComponentTestCase(TestCase): 'count' : count } + trade_gen = SpecificEquityTrades(*args_a, **kwargs_a) + monitor.add_to_topology(trade_gen.get_hash()) + + launch_monitor(monitor) + comp_a = Component( - SpecificEquityTrades(*args_a, **kwargs_a), + trade_gen, monitor, socket_uri, DATASOURCE_FRAME, DATASOURCE_UNFRAME ) - launch_monitor(monitor) - iter_a = iter(comp_a) - ev = iter_a.next() - return for event in comp_a: log.info(event) @@ -96,16 +97,8 @@ class ComponentTestCase(TestCase): 'filter' : filter, 'count' : count } - - - comp_a = Component( - SpecificEquityTrades(*args_a, **kwargs_a), - monitor, - socket_uris[0], - DATASOURCE_FRAME, - DATASOURCE_UNFRAME - ) - + trade_gen_a = SpecificEquityTrades(*args_a, **kwargs_a) + monitor.add_to_topology(trade_gen_a.get_hash()) #Set up source b. Two minutes between events. args_b = tuple() @@ -116,15 +109,8 @@ class ComponentTestCase(TestCase): 'filter' : filter, 'count' : count } - - - comp_b = Component( - SpecificEquityTrades(*args_b, **kwargs_b), - monitor, - socket_uris[1], - DATASOURCE_FRAME, - DATASOURCE_UNFRAME - ) + trade_gen_b = SpecificEquityTrades(*args_b, **kwargs_b) + monitor.add_to_topology(trade_gen_b.get_hash()) #Set up source c. Three minutes between events. args_c = tuple() @@ -136,18 +122,38 @@ class ComponentTestCase(TestCase): 'count' : count } + trade_gen_c = SpecificEquityTrades(*args_c, **kwargs_c) + monitor.add_to_topology(trade_gen_c.get_hash()) + + launch_monitor(monitor) + + comp_a = Component( + trade_gen_a, + monitor, + socket_uris[0], + DATASOURCE_FRAME, + DATASOURCE_UNFRAME + ) + + comp_b = Component( + trade_gen_b, + monitor, + socket_uris[1], + DATASOURCE_FRAME, + DATASOURCE_UNFRAME + ) + comp_c = Component( - SpecificEquityTrades(*args_c, **kwargs_c), + trade_gen_c, monitor, socket_uris[2], DATASOURCE_FRAME, DATASOURCE_UNFRAME ) - launch_monitor(monitor) sources = [comp_a, comp_b, comp_c] - gens = [iter(source) for source in sources] - sorted_out = date_sorted_sources(gens) + + sorted_out = date_sorted_sources(*sources) prev = None sort_count = 0 diff --git a/zipline/core/component.py b/zipline/core/component.py index 4cb4c0d0..82aebc88 100644 --- a/zipline/core/component.py +++ b/zipline/core/component.py @@ -83,10 +83,6 @@ class Component(object): self.unframe = unframe self.prefix = "" - # register two components with the monitor - monitor.add_to_topology(self.component_id) - monitor.add_to_topology("FORK-"+self.component_id) - # TODO: state_flag is deprecated, remove self.state_flag = COMPONENT_STATE.OK @@ -98,10 +94,19 @@ class Component(object): self.guid = uuid.uuid4() self.huid = humanhash.humanize(self.guid.hex) + # first, start the generator in its own process. Once + # Monitor says "go", Events from the generator will be + # FRAME'd and PUSH'd to self.socket_uri. + proc = multiprocessing.Process( + target=self.loop_send + ) + proc.start() + # ------------ - # Generator + # Message Receiver/Generator # ------------ - self.gen = None + self.recv_gen = self.create_recv_gen() + # ------------ @@ -109,94 +114,87 @@ class Component(object): # ------------ - def _run_out(self): + def loop_send(self): """ The main component loop. This is wrapped inside a exception reporting context inside of run. The core logic of the all components is run here. """ - # The process title so you can watch it in top, ps. - setproctitle(self.generator.__class__.__name__) - self.prefix = "FORK-" - - log.info("Start %r" % self) - log.info("Pid %s" % os.getpid()) - log.info("Group %s" % os.getpgrp()) - - self.open() - - self.signal_ready() - self.lock_ready() - self.wait_ready() - - # ----------------------- - # YOU SHALL NOT PASS!!!!! - # ----------------------- - # ... until the monitor signals GO - - for event in self.generator: - self.heartbeat() - event.source_id = self.get_id - msg = self.frame(event) - self.out_socket.send(msg) - - self.signal_done() - - def _run_in(self): - self.open(send=False) - self.signal_ready() - self.lock_ready() - self.wait_ready() - # ----------------------- - # YOU SHALL NOT PASS!!!!! - # ----------------------- - # ... until the monitor signals GO - - # return the generator - for event in gen_from_poller(self.poll, self.in_socket, self.unframe): - event.source_id = self.get_id - yield event - - self.signal_done() - - def run_safe(self, func): - """ - Run a function that is assumed to include wait_ready and - heartbeat. Used to wrap fork_generator and consume_gen. - """ try: - return func() + # The process title so you can watch it in top, ps. + setproctitle(self.generator.__class__.__name__) + self.prefix = "FORK-" + + log.info("Start %r" % self) + log.info("Pid %s" % os.getpid()) + log.info("Group %s" % os.getpgrp()) + + self.open() + + self.signal_ready() + self.lock_ready() + self.wait_ready() + + # ----------------------- + # YOU SHALL NOT PASS!!!!! + # ----------------------- + # ... until the monitor signals GO + + for event in self.generator: + self.heartbeat() + msg = self.frame(event) + self.out_socket.send(msg) + + self.signal_done() + except Exception as exc: - if not isinstance(exc, KillSignal): - self.signal_exception(exc) - else: - # if we get a kill signal, forcibly close all the - # sockets. - self.teardown_sockets() + self.handle_exception(exc) finally: log.info("Exiting %r" % self) - def _launch(self): - # first, start the generator in its own process. Once - # Monitor says "go", Events from the generator will be - # FRAME'd and PUSH'd to self.socket_uri. - proc = multiprocessing.Process( - target=self.run_safe, - args=(self._run_out,) - ) - proc.start() + def create_recv_gen(self): + try: + self.open(send=False) + self.signal_ready() + self.lock_ready() + # return the generator + return self.loop_recv() + except Exception as exc: + self.handle_exception(exc) + finally: + log.info("Created Recv Gen for %r" % self) - # Start the poller-generator, which will PULL messages - # from self.sockiet_uri, UNFRAME'd them, and yield them. - return self.run_safe(self._run_in) + def loop_recv(self): + try: + # we block on ready here until monitor sends the GO + self.wait_ready() + log.info("Starting to drain {id}".format(id=self.get_id)) + for event in gen_from_poller(self.poll, self.in_socket, self.unframe): + self.heartbeat() + # event.source_id = self.get_id + yield event + + self.signal_done() + except Exception as exc: + self.handle_exception(exc) + finally: + log.info("Exiting %r" % self) + + def handle_exception(self, exc, re_raise=False): + if not isinstance(exc, KillSignal): + self.signal_exception(exc) + else: + # if we get a kill signal, forcibly close all the + # sockets. + self.teardown_sockets() def __iter__(self): - if not self.gen: - self.gen = self._launch() + return self - return self.gen + def next(self): + return self.recv_gen.next() # ---------------------------- # Cleanup & Modes of Failure diff --git a/zipline/core/monitor.py b/zipline/core/monitor.py index 83b022c5..e9999ea4 100644 --- a/zipline/core/monitor.py +++ b/zipline/core/monitor.py @@ -120,7 +120,7 @@ class Monitor(object): return def add_to_topology(self, component_id): - add = set([component_id]) + add = set([component_id, "FORK-" + component_id]) self.topology.update(add) def freeze_topology(self): diff --git a/zipline/gens/utils.py b/zipline/gens/utils.py index 4a95198f..d76966f5 100644 --- a/zipline/gens/utils.py +++ b/zipline/gens/utils.py @@ -43,8 +43,7 @@ def roundrobin(sources, namestrings): """ assert len(sources) == len(namestrings) mapping = OrderedDict(zip(namestrings, sources)) - - import nose.tools; nose.tools.set_trace() + # While our generators have not been exhausted, pull elements while mapping.keys() != []: for namestring, source in mapping.iteritems(): diff --git a/zipline/gens/zmqgen.py b/zipline/gens/zmqgen.py index f9d5f919..463f8b42 100644 --- a/zipline/gens/zmqgen.py +++ b/zipline/gens/zmqgen.py @@ -21,7 +21,7 @@ def gen_from_pull_socket(socket_uri, context, unframe): def gen_from_poller(poller, in_socket, unframe): while True: - socks = dict(poller.poll(1000)) + socks = dict(poller.poll()) if socks.get(in_socket) == zmq.POLLIN: message = in_socket.recv() From 479ad502f6f0db73bfd2c590eb49fa3db166f80a Mon Sep 17 00:00:00 2001 From: fawce Date: Fri, 3 Aug 2012 01:05:53 -0400 Subject: [PATCH 29/73] addressing race condition between loop_send exit and DONE message delivery. --- tests/test_components.py | 4 +- zipline/core/component.py | 91 ++++++++++++++++++++------------------- zipline/core/monitor.py | 20 +++------ zipline/gens/zmqgen.py | 13 ------ 4 files changed, 56 insertions(+), 72 deletions(-) diff --git a/tests/test_components.py b/tests/test_components.py index bbd881e7..cf140677 100644 --- a/tests/test_components.py +++ b/tests/test_components.py @@ -77,7 +77,6 @@ class ComponentTestCase(TestCase): DATASOURCE_UNFRAME ) - for event in comp_a: log.info(event) @@ -155,6 +154,9 @@ class ComponentTestCase(TestCase): sorted_out = date_sorted_sources(*sources) + import time + time.sleep(.25) + prev = None sort_count = 0 for msg in sorted_out: diff --git a/zipline/core/component.py b/zipline/core/component.py index 82aebc88..78ee69c8 100644 --- a/zipline/core/component.py +++ b/zipline/core/component.py @@ -17,8 +17,6 @@ from collections import namedtuple # pyzmq import zmq -from zipline.gens.zmqgen import gen_from_poller - from zipline.core.monitor import PARAMETERS from zipline.protocol import ( @@ -36,6 +34,10 @@ class KillSignal(Exception): def __init__(self): pass +class ShutdownSignal(Exception): + def __init__(self): + pass + ComponentSocketArgs = namedtuple('ComponentSocketArgs',['uri','style','bind']) class Component(object): @@ -87,7 +89,7 @@ class Component(object): self.state_flag = COMPONENT_STATE.OK # track time of last ping we received from monitor - self.last_ping = None + self.last_ping = time.time() # Humanhashes make this way easier to debug because they stick # in your mind unlike a 32 byte string of random hex. @@ -108,7 +110,6 @@ class Component(object): self.recv_gen = self.create_recv_gen() - # ------------ # Core Methods # ------------ @@ -148,6 +149,13 @@ class Component(object): self.signal_done() + # keep heartbeating until we receive the shutdown + # message from the Monitor (raises a + # ShutdownSignal), or we don't hear from the Monitor + # for MAX_COMPONENT_WAIT. + while True: + self.heartbeat(timeout=1000) + except Exception as exc: self.handle_exception(exc) finally: @@ -170,10 +178,7 @@ class Component(object): try: # we block on ready here until monitor sends the GO self.wait_ready() - log.info("Starting to drain {id}".format(id=self.get_id)) - for event in gen_from_poller(self.poll, self.in_socket, self.unframe): - self.heartbeat() - # event.source_id = self.get_id + for event in self.gen_from_poller(self.poll, self.in_socket, self.unframe): yield event self.signal_done() @@ -182,13 +187,30 @@ class Component(object): finally: log.info("Exiting %r" % self) + def gen_from_poller(self, poller, in_socket, unframe): + + while True: + socks = dict(poller.poll(0)) + self.heartbeat() + if socks.get(in_socket) == zmq.POLLIN: + message = in_socket.recv() + if message == str(CONTROL_PROTOCOL.DONE): + break + else: + event = unframe(message) + yield event + def handle_exception(self, exc, re_raise=False): - if not isinstance(exc, KillSignal): - self.signal_exception(exc) - else: + if isinstance(exc, KillSignal): # if we get a kill signal, forcibly close all the # sockets. self.teardown_sockets() + elif isinstance(exc, ShutdownSignal): + # signal from monitor of an orderly shutdown, + # do nothing. + pass + else: + self.signal_exception(exc) def __iter__(self): return self @@ -210,6 +232,12 @@ class Component(object): for sock in self.sockets: sock.close() + def shutdown(self): + """ + Clean shutdown. + """ + raise ShutdownSignal() + def kill(self): """ Unclean shutdown. @@ -333,7 +361,7 @@ class Component(object): # Side effectful call from the monitor to unlock # and begin doing work only when the entire topology # of the system beings to come online - log.info('Unlocking ' + self.__class__.__name__) + log.info('Unlocking ' + self.get_id) self.unlock_ready() # ========= @@ -344,7 +372,7 @@ class Component(object): # data that are done during a clean shutdown. Inform the # monitor that we're done. elif event == CONTROL_PROTOCOL.SHUTDOWN: - self.signal_done() + self.shutdown() break # ========= @@ -358,7 +386,7 @@ class Component(object): elif time.time() - start_wait > PARAMETERS.MAX_COMPONENT_WAIT: log.info('No go signal from monitor, %s exiting' \ - % self.__class__.__name__) + % self.get_id) self.kill() break @@ -407,7 +435,7 @@ class Component(object): # data that are done during a clean shutdown. Inform the # monitor that we're done. elif event == CONTROL_PROTOCOL.SHUTDOWN: - self.signal_done() + self.shutdown() # ========= # Hard Kill @@ -419,7 +447,7 @@ class Component(object): # In case we didn't receive a ping, send a pre-emptive # pong to the monitor. - elif self.last_ping and time.time() - self.last_ping > 1: + elif time.time() - self.last_ping > 2: # send a ping ahead of schedule pre_pong = time.time() heartbeat_frame = CONTROL_FRAME( @@ -432,15 +460,14 @@ class Component(object): # doing work self.control_out.send(heartbeat_frame, self.zmq.NOBLOCK) self.last_ping = pre_pong - elif self.last_ping and \ - time.time() - self.last_ping > PARAMETERS.MAX_COMPONENT_WAIT: + elif time.time() - self.last_ping > PARAMETERS.MAX_COMPONENT_WAIT: # monitor is gone without sending the shutdown # signal, do a hard exit. self.kill() def signal_ready(self): - log.info(self.__class__.__name__ + ' is ready') + log.info(self.get_id + ' is ready') frame = CONTROL_FRAME( CONTROL_PROTOCOL.READY, '' @@ -470,13 +497,6 @@ class Component(object): self.control_out.send(done_frame) log.info("[%s] sent control done" % self.get_id) - # there is a narrow race condition where we finish just - # after the Monitor accepts our prior heartbeat, but just - # before the next one is sent. So, we hang around for one - # last heartbeat, and wait an unusually long time. - # TODO: decided if this is really necessary. - # self.heartbeat(timeout=5000) - # ----------- # Messaging # ----------- @@ -529,7 +549,6 @@ class Component(object): def bind_push_socket(self, addr): push_socket = self.context.socket(self.zmq.PUSH) push_socket.bind(addr) - self.out_socket = push_socket self.sockets.append(push_socket) return push_socket @@ -547,7 +566,6 @@ class Component(object): pull_socket = self.context.socket(self.zmq.PULL) pull_socket.bind(addr) self.poll.register(pull_socket, self.zmq.POLLIN) - self.sockets.append(pull_socket) return pull_socket @@ -556,26 +574,9 @@ class Component(object): push_socket = self.context.socket(self.zmq.PUSH) push_socket.connect(addr) self.sockets.append(push_socket) - self.out_socket = push_socket return push_socket - def bind_pub_socket(self, addr): - pub_socket = self.context.socket(self.zmq.PUB) - pub_socket.bind(addr) - self.out_socket = pub_socket - - return pub_socket - - def connect_sub_socket(self, addr): - sub_socket = self.context.socket(self.zmq.SUB) - sub_socket.connect(addr) - sub_socket.setsockopt(self.zmq.SUBSCRIBE,'') - self.sockets.append(sub_socket) - - self.poll.register(sub_socket, self.zmq.POLLIN) - - return sub_socket def setup_control(self): """ diff --git a/zipline/core/monitor.py b/zipline/core/monitor.py index d9e4bd19..7a920742 100644 --- a/zipline/core/monitor.py +++ b/zipline/core/monitor.py @@ -340,9 +340,8 @@ class Monitor(object): log.info("breaking out of initial heartbeat") break - # Has the entire topology told us its DONE - done = len(self.finished) == len(self.topology) - if done: + # Break out if the entire topology told us its DONE + if len(self.finished) == len(self.topology): break @@ -438,27 +437,22 @@ class Monitor(object): bad = self.tracked - good - self.finished new = self.responses - good - self.finished - missing = self.topology - self.tracked - self.finished - for component in new: self.new(component) - if self.debug: - log.info('New component %r' % component) - for component in bad: self.timed_out(component) - for component in missing: + missing = self.topology - self.tracked - self.finished + for component in missing: if self.debug: log.info('Missing component %r' % component) - if self.debug: - for component in self.tracked: - if component not in self.topology: - log.info('Uninvited component %r' % component) + for component in self.tracked: + if component not in self.topology: + log.info('Uninvited component %r' % component) # -------------- # Init Handlers diff --git a/zipline/gens/zmqgen.py b/zipline/gens/zmqgen.py index 463f8b42..66dbdca3 100644 --- a/zipline/gens/zmqgen.py +++ b/zipline/gens/zmqgen.py @@ -17,16 +17,3 @@ def gen_from_pull_socket(socket_uri, context, unframe): # this generator needs to know about the source_ids coming in via # the poller, and need to yield DONE messages for each # source_id. - -def gen_from_poller(poller, in_socket, unframe): - - while True: - socks = dict(poller.poll()) - - if socks.get(in_socket) == zmq.POLLIN: - message = in_socket.recv() - if message == str(zp.CONTROL_PROTOCOL.DONE): - break - else: - event = unframe(message) - yield event From ace5d05fab906eef4aa8b8585fee1ecb85ffccd3 Mon Sep 17 00:00:00 2001 From: scottsanderson Date: Fri, 3 Aug 2012 14:07:05 -0400 Subject: [PATCH 30/73] tradesim as generator v. 2.0 --- zipline/finance/trading.py | 10 +- zipline/gens/composites.py | 6 +- zipline/gens/examples.py | 4 +- zipline/gens/tradegens.py | 2 +- zipline/gens/tradesimulation.py | 324 ++++++++++++++++++++------------ zipline/gens/transform.py | 20 +- 6 files changed, 229 insertions(+), 137 deletions(-) diff --git a/zipline/finance/trading.py b/zipline/finance/trading.py index dd6345d4..53ac8f2f 100644 --- a/zipline/finance/trading.py +++ b/zipline/finance/trading.py @@ -11,8 +11,8 @@ log = logbook.Logger('Transaction Simulator') class TransactionSimulator(object): UPDATER = True - def __init__(self, open_orders, style=SIMULATION_STYLE.PARTIAL_VOLUME): - self.open_orders = open_orders + def __init__(self, sid_filter, style=SIMULATION_STYLE.PARTIAL_VOLUME): + self.open_orders = {} self.txn_count = 0 self.trade_window = datetime.timedelta(seconds=30) self.orderTTL = datetime.timedelta(days=1) @@ -27,6 +27,12 @@ class TransactionSimulator(object): elif style == SIMULATION_STYLE.NOOP: self.apply_trade_to_open_orders = self.simulate_noop + for sid in sid_filter: + self.open_orders[sid] = [] + + def place_order(self, order): + self.open_orders[order.sid].append(order) + def update(self, event): event.TRANSACTION = None if event.type == zp.DATASOURCE_TYPE.TRADE: diff --git a/zipline/gens/composites.py b/zipline/gens/composites.py index f7875816..2b9905b7 100644 --- a/zipline/gens/composites.py +++ b/zipline/gens/composites.py @@ -15,7 +15,7 @@ def date_sorted_sources(*sources): """ Takes an iterable of SortBundles, generating namestrings and initialized datasources for each before piping them into a date_sort. -n """ + """ for source in sources: assert iter(source), "Source %s not iterable" % source @@ -55,7 +55,7 @@ def merged_transforms(sorted_stream, bundles): tnfms_with_streams = zip(split, bundles) # Convert the copies into transform streams. - tnfms = [ + tnfm_gens = [ StatefulTransform( stream_copy, bundle.tnfm, @@ -64,8 +64,6 @@ def merged_transforms(sorted_stream, bundles): ) for stream_copy, bundle in tnfms_with_streams ] - tnfm_gens = [tnfm.gen() for tnfm in tnfms] - # Roundrobin the outputs of our transforms to create a single flat stream. to_merge = roundrobin(tnfm_gens, namestrings) diff --git a/zipline/gens/examples.py b/zipline/gens/examples.py index 3027eab7..38964f01 100644 --- a/zipline/gens/examples.py +++ b/zipline/gens/examples.py @@ -11,7 +11,7 @@ from zipline.gens.composites import SourceBundle, TransformBundle, \ date_sorted_sources, merged_transforms from zipline.gens.tradegens import SpecificEquityTrades from zipline.gens.transform import MovingAverage, Passthrough, StatefulTransform -from zipline.gens.tradesimulation import trade_simulation_client as tsc +from zipline.gens.tradesimulation import TradeSimulationClient as tsc import zipline.protocol as zp @@ -21,6 +21,7 @@ if __name__ == "__main__": #Set up source a. One minute between events. args_a = tuple() kwargs_a = { + 'count' : 2000, 'sids' : [1,2,3], 'start' : datetime(2012,1,3,15, tzinfo = pytz.utc), 'delta' : timedelta(minutes = 10), @@ -31,6 +32,7 @@ if __name__ == "__main__": #Set up source b. Two minutes between events. args_b = tuple() kwargs_b = { + 'count' : 2000, 'sids' : [2,3,4], 'start' : datetime(2012,1,3,14, tzinfo = pytz.utc), 'delta' : timedelta(minutes = 10), diff --git a/zipline/gens/tradegens.py b/zipline/gens/tradegens.py index a8a691b5..b1a0ed96 100644 --- a/zipline/gens/tradegens.py +++ b/zipline/gens/tradegens.py @@ -84,7 +84,7 @@ class SpecificEquityTrades(object): self.generator = self.create_fresh_generator() def __iter__(self): - return self.generator + return self def next(self): return self.generator.next() diff --git a/zipline/gens/tradesimulation.py b/zipline/gens/tradesimulation.py index a4a576fe..5e8db374 100644 --- a/zipline/gens/tradesimulation.py +++ b/zipline/gens/tradesimulation.py @@ -9,7 +9,7 @@ from zipline.gens.transform import StatefulTransform from zipline.finance.trading import TransactionSimulator from zipline.finance.performance import PerformanceTracker -def trade_simulation_client(stream_in, algo, environment, sim_style): +class TradeSimulationClient(object): """ Generator that takes the expected output of a merge, a user algorithm, a trading environment, and a simulator style as @@ -42,61 +42,123 @@ def trade_simulation_client(stream_in, algo, environment, sim_style): overwritten so that only the most recent snapshot of the universe is sent to the algo. """ - - #============ - # Algo Setup - #============ - - # Initialize txn_sim's dictionary of orders here so that we can - # reference it from within the user's algorithm. - sids = algo.get_sid_filter() - open_orders = {} + def __init__(self, stream_in, algo, environment, sim_style): - for sid in sids: - open_orders[sid] = [] + self.stream_in = stream_in + self.algo = algo + self.sids = algo.get_sid_filter() + self.environment = environment + self.style = sim_style + + self.__generator = None + + + def get_hash(self): + """ + There should only ever be one TSC in the system. + """ + return self.__class__.__name__ + hash_args() + + def __iter__(self): + return self + + def next(self): + if self.__generator: + return self.__generator.next() + else: + self.__generator = self.run_simulation() + return self.__generator.next() + + def run_simulation(self): + """ + Main generator work loop. + """ + # Simulate filling any open orders made by the previous run of + # the user's algorithm. Sets the txn field to true on any + # event that results in a filled order. + ordering_client = StatefulTransform( + self.stream_in, + TransactionSimulator, + self.sids, + style = self.style + ) + # Pipe the events with transactions to perf. This will remove + # the txn field added by TransactionSimulator and replace it + # with a portfolio object to be passed to the user's + # algorithm. Also adds a PERF_MESSAGE field which is usually + # none, but contains an update message once per day. + current_portfolio = StatefulTransform( + ordering_client, + PerformanceTracker, + self.environment, + self.sids + ) + # Pass both the ordering client's state and messages with the + # current portfolio into the algorithm for simulation. + algo_results = AlgorithmSimulator( + current_portfolio, + ordering_client.state, + self.algo, + ) + + for message in algo_results: + yield message + + +class AlgorithmSimulator(object): - # Pipe the in stream into the transaction simulator. - # Creates a txn field on the event containing transaction - # information if we filled any pending orders on the event's sid. - # TRANSACTION is None if we didn't fill any orders. - with_txns = StatefulTransform( - stream_in, - TransactionSimulator, - open_orders, - style = sim_style - ) - - # Pipe the events with transactions to perf. This will remove the - # txn field added by TransactionSimulator and replace it with - # a portfolio object to be passed to the user's algorithm. Also adds - # a PERF_MESSAGE field which is usually none, but contains an update - # message once per day. - with_portfolio_and_perf_msg = StatefulTransform( - with_txns, - PerformanceTracker, - environment, - sids - ) - - # Batch the event stream by dt to be processed by the user's algo. - # Yields perf messages whenever it encounters them. - perf_messages = algo_simulator(with_portfolio_and_perf_msg, sids, algo, open_orders) - - for message in perf_messages: - yield message - - -def algo_simulator(stream_in, sids, algo, order_book): + def __init__(self, stream_in, order_book, algo): - simulation_dt = None + self.stream_in = stream_in - # Closure to pass into the user's algo to allow placing orders - # into the txn_sim's dict of open orders. - def order(sid, amount): - assert sid in sids, "Order on invalid sid: %i" % sid + # We extract the order book from the txn client so that + # the algo can place new orders. + self.order_book = order_book + + self.algo = algo + self.sids = algo.get_sid_filter() + + # Monkey patch the user algorithm to place orders in the + # txn_sim order book. + self.algo.set_order(self.order) + self.algo.set_logger(logbook.Logger("Algolog")) + + # Call the user's initialize method. + self.algo.initialize() + + # The algorithm's universe as of our most recent event. + self.universe = ndict() + + for sid in self.sids: + self.universe[sid] = ndict() + self.universe.portfolio = None + + # We don't have a datetime for the current snapshot until we + # receive a message. + self.simulation_dt = None + self.this_snapshot_dt = None + + self.__generator = None + + def __iter__(self): + return self + + def next(self): + if self.__generator: + return self.__generator.next() + else: + self.__generator = self._gen() + return self.__generator.next() + + def order(self, sid, amount): + """ + Closure to pass into the user's algo to allow placing orders + into the txn_sim's dict of open orders. + """ + assert sid in self.sids, "Order on invalid sid: %i" % sid order = ndict({ - 'dt' : simulation_dt, + 'dt' : self.simulation_dt, 'sid' : sid, 'amount' : int(amount), 'filled' : 0 @@ -104,91 +166,105 @@ def algo_simulator(stream_in, sids, algo, order_book): # Tell the user if they try to buy 0 shares of something. if order.amount == 0: - log = "requested to trade zero shares of {sid}".format( + zero_message = "Requested to trade zero shares of {sid}".format( sid=event.sid ) - log.debug(log) + log.debug(zero_message) + # Don't bother placing orders for 0 shares. return - - order_book[sid].append(order) - - # Set the algo's order method. - algo.set_order(order) - # Provide a logbook logging interface to user code. - algo.set_logger(logbook.Logger("Algolog")) + # Add non-zero orders to the order book. + # !!!IMPORTANT SIDE-EFFECT!!! + # This modifies the internal state of the transaction + # simulator so that it can fill the placed order when it + # receives its next message. + self.order_book.place_order(order) - # Call user-defined initialize method before we process any - # events. - algo.initialize() - - universe = ndict() - for sid in sids: - universe[sid] = ndict() - universe.portfolio = None - this_snapshot_dt = None - - for event in stream_in: - # Yield any perf messages received to be relayed back to the browser. - if event.perf_message: - yield event.perf_message - del event['perf_message'] - - # This should only happen for the first event we run. - if simulation_dt == None: - simulation_dt = event.dt - - # If we are currently creating a new message and this update - # matches the message dt, update the state of the universe. - - if this_snapshot_dt != None: - - if event.dt == this_snapshot_dt: - update_universe(event, universe) - - # If we are constructing a snapshot and we hit a new dt, call - # handle_data and record how long it takes. - else: - start_tic = datetime.now() - algo.handle_data(universe) - stop_tic = datetime.now() - - # How long did you take? - delta = stop_tic - start_tic - - # Update the simulation time. - simulation_dt = this_snapshot_dt + delta + def _gen(self): + """ + Internal generator work loop. + """ + for event in self.stream_in: + # Yield any perf messages received to be relayed back to the browser. + if event.perf_message: + yield event.perf_message + del event['perf_message'] - # Update the universe with the new event. - update_universe(event, universe) + # This should only happen for the first event we run. + if self.simulation_dt == None: + self.simulation_dt = event.dt + + # ====================== + # Time Compression Logic + # ====================== + + if self.this_snapshot_dt != None: + self.update_current_snapshot(event) - # If the current event is later than the simulation - # time, update the universe and start constructing - # another snapshot. - if event.dt >= simulation_dt: - this_snapshot_dt = event.dt - else: - this_snapshot_dt = None - # We have been fastforwarding. Update the universe - # and check if we can start a new snapshot. + # The algorithm has been missing events because it took + # too long processing. Update the universe with data from + # this event, then check if enough time has passed that we + # can start a new snapshot. + else: + self.update_universe(event) + if event.dt >= self.simulation_dt: + self.this_snapshot_dt = event.dt + + def update_current_snapshot(self, event): + """ + Update our current snapshot of the universe. Call handle_data if + """ + # The new event matches our snapshot dt. Just update the + # universe and move on. + if event.dt == self.this_snapshot_dt: + self.update_universe(event) + + # The new event does not match our snapshot. else: - update_universe(event, universe) - if event.dt >= simulation_dt: - this_snapshot_dt = event.dt - + self.simulate_current_snapshot() + + # Once we've finished simulating the old snapshot, + # we can update the universe with the new event. + self.update_universe(event) + + # The current event is later than the simulation time, + # which means the algorithm finished quickly enough to + # receive the new event. Start a new snapshot with this + # event's dt. + if event.dt >= self.simulation_dt: + self.this_snapshot_dt = event.dt - + # The algorithm spent enough time processing that it + # missed the new event. Wait to start a new snapshot until + # the events catch up to the algo's simulated dt. + else: + self.this_snapshot_dt = None + + def simulate_current_snapshot(self): + """ + Run the user's algo against our current snapshot and update the algo's + simulated time. + """ + start_tic = datetime.now() + self.algo.handle_data(self.universe) + stop_tic = datetime.now() + + # How long did you take? + delta = stop_tic - start_tic + + # Update the simulation time. + self.simulation_dt = self.this_snapshot_dt + delta -def update_universe(event, universe): - - universe.portfolio = event.portfolio - del event['portfolio'] + def update_universe(self, event): + """ + Update the universe with new event information. + """ + # Update our portfolio. + self.universe.portfolio = event.portfolio - event_sid = event.sid - del event['sid'] - - for field in event.keys(): - universe[event_sid][field] = event[field] + # Update our knowledge of this event's sid + for field in event.keys(): + self.universe[event.sid][field] = event[field] diff --git a/zipline/gens/transform.py b/zipline/gens/transform.py index 4d453927..f230f99d 100644 --- a/zipline/gens/transform.py +++ b/zipline/gens/transform.py @@ -42,7 +42,7 @@ def functional_transform(stream_in, func, *args, **kwargs): class StatefulTransform(object): """ Generic transform generator that takes each message from an - in-stream and passes it to a state class. For each call to + in-stream and passes it to a state object. For each call to update, the state class must produce a message to be fed downstream. Any transform class with the FORWARDER class variable set to true will forward all fields in the original message. @@ -63,16 +63,26 @@ class StatefulTransform(object): # Create an instance of our transform class. self.state = tnfm_class(*args, **kwargs) - # Generate the string associated with this generator's output. + # Create the string associated with this generator's output. self.namestring = tnfm_class.__name__ + hash_args(*args, **kwargs) + + # Generator isn't initialized until someone calls __iter__ or next(). + self.__generator = None def get_hash(self): return self.namestring + def next(self): + if self.__generator: + return self.__generator.next() + else: + self.__generator = self._gen() + return self.__generator.next() + def __iter__(self): - return self.gen() - - def gen(self): + return self + + def _gen(self): # IMPORTANT: Messages may contain pointers that are shared with # other streams, so we only manipulate copies. for message in self.stream_in: From 8437a28c14fba49cc861e30917e76dc87dbcac85 Mon Sep 17 00:00:00 2001 From: scottsanderson Date: Fri, 3 Aug 2012 21:09:05 -0400 Subject: [PATCH 31/73] generator-style perf now sends a risk report on receipt of DONE --- zipline/core/component.py | 1 - zipline/finance/performance.py | 25 ++++++++++-------- zipline/finance/trading.py | 1 + zipline/gens/composites.py | 39 +++++++++++++++------------- zipline/gens/examples.py | 25 +++++++++--------- zipline/gens/merge.py | 3 ++- zipline/gens/tradesimulation.py | 45 +++++++++++++++------------------ zipline/gens/transform.py | 24 +++++------------- zipline/gens/utils.py | 2 +- 9 files changed, 79 insertions(+), 86 deletions(-) diff --git a/zipline/core/component.py b/zipline/core/component.py index 78ee69c8..313e76e5 100644 --- a/zipline/core/component.py +++ b/zipline/core/component.py @@ -114,7 +114,6 @@ class Component(object): # Core Methods # ------------ - def loop_send(self): """ The main component loop. This is wrapped inside a diff --git a/zipline/finance/performance.py b/zipline/finance/performance.py index 92c96f3a..4c96ecce 100644 --- a/zipline/finance/performance.py +++ b/zipline/finance/performance.py @@ -203,10 +203,15 @@ class PerformanceTracker(object): self.todays_performance.positions[sid] = Position(sid) def update(self, event): - event.perf_message = self.process_event(event) - event.portfolio = self.get_portfolio() - del event['TRANSACTION'] - return event + if event.dt == "DONE": + event.perf_message = self.handle_simulation_end() + del event['TRANSACTION'] + return event + else: + event.perf_message = self.process_event(event) + event.portfolio = self.get_portfolio() + del event['TRANSACTION'] + return event def get_portfolio(self): return self.cumulative_performance.as_portfolio() @@ -270,6 +275,7 @@ class PerformanceTracker(object): #calculate performance as of last trade self.cumulative_performance.calculate_performance() self.todays_performance.calculate_performance() + return message @@ -296,7 +302,8 @@ class PerformanceTracker(object): # calculate progress of test self.progress = self.day_count / self.total_days - #TODO TODO TODO!! + # Take a snapshot of our current peformance to return to the + # browser. daily_update = self.to_dict() if self.trading_environment.max_drawdown: @@ -356,12 +363,8 @@ class PerformanceTracker(object): exceeded_max_loss = self.exceeded_max_loss ) - if self.results_socket: - log.info("about to stream the risk report...") - risk_dict = self.risk_report.to_dict() - - msg = zp.RISK_FRAME(risk_dict) - self.results_socket.send(msg) + risk_dict = self.risk_report.to_dict() + return risk_dict class Position(object): diff --git a/zipline/finance/trading.py b/zipline/finance/trading.py index 53ac8f2f..7bd8c7c3 100644 --- a/zipline/finance/trading.py +++ b/zipline/finance/trading.py @@ -35,6 +35,7 @@ class TransactionSimulator(object): def update(self, event): event.TRANSACTION = None + # We only fill transactions on trade events. if event.type == zp.DATASOURCE_TYPE.TRADE: event.TRANSACTION = self.apply_trade_to_open_orders(event) return event diff --git a/zipline/gens/composites.py b/zipline/gens/composites.py index 2b9905b7..2716ee32 100644 --- a/zipline/gens/composites.py +++ b/zipline/gens/composites.py @@ -3,7 +3,7 @@ from itertools import tee, starmap from collections import namedtuple from zipline.gens.tradegens import SpecificEquityTrades -from zipline.gens.utils import roundrobin, hash_args +from zipline.gens.utils import roundrobin, hash_args, done_message from zipline.gens.sort import date_sort from zipline.gens.merge import merge from zipline.gens.transform import StatefulTransform @@ -34,8 +34,7 @@ def date_sorted_sources(*sources): return date_sort(stream_in, names) - -def merged_transforms(sorted_stream, bundles): +def merged_transforms(sorted_stream, *transforms): """ A generator that takes the expected output of a date_sort, pipes it through a given set of transforms, and runs the results throught a @@ -45,30 +44,34 @@ def merged_transforms(sorted_stream, bundles): tnfm_kwargs should be a list of dictionaries representing keyword arguments to each transform. """ + for transform in transforms: + assert isinstance(transform, StatefulTransform) + # Generate expected hashes for each transform - namestrings = [bundle.tnfm.__name__ + hash_args(*bundle.args, **bundle.kwargs) - for bundle in bundles] + namestrings = [tnfm.get_hash() for tnfm in transforms] # Create a copy of the stream for each transform. - split = tee(sorted_stream, len(bundles)) - # Package a stream copy with each bundle - tnfms_with_streams = zip(split, bundles) + split = tee(sorted_stream, len(transforms)) + + # Package a stream copy with each StatefulTransform instance. + bundles = zip(transforms, split) # Convert the copies into transform streams. - tnfm_gens = [ - StatefulTransform( - stream_copy, - bundle.tnfm, - *bundle.args, - **bundle.kwargs - ) - for stream_copy, bundle in tnfms_with_streams - ] + tnfm_gens = [tnfm.transform(stream) for tnfm, stream in bundles] - # Roundrobin the outputs of our transforms to create a single flat stream. + # Roundrobin the outputs of our transforms to create a single flat + # stream. to_merge = roundrobin(tnfm_gens, namestrings) # Pipe the stream into merge. merged = merge(to_merge, namestrings) # Return the merged events. return merged + +def zipline(sources, transforms, endpoint): + assert isinstance(sources, (list, tuple)) + assert isinstance(transforms, (list, tuple)) + + + + diff --git a/zipline/gens/examples.py b/zipline/gens/examples.py index 38964f01..a6a95f59 100644 --- a/zipline/gens/examples.py +++ b/zipline/gens/examples.py @@ -21,10 +21,10 @@ if __name__ == "__main__": #Set up source a. One minute between events. args_a = tuple() kwargs_a = { - 'count' : 2000, + 'count' : 325, 'sids' : [1,2,3], 'start' : datetime(2012,1,3,15, tzinfo = pytz.utc), - 'delta' : timedelta(minutes = 10), + 'delta' : timedelta(hours = 6), 'filter' : filter } source_a = SpecificEquityTrades(*args_a, **kwargs_a) @@ -32,30 +32,29 @@ if __name__ == "__main__": #Set up source b. Two minutes between events. args_b = tuple() kwargs_b = { - 'count' : 2000, + 'count' : 7500, 'sids' : [2,3,4], 'start' : datetime(2012,1,3,14, tzinfo = pytz.utc), - 'delta' : timedelta(minutes = 10), + 'delta' : timedelta(minutes = 5), 'filter' : filter } source_b = SpecificEquityTrades(*args_b, **kwargs_b) #Set up source c. Three minutes between events. - sort_out = date_sorted_sources(source_a, source_b) + sorted = date_sorted_sources(source_a, source_b) - passthrough = TransformBundle(Passthrough, (), {}) - mavg_price = TransformBundle(MovingAverage, (timedelta(minutes = 20), ['price']), {}) - tnfm_bundles = (passthrough, mavg_price) + passthrough = StatefulTransform(Passthrough) + mavg_price = StatefulTransform(MovingAverage, timedelta(minutes = 20), ['price']) - merge_out = merged_transforms(sort_out, tnfm_bundles) + merged = merged_transforms(sorted, passthrough, mavg_price) algo = TestAlgorithm(2, 10, 100, sid_filter = [2,3]) environment = create_trading_environment(year = 2012) style = zp.SIMULATION_STYLE.FIXED_SLIPPAGE - client_out = tsc(merge_out, algo, environment, style) - for message in client_out: - pp(message) - sleep(1) + trading_client = tsc(algo, environment, style) + + for message in trading_client.simulate(merged): + pp(message) diff --git a/zipline/gens/merge.py b/zipline/gens/merge.py index dfd904d2..32035492 100644 --- a/zipline/gens/merge.py +++ b/zipline/gens/merge.py @@ -6,7 +6,7 @@ from collections import deque from zipline import ndict from zipline.gens.utils import hash_args, \ - assert_merge_protocol + assert_merge_protocol, done_message from itertools import repeat def merge(stream_in, tnfm_ids): @@ -51,6 +51,7 @@ def merge(stream_in, tnfm_ids): assert len(queue) == 1, "Bad queue in merge on exit: %s" % queue assert queue[0].dt == "DONE", \ "Bad last message in merge on exit: %s" % queue + yield done_message('Merge') def merge_one(sources): dict_primer = zip(sources.keys(), repeat(None)) diff --git a/zipline/gens/tradesimulation.py b/zipline/gens/tradesimulation.py index 5e8db374..972f44e8 100644 --- a/zipline/gens/tradesimulation.py +++ b/zipline/gens/tradesimulation.py @@ -43,65 +43,60 @@ class TradeSimulationClient(object): is sent to the algo. """ - def __init__(self, stream_in, algo, environment, sim_style): + def __init__(self, algo, environment, sim_style): - self.stream_in = stream_in self.algo = algo self.sids = algo.get_sid_filter() self.environment = environment self.style = sim_style - - self.__generator = None - def get_hash(self): """ There should only ever be one TSC in the system. """ return self.__class__.__name__ + hash_args() - def __iter__(self): - return self - - def next(self): - if self.__generator: - return self.__generator.next() - else: - self.__generator = self.run_simulation() - return self.__generator.next() - - def run_simulation(self): + def simulate(self, stream_in): """ Main generator work loop. """ + # Simulate filling any open orders made by the previous run of # the user's algorithm. Sets the txn field to true on any # event that results in a filled order. ordering_client = StatefulTransform( - self.stream_in, TransactionSimulator, self.sids, style = self.style ) + with_filled_orders = ordering_client.transform(stream_in) + # Pipe the events with transactions to perf. This will remove # the txn field added by TransactionSimulator and replace it # with a portfolio object to be passed to the user's # algorithm. Also adds a PERF_MESSAGE field which is usually # none, but contains an update message once per day. - current_portfolio = StatefulTransform( - ordering_client, + perf_tracker = StatefulTransform( PerformanceTracker, self.environment, self.sids ) - # Pass both the ordering client's state and messages with the - # current portfolio into the algorithm for simulation. + with_portfolio = perf_tracker.transform(with_filled_orders) + + # Pass the messages from perf along with the trading client's + # state into the algorithm for simulation. We provide the + # trading client so that the algorithm can place new orders + # into the client's order book. algo_results = AlgorithmSimulator( - current_portfolio, + with_portfolio, ordering_client.state, self.algo, ) - + + # The algorithm will yield a daily_results message (as + # calculated by the performance tracker) at the end of each + # day. It will also yield a risk report at the end of the + # simulation. for message in algo_results: yield message @@ -120,7 +115,7 @@ class AlgorithmSimulator(object): self.sids = algo.get_sid_filter() # Monkey patch the user algorithm to place orders in the - # txn_sim order book. + # TransactionSimulator's order book. self.algo.set_order(self.order) self.algo.set_logger(logbook.Logger("Algolog")) @@ -189,6 +184,8 @@ class AlgorithmSimulator(object): if event.perf_message: yield event.perf_message del event['perf_message'] + if event.dt == "DONE": + break # This should only happen for the first event we run. if self.simulation_dt == None: diff --git a/zipline/gens/transform.py b/zipline/gens/transform.py index f230f99d..564284b5 100644 --- a/zipline/gens/transform.py +++ b/zipline/gens/transform.py @@ -48,7 +48,7 @@ class StatefulTransform(object): set to true will forward all fields in the original message. Otherwise only dt, tnfm_id, and tnfm_value are forwarded. """ - def __init__(self, stream_in, tnfm_class, *args, **kwargs): + def __init__(self, tnfm_class, *args, **kwargs): assert isinstance(tnfm_class, (types.ObjectType, types.ClassType)), \ "Stateful transform requires a class." assert tnfm_class.__dict__.has_key('update'), \ @@ -56,36 +56,26 @@ class StatefulTransform(object): self.forward_all = tnfm_class.__dict__.get('FORWARDER', False) self.update_in_place = tnfm_class.__dict__.get('UPDATER', False) + + # You can't be both a forwarded and an updater. assert not all([self.forward_all, self.update_in_place]) - self.stream_in = stream_in - # Create an instance of our transform class. self.state = tnfm_class(*args, **kwargs) # Create the string associated with this generator's output. self.namestring = tnfm_class.__name__ + hash_args(*args, **kwargs) - # Generator isn't initialized until someone calls __iter__ or next(). - self.__generator = None - def get_hash(self): return self.namestring - def next(self): - if self.__generator: - return self.__generator.next() - else: - self.__generator = self._gen() - return self.__generator.next() + def transform(self, stream_in): + return self._gen(stream_in) - def __iter__(self): - return self - - def _gen(self): + def _gen(self, stream_in): # IMPORTANT: Messages may contain pointers that are shared with # other streams, so we only manipulate copies. - for message in self.stream_in: + for message in stream_in: assert_sort_unframe_protocol(message) message_copy = deepcopy(message) diff --git a/zipline/gens/utils.py b/zipline/gens/utils.py index bacd07bd..f979e063 100644 --- a/zipline/gens/utils.py +++ b/zipline/gens/utils.py @@ -23,7 +23,7 @@ def mock_done(id): "source_id" : id, 'tnfm_id' : id, 'tnfm_value': None, - 'type' : 0 + 'type' : DATASOURCE_TYPE.DONE }) done_message = mock_done From f3c81d67319e7acf398e8427b9a51cbf0f44bc40 Mon Sep 17 00:00:00 2001 From: fawce Date: Fri, 3 Aug 2012 23:00:45 -0400 Subject: [PATCH 32/73] dropped an extraneous sleep in the test --- tests/test_components.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/tests/test_components.py b/tests/test_components.py index cf140677..896f4afc 100644 --- a/tests/test_components.py +++ b/tests/test_components.py @@ -154,9 +154,6 @@ class ComponentTestCase(TestCase): sorted_out = date_sorted_sources(*sources) - import time - time.sleep(.25) - prev = None sort_count = 0 for msg in sorted_out: From 4a582e8952efeef6ed6dfb18517bdb619102c522 Mon Sep 17 00:00:00 2001 From: fawce Date: Sat, 4 Aug 2012 12:58:07 -0400 Subject: [PATCH 33/73] modified zmq_gen method to yield None when there is no waiting message. This prevents blocking in the next() method of a component. But it requires generators wrapping the component to handle None. Also modified component's receiver creation to be triggered on the first call to next, rather than iter. This change means that the zmq context and socket for the component's receiver should always be created in the same process as the consumer of the generator. Chaining together component wrapped generators will result in the send process of the last component actually instantiating the receive socket of the prior component. In this way, the components are actually communicating directly via zmq. Component's send method now calls the wait_ready(), which waits for the monitor's GO message, inside the generator loop. This guarantees that the generator's next method is called before the send loop blocks on the monitor. As a result, components will call __init__ and next() without blocking, mimicking the behavior of plain generators. --- tests/test_components.py | 186 ++++++++++++++++++++++++++++++++++++-- zipline/core/component.py | 55 ++++++----- zipline/gens/merge.py | 10 +- zipline/gens/transform.py | 14 ++- zipline/gens/utils.py | 4 +- 5 files changed, 228 insertions(+), 41 deletions(-) diff --git a/tests/test_components.py b/tests/test_components.py index 896f4afc..54049723 100644 --- a/tests/test_components.py +++ b/tests/test_components.py @@ -1,13 +1,20 @@ import zmq import pytz +from pprint import pformat as pf from datetime import datetime, timedelta from unittest2 import TestCase from collections import defaultdict -from zipline.gens.composites import date_sorted_sources +from zipline.gens.composites import date_sorted_sources, merged_transforms from zipline.finance.trading import SIMULATION_STYLE from zipline.core.devsimulator import AddressAllocator +from zipline.gens.transform import MovingAverage, Passthrough, StatefulTransform +from zipline.gens.tradesimulation import TradeSimulationClient as tsc + +from zipline.utils.factory import create_trading_environment +from zipline.test_algorithms import TestAlgorithm + from zipline.utils.test_utils import ( setup_logger, @@ -19,7 +26,12 @@ from zipline.utils.test_utils import ( from zipline.core import Component from zipline.protocol import ( DATASOURCE_FRAME, - DATASOURCE_UNFRAME + DATASOURCE_UNFRAME, + FEED_FRAME, + FEED_UNFRAME, + MERGE_FRAME, + MERGE_UNFRAME, + SIMULATION_STYLE ) from zipline.gens.tradegens import SpecificEquityTrades @@ -65,9 +77,7 @@ class ComponentTestCase(TestCase): } trade_gen = SpecificEquityTrades(*args_a, **kwargs_a) - monitor.add_to_topology(trade_gen.get_hash()) - launch_monitor(monitor) comp_a = Component( trade_gen, @@ -77,9 +87,14 @@ class ComponentTestCase(TestCase): DATASOURCE_UNFRAME ) + launch_monitor(monitor) + for event in comp_a: log.info(event) + # wait for the sending process to exit + comp_a.proc.join() + def test_sort(self): monitor = create_monitor(allocator) @@ -97,7 +112,6 @@ class ComponentTestCase(TestCase): 'count' : count } trade_gen_a = SpecificEquityTrades(*args_a, **kwargs_a) - monitor.add_to_topology(trade_gen_a.get_hash()) #Set up source b. Two minutes between events. args_b = tuple() @@ -109,7 +123,6 @@ class ComponentTestCase(TestCase): 'count' : count } trade_gen_b = SpecificEquityTrades(*args_b, **kwargs_b) - monitor.add_to_topology(trade_gen_b.get_hash()) #Set up source c. Three minutes between events. args_c = tuple() @@ -122,9 +135,7 @@ class ComponentTestCase(TestCase): } trade_gen_c = SpecificEquityTrades(*args_c, **kwargs_c) - monitor.add_to_topology(trade_gen_c.get_hash()) - launch_monitor(monitor) comp_a = Component( trade_gen_a, @@ -154,6 +165,8 @@ class ComponentTestCase(TestCase): sorted_out = date_sorted_sources(*sources) + launch_monitor(monitor) + prev = None sort_count = 0 for msg in sorted_out: @@ -164,3 +177,160 @@ class ComponentTestCase(TestCase): sort_count += 1 self.assertEqual(count*3, sort_count) + + # wait for processes to finish + comp_a.proc.join() + comp_b.proc.join() + comp_c.proc.join() + + + def test_full(self): + monitor = create_monitor(allocator) + + filter = [2,3] + #Set up source a. One minute between events. + args_a = tuple() + kwargs_a = { + 'count' : 325, + 'sids' : [1,2,3], + 'start' : datetime(2012,1,3,15, tzinfo = pytz.utc), + 'delta' : timedelta(hours = 6), + 'filter' : filter + } + source_a = SpecificEquityTrades(*args_a, **kwargs_a) + + #Set up source b. Two minutes between events. + args_b = tuple() + kwargs_b = { + 'count' : 7500, + 'sids' : [2,3,4], + 'start' : datetime(2012,1,3,14, tzinfo = pytz.utc), + 'delta' : timedelta(minutes = 5), + 'filter' : filter + } + source_b = SpecificEquityTrades(*args_b, **kwargs_b) + + # ------------------------ + # Run sources in dedicated processes + comp_a = Component( + source_a, + monitor, + allocator.lease(1)[0], + DATASOURCE_FRAME, + DATASOURCE_UNFRAME, + source_a.get_hash() + ) + + comp_b = Component( + source_b, + monitor, + allocator.lease(1)[0], + DATASOURCE_FRAME, + DATASOURCE_UNFRAME, + source_b.get_hash() + ) + + # Date sort the sources, and run the sort in a dedicated + # process + sources = [comp_a, comp_b] + + sorted_out = date_sorted_sources(*sources) + + #launch_monitor(monitor) + #import nose.tools; nose.tools.set_trace() + #for feed_msg in sorted_out: + # log.info(pf(feed_msg)) + + #return + + sorted = Component( + sorted_out, + monitor, + allocator.lease(1)[0], + FEED_FRAME, + FEED_UNFRAME, + "sort" + ) + + + passthrough = StatefulTransform(Passthrough) + mavg_price = StatefulTransform( + MovingAverage, + timedelta(minutes = 20), + ['price'] + ) + + merged_gen = merged_transforms(sorted, passthrough, mavg_price) + + merged = Component( + merged_gen, + monitor, + allocator.lease(1)[0], + MERGE_FRAME, + MERGE_UNFRAME, + "merge" + ) + + algo = TestAlgorithm(2, 10, 100, sid_filter = [2,3]) + environment = create_trading_environment(year = 2012) + style = SIMULATION_STYLE.FIXED_SLIPPAGE + + trading_client = tsc(algo, environment, style) + + launch_monitor(monitor) + for message in trading_client.simulate(merged): + log.info(pf(message)) + + + # wait for processes to finish + comp_a.proc.join() + comp_b.proc.join() + sorted.proc.join() + merged.proc.join() + return + + + + def test_compound(self): + monitor = create_monitor(allocator) + + filter = [2,3] + #Set up source a. One minute between events. + args_a = tuple() + kwargs_a = { + 'count' : 325, + 'sids' : [1,2,3], + 'start' : datetime(2012,1,3,15, tzinfo = pytz.utc), + 'delta' : timedelta(hours = 6), + 'filter' : filter + } + source_a = SpecificEquityTrades(*args_a, **kwargs_a) + + #Set up source b. Two minutes between events. + args_b = tuple() + kwargs_b = { + 'count' : 7500, + 'sids' : [2,3,4], + 'start' : datetime(2012,1,3,14, tzinfo = pytz.utc), + 'delta' : timedelta(minutes = 5), + 'filter' : filter + } + source_b = SpecificEquityTrades(*args_b, **kwargs_b) + + sorted_out = date_sorted_sources(source_a, source_b) + + sorted = Component( + sorted_out, + monitor, + allocator.lease(1)[0], + FEED_FRAME, + FEED_UNFRAME + ) + + launch_monitor(monitor) + + for event in sorted: + log.info(event) + + + sorted.proc.join() diff --git a/zipline/core/component.py b/zipline/core/component.py index 313e76e5..c9ce56e6 100644 --- a/zipline/core/component.py +++ b/zipline/core/component.py @@ -51,7 +51,8 @@ class Component(object): monitor, socket_uri, frame, - unframe + unframe, + component_id ): # ----------------- @@ -59,7 +60,7 @@ class Component(object): # ----------------- self.generator = generator self.frame = frame - self.component_id = self.generator.get_hash() + self.component_id = component_id # lock for waiting on monitor "GO" self.waiting = None @@ -99,15 +100,16 @@ class Component(object): # first, start the generator in its own process. Once # Monitor says "go", Events from the generator will be # FRAME'd and PUSH'd to self.socket_uri. - proc = multiprocessing.Process( - target=self.loop_send - ) - proc.start() + monitor.add_to_topology(self.component_id) - # ------------ - # Message Receiver/Generator - # ------------ - self.recv_gen = self.create_recv_gen() + self.proc = multiprocessing.Process( + target=self.loop_send + ) + self.proc.start() + + # Placeholder for receive generator, which will be + # created in __iter__ + self.recv_gen = None # ------------ @@ -123,8 +125,8 @@ class Component(object): """ try: # The process title so you can watch it in top, ps. - setproctitle(self.generator.__class__.__name__) self.prefix = "FORK-" + setproctitle(self.get_id) log.info("Start %r" % self) log.info("Pid %s" % os.getpid()) @@ -134,14 +136,15 @@ class Component(object): self.signal_ready() self.lock_ready() - self.wait_ready() - - # ----------------------- - # YOU SHALL NOT PASS!!!!! - # ----------------------- - # ... until the monitor signals GO + msg = None for event in self.generator: + + if hasattr(event, 'dt') and event.dt == 'DONE': + continue + + self.wait_ready() + self.heartbeat() msg = self.frame(event) self.out_socket.send(msg) @@ -163,9 +166,6 @@ class Component(object): def create_recv_gen(self): try: - self.open(send=False) - self.signal_ready() - self.lock_ready() # return the generator return self.loop_recv() except Exception as exc: @@ -175,8 +175,12 @@ class Component(object): def loop_recv(self): try: + self.open(send=False) + self.signal_ready() + self.lock_ready() + # we block on ready here until monitor sends the GO - self.wait_ready() + # self.wait_ready() for event in self.gen_from_poller(self.poll, self.in_socket, self.unframe): yield event @@ -189,7 +193,10 @@ class Component(object): def gen_from_poller(self, poller, in_socket, unframe): while True: - socks = dict(poller.poll(0)) + # Since we will yield None to avoid blocking, we need + # to have a small delay to give the poller a chance + # to receive a message from upstream. + socks = dict(poller.poll(100)) self.heartbeat() if socks.get(in_socket) == zmq.POLLIN: message = in_socket.recv() @@ -198,6 +205,8 @@ class Component(object): else: event = unframe(message) yield event + else: + yield def handle_exception(self, exc, re_raise=False): if isinstance(exc, KillSignal): @@ -215,6 +224,8 @@ class Component(object): return self def next(self): + if not self.recv_gen: + self.recv_gen = self.create_recv_gen() return self.recv_gen.next() # ---------------------------- diff --git a/zipline/gens/merge.py b/zipline/gens/merge.py index 32035492..f6434918 100644 --- a/zipline/gens/merge.py +++ b/zipline/gens/merge.py @@ -17,9 +17,9 @@ def merge(stream_in, tnfm_ids): and merge them together into an event. We raise an error if we do not receive the same number of events from all sources. """ - + assert isinstance(tnfm_ids, list) - + # Set up an internal queue for each expected source. tnfms = {} for id in tnfm_ids: @@ -36,7 +36,7 @@ def merge(stream_in, tnfm_ids): id = message.tnfm_id assert id in tnfm_ids, \ "Message from unexpected tnfm: %s, %s" % (id, tnfm_ids) - + tnfms[id].append(message) # Only pop messages when we have a pending message from @@ -58,13 +58,13 @@ def merge_one(sources): event_fields = ndict() for key, queue in sources.iteritems(): - + # Add transform value to the transforms dict. message = queue.popleft() event_fields[message.tnfm_id] = message.tnfm_value del message['tnfm_id'] del message['tnfm_value'] - + # Merge any remaining fields into the event dict. event_fields.merge(message) return event_fields diff --git a/zipline/gens/transform.py b/zipline/gens/transform.py index 564284b5..2883733a 100644 --- a/zipline/gens/transform.py +++ b/zipline/gens/transform.py @@ -53,16 +53,16 @@ class StatefulTransform(object): "Stateful transform requires a class." assert tnfm_class.__dict__.has_key('update'), \ "Stateful transform requires the class to have an update method" - + self.forward_all = tnfm_class.__dict__.get('FORWARDER', False) self.update_in_place = tnfm_class.__dict__.get('UPDATER', False) # You can't be both a forwarded and an updater. assert not all([self.forward_all, self.update_in_place]) - + # Create an instance of our transform class. self.state = tnfm_class(*args, **kwargs) - + # Create the string associated with this generator's output. self.namestring = tnfm_class.__name__ + hash_args(*args, **kwargs) @@ -76,7 +76,11 @@ class StatefulTransform(object): # IMPORTANT: Messages may contain pointers that are shared with # other streams, so we only manipulate copies. for message in stream_in: - + # allow upstream generators to yield None to avoid + # blocking. + if message == None: + continue + assert_sort_unframe_protocol(message) message_copy = deepcopy(message) @@ -90,7 +94,7 @@ class StatefulTransform(object): out_message.tnfm_id = self.namestring out_message.tnfm_value = tnfm_value yield out_message - + # Our expectation is that the transform simply updated the # message it was passed. Useful for chaining together # multiple transforms, e.g. TransactionSimulator/PerformanceTracker. diff --git a/zipline/gens/utils.py b/zipline/gens/utils.py index f979e063..071ce5fc 100644 --- a/zipline/gens/utils.py +++ b/zipline/gens/utils.py @@ -49,7 +49,9 @@ def roundrobin(sources, namestrings): for namestring, source in mapping.iteritems(): try: message = source.next() - yield message + # allow sources to yield None to avoid blocking. + if message: + yield message except StopIteration: yield done_message(namestring) del mapping[namestring] From b67cbb2aab6937e41927b15c49ef9422a503e9cf Mon Sep 17 00:00:00 2001 From: fawce Date: Sat, 4 Aug 2012 15:26:48 -0400 Subject: [PATCH 34/73] updated tests to compare with/without processes --- tests/test_components.py | 157 ++++++++++++++++++++++++--------------- 1 file changed, 97 insertions(+), 60 deletions(-) diff --git a/tests/test_components.py b/tests/test_components.py index 54049723..33255883 100644 --- a/tests/test_components.py +++ b/tests/test_components.py @@ -7,7 +7,6 @@ from unittest2 import TestCase from collections import defaultdict from zipline.gens.composites import date_sorted_sources, merged_transforms -from zipline.finance.trading import SIMULATION_STYLE from zipline.core.devsimulator import AddressAllocator from zipline.gens.transform import MovingAverage, Passthrough, StatefulTransform from zipline.gens.tradesimulation import TradeSimulationClient as tsc @@ -31,7 +30,9 @@ from zipline.protocol import ( FEED_UNFRAME, MERGE_FRAME, MERGE_UNFRAME, - SIMULATION_STYLE + SIMULATION_STYLE, + PERF_FRAME, + BT_UPDATE_UNFRAME ) from zipline.gens.tradegens import SpecificEquityTrades @@ -57,6 +58,34 @@ class ComponentTestCase(TestCase): self.ctx = zmq.Context() setup_logger(self) + count = 250 + filter = [2,3] + #Set up source a. One minute between events. + args_a = tuple() + kwargs_a = { + 'count' : 2*count, + 'sids' : [1,2,3], + 'start' : datetime(2002,1,3,15, tzinfo = pytz.utc), + 'delta' : timedelta(hours = 6), + 'filter' : filter + } + self.source_a = SpecificEquityTrades(*args_a, **kwargs_a) + + #Set up source b. Two minutes between events. + args_b = tuple() + kwargs_b = { + 'count' : count, + 'sids' : [2,3,4], + 'start' : datetime(2002,1,3,14, tzinfo = pytz.utc), + 'delta' : timedelta(minutes = 5), + 'filter' : filter + } + self.source_b = SpecificEquityTrades(*args_b, **kwargs_b) + + self.environment = create_trading_environment(year = 2002) + + + def tearDown(self): teardown_logger(self) @@ -187,47 +216,24 @@ class ComponentTestCase(TestCase): def test_full(self): monitor = create_monitor(allocator) - filter = [2,3] - #Set up source a. One minute between events. - args_a = tuple() - kwargs_a = { - 'count' : 325, - 'sids' : [1,2,3], - 'start' : datetime(2012,1,3,15, tzinfo = pytz.utc), - 'delta' : timedelta(hours = 6), - 'filter' : filter - } - source_a = SpecificEquityTrades(*args_a, **kwargs_a) - - #Set up source b. Two minutes between events. - args_b = tuple() - kwargs_b = { - 'count' : 7500, - 'sids' : [2,3,4], - 'start' : datetime(2012,1,3,14, tzinfo = pytz.utc), - 'delta' : timedelta(minutes = 5), - 'filter' : filter - } - source_b = SpecificEquityTrades(*args_b, **kwargs_b) - # ------------------------ # Run sources in dedicated processes comp_a = Component( - source_a, + self.source_a, monitor, allocator.lease(1)[0], DATASOURCE_FRAME, DATASOURCE_UNFRAME, - source_a.get_hash() + self.source_a.get_hash() ) comp_b = Component( - source_b, + self.source_b, monitor, allocator.lease(1)[0], DATASOURCE_FRAME, DATASOURCE_UNFRAME, - source_b.get_hash() + self.source_b.get_hash() ) # Date sort the sources, and run the sort in a dedicated @@ -272,13 +278,23 @@ class ComponentTestCase(TestCase): ) algo = TestAlgorithm(2, 10, 100, sid_filter = [2,3]) - environment = create_trading_environment(year = 2012) + style = SIMULATION_STYLE.FIXED_SLIPPAGE - trading_client = tsc(algo, environment, style) + trading_client = tsc(algo, self.environment, style) + tsc_gen = trading_client.simulate(merged) + + tsc_comp = Component( + tsc_gen, + monitor, + allocator.lease(1)[0], + PERF_FRAME, + BT_UPDATE_UNFRAME, + "tsc" + ) launch_monitor(monitor) - for message in trading_client.simulate(merged): + for message in tsc_comp: log.info(pf(message)) @@ -289,48 +305,69 @@ class ComponentTestCase(TestCase): merged.proc.join() return + def test_single_thread(self): + #Set up source c. Three minutes between events. + + sorted = date_sorted_sources(self.source_a, self.source_b) + + passthrough = StatefulTransform(Passthrough) + mavg_price = StatefulTransform(MovingAverage, timedelta(minutes = 20), ['price']) + + merged = merged_transforms(sorted, passthrough, mavg_price) + + algo = TestAlgorithm(2, 10, 100, sid_filter = [2,3]) + style = SIMULATION_STYLE.FIXED_SLIPPAGE + + trading_client = tsc(algo, self.environment, style) + for message in trading_client.simulate(merged): + log.info(pf(message)) def test_compound(self): monitor = create_monitor(allocator) - filter = [2,3] - #Set up source a. One minute between events. - args_a = tuple() - kwargs_a = { - 'count' : 325, - 'sids' : [1,2,3], - 'start' : datetime(2012,1,3,15, tzinfo = pytz.utc), - 'delta' : timedelta(hours = 6), - 'filter' : filter - } - source_a = SpecificEquityTrades(*args_a, **kwargs_a) - - #Set up source b. Two minutes between events. - args_b = tuple() - kwargs_b = { - 'count' : 7500, - 'sids' : [2,3,4], - 'start' : datetime(2012,1,3,14, tzinfo = pytz.utc), - 'delta' : timedelta(minutes = 5), - 'filter' : filter - } - source_b = SpecificEquityTrades(*args_b, **kwargs_b) - - sorted_out = date_sorted_sources(source_a, source_b) + sorted_out = date_sorted_sources(self.source_a, self.source_b) sorted = Component( sorted_out, monitor, allocator.lease(1)[0], FEED_FRAME, - FEED_UNFRAME + FEED_UNFRAME, + "feed" ) + passthrough = StatefulTransform(Passthrough) + mavg_price = StatefulTransform( + MovingAverage, + timedelta(minutes = 20), + ['price'] + ) + + merged_gen = merged_transforms(sorted, passthrough, mavg_price) + + merged = Component( + merged_gen, + monitor, + allocator.lease(1)[0], + MERGE_FRAME, + MERGE_UNFRAME, + "merge" + ) + + algo = TestAlgorithm(2, 10, 100, sid_filter = [2,3]) + style = SIMULATION_STYLE.FIXED_SLIPPAGE + + trading_client = tsc(algo, self.environment, style) + tsc_gen = trading_client.simulate(merged) + + launch_monitor(monitor) - - for event in sorted: - log.info(event) + for message in tsc_gen: + log.info(pf(message)) + # wait for processes to finish sorted.proc.join() + merged.proc.join() + return From 4655e643a42603a735c591b03795a2c31bda7eea Mon Sep 17 00:00:00 2001 From: scottsanderson Date: Mon, 6 Aug 2012 11:05:25 -0400 Subject: [PATCH 35/73] api changes and refactor of sorting tests --- tests/test_feed.py | 233 --------------------------------- tests/test_sorting.py | 257 +++++++++++++++++++++++++++++++++++++ zipline/gens/composites.py | 51 +++++--- zipline/gens/examples.py | 70 +++++++--- zipline/gens/merge.py | 7 +- zipline/gens/sort.py | 3 +- zipline/gens/transform.py | 46 +++++-- zipline/gens/utils.py | 8 ++ 8 files changed, 397 insertions(+), 278 deletions(-) delete mode 100644 tests/test_feed.py create mode 100644 tests/test_sorting.py diff --git a/tests/test_feed.py b/tests/test_feed.py deleted file mode 100644 index 21e8afb3..00000000 --- a/tests/test_feed.py +++ /dev/null @@ -1,233 +0,0 @@ -from unittest2 import TestCase -from itertools import cycle, chain -from datetime import datetime, timedelta -from collections import deque - -from zipline import ndict -from zipline.gens.sort import \ - date_sort, \ - ready, \ - done, \ - queue_is_ready,\ - queue_is_done -from zipline.gens.utils import hash_args, alternate -from zipline.gens.tradegens import date_gen, SpecificEquityTrades -from zipline.gens.composites import date_sorted_sources - -import zipline.protocol as zp - -class HelperTestCase(TestCase): - - def setUp(self): - pass - - def tearDown(self): - pass - - def test_individual_queue_logic(self): - queue = deque() - # Empty queues are neither done nor ready. - assert not queue_is_ready(queue) - assert not queue_is_done(queue) - - queue.append(to_dt('foo')) - assert queue_is_ready(queue) - assert not queue_is_done(queue) - - - queue.appendleft(to_dt('DONE')) - assert queue_is_ready(queue) - - # Checking done when we have a message after done will trip an assert. - self.assertRaises(AssertionError, queue_is_done, queue) - - queue.pop() - assert queue_is_ready(queue) - assert queue_is_done(queue) - - def test_pop_logic(self): - sources = {} - ids = ['a', 'b', 'c'] - for id in ids: - sources[id] = deque() - - assert not ready(sources) - assert not done(sources) - - # All sources must have a message to be ready/done - sources['a'].append(to_dt("datetime")) - assert not ready(sources) - assert not done(sources) - sources['a'].pop() - - for id in ids: - sources[id].append(to_dt("datetime")) - - assert ready(sources) - assert not done(sources) - - for id in ids: - sources[id].appendleft(to_dt("DONE")) - - # ["DONE", message] will trip an assert in queue_is_done. - assert ready(sources) - self.assertRaises(AssertionError, done, sources) - - for id in ids: - sources[id].pop() - - assert ready(sources) - assert done(sources) - -class DateSortTestCase(TestCase): - - def setUp(self): - pass - - def tearDown(self): - pass - - def run_date_sort(self, events, expected, source_ids): - """ - Take a list of events, their source_ids, and an expected sorting. - Assert that date_sort's output agrees with expected. - """ - sort_gen = date_sort(events, source_ids) - l = list(sort_gen) - assert l == expected - - def test_single_source(self): - source_ids = ['a'] - # 100 events, increasing by a minute at a time. - type = zp.DATASOURCE_TYPE.TRADE - dates = list(date_gen(count = 100)) - dates.append("DONE") - - # [('a', date1, type), ('a', date2, type), ... ('a', "DONE", type)] - event_args = zip(cycle(source_ids), iter(dates), cycle([type])) - - # Turn event_args into proper events. - events = [mock_data_unframe(*args) for args in event_args] - - # We don't expected Feed to yield the last event. - expected = events[:-1] - - event_gen = (e for e in events) - - self.run_date_sort(event_gen, expected, source_ids) - - def test_multi_source(self): - source_ids = ['a', 'b'] - type = zp.DATASOURCE_TYPE.TRADE - - # Set up source 'a'. Outputs 20 events with 2 minute deltas. - delta_a = timedelta(minutes = 2) - dates_a = list(date_gen(delta = delta_a, count = 20)) - dates_a.append("DONE") - - events_a_args = zip(cycle(['a']), iter(dates_a), cycle([type])) - events_a = [mock_data_unframe(*args) for args in events_a_args] - - # Set up source 'b'. Outputs 10 events with 1 minute deltas. - delta_b = timedelta(minutes = 1) - dates_b = list(date_gen(delta = delta_b, count = 10)) - dates_b.append("DONE") - - events_b_args = zip(cycle(['b']), iter(dates_b), cycle([type])) - events_b = [mock_data_unframe(*args) for args in events_b_args] - - # The expected output is all non-DONE events in both a and b, - # sorted first by dt and then by source_id. - non_dones = events_a[:-1] + events_b[:-1] - expected = sorted(non_dones, compare_by_dt_source_id) - - # Alternating between a and b. - interleaved = alternate(iter(events_a), iter(events_b)) - self.run_date_sort(interleaved, expected, source_ids) - - # All of a, then all of b. - - sequential = chain(iter(events_a), iter(events_b)) - self.run_date_sort(sequential, expected, source_ids) - - def test_sorted_sources(self): - - filter = [1,2] - #Set up source a. One hour between events. - args_a = tuple() - kwargs_a = {'sids' : [1,2,3,4], - 'start' : datetime(2012,6,6,0), - 'delta' : timedelta(hours = 1), - 'filter' : filter - } - #Set up source b. One day between events. - args_b = tuple() - kwargs_b = {'sids' : [1,2,3,4], - 'start' : datetime(2012,6,6,0), - 'delta' : timedelta(days = 1), - 'filter' : filter - } - #Set up source c. One minute between events. - args_c = tuple() - kwargs_c = {'sids' : [1,2,3,4], - 'start' : datetime(2012,6,6,0), - 'delta' : timedelta(minutes = 1), - 'filter' : filter - } - # Set up source d. This should produce no events because the - # internal sids don't match the filter. - args_d = tuple() - kwargs_d = {'sids' : [3,4], - 'start' : datetime(2012,6,6,0), - 'delta' : timedelta(minutes = 1), - 'filter' : filter - } - - sources = (SpecificEquityTrades,) * 4 - source_args = (args_a, args_b, args_c, args_d) - source_kwargs = (kwargs_a, kwargs_b, kwargs_c, kwargs_d) - - # Generate our expected source_ids. - zip_args = zip(source_args, source_kwargs) - expected_ids = ["SpecificEquityTrades" + hash_args(*args, **kwargs) - for args, kwargs in zip_args] - - # Pipe our sources into sort. - sort_out = date_sorted_sources(sources, source_args, source_kwargs) - - # Read all the values from sort and assert that they arrive in - # the correct sorting with the expected hash values. - to_list = list(sort_out) - copy = to_list[:] - for e in to_list: - # All events should match one of our expected source_ids. - assert e.source_id in expected_ids - # But none of them should match source_d. - assert e.source_id != hash_args(*args_d, **kwargs_d) - - expected = sorted(copy, compare_by_dt_source_id) - assert to_list == expected - -def mock_data_unframe(source_id, dt, type): - event = ndict() - event.source_id = source_id - event.dt = dt - event.type = type - return event - -def to_dt(val): - return ndict({'dt': val}) - -def compare_by_dt_source_id(x,y): - if x.dt < y.dt: - return -1 - elif x.dt > y.dt: - return 1 - - elif x.source_id < y.source_id: - return -1 - elif x.source_id > y.source_id: - return 1 - - else: - return 0 diff --git a/tests/test_sorting.py b/tests/test_sorting.py new file mode 100644 index 00000000..966dec3f --- /dev/null +++ b/tests/test_sorting.py @@ -0,0 +1,257 @@ +import pytz + +from unittest2 import TestCase +from itertools import cycle, chain, izip, izip_longest +from datetime import datetime, timedelta +from collections import deque + +from zipline import ndict +from zipline.gens.sort import \ + date_sort, \ + ready, \ + done, \ + queue_is_ready,\ + queue_is_done +from zipline.gens.utils import hash_args, alternate, done_message +from zipline.gens.tradegens import date_gen, SpecificEquityTrades +from zipline.gens.composites import date_sorted_sources + +import zipline.protocol as zp + +class HelperTestCase(TestCase): + + def setUp(self): + pass + + def tearDown(self): + pass + + def test_individual_queue_logic(self): + queue = deque() + # Empty queues are neither done nor ready. + assert not queue_is_ready(queue) + assert not queue_is_done(queue) + + queue.append(to_dt('foo')) + assert queue_is_ready(queue) + assert not queue_is_done(queue) + + + queue.appendleft(to_dt('DONE')) + assert queue_is_ready(queue) + + # Checking done when we have a message after done will trip an assert. + self.assertRaises(AssertionError, queue_is_done, queue) + + queue.pop() + assert queue_is_ready(queue) + assert queue_is_done(queue) + + def test_pop_logic(self): + sources = {} + ids = ['a', 'b', 'c'] + for id in ids: + sources[id] = deque() + + assert not ready(sources) + assert not done(sources) + + # All sources must have a message to be ready/done + sources['a'].append(to_dt("datetime")) + assert not ready(sources) + assert not done(sources) + sources['a'].pop() + + for id in ids: + sources[id].append(to_dt("datetime")) + + assert ready(sources) + assert not done(sources) + + for id in ids: + sources[id].appendleft(to_dt("DONE")) + + # ["DONE", message] will trip an assert in queue_is_done. + assert ready(sources) + self.assertRaises(AssertionError, done, sources) + + for id in ids: + sources[id].pop() + + assert ready(sources) + assert done(sources) + +class DateSortTestCase(TestCase): + + def setUp(self): + pass + + def tearDown(self): + pass + + def run_date_sort(self, event_stream, expected, source_ids): + """ + Take a list of events, their source_ids, and an expected sorting. + Assert that date_sort's output agrees with expected. + """ + sort_out = date_sort(event_stream, source_ids) + for m1, m2 in izip_longest(sort_out, expected): + assert m1 == m2 + + def test_single_source(self): + + # Just using the built-in defaults. See + # zipline/gens/tradegens.py + source = SpecificEquityTrades() + expected = list(source) + source.rewind() + # The raw source doesn't handle done messaging, so we need to + # append a done message for sort to work properly. + with_done = chain(source, [done_message(source.get_hash())]) + self.run_date_sort(with_done, expected, [source.get_hash()]) + + def test_multi_source(self): + + filter = [2,3] + args_a = tuple() + kwargs_a = { + 'count' : 100, + 'sids' : [1,2,3], + 'start' : datetime(2012,1,3,15, tzinfo = pytz.utc), + 'delta' : timedelta(minutes = 6), + 'filter' : filter + } + source_a = SpecificEquityTrades(*args_a, **kwargs_a) + + args_b = tuple() + kwargs_b = { + 'count' : 100, + 'sids' : [2,3,4], + 'start' : datetime(2012,1,3,15, tzinfo = pytz.utc), + 'delta' : timedelta(minutes = 5), + 'filter' : filter + } + source_b = SpecificEquityTrades(*args_b, **kwargs_b) + + all_events = list(chain(source_a, source_b)) + + # The expected output is all events, sorted by dt with + # source_id as a tiebreaker. + expected = sorted(all_events, comp) + source_ids = [source_a.get_hash(), source_b.get_hash()] + + # Generating the events list consumes the sources. Rewind them + # for testing. + source_a.rewind() + source_b.rewind() + + # Append a done message to each source. + with_done_a = chain(source_a, [done_message(source_a.get_hash())]) + with_done_b = chain(source_b, [done_message(source_b.get_hash())]) + + interleaved = alternate(with_done_a, with_done_b) + + # Test sort with alternating messages from source_a and + # source_b. + self.run_date_sort(interleaved, expected, source_ids) + + source_a.rewind() + source_b.rewind() + with_done_a = chain(source_a, [done_message(source_a.get_hash())]) + with_done_b = chain(source_b, [done_message(source_b.get_hash())]) + + sequential = chain(with_done_a, with_done_b) + + # Test sort with all messages from a, followed by all messages + # from b. + + self.run_date_sort(sequential, expected, source_ids) + + + def test_sort_composite(self): + + filter = [1,2] + + #Set up source a. One hour between events. + args_a = tuple() + kwargs_a = { + 'count' : 100, + 'sids' : [1], + 'start' : datetime(2012,6,6,0), + 'delta' : timedelta(hours = 1), + 'filter' : filter + } + source_a = SpecificEquityTrades(*args_a, **kwargs_a) + + #Set up source b. One day between events. + args_b = tuple() + kwargs_b = { + 'count' : 50, + 'sids' : [2], + 'start' : datetime(2012,6,6,0), + 'delta' : timedelta(days = 1), + 'filter' : filter + } + source_b = SpecificEquityTrades(*args_b, **kwargs_b) + + #Set up source c. One minute between events. + args_c = tuple() + kwargs_c = { + 'count' : 150, + 'sids' : [1,2], + 'start' : datetime(2012,6,6,0), + 'delta' : timedelta(minutes = 1), + 'filter' : filter + } + source_c = SpecificEquityTrades(*args_c, **kwargs_c) + # Set up source d. This should produce no events because the + # internal sids don't match the filter. + args_d = tuple() + kwargs_d = { + 'count' : 50, + 'sids' : [3], + 'start' : datetime(2012,6,6,0), + 'delta' : timedelta(minutes = 1), + 'filter' : filter + } + source_d = SpecificEquityTrades(*args_d, **kwargs_d) + sources = [source_a, source_b, source_c, source_d] + hashes = [source.get_hash() for source in sources] + + sort_out = date_sorted_sources(*sources) + + # Read all the values from sort and assert that they arrive in + # the correct sorting with the expected hash values. + to_list = list(sort_out) + copy = to_list[:] + + # We should have 300 events (100 from a, 150 from b, 50 from c) + assert len(to_list) == 300 + + for e in to_list: + # All events should match one of our expected source_ids. + assert e.source_id in hashes + # But none of them should match source_d. + assert e.source_id != source_d.get_hash() + + # The events should be sorted by dt, with source_id as tiebreaker. + expected = sorted(copy, comp) + + assert to_list == expected + +def compare_by_dt_source_id(x,y): + if x.dt < y.dt: + return -1 + elif x.dt > y.dt: + return 1 + + elif x.source_id < y.source_id: + return -1 + elif x.source_id > y.source_id: + return 1 + + else: + return 0 + +#Alias for ease of use +comp = compare_by_dt_source_id diff --git a/zipline/gens/composites.py b/zipline/gens/composites.py index 2716ee32..4b5cd5ac 100644 --- a/zipline/gens/composites.py +++ b/zipline/gens/composites.py @@ -13,8 +13,9 @@ TransformBundle = namedtuple("TransformBundle", ['tnfm', 'args', 'kwargs']) def date_sorted_sources(*sources): """ - Takes an iterable of SortBundles, generating namestrings and initialized datasources - for each before piping them into a date_sort. + Takes an iterable of SortBundles, generating namestrings and + initialized datasources for each before piping them into a + date_sort. """ for source in sources: @@ -28,21 +29,21 @@ def date_sorted_sources(*sources): # one element at a time from each. stream_in = roundrobin(sources, names) - # Guarantee the flat stream will be sorted by date, using source_id as - # tie-breaker, which is fully deterministic (given deterministic string - # representation for all args/kwargs) + # Guarantee the flat stream will be sorted by date, using + # source_id as tie-breaker, which is fully deterministic (given + # deterministic string representation for all args/kwargs) return date_sort(stream_in, names) def merged_transforms(sorted_stream, *transforms): """ - A generator that takes the expected output of a date_sort, pipes it - through a given set of transforms, and runs the results throught a - merge to output a unified stream. tnfms should be a list of - pointers to generator functions. tnfm_args should be a list of - tuples, representing the arguments to be passed to each transform. - tnfm_kwargs should be a list of dictionaries representing keyword - arguments to each transform. + A generator that takes the expected output of a date_sort, pipes + it through a given set of transforms, and runs the results + through a merge to output a unified stream. tnfms should be a + list of pointers to generator functions. tnfm_args should be a + list of tuples, representing the arguments to be passed to each + transform. tnfm_kwargs should be a list of dictionaries + representing keyword arguments to each transform. """ for transform in transforms: assert isinstance(transform, StatefulTransform) @@ -62,15 +63,35 @@ def merged_transforms(sorted_stream, *transforms): # Roundrobin the outputs of our transforms to create a single flat # stream. to_merge = roundrobin(tnfm_gens, namestrings) - # Pipe the stream into merge. merged = merge(to_merge, namestrings) # Return the merged events. return merged -def zipline(sources, transforms, endpoint): - assert isinstance(sources, (list, tuple)) +def sequential_transforms(stream_in, *transforms): + """ + Apply each transform in transforms sequentially to each event in stream_in. + Each transform application will add a new entry indexed to the transform's + hash string. + """ + assert isinstance(transforms, (list, tuple)) + for tnfm in transforms: + tnfm.forward_all = False + tnfm.update_in_place = False + tnfm.append_value = True + + # Recursively apply all transforms to the stream. + stream_out = reduce(lambda stream, tnfm: tnfm.transform(stream), + transforms, + stream_in) + return stream_out + + + + + + diff --git a/zipline/gens/examples.py b/zipline/gens/examples.py index a6a95f59..f3a0dd0b 100644 --- a/zipline/gens/examples.py +++ b/zipline/gens/examples.py @@ -1,14 +1,16 @@ import pytz +import time from time import sleep from pprint import pprint as pp from datetime import datetime, timedelta +from itertools import izip from zipline.utils.factory import create_trading_environment from zipline.test_algorithms import TestAlgorithm from zipline.gens.composites import SourceBundle, TransformBundle, \ - date_sorted_sources, merged_transforms + date_sorted_sources, merged_transforms, sequential_transforms from zipline.gens.tradegens import SpecificEquityTrades from zipline.gens.transform import MovingAverage, Passthrough, StatefulTransform from zipline.gens.tradesimulation import TradeSimulationClient as tsc @@ -18,43 +20,81 @@ import zipline.protocol as zp if __name__ == "__main__": filter = [2,3] - #Set up source a. One minute between events. + #Set up source a. Six minutes between events. args_a = tuple() kwargs_a = { - 'count' : 325, + 'count' : 1000, 'sids' : [1,2,3], 'start' : datetime(2012,1,3,15, tzinfo = pytz.utc), - 'delta' : timedelta(hours = 6), + 'delta' : timedelta(minutes = 6), 'filter' : filter } source_a = SpecificEquityTrades(*args_a, **kwargs_a) + source_a_prime = SpecificEquityTrades(*args_a, **kwargs_a) - #Set up source b. Two minutes between events. + #Set up source b. Five minutes between events. args_b = tuple() kwargs_b = { - 'count' : 7500, + 'count' : 1000, 'sids' : [2,3,4], 'start' : datetime(2012,1,3,14, tzinfo = pytz.utc), 'delta' : timedelta(minutes = 5), 'filter' : filter } source_b = SpecificEquityTrades(*args_b, **kwargs_b) - - #Set up source c. Three minutes between events. + source_b_prime = SpecificEquityTrades(*args_b, **kwargs_b) sorted = date_sorted_sources(source_a, source_b) + sorted_prime = date_sorted_sources( + source_a_prime, + source_b_prime + ) passthrough = StatefulTransform(Passthrough) - mavg_price = StatefulTransform(MovingAverage, timedelta(minutes = 20), ['price']) + mavg_price = StatefulTransform( + MovingAverage, + timedelta(minutes = 20), + ['price'] + ) + + passthrough_prime = StatefulTransform(Passthrough) + mavg_price_prime = StatefulTransform( + MovingAverage, + timedelta(minutes = 20), + ['price'] + ) merged = merged_transforms(sorted, passthrough, mavg_price) + start = time.time() + for message in merged: + assert 1 + 1 == 2 + stop = time.time() + merge_time = stop - start + print "Merge time: %s" % str(merge_time) + + sequential = sequential_transforms( + sorted_prime, + passthrough_prime, + mavg_price_prime + ) - algo = TestAlgorithm(2, 10, 100, sid_filter = [2,3]) - environment = create_trading_environment(year = 2012) - style = zp.SIMULATION_STYLE.FIXED_SLIPPAGE + start = time.time() + for message in sequential: + assert 1 + 1 == 2 + stop = time.time() + seq_time = stop - start + print "Sequential time: %s" % str(seq_time) + print "Merge/Seq: %s" % (str(merge_time/seq_time)) - trading_client = tsc(algo, environment, style) + +# merged = merged_transforms(sorted, passthrough, mavg_price) - for message in trading_client.simulate(merged): - pp(message) + # algo = TestAlgorithm(2, 10, 100, sid_filter = [2,3]) +# environment = create_trading_environment(year = 2012) +# style = zp.SIMULATION_STYLE.FIXED_SLIPPAGE + +# trading_client = tsc(algo, environment, style) + +# for message in trading_client.simulate(merged): +# pp(message) diff --git a/zipline/gens/merge.py b/zipline/gens/merge.py index f6434918..c4afb1b4 100644 --- a/zipline/gens/merge.py +++ b/zipline/gens/merge.py @@ -11,8 +11,8 @@ from itertools import repeat def merge(stream_in, tnfm_ids): """ - A generator that takes a generator and a list of source_ids. We - maintain an internal queue for each id in source_ids. Once we + A generator that takes a generator and a list of transform ids. We + maintain an internal queue for each id in tnfm_ids. Once we have a message from every queue, we pop an event from each queue and merge them together into an event. We raise an error if we do not receive the same number of events from all sources. @@ -54,9 +54,8 @@ def merge(stream_in, tnfm_ids): yield done_message('Merge') def merge_one(sources): - dict_primer = zip(sources.keys(), repeat(None)) - event_fields = ndict() + event_fields = ndict() for key, queue in sources.iteritems(): # Add transform value to the transforms dict. diff --git a/zipline/gens/sort.py b/zipline/gens/sort.py index f6ff7a5e..3ff5ee3f 100644 --- a/zipline/gens/sort.py +++ b/zipline/gens/sort.py @@ -14,7 +14,6 @@ def date_sort(stream_in, source_ids): have messages pending from all sources, we pull the earliest message and yield it. """ - assert isinstance(source_ids, (list, tuple)) # Set up an internal queue for each expected source. @@ -41,7 +40,7 @@ def date_sort(stream_in, source_ids): message = pop_oldest(sources) assert_sort_protocol(message) yield message - + # We should have only a done message left in each queue. for queue in sources.itervalues(): assert len(queue) == 1, "Bad queue in date_sort on exit: %s" % queue diff --git a/zipline/gens/transform.py b/zipline/gens/transform.py index 2883733a..36e15689 100644 --- a/zipline/gens/transform.py +++ b/zipline/gens/transform.py @@ -56,9 +56,12 @@ class StatefulTransform(object): self.forward_all = tnfm_class.__dict__.get('FORWARDER', False) self.update_in_place = tnfm_class.__dict__.get('UPDATER', False) + self.append_value = tnfm_class.__dict__.get('APPENDER', False) - # You can't be both a forwarded and an updater. - assert not all([self.forward_all, self.update_in_place]) + # You only one special behavior mode can be set. + assert sum(map(int, [self.forward_all, + self.update_in_place, + self.append_value])) <= 1 # Create an instance of our transform class. self.state = tnfm_class(*args, **kwargs) @@ -75,11 +78,15 @@ class StatefulTransform(object): def _gen(self, stream_in): # IMPORTANT: Messages may contain pointers that are shared with # other streams, so we only manipulate copies. + for message in stream_in: + # allow upstream generators to yield None to avoid # blocking. if message == None: continue + + #TODO: refactor this to avoid unnecessary copying. assert_sort_unframe_protocol(message) message_copy = deepcopy(message) @@ -87,22 +94,43 @@ class StatefulTransform(object): # Same shared pointer issue here as above. tnfm_value = self.state.update(deepcopy(message_copy)) - # If we want to keep all original values, plus append tnfm_id - # and tnfm_value. Used for Passthrough. + # FORWARDER flag means we want to keep all original + # values, plus append tnfm_id and tnfm_value. Used for + # preserving the original event fields when our output + # will be fed into a merge. if self.forward_all: out_message = message_copy out_message.tnfm_id = self.namestring out_message.tnfm_value = tnfm_value yield out_message - # Our expectation is that the transform simply updated the - # message it was passed. Useful for chaining together - # multiple transforms, e.g. TransactionSimulator/PerformanceTracker. + # UPDATER flag should be used for transforms that + # side-effectfully modify the event they are passed. + # Updated messages are passed along exactly as they are + # returned to use by our state class. Useful for chaining + # specific transforms that won't be fed to a merge. (See + # the implementation of TradeSimulationClient for example + # usage of this flag with PerformanceTracker and + # TransactionSimulator. elif self.update_in_place: yield tnfm_value + + # APPENDER flag should be used to add a single new + # key-value pair to the event. The new key is this + # transform's namestring, and it's value is the value + # returned by state.update(event). This is almost + # identical to the behavior of FORWARDER, except we + # compress the two calculated values (tnfm_id, and + # tnfm_value) into a single field. + elif self.append_value: + out_message = message_copy + out_message[self.namestring] = tnfm_value + yield out_message - # Otherwise send tnfm_id, tnfm_value, and the message - # date. Useful for transforms being piped to a merge. + # If no flags are set, we create a new message containing + # just the tnfm_id, the event's datetime, and the + # calculated tnfm_value. This is the default behavior for + # a transform being fed into a merge. else: out_message = ndict() out_message.tnfm_id = self.namestring diff --git a/zipline/gens/utils.py b/zipline/gens/utils.py index 071ce5fc..83372753 100644 --- a/zipline/gens/utils.py +++ b/zipline/gens/utils.py @@ -66,6 +66,14 @@ def hash_args(*args, **kwargs): hasher.update(combined) return hasher.hexdigest() +def sum_true(bool_iterable): + """ + Takes an iterable of boolean values and returns the number of + those values that are True. + """ + return sum(map(int, bool_iterable)) + + def assert_datasource_protocol(event): """Assert that an event meets the protocol for datasource outputs.""" From 107369696597c38e266263bc097608ecc3edb87d Mon Sep 17 00:00:00 2001 From: scottsanderson Date: Mon, 6 Aug 2012 13:11:16 -0400 Subject: [PATCH 36/73] moving toward abstract base for event window tnfms --- zipline/gens/sort.py | 2 +- zipline/gens/transform.py | 85 +++++++++++++++++---------------------- 2 files changed, 38 insertions(+), 49 deletions(-) diff --git a/zipline/gens/sort.py b/zipline/gens/sort.py index 3ff5ee3f..9755da74 100644 --- a/zipline/gens/sort.py +++ b/zipline/gens/sort.py @@ -27,7 +27,7 @@ def date_sort(stream_in, source_ids): # Incoming messages should be the output of DATASOURCE_UNFRAME. assert_datasource_unframe_protocol(message), \ "Bad message in date_sort: %s" % message - + # Only allow messages from sources we expect. assert message.source_id in sources, "Unexpected source: %s" % message diff --git a/zipline/gens/transform.py b/zipline/gens/transform.py index 36e15689..ee8cd649 100644 --- a/zipline/gens/transform.py +++ b/zipline/gens/transform.py @@ -174,20 +174,39 @@ class MovingAverage(object): window.update(event) return window.get_averages() -class EventWindow(object): - """ - Maintains a list of events that are within a certain timedelta - of the most recent tick. The expected use of this class is to - track events associated with a single sid. We provide simple - functionality for averages, but anything more complicated - should be handled by a containing class. +class EventWindow: """ + Abstract base class for transform classes that calculate iterative + metrics on events within a given timedelta. Maintains a list of + events that are within a certain timedelta of the most recent + tick. Calls self.handle_add(event) for each event added to the + window. Calls self.handle_remove(event) for each event removed + from the window. Subclass these methods along with init(*args, + **kwargs) to calculate metrics over the window. - def __init__(self, delta, fields): + See zipline/gens/mavg.py and zipline/gens/vwap.py for example + implementations of moving average and volume-weighted average + price. + """ + # Mark this as an abstract base class. + __metaclass__ = ABCMeta + + def __init__(self, delta, *args, **kwargs): self.ticks = deque() self.delta = delta - self.fields = fields - self.totals = defaultdict(float) + self.init(*args, **kwargs) + + @abstractmethod + def init(self): + raise NotImplementedError() + + @abstractmethod + def handle_add(self, event): + raise NotImplementedError() + + @abstractmethod + def handle_remove(self, event): + raise NotImplementedError() def __len__(self): return len(self.ticks) @@ -196,44 +215,19 @@ class EventWindow(object): self.assert_well_formed(event) # Add new event and increment totals. self.ticks.append(event) - for field in self.fields: - self.totals[field] += event[field] + self.handle_add(event) - # We return a list of all out-of-range events we removed. - out_of_range = [] - - # Clear out expired events, decrementing totals. + # Clear out expired event. + # # newest oldest # | | # V V - - while (self.ticks[-1].dt - self.ticks[0].dt) >= self.delta: - # popleft removes and returns ticks[0] + while (self.ticks[-1].dt - self.ticks[0].dt) > self.delta: + # popleft removes and returns the oldest tick in self.ticks popped = self.ticks.popleft() - # Decrement totals - for field in self.fields: - self.totals[field] -= popped[field] - # Add the popped element to the list of dropped events. - out_of_range.append(popped) - - return out_of_range - - def average(self, field): - assert field in self.fields - if len(self.ticks) == 0: - return 0.0 - else: - return self.totals[field] / len(self.ticks) - - def get_averages(self): - """ - Return an ndict of all our tracked averages. - """ - out = ndict() - # out.ticks = len(self.ticks) - for field in self.fields: - out[field] = self.average(field) - return out + # Subclasses should override handle_remove to define + # behavior for removing ticks. + self.handle_remove(popped) def assert_well_formed(self, event): assert isinstance(event, ndict), "Bad event in EventWindow:%s" % event @@ -243,8 +237,3 @@ class EventWindow(object): # Something is wrong if new event is older than previous. assert event.dt >= self.ticks[-1].dt, \ "Events arrived out of order in EventWindow: %s -> %s" % (event, self.ticks[0]) - for field in self.fields: - assert event.has_key(field), \ - "Event missing [%s] in EventWindow" % field - assert isinstance(event[field], Number), \ - "Got %s for %s in EventWindow" % (event[field], field) From 06dc6f7acb84cb4e73440bb39a5bdf762997ab12 Mon Sep 17 00:00:00 2001 From: fawce Date: Mon, 6 Aug 2012 13:11:20 -0400 Subject: [PATCH 37/73] beginning refactor to use single threaded simulator. --- zipline/__init__.py | 6 - zipline/gens/examples.py | 16 +-- zipline/gens/tradegens.py | 3 +- zipline/gens/utils.py | 12 ++ zipline/lines.py | 272 ++++++-------------------------------- zipline/utils/factory.py | 49 +++---- 6 files changed, 76 insertions(+), 282 deletions(-) diff --git a/zipline/__init__.py b/zipline/__init__.py index a84cd345..31272fcb 100644 --- a/zipline/__init__.py +++ b/zipline/__init__.py @@ -6,15 +6,9 @@ Zipline # it is a place to expose the public interfaces. import protocol # namespace -from core.monitor import Monitor -from lines import SimulatedTrading -from core.host import ComponentHost from utils.protocol_utils import ndict __all__ = [ - SimulatedTrading, - Monitor, - ComponentHost, protocol, ndict ] diff --git a/zipline/gens/examples.py b/zipline/gens/examples.py index a6a95f59..2f003230 100644 --- a/zipline/gens/examples.py +++ b/zipline/gens/examples.py @@ -1,6 +1,5 @@ import pytz -from time import sleep from pprint import pprint as pp from datetime import datetime, timedelta @@ -16,7 +15,7 @@ from zipline.gens.tradesimulation import TradeSimulationClient as tsc import zipline.protocol as zp if __name__ == "__main__": - + filter = [2,3] #Set up source a. One minute between events. args_a = tuple() @@ -42,19 +41,18 @@ if __name__ == "__main__": #Set up source c. Three minutes between events. - sorted = date_sorted_sources(source_a, source_b) - + sorted = date_sorted_sources(source_a, source_b) + passthrough = StatefulTransform(Passthrough) mavg_price = StatefulTransform(MovingAverage, timedelta(minutes = 20), ['price']) - + merged = merged_transforms(sorted, passthrough, mavg_price) - + algo = TestAlgorithm(2, 10, 100, sid_filter = [2,3]) environment = create_trading_environment(year = 2012) style = zp.SIMULATION_STYLE.FIXED_SLIPPAGE - + trading_client = tsc(algo, environment, style) - + for message in trading_client.simulate(merged): pp(message) - diff --git a/zipline/gens/tradegens.py b/zipline/gens/tradegens.py index b1a0ed96..5c1bacd7 100644 --- a/zipline/gens/tradegens.py +++ b/zipline/gens/tradegens.py @@ -6,8 +6,7 @@ import random from itertools import chain, cycle, ifilter, izip from datetime import datetime, timedelta -from zipline.utils.factory import create_trade -from zipline.gens.utils import hash_args +from zipline.gens.utils import hash_args, create_trade def date_gen(start = datetime(2006, 6, 6, 12), delta = timedelta(minutes = 1), diff --git a/zipline/gens/utils.py b/zipline/gens/utils.py index 071ce5fc..45e31de4 100644 --- a/zipline/gens/utils.py +++ b/zipline/gens/utils.py @@ -66,6 +66,18 @@ def hash_args(*args, **kwargs): hasher.update(combined) return hasher.hexdigest() +def create_trade(sid, price, amount, datetime, source_id = "test_factory"): + row = ndict({ + 'source_id' : source_id, + 'type' : DATASOURCE_TYPE.TRADE, + 'sid' : sid, + 'dt' : datetime, + 'price' : price, + 'volume' : amount + }) + return row + + def assert_datasource_protocol(event): """Assert that an event meets the protocol for datasource outputs.""" diff --git a/zipline/lines.py b/zipline/lines.py index a5a3858e..fbc4d155 100644 --- a/zipline/lines.py +++ b/zipline/lines.py @@ -60,110 +60,41 @@ before invoking simulate. +---------------------------------+ """ -import inspect import logbook -#import zipline.utils.factory as factory - -from zipline.components import DataSource -from zipline.transforms import BaseTransform from zipline.test_algorithms import TestAlgorithm -from zipline.components import TradeSimulationClient -from zipline.core.process import ProcessSimulator -from zipline.core.monitor import Monitor from zipline.finance.trading import SIMULATION_STYLE +from zipline.utils import factory +import pytz + +from pprint import pprint as pp +from datetime import datetime, timedelta + +from zipline.utils.factory import create_trading_environment +from zipline.test_algorithms import TestAlgorithm + +from zipline.gens.composites import SourceBundle, TransformBundle, \ + date_sorted_sources, merged_transforms +from zipline.gens.tradegens import SpecificEquityTrades +from zipline.gens.transform import MovingAverage, Passthrough, StatefulTransform +from zipline.gens.tradesimulation import TradeSimulationClient as tsc + +import zipline.protocol as zp + log = logbook.Logger('Lines') class SimulatedTrading(object): - """ - Zipline with:: - - _no_ data sources. - - Trade simulation client, which is available to send callbacks on - events and also accept orders to be simulated. - - An order data source, which will receive orders from the trade - simulation client, and feed them into the event stream to be - serialized and order alongside all other data source events. - - transaction simulation transformation, which receives the order - events and estimates a theoretical execution price and volume. + @staticmethod + def create_simulation(sources, transforms, algorithm, environment, style): - All components in this zipline are subject to heartbeat checks and - a control monitor, which can kill the entire zipline in the event of - exceptions in one of the components or an external request to end the - simulation. - """ + sorted = date_sorted_sources(*sources) + passthrough = StatefulTransform(Passthrough) - def __init__(self, **config): - """ - :param config: a dict with the following required properties:: - - - algorithm: a class that follows the algorithm protocol. See - :py:meth:`zipline.finance.trading.TradeSimulationClient.add_algorithm - for details. - - trading_environment: an instance of - :py:class:`zipline.trading.TradingEnvironment` - - allocator: an instance of - :py:class:`zipline.simulator.AddressAllocator` - - simulation_style: optional parameter that configures the - :py:class:`zipline.finance.trading.TransactionSimulator`. Expects - a SIMULATION_STYLE as defined in :py:mod:`zipline.finance.trading` - """ - assert isinstance(config, dict) - self.algorithm = config['algorithm'] - self.allocator = config['allocator'] - self.trading_environment = config['trading_environment'] - self.sim_style = config.get('simulation_style') - self.send_sighup = config.get('send_sighup', False) - - - self.leased_sockets = [] - self.sim_context = None - - sockets = self.allocate_sockets(7) - addresses = { - 'sync_address' : sockets[0], - 'data_address' : sockets[1], - 'feed_address' : sockets[2], - 'merge_address' : sockets[3], - # TODO: this refers to the results of the merge, a - # horribly confusing name for the socket. - 'results_address' : sockets[4], - } - - self.monitor = Monitor( - # pub socket - sockets[5], - # route socket - sockets[6], - # exception socket to match tradesimclient's result - # socket, because we want to relay exceptions to the - # same listener - config['results_socket'], - send_sighup=self.send_sighup - ) - - self.started = False - - self.sim = ProcessSimulator(addresses) - - self.clients = {} - - self.trading_client = TradeSimulationClient( - self.trading_environment, - self.sim_style, - config['results_socket'], - self.algorithm - ) - self.add_client(self.trading_client) - - # setup all sources - self.sources = {} - - #setup transforms - self.transforms = {} - - self.sim.register_monitor( self.monitor ) + merged = merged_transforms(sorted, passthrough, *transforms) + trading_client = tsc(algorithm, environment, style) + return trading_client.simluate(merged) @staticmethod @@ -173,7 +104,6 @@ class SimulatedTrading(object): - environment - a \ :py:class:`zipline.finance.trading.TradingEnvironment` - - allocator - a :py:class:`zipline.simulator.AddressAllocator` - sid - an integer, which will be used as the security ID. - order_count - the number of orders the test algo will place, defaults to 100 @@ -188,10 +118,11 @@ class SimulatedTrading(object): - simulation_style: optional parameter that configures the :py:class:`zipline.finance.trading.TransactionSimulator`. Expects a SIMULATION_STYLE as defined in :py:mod:`zipline.finance.trading` + - transforms: optional parameter that provides a list + of StatefulTransform objects. """ assert isinstance(config, dict) - allocator = config['allocator'] sid = config['sid'] #-------------------- @@ -236,6 +167,12 @@ class SimulatedTrading(object): trade_count, trading_environment ) + + #------------------- + # Transforms + #------------------- + transforms = config.get('transforms', []) + #------------------- # Create the Algo #------------------- @@ -248,149 +185,20 @@ class SimulatedTrading(object): order_count ) - if config.has_key('results_socket'): - results_socket = config['results_socket'] - else: - results_socket = None #------------------- # Simulation #------------------- - zipline = SimulatedTrading(**{ - 'algorithm' : test_algo, - 'trading_environment' : trading_environment, - 'allocator' : allocator, - 'simulation_style' : simulation_style, - 'results_socket' : results_socket, - }) + + sim = SimulatedTrading.create_simulation( + [trade_source], + transforms, + test_algo, + trading_environment, + simulation_style) #------------------- - zipline.add_source(trade_source) + return sim - return zipline - - def add_source(self, source): - """ - Adds the source to the zipline, sets the sid filter of the - source to the algorithm's sid filter. - """ - assert isinstance(source, DataSource) - self.check_started() - source.set_filter('sid', self.algorithm.get_sid_filter()) - self.sim.register_components([source]) - - # ``id`` is name of source_id, ``get_id`` is the class name - self.sources[source.get_id] = source - - def add_transform(self, transform): - assert isinstance(transform, BaseTransform) - self.check_started() - self.sim.register_components([transform]) - self.transforms[transform.get_id] = transform - - def add_client(self, client): - assert isinstance(client, TradeSimulationClient) - self.check_started() - self.sim.register_components([client]) - self.clients[client.get_id] = client - - def check_started(self): - if self.started: - raise ZiplineException("TradeSimulation", "You cannot add \ - components after the simulation has begun.") - - def get_cumulative_performance(self): - return self.trading_client.perf.cumulative_performance.to_dict() - - def allocate_sockets(self, n): - """ - Allocate sockets local to this line, track them so - we can gc after test run. - """ - - assert isinstance(n, int) - assert n > 0 - - leased = self.allocator.lease(n) - self.leased_sockets.extend(leased) - - return leased - - @property - def components(self): - """ - Return the component instances inside of this topology - """ - - base = set(self.sim.components.values()) - transforms = set(self.transforms.values()) - sources = set(self.sources.values()) - - return base | transforms | sources - - @property - def topology(self): - """ - Returns the Component names in the topology of the - backtest. - """ - - # A complete topology is the union of three classes of - # components added individually to the simulation client - # at various places. - # - # base : ['FEED', 'MERGE', 'TRADING_CLIENT', 'PASSTHROUGH'] - # transforms : ['vwap__01', ... ] - # sources : ['MongoTradeHistory', ... ] - - base = set(self.sim.components.keys()) - transforms = set(self.transforms.keys()) - sources = set(self.sources.keys()) - - return base | transforms | sources - - def setup_monitor(self): - """ - Prepare the monitor to manage the topology specified - by this line. - """ - self.monitor.manage(self.topology) - - def simulate(self, blocking=True): - self.setup_monitor() - - self.started = True - self.sim_context = self.sim.simulate() - - # If we're using a threaded simulator block on the pool - # of thread since we're only ever in a test and we don't - # generally monitor the state of the system as a hold at - # the supervisory layer - - # TODO: better way of identifying concurrency substrate - if blocking: - for process in self.sim.subprocesses: - process.join() - - @property - def is_success(self): - # TODO: other assertions? - if self.sim.did_clean_shutdown(): - return True - else: - return False - - #-------------------------------- - # Component property accessors - #-------------------------------- - - def get_positions(self): - """ - returns current positions as a dict. draws from the cumulative - performance period in the performance tracker. - """ - perf = self.trading_client.perf.cumulative_performance - positions = perf.get_positions() - return positions class ZiplineException(Exception): def __init__(self, zipline_name, msg): diff --git a/zipline/utils/factory.py b/zipline/utils/factory.py index 004f542a..e001cf07 100644 --- a/zipline/utils/factory.py +++ b/zipline/utils/factory.py @@ -12,7 +12,9 @@ from datetime import datetime, timedelta import zipline.finance.risk as risk import zipline.protocol as zp -from zipline.finance.sources import SpecificEquityTrades, RandomEquityTrades +from zipline.finance.sources import RandomEquityTrades +from zipline.gens.tradegens import SpecificEquityTrades +from zipline.gens.utils import create_trade from zipline.finance.trading import TradingEnvironment # TODO @@ -69,16 +71,6 @@ def create_trading_environment(year=2006): return trading_environment -def create_trade(sid, price, amount, datetime, source_id = "test_factory"): - row = zp.ndict({ - 'source_id' : source_id, - 'type' : zp.DATASOURCE_TYPE.TRADE, - 'sid' : sid, - 'dt' : datetime, - 'price' : price, - 'volume' : amount - }) - return row def get_next_trading_dt(current, interval, trading_calendar): next = current @@ -220,29 +212,20 @@ def create_minutely_trade_source(sids, trade_count, trading_environment): ) def create_trade_source(sids, trade_count, trade_time_increment, trading_environment): - trade_history = [] - price = [10.1] * trade_count - volume = [100] * trade_count + #Set up source a. One minute between events. + args = tuple() + kwargs = { + 'count' : trade_count, + 'sids' : sids, + 'start' : trading_environment.first_open, + 'delta' : trade_time_increment, + 'filter' : sids + } + source = SpecificEquityTrades(*args, **kwargs) - for sid in sids: - start_date = trading_environment.first_open + # TODO: do we need to set the trading environment's end to same dt as + # the last trade in the history? + #trading_environment.period_end = trade_history[-1].dt - generated_trades = create_trade_history( - sid, - price, - volume, - trade_time_increment, - trading_environment - ) - - trade_history.extend(generated_trades) - - trade_history = sorted(trade_history, key=attrgetter('dt')) - - #set the trading environment's end to same dt as the last trade in the - #history. - trading_environment.period_end = trade_history[-1].dt - - source = SpecificEquityTrades(trade_history) return source From 45bfb3e8cdc9833124813ed6eeeb837924f02b1d Mon Sep 17 00:00:00 2001 From: scottsanderson Date: Mon, 6 Aug 2012 13:38:45 -0400 Subject: [PATCH 38/73] moving average using eventwindow abc --- zipline/gens/mavg.py | 97 +++++++++++++++++++++++++++++++++++++++ zipline/gens/transform.py | 1 + 2 files changed, 98 insertions(+) create mode 100644 zipline/gens/mavg.py diff --git a/zipline/gens/mavg.py b/zipline/gens/mavg.py new file mode 100644 index 00000000..54826d23 --- /dev/null +++ b/zipline/gens/mavg.py @@ -0,0 +1,97 @@ + +from numbers import Number +from datetime import datetime, timedelta +from collections import defaultdict + +from zipline import ndict +from zipline.gens.transform import EventWindow + +class MovingAverage(object): + """ + Class that maintains a dictionary from sids to EventWindows + Upon receipt of each message we update the + corresponding window and return the calculated average. + """ + + def __init__(self, delta, fields): + self.delta = delta + self.fields = fields + + # No way to pass arguments to the defaultdict factory, so we + # need to define a method to generate the correct EventWindows. + self.sid_windows = defaultdict(self.create_window) + + def create_window(self): + """Factory method for self.sid_windows.""" + return MovingAverageEventWindow(self.delta, self.fields) + + def update(self, event): + """ + Update the event window for this event's sid. Return an ndict from + tracked fields to averages. + """ + assert isinstance(event, ndict),"Bad event in MovingAverage: %s" % event + assert event.has_key('sid'), "No sid in MovingAverage: %s" % event + assert event.has_key('dt'), "No dt in MovingAverage: %s" % event + + # This will create a new EventWindow if this is the first + # message for this sid. + window = self.sid_windows[event.sid] + window.update(event) + return window.get_averages() + +class MovingAverageEventWindow(EventWindow): + """ + Calculates a moving average over all specified fields. + """ + def init(self, fields): + # Subclass initializer. Superclass also expects a timedelta. + self.fields = fields + self.totals = defaultdict(float) + + def handle_add(self, event): + # Sanity check on the event. + self.assert_all_fields(event) + # Increment our running totals with data from the event. + for field in self.fields: + self.totals[field] += event[field] + + def handle_remove(self, event): + # Decrement our running totals with data from the event. + for field in self.fields: + self.totals[field] -= event[field] + + def average(self, field): + """ + Calculate the average value of our ticks over a given field. + """ + # Sanity check. + assert field in self.fields + + # Averages are 0 by convention if we have no ticks. + if len(self.ticks) == 0: + return 0.0 + + # Calculate and return the average. len(self.ticks) is O(1). + else: + return self.totals[field] / len(self.ticks) + + def get_averages(self): + """ + Return an ndict of all our tracked averages. + """ + out = ndict() + + for field in self.fields: + out[field] = self.average(field) + return out + + def assert_all_fields(self, event): + """ + We only track events with all the fields we care about. + """ + for field in self.fields: + assert event.has_key(field), \ + "Event missing [%s] in MovingAverageEventWindow" % field + assert isinstance(event[field], Number), \ + "Got %s for %s in MovingAverageEventWindow" % (event[field], field) diff --git a/zipline/gens/transform.py b/zipline/gens/transform.py index ee8cd649..249b44f2 100644 --- a/zipline/gens/transform.py +++ b/zipline/gens/transform.py @@ -7,6 +7,7 @@ from copy import deepcopy from datetime import datetime from collections import deque, defaultdict from numbers import Number +from abc import ABCMeta, abstractmethod from zipline import ndict from zipline.gens.utils import assert_sort_unframe_protocol, \ From 10ff5effbf58010285e82e857a815106b22cde35 Mon Sep 17 00:00:00 2001 From: scottsanderson Date: Mon, 6 Aug 2012 13:54:07 -0400 Subject: [PATCH 39/73] moved MovingAverage state class to its own file --- zipline/gens/mavg.py | 4 +++- zipline/gens/transform.py | 36 ------------------------------------ 2 files changed, 3 insertions(+), 37 deletions(-) diff --git a/zipline/gens/mavg.py b/zipline/gens/mavg.py index 54826d23..4d477c9d 100644 --- a/zipline/gens/mavg.py +++ b/zipline/gens/mavg.py @@ -44,8 +44,10 @@ class MovingAverageEventWindow(EventWindow): """ Calculates a moving average over all specified fields. """ + # Subclass initializer. The superclass also requires a timedelta + # argument, so instantiation should look like: + # mavg = MovingAverageEventWindow(timedelta(minutes=1), ['price']) def init(self, fields): - # Subclass initializer. Superclass also expects a timedelta. self.fields = fields self.totals = defaultdict(float) diff --git a/zipline/gens/transform.py b/zipline/gens/transform.py index 249b44f2..bdee3270 100644 --- a/zipline/gens/transform.py +++ b/zipline/gens/transform.py @@ -139,42 +139,6 @@ class StatefulTransform(object): out_message.dt = message_copy.dt yield out_message -class MovingAverage(object): - """ - Class that maintains a dictionary from sids to EventWindows - Upon receipt of each message we update the - corresponding window and return the calculated average. - """ - FORWARDER = False - - def __init__(self, delta, fields): - self.delta = delta - self.fields = fields - - # No way to pass arguments to the defaultdict factory, so we - # need to define a method to generate the correct EventWindows. - self.sid_windows = defaultdict(self.create_window) - - def create_window(self): - """Factory method for self.sid_windows.""" - return EventWindow(self.delta, self.fields) - - def update(self, event): - """ - Update the event window for this event's sid. Return an ndict from - tracked fields to averages. - """ - - assert isinstance(event, ndict),"Bad event in MovingAverage: %s" % event - assert event.has_key('sid'), "No sid in MovingAverage: %s" % event - assert event.has_key('dt'), "No dt in MovingAverage: %s" % event - - # This will create a new EventWindow if this is the first - # message for this sid. - window = self.sid_windows[event.sid] - window.update(event) - return window.get_averages() - class EventWindow: """ Abstract base class for transform classes that calculate iterative From e061cb3a077b01a839fc414be32e7ab44f80612f Mon Sep 17 00:00:00 2001 From: scottsanderson Date: Mon, 6 Aug 2012 15:35:56 -0400 Subject: [PATCH 40/73] new-style vwap --- zipline/gens/mavg.py | 52 ++++++++++++++----------- zipline/gens/tradegens.py | 4 +- zipline/gens/transform.py | 11 ++---- zipline/gens/vwap.py | 82 +++++++++++++++++++++++++++++++++++++++ 4 files changed, 117 insertions(+), 32 deletions(-) create mode 100644 zipline/gens/vwap.py diff --git a/zipline/gens/mavg.py b/zipline/gens/mavg.py index 4d477c9d..1978f92e 100644 --- a/zipline/gens/mavg.py +++ b/zipline/gens/mavg.py @@ -1,4 +1,3 @@ - from numbers import Number from datetime import datetime, timedelta from collections import defaultdict @@ -8,15 +7,15 @@ from zipline.gens.transform import EventWindow class MovingAverage(object): """ - Class that maintains a dictionary from sids to EventWindows - Upon receipt of each message we update the - corresponding window and return the calculated average. + Class that maintains a dictionary from sids to + MovingAverageEventWindows. For each sid, we maintain moving + averages over any number of distinct fields (For example, we can + maintain a sid's average volume as well as its average price.) """ def __init__(self, delta, fields): self.delta = delta self.fields = fields - # No way to pass arguments to the defaultdict factory, so we # need to define a method to generate the correct EventWindows. self.sid_windows = defaultdict(self.create_window) @@ -27,13 +26,9 @@ class MovingAverage(object): def update(self, event): """ - Update the event window for this event's sid. Return an ndict from - tracked fields to averages. + Update the event window for this event's sid. Return an ndict + from tracked fields to moving averages. """ - assert isinstance(event, ndict),"Bad event in MovingAverage: %s" % event - assert event.has_key('sid'), "No sid in MovingAverage: %s" % event - assert event.has_key('dt'), "No dt in MovingAverage: %s" % event - # This will create a new EventWindow if this is the first # message for this sid. window = self.sid_windows[event.sid] @@ -42,22 +37,34 @@ class MovingAverage(object): class MovingAverageEventWindow(EventWindow): """ - Calculates a moving average over all specified fields. + Iteratively calculates moving averages for a particular sid over a + given time window. We can maintain averages for arbitrarily many + fields on a single sid. (For example, we might track average + price as well as average volume for a single sid.) The expected + functionality of this class is to be instantiated inside a + MovingAverage transform. """ - # Subclass initializer. The superclass also requires a timedelta - # argument, so instantiation should look like: - # mavg = MovingAverageEventWindow(timedelta(minutes=1), ['price']) - def init(self, fields): + + def __init__(self, delta, fields): + + # Call the superclass constructor to set up base EventWindow + # infrastructure. + EventWindow.__init__(self, delta) + + # We maintain a dictionary of totals for each of our tracked + # fields. self.fields = fields self.totals = defaultdict(float) + # Subclass customization for adding new events. def handle_add(self, event): # Sanity check on the event. - self.assert_all_fields(event) + self.assert_required_fields(event) # Increment our running totals with data from the event. for field in self.fields: self.totals[field] += event[field] + # Subclass customization for removing expired events. def handle_remove(self, event): # Decrement our running totals with data from the event. for field in self.fields: @@ -65,12 +72,12 @@ class MovingAverageEventWindow(EventWindow): def average(self, field): """ - Calculate the average value of our ticks over a given field. + Calculate the average value of our ticks over a single field. """ # Sanity check. assert field in self.fields - # Averages are 0 by convention if we have no ticks. + # Averages are None by convention if we have no ticks. if len(self.ticks) == 0: return 0.0 @@ -82,15 +89,14 @@ class MovingAverageEventWindow(EventWindow): """ Return an ndict of all our tracked averages. """ - out = ndict() - + out = ndict() for field in self.fields: out[field] = self.average(field) return out - def assert_all_fields(self, event): + def assert_required_fields(self, event): """ - We only track events with all the fields we care about. + We only allow events with all of our tracked fields. """ for field in self.fields: assert event.has_key(field), \ diff --git a/zipline/gens/tradegens.py b/zipline/gens/tradegens.py index b1a0ed96..7b86da05 100644 --- a/zipline/gens/tradegens.py +++ b/zipline/gens/tradegens.py @@ -25,9 +25,9 @@ def mock_prices(count, rand = False): """ if rand: - return (random.uniform(0.0, 10.0) for i in xrange(count)) + return (random.uniform(1.0, 10.0) for i in xrange(count)) else: - return (float(i % 11) for i in xrange(1,count+1)) + return (float(i % 10) + 1.0 for i in xrange(count)) def mock_volumes(count, rand = False): """ diff --git a/zipline/gens/transform.py b/zipline/gens/transform.py index bdee3270..a191e50b 100644 --- a/zipline/gens/transform.py +++ b/zipline/gens/transform.py @@ -156,15 +156,10 @@ class EventWindow: # Mark this as an abstract base class. __metaclass__ = ABCMeta - def __init__(self, delta, *args, **kwargs): + def __init__(self, delta): self.ticks = deque() self.delta = delta - self.init(*args, **kwargs) - @abstractmethod - def init(self): - raise NotImplementedError() - @abstractmethod def handle_add(self, event): raise NotImplementedError() @@ -193,7 +188,9 @@ class EventWindow: # Subclasses should override handle_remove to define # behavior for removing ticks. self.handle_remove(popped) - + + # All event windows expect to receive events with datetime fields + # that arrive in sorted order. def assert_well_formed(self, event): assert isinstance(event, ndict), "Bad event in EventWindow:%s" % event assert event.has_key('dt'), "Missing dt in EventWindow:%s" % event diff --git a/zipline/gens/vwap.py b/zipline/gens/vwap.py new file mode 100644 index 00000000..4e8a2f8b --- /dev/null +++ b/zipline/gens/vwap.py @@ -0,0 +1,82 @@ +from numbers import Number +from datetime import datetime, timedelta +from collections import defaultdict + +from zipline import ndict +from zipline.gens.transform import EventWindow + +class VWAP(object): + """ + Class that maintains a dictionary from sids to VWAPEventWindows. + """ + def __init__(self, delta): + self.delta = delta + + # No way to pass arguments to the defaultdict factory, so we + # need to define a method to generate the correct EventWindows. + self.sid_windows = defaultdict(self.create_window) + + def create_window(self): + """Factory method for self.sid_windows.""" + return VWAPEventWindow(self.delta) + + def update(self, event): + """ + Update the event window for this event's sid. Returns the + current vwap for the sid. + """ + # This will create a new EventWindow if this is the first + # message for this sid. + window = self.sid_windows[event.sid] + window.update(event) + return window.get_vwap() + + +class VWAPEventWindow(EventWindow): + """ + Iteratively maintains a vwap for a single sid over a given + timedelta. + """ + def __init__(self, delta): + EventWindow.__init__(self, delta) + self.flux = 0.0 + self.totalvolume = 0.0 + + # Subclass customization for adding new events. + def handle_add(self, event): + # Sanity check on the event. + self.assert_required_fields(event) + self.flux += event.volume * event.price + self.totalvolume += event.volume + + # Subclass customization for removing expired events. + def handle_remove(self, event): + self.flux -= event.volume * event.price + self.totalvolume -= event.volume + + def get_vwap(self): + """ + Return the calculated vwap for this sid. + """ + # By convention, vwap is None if we have no events. + if len(self.ticks) == 0 + return None + else: + return (self.flux / self.totalvolume) + + # We need numerical price and volume to calculate a vwap. + def assert_required_fields(self, event): + assert isinstance(event.price, Number) + assert isinstance(event.volume, Number) + +if __name__ == "__main__": + + from zipline.gens.tradegens import SpecificEquityTrades + from zipline.gens.transform import StatefulTransform + + source = SpecificEquityTrades() + vwap = StatefulTransform(VWAP, timedelta(minutes = 10)) + + out = vwap.transform(source) + + From ed206de84a6344fe180a957b618ecaf23aef834a Mon Sep 17 00:00:00 2001 From: scottsanderson Date: Tue, 7 Aug 2012 10:32:10 -0400 Subject: [PATCH 41/73] abstract eventwindow and trading calendar utility --- tests/test_sorting.py | 4 +- tests/test_transforms.py | 78 +++++--- zipline/finance/returns.py | 34 ++-- zipline/gens/tradegens.py | 6 +- zipline/gens/transform.py | 1 + zipline/gens/vwap.py | 14 +- zipline/utils/date_utils.py | 2 +- zipline/utils/tradingcalendar.py | 333 +++++++++++++++++++++++++++++++ 8 files changed, 410 insertions(+), 62 deletions(-) create mode 100644 zipline/utils/tradingcalendar.py diff --git a/tests/test_sorting.py b/tests/test_sorting.py index 966dec3f..bec97e31 100644 --- a/tests/test_sorting.py +++ b/tests/test_sorting.py @@ -249,9 +249,11 @@ def compare_by_dt_source_id(x,y): return -1 elif x.source_id > y.source_id: return 1 - else: return 0 #Alias for ease of use comp = compare_by_dt_source_id + +def to_dt(msg): + return ndict({'dt': msg}) diff --git a/tests/test_transforms.py b/tests/test_transforms.py index 1fe1ce3c..b9420633 100644 --- a/tests/test_transforms.py +++ b/tests/test_transforms.py @@ -5,9 +5,12 @@ from unittest2 import TestCase from zipline.utils.test_utils import setup_logger, teardown_logger import zipline.utils.factory as factory -from zipline.finance.vwap import DailyVWAP, VWAPTransform + +from zipline.gens.tradegens import SpecificEquityTrades +from zipline.gens.transform import StatefulTransform +from zipline.gens.vwap import VWAP +from zipline.gens.mavg import MovingAverage from zipline.finance.returns import ReturnsFromPriorClose -from zipline.finance.movingaverage import MovingAverage from zipline.lines import SimulatedTrading from zipline.core.devsimulator import AddressAllocator @@ -25,7 +28,7 @@ class ZiplineWithTransformsTestCase(TestCase): 'sid' : 133, 'devel' : True } - setup_logger(self, '/var/log/qexec/qexed.log') + setup_logger(self, '/var/log/qexec/qexec.log') def tearDown(self): teardown_logger(self) @@ -48,25 +51,34 @@ class FinanceTransformsTestCase(TestCase): self.trading_environment = factory.create_trading_environment() setup_logger(self, '/var/log/qexec/qexec.log') - def tearDown(self): - self.log_handler.pop_application() - - def test_vwap(self): - trade_history = factory.create_trade_history( 133, - [10.0, 10.0, 10.0, 11.0], + [10.0, 10.0, 11.0, 11.0], [100, 100, 100, 300], timedelta(days=1), self.trading_environment ) + self.source = SpecificEquityTrades(event_list=trade_history) - vwap = DailyVWAP(days=2) - for trade in trade_history: - vwap.update(trade) + def tearDown(self): + self.log_handler.pop_application() - self.assertEqual(vwap.vwap, 10.75) + def test_vwap(self): + vwap = StatefulTransform(VWAP, timedelta(days = 2)) + transformed = list(vwap.transform(self.source)) + # Output values + tnfm_vals = [message.tnfm_value for message in transformed] + # "Hand calculated" values. + expected = [(10.0 * 100) / 100.0, + ((10.0 * 100) + (10.0 * 100)) / (200.0), + ((10.0 * 100) + (10.0 * 100) + (11.0 * 100)) / (300.0), + # First event should get droppped here. + ((10.0 * 100) + (11.0 * 100) + (11.0 * 300)) / (500.0)] + + # Output should match the expected. + assert tnfm_vals == expected + def test_returns(self): trade_history = factory.create_trade_history( @@ -86,17 +98,29 @@ class FinanceTransformsTestCase(TestCase): def test_moving_average(self): - trade_history = factory.create_trade_history( - 133, - [10.0, 10.0, 10.0, 11.0], - [100, 100, 100, 300], - timedelta(days=1), - self.trading_environment - ) - - ma = MovingAverage(days=2) - for trade in trade_history: - ma.update(trade) - - - self.assertEqual(ma.average, 10.5) + + mavg = StatefulTransform( + MovingAverage, + timedelta(days = 2), + ['price', 'volume'] + ) + + transformed = list(mavg.transform(self.source)) + # Output values. + tnfm_prices = [message.tnfm_value.price for message in transformed] + tnfm_volumes = [message.tnfm_value.volume for message in transformed] + # "Hand-calculated" values + expected_prices = [((10.0) / 1.0), + ((10.0 + 10.0) / 2.0), + ((10.0 + 10.0 + 11.0) / 3.0), + # First event should get dropped here. + ((10.0 + 11.0 + 11.0) / 3.0)] + expected_volumes = [((100.0) / 1.0), + ((100.0 + 100.0) / 2.0), + ((100.0 + 100.0 + 100.0) / 3.0), + # First event should get dropped here. + ((100.0 + 100.0 + 300.0) / 3.0)] + + assert tnfm_prices == expected_prices + assert tnfm_volumes == expected_volumes + diff --git a/zipline/finance/returns.py b/zipline/finance/returns.py index 5585f325..6e390364 100644 --- a/zipline/finance/returns.py +++ b/zipline/finance/returns.py @@ -1,26 +1,24 @@ from collections import defaultdict from zipline.transforms.base import BaseTransform -class ReturnsTransform(BaseTransform): - - def init(self, name): - self.state = {} - self.state['name'] = name - self.by_sid = defaultdict(self._create) - - @property - def get_id(self): - return self.state['name'] - - - def transform(self, event): - cur = self.by_sid[event.sid] - cur.update(event) - self.state['value'] = cur.returns - return self.state +class Returns(object): + """ + Class that maintains a dictionary from sids to the event + representing the most recent closing price. + """ + def __init__(self, days == 1): + self.days = days + self.mapping = defaultdict(self._create) + + def update(self, event): + """ + Update and return the calculated returns for this event's sid. + """ + sid_returns = self.mapping[event.sid].update(event) + return sid_returns def _create(self): - return ReturnsFromPriorClose() + return ReturnsFromPriorClose(days) class ReturnsFromPriorClose(object): """ diff --git a/zipline/gens/tradegens.py b/zipline/gens/tradegens.py index 7b86da05..9a0f7406 100644 --- a/zipline/gens/tradegens.py +++ b/zipline/gens/tradegens.py @@ -3,13 +3,15 @@ Tools to generate trade events without a backing store. Useful for testing and zipline development """ import random +import pytz + from itertools import chain, cycle, ifilter, izip from datetime import datetime, timedelta from zipline.utils.factory import create_trade from zipline.gens.utils import hash_args -def date_gen(start = datetime(2006, 6, 6, 12), +def date_gen(start = datetime(2006, 6, 6, 12, tzinfo=pytz.utc), delta = timedelta(minutes = 1), count = 100): """ @@ -71,7 +73,7 @@ class SpecificEquityTrades(object): # Unpack config dictionary with default values. self.count = kwargs.get('count', 500) self.sids = kwargs.get('sids', [1, 2]) - self.start = kwargs.get('start', datetime(2012, 6, 6, 0)) + self.start = kwargs.get('start', datetime(2008, 6, 6, 15, tzinfo = pytz.utc)) self.delta = kwargs.get('delta', timedelta(minutes = 1)) # Default to None for event_list and filter. diff --git a/zipline/gens/transform.py b/zipline/gens/transform.py index a191e50b..202acc0f 100644 --- a/zipline/gens/transform.py +++ b/zipline/gens/transform.py @@ -10,6 +10,7 @@ from numbers import Number from abc import ABCMeta, abstractmethod from zipline import ndict +from zipline.utils.tradingcalendar import trading_days_between from zipline.gens.utils import assert_sort_unframe_protocol, \ assert_transform_protocol, hash_args diff --git a/zipline/gens/vwap.py b/zipline/gens/vwap.py index 4e8a2f8b..029284c1 100644 --- a/zipline/gens/vwap.py +++ b/zipline/gens/vwap.py @@ -59,7 +59,7 @@ class VWAPEventWindow(EventWindow): Return the calculated vwap for this sid. """ # By convention, vwap is None if we have no events. - if len(self.ticks) == 0 + if len(self.ticks) == 0: return None else: return (self.flux / self.totalvolume) @@ -68,15 +68,3 @@ class VWAPEventWindow(EventWindow): def assert_required_fields(self, event): assert isinstance(event.price, Number) assert isinstance(event.volume, Number) - -if __name__ == "__main__": - - from zipline.gens.tradegens import SpecificEquityTrades - from zipline.gens.transform import StatefulTransform - - source = SpecificEquityTrades() - vwap = StatefulTransform(VWAP, timedelta(minutes = 10)) - - out = vwap.transform(source) - - diff --git a/zipline/utils/date_utils.py b/zipline/utils/date_utils.py index a1fbfad1..2819d4e9 100644 --- a/zipline/utils/date_utils.py +++ b/zipline/utils/date_utils.py @@ -95,7 +95,7 @@ HOLIDAYS = { 'july_4th' : datetime(2008 , 7 , 4 ), 'labor_day' : datetime(2008 , 9 , 1 ), 'tgiving' : datetime(2008 , 11 , 27), - 'christmas' : datetime(2008 , 5 , 25), + 'christmas' : datetime(2008 , 12 , 25), } # Create a rule to recur every weekday starting today diff --git a/zipline/utils/tradingcalendar.py b/zipline/utils/tradingcalendar.py new file mode 100644 index 00000000..c7aa9152 --- /dev/null +++ b/zipline/utils/tradingcalendar.py @@ -0,0 +1,333 @@ +import pytz + +from datetime import datetime, timedelta +from dateutil import rrule +from zipline.utils.date_utils import utcnow + +def market_opens(start, end, inclusive=False): + """ + Returns all market opens between the start date and the end date. + Must use utc-stamped datetimes. + """ + return opens.between(start, end, inc=inclusive) + +def market_closes(start, end, inclusive=False): + """ + Returns all market closes between the start date and the end date. + Must use utc-stamped datetimes. + """ + return closes.between(start, end, inc=inclusive) + +def trading_days_between(start, end): + """ + Calculate the number of "complete" trading days between two + events. We define this as the number of market opens that + occurred between start and end, with the caveat that we subtract 1 + from this total if end falls on the same day as the last market + open and end occurs earlier in its own day than start. This + reflects the fact that we haven't completed a full day + corresponding to the last market open. + + Examples: + + 1.) + start = Tuesday, Aug 7, 2012, 1:00 pm + end = Wednesday, Aug 8, 2012, 1:30 pm + + There is one market open between these dates, on the morning of + Wednesday the 8th. This falls on the same calendar day as end, + but end is later in the day than start, so we count this as a full + day. The correct output is 1. + + 2.) + start = Tuesday, Aug 7, 2012, 1:30 pm + end = Wednesday, Aug 8, 2012, 1:00 pm + + There is one market open between these dayes, on the morning of + Wednesday the 8th. This falls on the same calendar day as end, + and end is earlier in the day than start, so we do not count this + day as completed. The correct output is 0. + + 3.) + start = Tuesday, Aug 7, 2012, 1:00 pm + end = Saturday, Aug 11, 2012, 1:30 pm + + There are 3 market opens between these dates, occurring on + Wednesday, Thursday, and Friday. The last open is not on + the same day as end, so we simply return 3 + + 4.) + start = Tuesday, Aug 7, 2012, 1:30 pm + end = Monday, Aug, 13, 2012, 1:00 pm + + There are 4 market opens between these dates, occurring on + Wednesday, Thursday, Friday, and the following Monday. The + last open occurs on the same calendar day as end, and end + is earlier in the day than start, so we do not count the + last market day as completed. The correct output is 3 days. + """ + # Calculate the number of opens between the events. + opens = (market_opens(start, end)) + days_between = len(opens) + if days_between == 0: + return days_between + + # If end falls on the same day as an open, subtract 1 from the + # total if end is earlier in its respective day than start. + last_open = opens[-1] + if last_open.date() == end.date() and earlier_in_day(end, start): + days_between -=1 + + return days_between + +def earlier_in_day(d1, d2): + """ + Return true if d1 falls earlier in its own day than d2. + """ + d1 = d1.replace(year = d2.year, day = d2.day) + return d1 < d2 + +WEEKDAYS = [rrule.MO, rrule.TU, rrule.WE, rrule.TH, rrule.FR] + +# Recurrence rule that generates all market opens since Jan 1, 1970. +# This does not exclude holidays. +market_opens_with_holidays = rrule.rrule( + rrule.DAILY, + byweekday=WEEKDAYS, + byhour = 14, + byminute = 30, + cache = True, + dtstart=datetime(1970, 1, 1, tzinfo = pytz.utc), +) + +# Recurrence rule that generates all market closes since Jan 1, 1970. +# This does not exclude holidays. +market_closes_with_holidays = rrule.rrule( + rrule.DAILY, + byweekday=WEEKDAYS, + byhour = 21, + byminute = 0, + cache = True, + dtstart=datetime(1970, 1, 1, tzinfo = pytz.utc), +) + +# Recurrence rules for excluding the market open/close on new years. +new_years_opens = rrule.rrule( + rrule.MONTHLY, + byyearday = 1, + byhour = 14, + byminute = 30, + cache = True, + dtstart = datetime(1970, 1,1,tzinfo = pytz.utc) +) +new_years_closes = rrule.rrule( + rrule.MONTHLY, + byyearday = 1, + byhour = 21, + byminute = 0, + cache = True, + dtstart = datetime(1970, 1,1,tzinfo = pytz.utc) +) + +# Recurrence rules for excluding MLK day. It is always the third +# monday in January. +mlk_opens = rrule.rrule( + rrule.MONTHLY, + bymonth = 1, + byweekday = (rrule.MO(3)), + byhour = 14, + byminute = 30, + cache = True, + dtstart = datetime(1970, 1,1,tzinfo = pytz.utc) +) +mlk_closes = rrule.rrule( + rrule.MONTHLY, + bymonth = 1, + byweekday = (rrule.MO(+3)), + byhour = 21, + byminute = 0, + cache = True, + dtstart = datetime(1970, 1,1,tzinfo = pytz.utc) +) + +# Recurrence rules for generating the market open/close for +# presidents' day. Presidents' day always occurs on the third monday +# of February. +presidents_day_opens = rrule.rrule( + rrule.MONTHLY, + bymonth = 2, + byweekday = (rrule.MO(3)), + byhour = 14, + byminute = 30, + cache = True, + dtstart = datetime(1970, 1,1,tzinfo = pytz.utc) +) +presidents_day_closes = rrule.rrule( + rrule.MONTHLY, + bymonth = 2, + byweekday = (rrule.MO(3)), + byhour = 21, + byminute = 0, + cache = True, + dtstart = datetime(1970, 1,1,tzinfo = pytz.utc) +) + +# Recurrence rules for generating the market open/close for good +# friday. Good friday always falls 2 days before easter, which +# thankfully is a built-in refernce in this module. +good_friday_opens = rrule.rrule( + rrule.DAILY, + byeaster = -2, + byhour = 14, + byminute = 30, + cache = True, + dtstart = datetime(1970, 1,1,tzinfo = pytz.utc) +) +good_friday_closes = rrule.rrule( + rrule.DAILY, + byeaster = -2, + byhour = 21, + byminute = 0, + cache = True, + dtstart = datetime(1970, 1,1,tzinfo = pytz.utc) +) + +# Recurrence rules for generating the market open/close for memorial +# day. Memorial day always occurs on the last monday of May. +memorial_day_opens = rrule.rrule( + rrule.MONTHLY, + bymonth = 5, + byweekday = (rrule.MO(-1)), + byhour = 14, + byminute = 30, + cache = True, + dtstart = datetime(1970, 1,1,tzinfo = pytz.utc) +) +memorial_day_closes = rrule.rrule( + rrule.MONTHLY, + bymonth = 5, + byweekday = (rrule.MO(-1)), + byhour = 21, + byminute = 0, + cache = True, + dtstart = datetime(1970, 1,1,tzinfo = pytz.utc) +) + +# Recurrence rules for generating the market open/close for July 4th. +july_4th_opens = rrule.rrule( + rrule.MONTHLY, + bymonth = 6, + bymonthday = 4, + byhour = 14, + byminute = 30, + cache = True, + dtstart = datetime(1970, 1,1,tzinfo = pytz.utc) +) +july_4th_closes = rrule.rrule( + rrule.MONTHLY, + bymonth = 6, + bymonthday = 4, + byhour = 21, + byminute = 0, + cache = True, + dtstart = datetime(1970, 1,1,tzinfo = pytz.utc) +) + +# Recurrence rule for generating the market open/close for labor day. +# Labor day is always the first monday of September. +labor_day_opens = rrule.rrule( + rrule.MONTHLY, + bymonth = 9, + byweekday = (rrule.MO(1)), + byhour = 14, + byminute = 30, + cache = True, + dtstart = datetime(1970, 1,1,tzinfo = pytz.utc) +) +labor_day_closes = rrule.rrule( + rrule.MONTHLY, + bymonth = 9, + byweekday = (rrule.MO(1)), + byhour = 21, + byminute = 0, + cache = True, + dtstart = datetime(1970, 1,1,tzinfo = pytz.utc) +) + +# Recurrence rule for generating the market open/close for +# thanksgiving. Thanksgiving always falls on the fourth thursday in +# November. (Who decides how these holidays work!?!) +thanksgiving_opens = rrule.rrule( + rrule.MONTHLY, + bymonth = 11, + byweekday = (rrule.TH(-1)), + byhour = 14, + byminute = 30, + cache = True, + dtstart = datetime(1970, 1,1,tzinfo = pytz.utc) +) +thanksgiving_closes = rrule.rrule( + rrule.MONTHLY, + bymonth = 11, + byweekday = (rrule.TH(-1)), + byhour = 21, + byminute = 0, + cache = True, + dtstart = datetime(1970, 1,1,tzinfo = pytz.utc) +) + +# Recurrence relation for generating the market open/close for +# christmas. Christmas always occurs on december 25th. + +christmas_opens = rrule.rrule( + rrule.MONTHLY, + bymonth = 12, + bymonthday = 25, + byhour = 14, + byminute = 30, + cache = True, + dtstart = datetime(1970, 1,1,tzinfo = pytz.utc) +) +christmas_closes = rrule.rrule( + rrule.MONTHLY, + bymonth = 12, + bymonthday = 25, + byhour = 21, + byminute = 0, + cache = True, + dtstart = datetime(1970, 1,1,tzinfo = pytz.utc) +) +# All NYSE observed holidays. +holiday_opens = [ + new_years_opens, + mlk_opens, + presidents_day_opens, + good_friday_opens, + memorial_day_opens, + july_4th_opens, + labor_day_opens, + thanksgiving_opens, + christmas_opens +] +holiday_closes = [ + new_years_closes, + mlk_closes, + presidents_day_closes, + good_friday_closes, + memorial_day_closes, + july_4th_closes, + labor_day_closes, + thanksgiving_closes, + christmas_closes +] + +# Valid market opens are given by all market opens minus holidays. +opens = rrule.rruleset() +opens.rrule(market_opens_with_holidays) +for holiday_rule in holiday_opens: + opens.exrule(holiday_rule) + +closes = rrule.rruleset() +closes.rrule(market_closes_with_holidays) +for holiday_rule in holiday_closes: + closes.exrule(holiday_rule) From 3f4d772e4c04e4c6cee7c29d8769a65f13712d2c Mon Sep 17 00:00:00 2001 From: fawce Date: Tue, 7 Aug 2012 11:15:14 -0400 Subject: [PATCH 42/73] first draft of lines returning new SimulatedTrading object. --- tests/test_components.py | 15 ++- tests/test_exception_handling.py | 10 +- zipline/lines.py | 193 ++++++++++++++++++++++++++----- 3 files changed, 176 insertions(+), 42 deletions(-) diff --git a/tests/test_components.py b/tests/test_components.py index 33255883..7ab0d3b2 100644 --- a/tests/test_components.py +++ b/tests/test_components.py @@ -8,7 +8,8 @@ from collections import defaultdict from zipline.gens.composites import date_sorted_sources, merged_transforms from zipline.core.devsimulator import AddressAllocator -from zipline.gens.transform import MovingAverage, Passthrough, StatefulTransform +from zipline.gens.transform import Passthrough, StatefulTransform +from zipline.gens.mavg import MovingAverage from zipline.gens.tradesimulation import TradeSimulationClient as tsc from zipline.utils.factory import create_trading_environment @@ -113,7 +114,8 @@ class ComponentTestCase(TestCase): monitor, socket_uri, DATASOURCE_FRAME, - DATASOURCE_UNFRAME + DATASOURCE_UNFRAME, + "source_a" ) launch_monitor(monitor) @@ -171,7 +173,8 @@ class ComponentTestCase(TestCase): monitor, socket_uris[0], DATASOURCE_FRAME, - DATASOURCE_UNFRAME + DATASOURCE_UNFRAME, + trade_gen_a.get_hash() ) comp_b = Component( @@ -179,7 +182,8 @@ class ComponentTestCase(TestCase): monitor, socket_uris[1], DATASOURCE_FRAME, - DATASOURCE_UNFRAME + DATASOURCE_UNFRAME, + trade_gen_b.get_hash() ) comp_c = Component( @@ -187,7 +191,8 @@ class ComponentTestCase(TestCase): monitor, socket_uris[2], DATASOURCE_FRAME, - DATASOURCE_UNFRAME + DATASOURCE_UNFRAME, + trade_gen_c.get_hash() ) sources = [comp_a, comp_b, comp_c] diff --git a/tests/test_exception_handling.py b/tests/test_exception_handling.py index 144568d1..f278ef34 100644 --- a/tests/test_exception_handling.py +++ b/tests/test_exception_handling.py @@ -26,11 +26,11 @@ class ExceptionTestCase(TestCase): def setUp(self): self.zipline_test_config = { - 'allocator' : allocator, - 'sid' : 133, - 'devel' : False, - 'results_socket' : allocator.lease(1)[0], - 'simulation_style' : SIMULATION_STYLE.FIXED_SLIPPAGE + 'allocator' : allocator, + 'sid' : 133, + 'devel' : False, + 'results_socket_uri' : allocator.lease(1)[0], + 'simulation_style' : SIMULATION_STYLE.FIXED_SLIPPAGE } self.ctx = zmq.Context() setup_logger(self) diff --git a/zipline/lines.py b/zipline/lines.py index fbc4d155..6b96d1ec 100644 --- a/zipline/lines.py +++ b/zipline/lines.py @@ -59,44 +59,178 @@ before invoking simulate. | __init__. | +---------------------------------+ """ - -import logbook +import sys +import zmq +import multiprocessing from zipline.test_algorithms import TestAlgorithm from zipline.finance.trading import SIMULATION_STYLE +from zipline.utils.log_utils import ZeroMQLogHandler, stdout_only_pipe from zipline.utils import factory -import pytz -from pprint import pprint as pp -from datetime import datetime, timedelta - -from zipline.utils.factory import create_trading_environment from zipline.test_algorithms import TestAlgorithm -from zipline.gens.composites import SourceBundle, TransformBundle, \ +from zipline.gens.composites import \ date_sorted_sources, merged_transforms -from zipline.gens.tradegens import SpecificEquityTrades -from zipline.gens.transform import MovingAverage, Passthrough, StatefulTransform +from zipline.gens.transform import Passthrough, StatefulTransform from zipline.gens.tradesimulation import TradeSimulationClient as tsc +from logbook import Logger, NestedSetup, Processor import zipline.protocol as zp -log = logbook.Logger('Lines') +log = Logger('Lines') + +class CancelSignal(Exception): + def __init__(self): + pass class SimulatedTrading(object): - @staticmethod - def create_simulation(sources, transforms, algorithm, environment, style): + def __init__(self, + sources, + transforms, + algorithm, + environment, + style, + results_socket_uri, + context, + sim_id): - sorted = date_sorted_sources(*sources) - passthrough = StatefulTransform(Passthrough) + self.date_sorted = date_sorted_sources(*sources) + self.transforms = transforms + self.transforms.extend(StatefulTransform(Passthrough)) + self.merged = merged_transforms(self.date_sorted, *self.transforms) + self.trading_client = tsc(algorithm, environment, style) + self.gen = self.trading_client.simluate(self.merged) + self.results_uri = results_socket_uri + self.results_socket = None + self.context = context + self.sim_id = sim_id - merged = merged_transforms(sorted, passthrough, *transforms) - trading_client = tsc(algorithm, environment, style) - return trading_client.simluate(merged) + # optional process if we fork simulate into an + # independent process. + self.proc = None + def simulate(self, blocking=True): + + # for non-blocking, + if blocking: + self.run_gen() + else: + return self.fork_and_sim() + + def fork_and_sim(self): + self.proc = multiprocessing.Process(self.run_gen) + self.proc.start() + return self.proc + + def run_gen(self): + + self.open() + if self.zmq_out: + + def inject_event_data(record): + # Record the simulation time. + record.extra['algo_dt'] = self.current_dt + + data_injector = Processor(inject_event_data) + log_pipeline = NestedSetup([self.zmq_out,data_injector]) + with log_pipeline.threadbound(), self.stdout_capture(self.logger, ''): + self.drain_gen() + # if no log socket, just run the algo normally + else: + self.drain_gen() + + def stream_results(self): + assert self.results_socket, \ + "Results socket must exist to stream results" + try: + for event in self.gen: + if event.has_key('daily_perf'): + msg = zp.PERF_FRAME(event) + else: + msg = zp.RISK_FRAME(event) + self.results_socket.send(msg) + self.signal_done() + except Exception as exc: + self.handle_exception(exc) + finally: + self.close() + + def close(self): + log.info("Closing Simulation") + + def cancel(self): + if self.proc and self.proc.is_alive(): + self.proc.terminate() + else: + self.gen.throw(CancelSignal()) + + def handle_exception(self, exc): + if isinstance(exc, CancelSignal): + # signal from monitor of an orderly shutdown, + # do nothing. + pass + else: + self.signal_exception(exc) + + def signal_exception(self, exc=None): + """ + All exceptions inside any component should boil back to + this handler. + + Will inform the system that the component has failed and how it + has failed. + """ + exc_type, exc_value, exc_traceback = sys.exc_info() + + log.exception("Unexpected error in run for {id}.".format(id=self.sim_id)) + + try: + log.info('{id} sending exception to monitor'\ + .format(id=self.sim_id)) + msg = zp.EXCEPTION_FRAME( + exc_traceback, + exc_type.__name__, + exc_value.message + ) + + exception_frame = zp.CONTROL_FRAME( + zp.CONTROL_PROTOCOL.EXCEPTION, + msg + ) + self.results_socket.send(exception_frame) + + except: + log.exception("Exception while reporting simulation exception.") + + + def open(self): + if not self.context: + self.context = zmq.Context() + if self.results_uri: + sock = self.context.socket(zmq.PUSH) + sock.connect(self.results_uri) + self.results_socket = sock + self.sockets.append(sock) + self.results_socket = sock + + self.setup_logging() + + def setup_logging(self, socket = None): + sock = socket or self.results_socket + + self.zmq_out = ZeroMQLogHandler( + socket = sock, + ) + + + # This is a class, which is instantiated later + # in run_algorithm. The class provides a generator. + self.stdout_capture = stdout_only_pipe + @staticmethod def create_test_zipline(**config): """ @@ -154,6 +288,10 @@ class SimulatedTrading(object): if not simulation_style: simulation_style = SIMULATION_STYLE.FIXED_SLIPPAGE + zmq_context = config.get('zmq_context', None) + simulation_id = config.get('simumlation_id', 'test_simulation') + results_socket_uri = config.get('results_socket_uri', None) + #------------------- # Trade Source #------------------- @@ -189,24 +327,15 @@ class SimulatedTrading(object): # Simulation #------------------- - sim = SimulatedTrading.create_simulation( + sim = SimulatedTrading( [trade_source], transforms, test_algo, trading_environment, - simulation_style) + simulation_style, + zmq_context, + results_socket_uri, + simulation_id) #------------------- return sim - - -class ZiplineException(Exception): - def __init__(self, zipline_name, msg): - self.name = zipline_name - self.message = msg - - def __str__(self): - return "Unexpected exception {line}: {msg}".format( - line=self.name, - msg=self.message - ) From 804bcb4e0c10afb4e3344da6cee47ccab62bd5ee Mon Sep 17 00:00:00 2001 From: fawce Date: Tue, 7 Aug 2012 13:16:42 -0400 Subject: [PATCH 43/73] exceptions tests passing --- tests/test_exception_handling.py | 116 ++++++++++++++----------------- zipline/lines.py | 36 +++++----- zipline/utils/test_utils.py | 9 ++- 3 files changed, 72 insertions(+), 89 deletions(-) diff --git a/tests/test_exception_handling.py b/tests/test_exception_handling.py index f278ef34..53d24732 100644 --- a/tests/test_exception_handling.py +++ b/tests/test_exception_handling.py @@ -52,18 +52,14 @@ class ExceptionTestCase(TestCase): **self.zipline_test_config ) output, _ = drain_zipline(self, zipline) - self.assertEqual(len(output), 1) + self.assertEqual(len(output), 2) self.assertEqual(output[-1]['prefix'], 'EXCEPTION') payload = output[-1]['payload'] self.assertTrue(payload['date']) del payload['date'] check(self, payload, INITIALIZE_TB) - self.assertTrue(zipline.sim.ready()) - self.assertFalse(zipline.sim.exception) - def test_exception_in_handle_data(self): - # Simulation # ---------- self.zipline_test_config['algorithm'] = \ @@ -77,15 +73,12 @@ class ExceptionTestCase(TestCase): ) output, _ = drain_zipline(self, zipline) - - self.assertEqual(len(output), 1) + self.assertEqual(len(output), 3) self.assertEqual(output[-1]['prefix'], 'EXCEPTION') payload = output[-1]['payload'] self.assertTrue(payload['date']) del payload['date'] check(self, payload, HANDLE_DATA_TB) - self.assertTrue(zipline.sim.ready()) - self.assertFalse(zipline.sim.exception) def test_zerodivision_exception_in_handle_data(self): @@ -101,14 +94,12 @@ class ExceptionTestCase(TestCase): ) output, _ = drain_zipline(self, zipline) - self.assertEqual(len(output), 5) + self.assertEqual(len(output), 6) self.assertEqual(output[-1]['prefix'], 'EXCEPTION') payload = output[-1]['payload'] self.assertTrue(payload['date']) del payload['date'] check(self, payload, ZERO_DIV_TB) - self.assertTrue(zipline.sim.ready()) - self.assertFalse(zipline.sim.exception) # TODO: # - define more zipline failure modes: exception in other @@ -120,21 +111,12 @@ class ExceptionTestCase(TestCase): INITIALIZE_TB =\ {'message': 'Algo exception in initialize', 'name': 'Exception', - 'stack': [{'filename': '/zipline/core/component.py', 'line': 'self._run()', 'lineno': 210, 'method': 'run'}, - {'filename': '/zipline/core/component.py', 'line': 'self.loop()', 'lineno': 201, 'method': '_run'}, - {'filename': '/zipline/core/component.py', 'line': 'self.do_work()', 'lineno': 241, 'method': 'loop'}, - {'filename': '/zipline/components/tradesimulation.py', - 'line': 'self.initialize_algo()', - 'lineno': 91, - 'method': 'do_work'}, - {'filename': '/zipline/components/tradesimulation.py', - 'line': 'self.do_op(self.algorithm.initialize)', - 'lineno': 74, - 'method': 'initialize_algo'}, - {'filename': '/zipline/components/tradesimulation.py', - 'line': 'callable_op(*args, **kwargs)', - 'lineno': 194, - 'method': 'do_op'}, + 'stack': [{'filename': '/zipline/lines.py', 'line': 'for event in self.gen:', 'lineno': 152, 'method': 'stream_results'}, + {'filename': '/zipline/gens/tradesimulation.py', 'line': 'self.algo,', 'lineno': 93, 'method': 'simulate'}, + {'filename': '/zipline/gens/tradesimulation.py', + 'line': 'self.algo.initialize()', + 'lineno': 123, + 'method': '__init__'}, {'filename': '/zipline/test_algorithms.py', 'line': 'raise Exception("Algo exception in initialize")', 'lineno': 166, @@ -143,25 +125,27 @@ INITIALIZE_TB =\ HANDLE_DATA_TB =\ {'message': 'Algo exception in handle_data', 'name': 'Exception', - 'stack': [{'filename': '/zipline/core/component.py', 'line': 'self._run()', 'lineno': 210, 'method': 'run'}, - {'filename': '/zipline/core/component.py', 'line': 'self.loop()', 'lineno': 201, 'method': '_run'}, - {'filename': '/zipline/core/component.py', 'line': 'self.do_work()', 'lineno': 241, 'method': 'loop'}, - {'filename': '/zipline/components/tradesimulation.py', - 'line': 'self.process_event(event)', - 'lineno': 110, - 'method': 'do_work'}, - {'filename': '/zipline/components/tradesimulation.py', - 'line': 'self.run_algorithm()', - 'lineno': 158, - 'method': 'process_event'}, - {'filename': '/zipline/components/tradesimulation.py', - 'line': 'self.do_op(self.algorithm.handle_data, data)', - 'lineno': 180, - 'method': 'run_algorithm'}, - {'filename': '/zipline/components/tradesimulation.py', - 'line': 'callable_op(*args, **kwargs)', - 'lineno': 194, - 'method': 'do_op'}, + 'stack': [{'filename': '/zipline/lines.py', 'line': 'for event in self.gen:', 'lineno': 152, 'method': 'stream_results'}, + {'filename': '/zipline/gens/tradesimulation.py', + 'line': 'for message in algo_results:', + 'lineno': 100, + 'method': 'simulate'}, + {'filename': '/zipline/gens/tradesimulation.py', + 'line': 'return self.__generator.next()', + 'lineno': 144, + 'method': 'next'}, + {'filename': '/zipline/gens/tradesimulation.py', + 'line': 'self.update_current_snapshot(event)', + 'lineno': 199, + 'method': '_gen'}, + {'filename': '/zipline/gens/tradesimulation.py', + 'line': 'self.simulate_current_snapshot()', + 'lineno': 221, + 'method': 'update_current_snapshot'}, + {'filename': '/zipline/gens/tradesimulation.py', + 'line': 'self.algo.handle_data(self.universe)', + 'lineno': 246, + 'method': 'simulate_current_snapshot'}, {'filename': '/zipline/test_algorithms.py', 'line': 'raise Exception("Algo exception in handle_data")', 'lineno': 187, @@ -170,23 +154,25 @@ HANDLE_DATA_TB =\ ZERO_DIV_TB= \ {'message': 'integer division or modulo by zero', 'name': 'ZeroDivisionError', - 'stack': [{'filename': '/zipline/core/component.py', 'line': 'self._run()', 'lineno': 210, 'method': 'run'}, - {'filename': '/zipline/core/component.py', 'line': 'self.loop()', 'lineno': 201, 'method': '_run'}, - {'filename': '/zipline/core/component.py', 'line': 'self.do_work()', 'lineno': 241, 'method': 'loop'}, - {'filename': '/zipline/components/tradesimulation.py', - 'line': 'self.process_event(event)', - 'lineno': 110, - 'method': 'do_work'}, - {'filename': '/zipline/components/tradesimulation.py', - 'line': 'self.run_algorithm()', - 'lineno': 158, - 'method': 'process_event'}, - {'filename': '/zipline/components/tradesimulation.py', - 'line': 'self.do_op(self.algorithm.handle_data, data)', - 'lineno': 180, - 'method': 'run_algorithm'}, - {'filename': '/zipline/components/tradesimulation.py', - 'line': 'callable_op(*args, **kwargs)', - 'lineno': 194, - 'method': 'do_op'}, + 'stack': [{'filename': '/zipline/lines.py', 'line': 'for event in self.gen:', 'lineno': 152, 'method': 'stream_results'}, + {'filename': '/zipline/gens/tradesimulation.py', + 'line': 'for message in algo_results:', + 'lineno': 100, + 'method': 'simulate'}, + {'filename': '/zipline/gens/tradesimulation.py', + 'line': 'return self.__generator.next()', + 'lineno': 144, + 'method': 'next'}, + {'filename': '/zipline/gens/tradesimulation.py', + 'line': 'self.update_current_snapshot(event)', + 'lineno': 199, + 'method': '_gen'}, + {'filename': '/zipline/gens/tradesimulation.py', + 'line': 'self.simulate_current_snapshot()', + 'lineno': 221, + 'method': 'update_current_snapshot'}, + {'filename': '/zipline/gens/tradesimulation.py', + 'line': 'self.algo.handle_data(self.universe)', + 'lineno': 246, + 'method': 'simulate_current_snapshot'}, {'filename': '/zipline/test_algorithms.py', 'line': '5/0', 'lineno': 218, 'method': 'handle_data'}]} diff --git a/zipline/lines.py b/zipline/lines.py index 6b96d1ec..b4db5de6 100644 --- a/zipline/lines.py +++ b/zipline/lines.py @@ -99,10 +99,10 @@ class SimulatedTrading(object): self.date_sorted = date_sorted_sources(*sources) self.transforms = transforms - self.transforms.extend(StatefulTransform(Passthrough)) + self.transforms.append(StatefulTransform(Passthrough)) self.merged = merged_transforms(self.date_sorted, *self.transforms) self.trading_client = tsc(algorithm, environment, style) - self.gen = self.trading_client.simluate(self.merged) + self.gen = self.trading_client.simulate(self.merged) self.results_uri = results_socket_uri self.results_socket = None self.context = context @@ -111,6 +111,7 @@ class SimulatedTrading(object): # optional process if we fork simulate into an # independent process. self.proc = None + self.logger = Logger(sim_id) def simulate(self, blocking=True): @@ -122,7 +123,7 @@ class SimulatedTrading(object): return self.fork_and_sim() def fork_and_sim(self): - self.proc = multiprocessing.Process(self.run_gen) + self.proc = multiprocessing.Process(target=self.run_gen) self.proc.start() return self.proc @@ -133,15 +134,16 @@ class SimulatedTrading(object): def inject_event_data(record): # Record the simulation time. - record.extra['algo_dt'] = self.current_dt + #record.extra['algo_dt'] = self.current_dt + pass data_injector = Processor(inject_event_data) log_pipeline = NestedSetup([self.zmq_out,data_injector]) with log_pipeline.threadbound(), self.stdout_capture(self.logger, ''): - self.drain_gen() + self.stream_results() # if no log socket, just run the algo normally else: - self.drain_gen() + self.stream_results() def stream_results(self): assert self.results_socket, \ @@ -153,7 +155,8 @@ class SimulatedTrading(object): else: msg = zp.RISK_FRAME(event) self.results_socket.send(msg) - self.signal_done() + + self.signal_done() except Exception as exc: self.handle_exception(exc) finally: @@ -186,10 +189,8 @@ class SimulatedTrading(object): """ exc_type, exc_value, exc_traceback = sys.exc_info() - log.exception("Unexpected error in run for {id}.".format(id=self.sim_id)) - try: - log.info('{id} sending exception to monitor'\ + log.exception('{id} sending exception to result stream.'\ .format(id=self.sim_id)) msg = zp.EXCEPTION_FRAME( exc_traceback, @@ -197,11 +198,7 @@ class SimulatedTrading(object): exc_value.message ) - exception_frame = zp.CONTROL_FRAME( - zp.CONTROL_PROTOCOL.EXCEPTION, - msg - ) - self.results_socket.send(exception_frame) + self.results_socket.send(msg) except: log.exception("Exception while reporting simulation exception.") @@ -214,9 +211,6 @@ class SimulatedTrading(object): sock = self.context.socket(zmq.PUSH) sock.connect(self.results_uri) self.results_socket = sock - self.sockets.append(sock) - self.results_socket = sock - self.setup_logging() def setup_logging(self, socket = None): @@ -231,6 +225,10 @@ class SimulatedTrading(object): # in run_algorithm. The class provides a generator. self.stdout_capture = stdout_only_pipe + def join(self): + if self.proc: + self.proc.join() + @staticmethod def create_test_zipline(**config): """ @@ -333,8 +331,8 @@ class SimulatedTrading(object): test_algo, trading_environment, simulation_style, - zmq_context, results_socket_uri, + zmq_context, simulation_id) #------------------- diff --git a/zipline/utils/test_utils.py b/zipline/utils/test_utils.py index eda5a133..03442002 100644 --- a/zipline/utils/test_utils.py +++ b/zipline/utils/test_utils.py @@ -65,10 +65,10 @@ def drain_zipline(test, zipline): assert test.ctx, "method expects a valid zmq context" assert test.zipline_test_config, "method expects a valid test config" assert isinstance(test.zipline_test_config, dict) - assert test.zipline_test_config['results_socket'], \ + assert test.zipline_test_config['results_socket_uri'], \ "need to specify a socket address for logs/perf/risk" test.receiver = create_receiver( - test.zipline_test_config['results_socket'], + test.zipline_test_config['results_socket_uri'], test.ctx ) # Bind and connect are asynch, so allow time for bind before @@ -76,13 +76,12 @@ def drain_zipline(test, zipline): time.sleep(1) # start the simulation - zipline.simulate(blocking=False) + zipline.simulate(blocking=True) output, transaction_count = drain_receiver(test.receiver) # some processes will exit after the message stream is # finished. We block here to avoid collisions with subsequent # ziplines. - for process in zipline.sim.subprocesses: - process.join() + zipline.join() return output, transaction_count From aeb50da170fa412979c84aed92a0c8a73400355f Mon Sep 17 00:00:00 2001 From: fawce Date: Tue, 7 Aug 2012 14:42:43 -0400 Subject: [PATCH 44/73] fixes for unit tests, back to 50/51 passing. --- tests/test_exception_handling.py | 2 -- tests/test_finance.py | 25 ++++++++----------------- tests/test_monitor.py | 6 ++++-- tests/test_perf_tracking.py | 5 ++++- tests/test_protocol.py | 4 ---- tests/test_transforms.py | 21 ++++++++++----------- zipline/finance/returns.py | 7 +++---- zipline/finance/trading.py | 4 +++- zipline/lines.py | 7 ++++++- zipline/protocol.py | 6 ++++++ zipline/utils/factory.py | 2 +- zipline/utils/test_utils.py | 25 +++++++++++-------------- 12 files changed, 56 insertions(+), 58 deletions(-) diff --git a/tests/test_exception_handling.py b/tests/test_exception_handling.py index 53d24732..6a091106 100644 --- a/tests/test_exception_handling.py +++ b/tests/test_exception_handling.py @@ -26,9 +26,7 @@ class ExceptionTestCase(TestCase): def setUp(self): self.zipline_test_config = { - 'allocator' : allocator, 'sid' : 133, - 'devel' : False, 'results_socket_uri' : allocator.lease(1)[0], 'simulation_style' : SIMULATION_STYLE.FIXED_SLIPPAGE } diff --git a/tests/test_finance.py b/tests/test_finance.py index d108e019..e5f26240 100644 --- a/tests/test_finance.py +++ b/tests/test_finance.py @@ -11,7 +11,6 @@ from collections import defaultdict from nose.tools import timed import zipline.utils.factory as factory -import zipline.protocol as zp from zipline.test_algorithms import TestAlgorithm from zipline.finance.trading import TradingEnvironment @@ -19,10 +18,9 @@ from zipline.core.devsimulator import AddressAllocator from zipline.lines import SimulatedTrading from zipline.finance.performance import PerformanceTracker from zipline.utils.protocol_utils import ndict -from zipline.finance.trading import TransactionSimulator, SIMULATION_STYLE +from zipline.finance.trading import TransactionSimulator from zipline.utils.test_utils import \ drain_zipline, \ - check, \ setup_logger, \ teardown_logger,\ assert_single_position @@ -39,10 +37,8 @@ class FinanceTestCase(TestCase): def setUp(self): self.zipline_test_config = { - 'allocator' : allocator, - 'sid' : 133, - #'devel' : True, - 'results_socket' : allocator.lease(1)[0] + 'sid' : 133, + 'results_socket_uri' : allocator.lease(1)[0] } self.ctx = zmq.Context() @@ -60,7 +56,7 @@ class FinanceTestCase(TestCase): trading_environment ) prev = None - for trade in trade_source.event_list: + for trade in trade_source: if prev: self.assertTrue(trade.dt > prev.dt) prev = trade @@ -123,7 +119,6 @@ class FinanceTestCase(TestCase): self.zipline_test_config['order_count'] = 100 self.zipline_test_config['trade_count'] = 200 zipline = SimulatedTrading.create_test_zipline(**self.zipline_test_config) - assert_single_position(self, zipline) #@timed(DEFAULT_TIMEOUT) @@ -148,9 +143,6 @@ class FinanceTestCase(TestCase): ) output, transaction_count = drain_zipline(self, zipline) - self.assertTrue(zipline.sim.ready()) - self.assertFalse(zipline.sim.exception) - #check that the algorithm received no events self.assertEqual( 0, @@ -301,12 +293,12 @@ class FinanceTestCase(TestCase): # if present, expect transaction amounts to match orders exactly. complete_fill = params.get('complete_fill') + sid = 1 trading_environment = factory.create_trading_environment() - trade_sim = TransactionSimulator() + trade_sim = TransactionSimulator([sid]) price = [10.1] * trade_count volume = [100] * trade_count start_date = trading_environment.first_open - sid = 1 generated_trades = factory.create_trade_history( sid, @@ -330,7 +322,7 @@ class FinanceTestCase(TestCase): 'dt' : order_date }) - trade_sim.add_open_order(order) + trade_sim.place_order(order) order_date = order_date + order_interval # move after market orders to just after market next @@ -353,14 +345,13 @@ class FinanceTestCase(TestCase): self.assertEqual(order.amount, order_amount * alternator**i) - tracker = PerformanceTracker(trading_environment) + tracker = PerformanceTracker(trading_environment, [sid]) # this approximates the loop inside TradingSimulationClient transactions = [] for trade in generated_trades: if trade_delay: trade.dt = trade.dt + trade_delay - txn = trade_sim.apply_trade_to_open_orders(trade) if txn: transactions.append(txn) diff --git a/tests/test_monitor.py b/tests/test_monitor.py index 3d063954..76bb6184 100644 --- a/tests/test_monitor.py +++ b/tests/test_monitor.py @@ -14,13 +14,15 @@ class TestMonitor(TestCase): def test_init(self): pub_socket = 'tcp://127.0.0.1:5000' route_socket = 'tcp://127.0.0.1:5001' + exception_socket = 'tcp://127.0.0.1:5002' - mon = Monitor(pub_socket, route_socket) + mon = Monitor(pub_socket, route_socket, exception_socket) mon.manage([]) def test_init_topology(self): pub_socket = 'tcp://127.0.0.1:5000' route_socket = 'tcp://127.0.0.1:5001' + exception_socket = 'tcp://127.0.0.1:5002' - mon = Monitor(pub_socket, route_socket, ) + mon = Monitor(pub_socket, route_socket, exception_socket) mon.manage([ 'a', 'b', 'c', 'd' ]) diff --git a/tests/test_perf_tracking.py b/tests/test_perf_tracking.py index 1a77818c..2f9c1df8 100644 --- a/tests/test_perf_tracking.py +++ b/tests/test_perf_tracking.py @@ -543,7 +543,10 @@ shares in position" self.trading_environment.capital_base = 1000.0 self.trading_environment.frame_index = ['sid', 'volume', 'dt', \ 'price', 'changed'] - perf_tracker = perf.PerformanceTracker(self.trading_environment) + perf_tracker = perf.PerformanceTracker( + self.trading_environment, + [sid, sid2] + ) for event in trade_history: #create a transaction for all but diff --git a/tests/test_protocol.py b/tests/test_protocol.py index c90b09dc..d1606ed4 100644 --- a/tests/test_protocol.py +++ b/tests/test_protocol.py @@ -1,8 +1,6 @@ """ Test the FRAME/UNFRAME functions in the sequence expected from ziplines. """ -import pytz - from unittest2 import TestCase from datetime import datetime, timedelta from collections import defaultdict @@ -10,10 +8,8 @@ from collections import defaultdict from nose.tools import timed import zipline.utils.factory as factory -from zipline.utils import logger import zipline.protocol as zp -from zipline.finance.sources import SpecificEquityTrades DEFAULT_TIMEOUT = 5 # seconds diff --git a/tests/test_transforms.py b/tests/test_transforms.py index b9420633..27de0626 100644 --- a/tests/test_transforms.py +++ b/tests/test_transforms.py @@ -37,7 +37,7 @@ class ZiplineWithTransformsTestCase(TestCase): zipline = SimulatedTrading.create_test_zipline( **self.zipline_test_config ) - vwap = VWAPTransform("vwap_10", daycount=10) + vwap = VWAP("vwap_10", daycount=10) zipline.add_transform(vwap) zipline.simulate(blocking=True) @@ -49,7 +49,7 @@ class FinanceTransformsTestCase(TestCase): def setUp(self): self.trading_environment = factory.create_trading_environment() - setup_logger(self, '/var/log/qexec/qexec.log') + setup_logger(self) trade_history = factory.create_trade_history( 133, @@ -74,11 +74,11 @@ class FinanceTransformsTestCase(TestCase): ((10.0 * 100) + (10.0 * 100)) / (200.0), ((10.0 * 100) + (10.0 * 100) + (11.0 * 100)) / (300.0), # First event should get droppped here. - ((10.0 * 100) + (11.0 * 100) + (11.0 * 300)) / (500.0)] + ((10.0 * 100) + (11.0 * 100) + (11.0 * 300)) / (500.0)] # Output should match the expected. assert tnfm_vals == expected - + def test_returns(self): trade_history = factory.create_trade_history( @@ -98,13 +98,13 @@ class FinanceTransformsTestCase(TestCase): def test_moving_average(self): - + mavg = StatefulTransform( - MovingAverage, - timedelta(days = 2), + MovingAverage, + timedelta(days = 2), ['price', 'volume'] - ) - + ) + transformed = list(mavg.transform(self.source)) # Output values. tnfm_prices = [message.tnfm_value.price for message in transformed] @@ -120,7 +120,6 @@ class FinanceTransformsTestCase(TestCase): ((100.0 + 100.0 + 100.0) / 3.0), # First event should get dropped here. ((100.0 + 100.0 + 300.0) / 3.0)] - + assert tnfm_prices == expected_prices assert tnfm_volumes == expected_volumes - diff --git a/zipline/finance/returns.py b/zipline/finance/returns.py index 6e390364..2973029f 100644 --- a/zipline/finance/returns.py +++ b/zipline/finance/returns.py @@ -1,15 +1,14 @@ from collections import defaultdict -from zipline.transforms.base import BaseTransform class Returns(object): """ Class that maintains a dictionary from sids to the event representing the most recent closing price. """ - def __init__(self, days == 1): + def __init__(self, days = 1): self.days = days self.mapping = defaultdict(self._create) - + def update(self, event): """ Update and return the calculated returns for this event's sid. @@ -18,7 +17,7 @@ class Returns(object): return sid_returns def _create(self): - return ReturnsFromPriorClose(days) + return ReturnsFromPriorClose(self.days) class ReturnsFromPriorClose(object): """ diff --git a/zipline/finance/trading.py b/zipline/finance/trading.py index 7bd8c7c3..baa21e58 100644 --- a/zipline/finance/trading.py +++ b/zipline/finance/trading.py @@ -31,6 +31,8 @@ class TransactionSimulator(object): self.open_orders[sid] = [] def place_order(self, order): + # initialized filled field. + order.filled = 0 self.open_orders[order.sid].append(order) def update(self, event): @@ -39,7 +41,7 @@ class TransactionSimulator(object): if event.type == zp.DATASOURCE_TYPE.TRADE: event.TRANSACTION = self.apply_trade_to_open_orders(event) return event - + def simulate_buy_all(self, event): txn = self.create_transaction( event.sid, diff --git a/zipline/lines.py b/zipline/lines.py index b4db5de6..4a54fb5d 100644 --- a/zipline/lines.py +++ b/zipline/lines.py @@ -162,8 +162,13 @@ class SimulatedTrading(object): finally: self.close() + def signal_done(self): + # notify monitor we're done + done_frame = zp.DONE_FRAME('succes') + self.results_socket.send(done_frame) + def close(self): - log.info("Closing Simulation") + log.info("Closing Simulation: {id}".format(id=self.sim_id)) def cancel(self): if self.proc and self.proc.is_alive(): diff --git a/zipline/protocol.py b/zipline/protocol.py index 7aa503d7..bbf80f98 100644 --- a/zipline/protocol.py +++ b/zipline/protocol.py @@ -570,6 +570,12 @@ def CANCEL_FRAME(date): return BT_UPDATE_FRAME('CANCEL', result) +def DONE_FRAME(msg): + assert isinstance(msg, basestring), \ + "Done message must be a string." + + return BT_UPDATE_FRAME('DONE', msg) + def BT_UPDATE_FRAME(prefix, payload): """ diff --git a/zipline/utils/factory.py b/zipline/utils/factory.py index e001cf07..1b881329 100644 --- a/zipline/utils/factory.py +++ b/zipline/utils/factory.py @@ -12,7 +12,7 @@ from datetime import datetime, timedelta import zipline.finance.risk as risk import zipline.protocol as zp -from zipline.finance.sources import RandomEquityTrades +from zipline.gens.tradegens import RandomEquityTrades from zipline.gens.tradegens import SpecificEquityTrades from zipline.gens.utils import create_trade from zipline.finance.trading import TradingEnvironment diff --git a/zipline/utils/test_utils.py b/zipline/utils/test_utils.py index 03442002..036ebe02 100644 --- a/zipline/utils/test_utils.py +++ b/zipline/utils/test_utils.py @@ -76,7 +76,7 @@ def drain_zipline(test, zipline): time.sleep(1) # start the simulation - zipline.simulate(blocking=True) + zipline.simulate(blocking=False) output, transaction_count = drain_receiver(test.receiver) # some processes will exit after the message stream is # finished. We block here to avoid collisions with subsequent @@ -96,16 +96,15 @@ def drain_receiver(receiver): transaction_count = 0 while True: msg = receiver.recv() - if msg == str(zp.CONTROL_PROTOCOL.DONE): + update = zp.BT_UPDATE_UNFRAME(msg) + output.append(update) + if update['prefix'] == 'PERF': + transaction_count += \ + len(update['payload']['daily_perf']['transactions']) + elif update['prefix'] == 'EXCEPTION': + break + elif update['prefix'] == 'DONE': break - else: - update = zp.BT_UPDATE_UNFRAME(msg) - output.append(update) - if update['prefix'] == 'PERF': - transaction_count += \ - len(update['payload']['daily_perf']['transactions']) - elif update['prefix'] == 'EXCEPTION': - break receiver.close() del receiver @@ -116,9 +115,6 @@ def drain_receiver(receiver): def assert_single_position(test, zipline): output, transaction_count = drain_zipline(test, zipline) - test.assertTrue(zipline.sim.ready()) - test.assertFalse(zipline.sim.exception) - test.assertEqual( test.zipline_test_config['order_count'], transaction_count @@ -127,7 +123,8 @@ def assert_single_position(test, zipline): # the final message is the risk report, the second to # last is the final day's results. Positions is a list of # dicts. - closing_positions = output[-2]['payload']['daily_perf']['positions'] + perfs = [x for x in output if x['prefix'] == 'PERF'] + closing_positions = perfs[-2]['payload']['daily_perf']['positions'] test.assertEqual( len(closing_positions), From 1f0e760856659a01ae74846a0cd3080185d65d4a Mon Sep 17 00:00:00 2001 From: scottsanderson Date: Tue, 7 Aug 2012 14:50:09 -0400 Subject: [PATCH 45/73] finish eventwindow ABS, and speedups for tradingcalendar --- tests/test_transforms.py | 6 +-- zipline/gens/mavg.py | 36 ++++++++++++---- zipline/gens/tradesimulation.py | 2 +- zipline/gens/transform.py | 58 ++++++++++++++++++++++---- zipline/gens/vwap.py | 28 ++++++++++--- zipline/utils/tradingcalendar.py | 71 +++++++++++++++++++++----------- 6 files changed, 152 insertions(+), 49 deletions(-) diff --git a/tests/test_transforms.py b/tests/test_transforms.py index b9420633..2a13e4d7 100644 --- a/tests/test_transforms.py +++ b/tests/test_transforms.py @@ -100,9 +100,9 @@ class FinanceTransformsTestCase(TestCase): def test_moving_average(self): mavg = StatefulTransform( - MovingAverage, - timedelta(days = 2), - ['price', 'volume'] + MovingAverage, + fields = ['price', 'volume'], + delta = timedelta(days = 2), ) transformed = list(mavg.transform(self.source)) diff --git a/zipline/gens/mavg.py b/zipline/gens/mavg.py index 1978f92e..21aa0bd0 100644 --- a/zipline/gens/mavg.py +++ b/zipline/gens/mavg.py @@ -13,17 +13,39 @@ class MovingAverage(object): maintain a sid's average volume as well as its average price.) """ - def __init__(self, delta, fields): - self.delta = delta + def __init__(self, fields, market_aware, days = None, delta = None): + self.fields = fields + self.market_aware = market_aware + + self.delta = delta + self.days = days + + # Market-aware mode only works with full-day windows. + if self.market_aware: + assert self.days and not self.delta,\ + "Market-aware mode only works with full-day windows." + + # Non-market-aware mode requires a timedelta. + else: + assert self.delta and not self.days, \ + "Non-market-aware mode requires a timedelta." + # No way to pass arguments to the defaultdict factory, so we # need to define a method to generate the correct EventWindows. self.sid_windows = defaultdict(self.create_window) def create_window(self): - """Factory method for self.sid_windows.""" - return MovingAverageEventWindow(self.delta, self.fields) - + """ + Factory method for self.sid_windows. + """ + return MovingAverageEventWindow( + self.fields, + self.market_aware, + self.days, + self.delta + ) + def update(self, event): """ Update the event window for this event's sid. Return an ndict @@ -45,11 +67,11 @@ class MovingAverageEventWindow(EventWindow): MovingAverage transform. """ - def __init__(self, delta, fields): + def __init__(self, fields, market_aware, days, delta): # Call the superclass constructor to set up base EventWindow # infrastructure. - EventWindow.__init__(self, delta) + EventWindow.__init__(self, market_aware, days, delta) # We maintain a dictionary of totals for each of our tracked # fields. diff --git a/zipline/gens/tradesimulation.py b/zipline/gens/tradesimulation.py index 972f44e8..522d80da 100644 --- a/zipline/gens/tradesimulation.py +++ b/zipline/gens/tradesimulation.py @@ -74,7 +74,7 @@ class TradeSimulationClient(object): # Pipe the events with transactions to perf. This will remove # the txn field added by TransactionSimulator and replace it # with a portfolio object to be passed to the user's - # algorithm. Also adds a PERF_MESSAGE field which is usually + # algorithm. Also adds a perf_message field which is usually # none, but contains an update message once per day. perf_tracker = StatefulTransform( PerformanceTracker, diff --git a/zipline/gens/transform.py b/zipline/gens/transform.py index 202acc0f..09f074ae 100644 --- a/zipline/gens/transform.py +++ b/zipline/gens/transform.py @@ -2,9 +2,10 @@ Generator versions of transforms. """ import types +import pytz from copy import deepcopy -from datetime import datetime +from datetime import datetime, timedelta from collections import deque, defaultdict from numbers import Number from abc import ABCMeta, abstractmethod @@ -149,6 +150,9 @@ class EventWindow: window. Calls self.handle_remove(event) for each event removed from the window. Subclass these methods along with init(*args, **kwargs) to calculate metrics over the window. + + The market_aware flag is used to toggle whether the eventwindow + calculates See zipline/gens/mavg.py and zipline/gens/vwap.py for example implementations of moving average and volume-weighted average @@ -157,10 +161,31 @@ class EventWindow: # Mark this as an abstract base class. __metaclass__ = ABCMeta - def __init__(self, delta): + def __init__(self, market_aware, days = None, delta = None): + + self.market_aware = market_aware + self.days = days + self.delta = delta + self.ticks = deque() - self.delta = delta + + # Market-aware mode only works with full-day windows. + if self.market_aware: + assert self.days and not self.delta,\ + "Market-aware mode only works with full-day windows." + + # Non-market-aware mode requires a timedelta. + else: + assert self.delta and not self.days, \ + "Non-market-aware mode requires a timedelta." + # Set the behavior for dropping events from the back of the + # event window. + if self.market_aware: + self.drop_condition = self.out_of_market_window + else: + self.drop_condition = self.out_of_timedelta + @abstractmethod def handle_add(self, event): raise NotImplementedError() @@ -174,22 +199,36 @@ class EventWindow: def update(self, event): self.assert_well_formed(event) + # Add new event and increment totals. self.ticks.append(event) + + # Subclasses should override handle_add to define behavior for + # adding new ticks. self.handle_add(event) - # Clear out expired event. + # Clear out any expired events. drop_condition changes depending + # on whether or not we are running in market_aware mode. # - # newest oldest - # | | - # V V - while (self.ticks[-1].dt - self.ticks[0].dt) > self.delta: + # oldest newest + # | | + # V V + while self.drop_condition(self.ticks[0].dt, self.ticks[-1].dt): + # popleft removes and returns the oldest tick in self.ticks popped = self.ticks.popleft() + # Subclasses should override handle_remove to define # behavior for removing ticks. self.handle_remove(popped) + def out_of_market_window(self, oldest, newest): + return trading_days_between(oldest, newest) >= self.days + + def out_of_delta(self, oldest, newest): + return (newest - oldest) >= self.delta + + # All event windows expect to receive events with datetime fields # that arrive in sorted order. def assert_well_formed(self, event): @@ -200,3 +239,6 @@ class EventWindow: # Something is wrong if new event is older than previous. assert event.dt >= self.ticks[-1].dt, \ "Events arrived out of order in EventWindow: %s -> %s" % (event, self.ticks[0]) + + + diff --git a/zipline/gens/vwap.py b/zipline/gens/vwap.py index 029284c1..5a0947d8 100644 --- a/zipline/gens/vwap.py +++ b/zipline/gens/vwap.py @@ -9,16 +9,33 @@ class VWAP(object): """ Class that maintains a dictionary from sids to VWAPEventWindows. """ - def __init__(self, delta): + def __init__(self, market_aware, delta=None, days=None): + + self.market_aware = market_aware self.delta = delta + self.days = days + + # Market-aware mode only works with full-day windows. + if self.market_aware: + assert self.days and not self.delta,\ + "Market-aware mode only works with full-day windows." + + # Non-market-aware mode requires a timedelta. + else: + assert self.delta and not self.days, \ + "Non-market-aware mode requires a timedelta." # No way to pass arguments to the defaultdict factory, so we # need to define a method to generate the correct EventWindows. self.sid_windows = defaultdict(self.create_window) - + def create_window(self): """Factory method for self.sid_windows.""" - return VWAPEventWindow(self.delta) + return VWAPEventWindow( + self.market_aware, + days = self.days, + delta = self.delta + ) def update(self, event): """ @@ -31,14 +48,13 @@ class VWAP(object): window.update(event) return window.get_vwap() - class VWAPEventWindow(EventWindow): """ Iteratively maintains a vwap for a single sid over a given timedelta. """ - def __init__(self, delta): - EventWindow.__init__(self, delta) + def __init__(self, market_aware, days=None, delta=None): + EventWindow.__init__(self, market_aware, days, delta) self.flux = 0.0 self.totalvolume = 0.0 diff --git a/zipline/utils/tradingcalendar.py b/zipline/utils/tradingcalendar.py index c7aa9152..7742b714 100644 --- a/zipline/utils/tradingcalendar.py +++ b/zipline/utils/tradingcalendar.py @@ -84,8 +84,7 @@ def earlier_in_day(d1, d2): """ Return true if d1 falls earlier in its own day than d2. """ - d1 = d1.replace(year = d2.year, day = d2.day) - return d1 < d2 + return d1.time() < d2.time() WEEKDAYS = [rrule.MO, rrule.TU, rrule.WE, rrule.TH, rrule.FR] @@ -97,7 +96,8 @@ market_opens_with_holidays = rrule.rrule( byhour = 14, byminute = 30, cache = True, - dtstart=datetime(1970, 1, 1, tzinfo = pytz.utc), + dtstart=datetime(2000, 1, 1, tzinfo = pytz.utc), + until=datetime(2014 , 1, 1, tzinfo = pytz.utc) ) # Recurrence rule that generates all market closes since Jan 1, 1970. @@ -108,7 +108,8 @@ market_closes_with_holidays = rrule.rrule( byhour = 21, byminute = 0, cache = True, - dtstart=datetime(1970, 1, 1, tzinfo = pytz.utc), + dtstart=datetime(2001, 1, 1, tzinfo = pytz.utc), + until=datetime(2014, 1, 1, tzinfo = pytz.utc) ) # Recurrence rules for excluding the market open/close on new years. @@ -118,7 +119,8 @@ new_years_opens = rrule.rrule( byhour = 14, byminute = 30, cache = True, - dtstart = datetime(1970, 1,1,tzinfo = pytz.utc) + dtstart = datetime(2000, 1,1,tzinfo = pytz.utc), + until=datetime(2014, 1, 1, tzinfo = pytz.utc) ) new_years_closes = rrule.rrule( rrule.MONTHLY, @@ -126,7 +128,8 @@ new_years_closes = rrule.rrule( byhour = 21, byminute = 0, cache = True, - dtstart = datetime(1970, 1,1,tzinfo = pytz.utc) + dtstart = datetime(2000, 1,1,tzinfo = pytz.utc), + until=datetime(2014, 1, 1, tzinfo = pytz.utc) ) # Recurrence rules for excluding MLK day. It is always the third @@ -138,7 +141,8 @@ mlk_opens = rrule.rrule( byhour = 14, byminute = 30, cache = True, - dtstart = datetime(1970, 1,1,tzinfo = pytz.utc) + dtstart = datetime(2000, 1,1,tzinfo = pytz.utc), + until=datetime(2014, 1, 1, tzinfo = pytz.utc) ) mlk_closes = rrule.rrule( rrule.MONTHLY, @@ -147,7 +151,8 @@ mlk_closes = rrule.rrule( byhour = 21, byminute = 0, cache = True, - dtstart = datetime(1970, 1,1,tzinfo = pytz.utc) + dtstart = datetime(2000, 1,1,tzinfo = pytz.utc), + until=datetime(2014, 1, 1, tzinfo = pytz.utc) ) # Recurrence rules for generating the market open/close for @@ -160,7 +165,8 @@ presidents_day_opens = rrule.rrule( byhour = 14, byminute = 30, cache = True, - dtstart = datetime(1970, 1,1,tzinfo = pytz.utc) + dtstart = datetime(2000, 1,1,tzinfo = pytz.utc), + until=datetime(2014, 1, 1, tzinfo = pytz.utc) ) presidents_day_closes = rrule.rrule( rrule.MONTHLY, @@ -169,7 +175,8 @@ presidents_day_closes = rrule.rrule( byhour = 21, byminute = 0, cache = True, - dtstart = datetime(1970, 1,1,tzinfo = pytz.utc) + dtstart = datetime(2000, 1,1,tzinfo = pytz.utc), + until=datetime(2014, 1, 1, tzinfo = pytz.utc) ) # Recurrence rules for generating the market open/close for good @@ -181,7 +188,8 @@ good_friday_opens = rrule.rrule( byhour = 14, byminute = 30, cache = True, - dtstart = datetime(1970, 1,1,tzinfo = pytz.utc) + dtstart = datetime(2000, 1,1,tzinfo = pytz.utc), + until=datetime(2014, 1, 1, tzinfo = pytz.utc) ) good_friday_closes = rrule.rrule( rrule.DAILY, @@ -189,7 +197,8 @@ good_friday_closes = rrule.rrule( byhour = 21, byminute = 0, cache = True, - dtstart = datetime(1970, 1,1,tzinfo = pytz.utc) + dtstart = datetime(2000, 1,1,tzinfo = pytz.utc), + until=datetime(2014, 1, 1, tzinfo = pytz.utc) ) # Recurrence rules for generating the market open/close for memorial @@ -201,7 +210,8 @@ memorial_day_opens = rrule.rrule( byhour = 14, byminute = 30, cache = True, - dtstart = datetime(1970, 1,1,tzinfo = pytz.utc) + dtstart = datetime(2000, 1,1,tzinfo = pytz.utc), + until=datetime(2014, 1, 1, tzinfo = pytz.utc) ) memorial_day_closes = rrule.rrule( rrule.MONTHLY, @@ -210,7 +220,8 @@ memorial_day_closes = rrule.rrule( byhour = 21, byminute = 0, cache = True, - dtstart = datetime(1970, 1,1,tzinfo = pytz.utc) + dtstart = datetime(2000, 1,1,tzinfo = pytz.utc), + until=datetime(2014, 1, 1, tzinfo = pytz.utc) ) # Recurrence rules for generating the market open/close for July 4th. @@ -221,7 +232,8 @@ july_4th_opens = rrule.rrule( byhour = 14, byminute = 30, cache = True, - dtstart = datetime(1970, 1,1,tzinfo = pytz.utc) + dtstart = datetime(2000, 1,1,tzinfo = pytz.utc), + until=datetime(2014, 1, 1, tzinfo = pytz.utc) ) july_4th_closes = rrule.rrule( rrule.MONTHLY, @@ -230,7 +242,8 @@ july_4th_closes = rrule.rrule( byhour = 21, byminute = 0, cache = True, - dtstart = datetime(1970, 1,1,tzinfo = pytz.utc) + dtstart = datetime(2000, 1,1,tzinfo = pytz.utc), + until=datetime(2014, 1, 1, tzinfo = pytz.utc) ) # Recurrence rule for generating the market open/close for labor day. @@ -242,7 +255,8 @@ labor_day_opens = rrule.rrule( byhour = 14, byminute = 30, cache = True, - dtstart = datetime(1970, 1,1,tzinfo = pytz.utc) + dtstart = datetime(2000, 1,1,tzinfo = pytz.utc), + until=datetime(2014, 1, 1, tzinfo = pytz.utc) ) labor_day_closes = rrule.rrule( rrule.MONTHLY, @@ -251,7 +265,8 @@ labor_day_closes = rrule.rrule( byhour = 21, byminute = 0, cache = True, - dtstart = datetime(1970, 1,1,tzinfo = pytz.utc) + dtstart = datetime(2000, 1,1,tzinfo = pytz.utc), + until=datetime(2014, 1, 1, tzinfo = pytz.utc) ) # Recurrence rule for generating the market open/close for @@ -264,7 +279,8 @@ thanksgiving_opens = rrule.rrule( byhour = 14, byminute = 30, cache = True, - dtstart = datetime(1970, 1,1,tzinfo = pytz.utc) + dtstart = datetime(2000, 1,1,tzinfo = pytz.utc), + until=datetime(2014, 1, 1, tzinfo = pytz.utc) ) thanksgiving_closes = rrule.rrule( rrule.MONTHLY, @@ -273,7 +289,8 @@ thanksgiving_closes = rrule.rrule( byhour = 21, byminute = 0, cache = True, - dtstart = datetime(1970, 1,1,tzinfo = pytz.utc) + dtstart = datetime(2000, 1,1,tzinfo = pytz.utc), + until=datetime(2014, 1, 1, tzinfo = pytz.utc) ) # Recurrence relation for generating the market open/close for @@ -286,7 +303,8 @@ christmas_opens = rrule.rrule( byhour = 14, byminute = 30, cache = True, - dtstart = datetime(1970, 1,1,tzinfo = pytz.utc) + dtstart = datetime(2000, 1,1,tzinfo = pytz.utc), + until=datetime(2014, 1, 1, tzinfo = pytz.utc) ) christmas_closes = rrule.rrule( rrule.MONTHLY, @@ -295,8 +313,10 @@ christmas_closes = rrule.rrule( byhour = 21, byminute = 0, cache = True, - dtstart = datetime(1970, 1,1,tzinfo = pytz.utc) + dtstart = datetime(2000, 1,1,tzinfo = pytz.utc), + until=datetime(2014, 1, 1, tzinfo = pytz.utc) ) + # All NYSE observed holidays. holiday_opens = [ new_years_opens, @@ -322,12 +342,15 @@ holiday_closes = [ ] # Valid market opens are given by all market opens minus holidays. -opens = rrule.rruleset() +opens = rrule.rruleset(cache=True) opens.rrule(market_opens_with_holidays) for holiday_rule in holiday_opens: opens.exrule(holiday_rule) +open_count = opens.count() -closes = rrule.rruleset() +closes = rrule.rruleset(cache=True) closes.rrule(market_closes_with_holidays) for holiday_rule in holiday_closes: closes.exrule(holiday_rule) +close_count = closes.count() + From c45b49a315a501bcac3bd9c9818cb842f4376b79 Mon Sep 17 00:00:00 2001 From: scottsanderson Date: Tue, 7 Aug 2012 15:11:22 -0400 Subject: [PATCH 46/73] fix method name typo in eventwindow --- zipline/gens/transform.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/zipline/gens/transform.py b/zipline/gens/transform.py index 09f074ae..5651eea9 100644 --- a/zipline/gens/transform.py +++ b/zipline/gens/transform.py @@ -184,7 +184,7 @@ class EventWindow: if self.market_aware: self.drop_condition = self.out_of_market_window else: - self.drop_condition = self.out_of_timedelta + self.drop_condition = self.out_of_delta @abstractmethod def handle_add(self, event): From 1959a8388cc9c078850f4c8fdf21f1cdb7c931ef Mon Sep 17 00:00:00 2001 From: scottsanderson Date: Tue, 7 Aug 2012 15:42:33 -0400 Subject: [PATCH 47/73] re-commit for fawce --- zipline/gens/transform.py | 1 + 1 file changed, 1 insertion(+) diff --git a/zipline/gens/transform.py b/zipline/gens/transform.py index 5651eea9..ee3d1621 100644 --- a/zipline/gens/transform.py +++ b/zipline/gens/transform.py @@ -213,6 +213,7 @@ class EventWindow: # oldest newest # | | # V V + import nose.tools; nose.tools.set_trace() while self.drop_condition(self.ticks[0].dt, self.ticks[-1].dt): # popleft removes and returns the oldest tick in self.ticks From acc88793ade4ac063ab1e8b151a2431ae79837e6 Mon Sep 17 00:00:00 2001 From: fawce Date: Tue, 7 Aug 2012 15:46:56 -0400 Subject: [PATCH 48/73] patching test to match new MovingAverage init. --- tests/test_components.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tests/test_components.py b/tests/test_components.py index 83b96f0c..f351dac6 100644 --- a/tests/test_components.py +++ b/tests/test_components.py @@ -351,8 +351,9 @@ class ComponentTestCase(TestCase): passthrough = StatefulTransform(Passthrough) mavg_price = StatefulTransform( MovingAverage, - timedelta(minutes = 20), - ['price'] + ['price'], + market_aware = False, + delta=timedelta(minutes = 20) ) merged_gen = merged_transforms(sorted, passthrough, mavg_price) From 172ed2aafec5786d46423298b0b5da827c14b898 Mon Sep 17 00:00:00 2001 From: fawce Date: Tue, 7 Aug 2012 15:49:04 -0400 Subject: [PATCH 49/73] removed the set_trace --- zipline/gens/transform.py | 24 ++++++++++-------------- 1 file changed, 10 insertions(+), 14 deletions(-) diff --git a/zipline/gens/transform.py b/zipline/gens/transform.py index ee3d1621..79719263 100644 --- a/zipline/gens/transform.py +++ b/zipline/gens/transform.py @@ -62,8 +62,8 @@ class StatefulTransform(object): self.append_value = tnfm_class.__dict__.get('APPENDER', False) # You only one special behavior mode can be set. - assert sum(map(int, [self.forward_all, - self.update_in_place, + assert sum(map(int, [self.forward_all, + self.update_in_place, self.append_value])) <= 1 # Create an instance of our transform class. @@ -81,14 +81,14 @@ class StatefulTransform(object): def _gen(self, stream_in): # IMPORTANT: Messages may contain pointers that are shared with # other streams, so we only manipulate copies. - + for message in stream_in: # allow upstream generators to yield None to avoid # blocking. if message == None: continue - + #TODO: refactor this to avoid unnecessary copying. assert_sort_unframe_protocol(message) @@ -117,7 +117,7 @@ class StatefulTransform(object): # TransactionSimulator. elif self.update_in_place: yield tnfm_value - + # APPENDER flag should be used to add a single new # key-value pair to the event. The new key is this # transform's namestring, and it's value is the value @@ -149,8 +149,8 @@ class EventWindow: tick. Calls self.handle_add(event) for each event added to the window. Calls self.handle_remove(event) for each event removed from the window. Subclass these methods along with init(*args, - **kwargs) to calculate metrics over the window. - + **kwargs) to calculate metrics over the window. + The market_aware flag is used to toggle whether the eventwindow calculates @@ -178,7 +178,7 @@ class EventWindow: else: assert self.delta and not self.days, \ "Non-market-aware mode requires a timedelta." - + # Set the behavior for dropping events from the back of the # event window. if self.market_aware: @@ -213,16 +213,15 @@ class EventWindow: # oldest newest # | | # V V - import nose.tools; nose.tools.set_trace() while self.drop_condition(self.ticks[0].dt, self.ticks[-1].dt): - + # popleft removes and returns the oldest tick in self.ticks popped = self.ticks.popleft() # Subclasses should override handle_remove to define # behavior for removing ticks. self.handle_remove(popped) - + def out_of_market_window(self, oldest, newest): return trading_days_between(oldest, newest) >= self.days @@ -240,6 +239,3 @@ class EventWindow: # Something is wrong if new event is older than previous. assert event.dt >= self.ticks[-1].dt, \ "Events arrived out of order in EventWindow: %s -> %s" % (event, self.ticks[0]) - - - From 1d9da39fbba527dd0f98d094b41e651a41e210d8 Mon Sep 17 00:00:00 2001 From: scottsanderson Date: Tue, 7 Aug 2012 16:54:55 -0400 Subject: [PATCH 50/73] new-style returns --- tests/test_transforms.py | 71 ++++++++++------------------- zipline/finance/returns.py | 47 ------------------- zipline/gens/returns.py | 77 ++++++++++++++++++++++++++++++++ zipline/gens/transform.py | 3 +- zipline/utils/tradingcalendar.py | 4 +- 5 files changed, 105 insertions(+), 97 deletions(-) delete mode 100644 zipline/finance/returns.py create mode 100644 zipline/gens/returns.py diff --git a/tests/test_transforms.py b/tests/test_transforms.py index 2a13e4d7..7d417ba6 100644 --- a/tests/test_transforms.py +++ b/tests/test_transforms.py @@ -10,41 +10,12 @@ from zipline.gens.tradegens import SpecificEquityTrades from zipline.gens.transform import StatefulTransform from zipline.gens.vwap import VWAP from zipline.gens.mavg import MovingAverage -from zipline.finance.returns import ReturnsFromPriorClose +from zipline.gens.returns import Returns from zipline.lines import SimulatedTrading from zipline.core.devsimulator import AddressAllocator allocator = AddressAllocator(1000) -class ZiplineWithTransformsTestCase(TestCase): - leased_sockets = defaultdict(list) - - def setUp(self): - # skip ahead 100 spots - allocator.lease(100) - self.trading_environment = factory.create_trading_environment() - self.zipline_test_config = { - 'allocator' : allocator, - 'sid' : 133, - 'devel' : True - } - setup_logger(self, '/var/log/qexec/qexec.log') - - def tearDown(self): - teardown_logger(self) - - def test_vwap_tnfm(self): - zipline = SimulatedTrading.create_test_zipline( - **self.zipline_test_config - ) - vwap = VWAPTransform("vwap_10", daycount=10) - zipline.add_transform(vwap) - - zipline.simulate(blocking=True) - - self.assertTrue(zipline.sim.ready()) - self.assertFalse(zipline.sim.exception) - class FinanceTransformsTestCase(TestCase): def setUp(self): @@ -64,7 +35,12 @@ class FinanceTransformsTestCase(TestCase): self.log_handler.pop_application() def test_vwap(self): - vwap = StatefulTransform(VWAP, timedelta(days = 2)) + + vwap = StatefulTransform( + VWAP, + market_aware = False, + delta = timedelta(days = 2) + ) transformed = list(vwap.transform(self.source)) # Output values @@ -72,35 +48,32 @@ class FinanceTransformsTestCase(TestCase): # "Hand calculated" values. expected = [(10.0 * 100) / 100.0, ((10.0 * 100) + (10.0 * 100)) / (200.0), - ((10.0 * 100) + (10.0 * 100) + (11.0 * 100)) / (300.0), - # First event should get droppped here. - ((10.0 * 100) + (11.0 * 100) + (11.0 * 300)) / (500.0)] + # We should drop the first event here. + ((10.0 * 100) + (11.0 * 100)) / (200.0), + # We should drop the second event here. + ((11.0 * 100) + (11.0 * 300)) / (400.0)] # Output should match the expected. assert tnfm_vals == expected - def test_returns(self): trade_history = factory.create_trade_history( 133, [10.0, 10.0, 10.0, 11.0], [100, 100, 100, 300], timedelta(days=1), - self.trading_environment ) - - returns = ReturnsFromPriorClose() + returns = StatefulTransform( + Returns for trade in trade_history: returns.update(trade) - - self.assertEqual(returns.returns, .1) - def test_moving_average(self): - + mavg = StatefulTransform( MovingAverage, + market_aware = False, fields = ['price', 'volume'], delta = timedelta(days = 2), ) @@ -109,17 +82,21 @@ class FinanceTransformsTestCase(TestCase): # Output values. tnfm_prices = [message.tnfm_value.price for message in transformed] tnfm_volumes = [message.tnfm_value.volume for message in transformed] + # "Hand-calculated" values expected_prices = [((10.0) / 1.0), ((10.0 + 10.0) / 2.0), - ((10.0 + 10.0 + 11.0) / 3.0), # First event should get dropped here. - ((10.0 + 11.0 + 11.0) / 3.0)] + ((10.0 + 11.0) / 2.0), + # Second event should get dropped here. + ((11.0 + 11.0) / 2.0)] + expected_volumes = [((100.0) / 1.0), ((100.0 + 100.0) / 2.0), - ((100.0 + 100.0 + 100.0) / 3.0), - # First event should get dropped here. - ((100.0 + 100.0 + 300.0) / 3.0)] + # First event should get dropped here. + ((100.0 + 100.0) / 2.0), + # Second event should get dropped here. + ((100.0 + 300.0) / 2.0)] assert tnfm_prices == expected_prices assert tnfm_volumes == expected_volumes diff --git a/zipline/finance/returns.py b/zipline/finance/returns.py deleted file mode 100644 index 6e390364..00000000 --- a/zipline/finance/returns.py +++ /dev/null @@ -1,47 +0,0 @@ -from collections import defaultdict -from zipline.transforms.base import BaseTransform - -class Returns(object): - """ - Class that maintains a dictionary from sids to the event - representing the most recent closing price. - """ - def __init__(self, days == 1): - self.days = days - self.mapping = defaultdict(self._create) - - def update(self, event): - """ - Update and return the calculated returns for this event's sid. - """ - sid_returns = self.mapping[event.sid].update(event) - return sid_returns - - def _create(self): - return ReturnsFromPriorClose(days) - -class ReturnsFromPriorClose(object): - """ - Calculates a security's returns since the previous close, using the - current price. - """ - - def __init__(self): - self.last_close = None - self.last_event = None - self.returns = 0.0 - - def update(self, event): - if self.last_close: - change = event.price - self.last_close.price - self.returns = change / self.last_close.price - - if self.last_event: - if self.last_event.dt.day != event.dt.day: - # the current event is from the day after - # the last event. Therefore the last event was - # the last close - self.last_close = self.last_event - - # the current event is now the last_event - self.last_event = event diff --git a/zipline/gens/returns.py b/zipline/gens/returns.py new file mode 100644 index 00000000..8775a125 --- /dev/null +++ b/zipline/gens/returns.py @@ -0,0 +1,77 @@ +from collections import defaultdict +from zipline.transforms.base import BaseTransform +from zipline.utils.tradingcalendar import market_closes + +class Returns(object): + """ + Class that maintains a dictionary from sids to the sid's + closing price N trading days ago. + """ + def __init__(self, days): + self.days = days + self.mapping = defaultdict(self._create) + + def update(self, event): + """ + Update and return the calculated returns for this event's sid. + """ + assert event.has_key('dt') + assert event.has_key('price') + + tracker = self.mapping[event.sid] + tracker.update(event) + + return tracker.get_returns() + + def _create(self): + return ReturnsFromPriorClose(days) + +class ReturnsFromPriorClose(object): + """ + Records the last N closing events for a given security as well as the + last event for the security. When we get an event for a new day, we + treat the last event seen as the close for the previous day. + """ + + def __init__(self, days): + self.closes = deque() + self.last_event = None + self.returns = None + self.days = days + + def get_returns(self): + return self.returns + + def update(self, event): + + if self.last_event: + + # Day has changed since the last event we saw. Treat + # the last event as the closing price for its day and + # clear out the oldest close if it has expired. + if self.last_event.dt.date() != event.dt.date(): + + self.closes.append(self.last_event) + + # We keep an event for the end of each trading day, so + # if the number of stored events is greater than the + # number of days we want to track, the oldest close + # is expired and should be discarded. + if len(self.closes) > self.days: + # Pop the oldest event. + self.closes.popleft() + + # We only generate a return value once we've seen enough days + # to give a sensible value. Would be nice if we could query + # db for closes prior to our initial event, but that would + # require giving this transform database creds, which we want + # to avoid. + + if len(self.closes) == self.days: + change = event.price - self.closes[0].price + self.returns = change / self.last_close.price + + + # the current event is now the last_event + self.last_event = event + diff --git a/zipline/gens/transform.py b/zipline/gens/transform.py index ee3d1621..fdf545a1 100644 --- a/zipline/gens/transform.py +++ b/zipline/gens/transform.py @@ -26,6 +26,7 @@ class Passthrough(object): def update(self, event): pass +# Deprecated def functional_transform(stream_in, func, *args, **kwargs): """ Generic transform generator that takes each message from an in-stream @@ -213,7 +214,6 @@ class EventWindow: # oldest newest # | | # V V - import nose.tools; nose.tools.set_trace() while self.drop_condition(self.ticks[0].dt, self.ticks[-1].dt): # popleft removes and returns the oldest tick in self.ticks @@ -229,7 +229,6 @@ class EventWindow: def out_of_delta(self, oldest, newest): return (newest - oldest) >= self.delta - # All event windows expect to receive events with datetime fields # that arrive in sorted order. def assert_well_formed(self, event): diff --git a/zipline/utils/tradingcalendar.py b/zipline/utils/tradingcalendar.py index 7742b714..f760e51e 100644 --- a/zipline/utils/tradingcalendar.py +++ b/zipline/utils/tradingcalendar.py @@ -346,11 +346,13 @@ opens = rrule.rruleset(cache=True) opens.rrule(market_opens_with_holidays) for holiday_rule in holiday_opens: opens.exrule(holiday_rule) -open_count = opens.count() closes = rrule.rruleset(cache=True) closes.rrule(market_closes_with_holidays) for holiday_rule in holiday_closes: closes.exrule(holiday_rule) + +# This runs the calendar to load all data into a cache. +open_count = opens.count() close_count = closes.count() From b4e0639d53b95038d97cfe296ba85b8f9934d7ed Mon Sep 17 00:00:00 2001 From: scottsanderson Date: Tue, 7 Aug 2012 17:24:53 -0400 Subject: [PATCH 51/73] pushing returns for fawce --- zipline/gens/returns.py | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/zipline/gens/returns.py b/zipline/gens/returns.py index 8775a125..1fcfabaa 100644 --- a/zipline/gens/returns.py +++ b/zipline/gens/returns.py @@ -1,6 +1,5 @@ -from collections import defaultdict +from collections import defaultdict, deque from zipline.transforms.base import BaseTransform -from zipline.utils.tradingcalendar import market_closes class Returns(object): """ @@ -17,14 +16,15 @@ class Returns(object): """ assert event.has_key('dt') assert event.has_key('price') - + + import nose.tools; nose.tools.set_trace() tracker = self.mapping[event.sid] tracker.update(event) return tracker.get_returns() def _create(self): - return ReturnsFromPriorClose(days) + return ReturnsFromPriorClose(self.days) class ReturnsFromPriorClose(object): """ @@ -57,7 +57,7 @@ class ReturnsFromPriorClose(object): # if the number of stored events is greater than the # number of days we want to track, the oldest close # is expired and should be discarded. - if len(self.closes) > self.days: + while len(self.closes) > self.days: # Pop the oldest event. self.closes.popleft() @@ -68,8 +68,9 @@ class ReturnsFromPriorClose(object): # to avoid. if len(self.closes) == self.days: - change = event.price - self.closes[0].price - self.returns = change / self.last_close.price + last_close = self.closes[0] + change = event.price - last_close + self.returns = change / last_close # the current event is now the last_event From 8ab12cc8f89a836cfdb52f1d5d026c55b9b8e498 Mon Sep 17 00:00:00 2001 From: scottsanderson Date: Tue, 7 Aug 2012 17:48:35 -0400 Subject: [PATCH 52/73] tests for returns --- tests/test_transforms.py | 87 ++++++++++++++++++++++++++++------------ zipline/gens/returns.py | 3 +- 2 files changed, 62 insertions(+), 28 deletions(-) diff --git a/tests/test_transforms.py b/tests/test_transforms.py index 0835cb71..a7beac1f 100644 --- a/tests/test_transforms.py +++ b/tests/test_transforms.py @@ -46,29 +46,61 @@ class FinanceTransformsTestCase(TestCase): # Output values tnfm_vals = [message.tnfm_value for message in transformed] # "Hand calculated" values. - expected = [(10.0 * 100) / 100.0, - ((10.0 * 100) + (10.0 * 100)) / (200.0), - # We should drop the first event here. - ((10.0 * 100) + (11.0 * 100)) / (200.0), - # We should drop the second event here. - ((11.0 * 100) + (11.0 * 300)) / (400.0)] + expected = [ + (10.0 * 100) / 100.0, + ((10.0 * 100) + (10.0 * 100)) / (200.0), + # We should drop the first event here. + ((10.0 * 100) + (11.0 * 100)) / (200.0), + # We should drop the second event here. + ((11.0 * 100) + (11.0 * 300)) / (400.0) + ] # Output should match the expected. assert tnfm_vals == expected def test_returns(self): + # Daily returns. + returns = StatefulTransform(Returns, 1) + + transformed = list(returns.transform(self.source)) + tnfm_vals = [message.tnfm_value for message in transformed] + + # No returns for the first event because we don't have a + # previous close. + expected = [None, 0.0, 0.1, 0.0] + + assert tnfm_vals == expected + + + # Two-day returns. An extra kink here is that the + # factory will automatically skip a weekend for the + # last event. Results shouldn't notice this blip. + trade_history = factory.create_trade_history( 133, - [10.0, 10.0, 10.0, 11.0], - [100, 100, 100, 300], + [10.0, 15.0, 13.0, 12.0, 13.0], + [100, 100, 100, 300, 100], timedelta(days=1), + self.trading_environment ) - returns = StatefulTransform( - Returns - for trade in trade_history: - returns.update(trade) - self.assertEqual(returns.returns, .1) + self.source = SpecificEquityTrades(event_list=trade_history) + returns = StatefulTransform(Returns, 2) + + transformed = list(returns.transform(self.source)) + tnfm_vals = [message.tnfm_value for message in transformed] + + expected = [ + None, + None, + (13.0 - 10.0) / 10.0, + (12.0 - 15.0) / 15.0, + (13.0 - 13.0) / 13.0 + ] + + import nose.tools; nose.tools.set_trace() + assert tnfm_vals == expected + def test_moving_average(self): mavg = StatefulTransform( @@ -83,19 +115,22 @@ class FinanceTransformsTestCase(TestCase): tnfm_volumes = [message.tnfm_value.volume for message in transformed] # "Hand-calculated" values - expected_prices = [((10.0) / 1.0), - ((10.0 + 10.0) / 2.0), - # First event should get dropped here. - ((10.0 + 11.0) / 2.0), - # Second event should get dropped here. - ((11.0 + 11.0) / 2.0)] - - expected_volumes = [((100.0) / 1.0), - ((100.0 + 100.0) / 2.0), - # First event should get dropped here. - ((100.0 + 100.0) / 2.0), - # Second event should get dropped here. - ((100.0 + 300.0) / 2.0)] + expected_prices = [ + ((10.0) / 1.0), + ((10.0 + 10.0) / 2.0), + # First event should get dropped here. + ((10.0 + 11.0) / 2.0), + # Second event should get dropped here. + ((11.0 + 11.0) / 2.0) + ] + expected_volumes = [ + ((100.0) / 1.0), + ((100.0 + 100.0) / 2.0), + # First event should get dropped here. + ((100.0 + 100.0) / 2.0), + # Second event should get dropped here. + ((100.0 + 300.0) / 2.0) + ] assert tnfm_prices == expected_prices assert tnfm_volumes == expected_volumes diff --git a/zipline/gens/returns.py b/zipline/gens/returns.py index 1fcfabaa..902693ff 100644 --- a/zipline/gens/returns.py +++ b/zipline/gens/returns.py @@ -17,7 +17,6 @@ class Returns(object): assert event.has_key('dt') assert event.has_key('price') - import nose.tools; nose.tools.set_trace() tracker = self.mapping[event.sid] tracker.update(event) @@ -68,7 +67,7 @@ class ReturnsFromPriorClose(object): # to avoid. if len(self.closes) == self.days: - last_close = self.closes[0] + last_close = self.closes[0].price change = event.price - last_close self.returns = change / last_close From e8f17083f7cfa8b39aba1c29ad061462df138824 Mon Sep 17 00:00:00 2001 From: fawce Date: Tue, 7 Aug 2012 17:49:56 -0400 Subject: [PATCH 53/73] merge --- zipline/gens/returns.py | 16 +++++++--------- zipline/lines.py | 19 +++++++++++++++++-- zipline/utils/test_utils.py | 1 + 3 files changed, 25 insertions(+), 11 deletions(-) diff --git a/zipline/gens/returns.py b/zipline/gens/returns.py index 1fcfabaa..6b04bc43 100644 --- a/zipline/gens/returns.py +++ b/zipline/gens/returns.py @@ -9,18 +9,17 @@ class Returns(object): def __init__(self, days): self.days = days self.mapping = defaultdict(self._create) - + def update(self, event): """ Update and return the calculated returns for this event's sid. """ assert event.has_key('dt') assert event.has_key('price') - - import nose.tools; nose.tools.set_trace() + tracker = self.mapping[event.sid] tracker.update(event) - + return tracker.get_returns() def _create(self): @@ -29,7 +28,7 @@ class Returns(object): class ReturnsFromPriorClose(object): """ Records the last N closing events for a given security as well as the - last event for the security. When we get an event for a new day, we + last event for the security. When we get an event for a new day, we treat the last event seen as the close for the previous day. """ @@ -43,11 +42,11 @@ class ReturnsFromPriorClose(object): return self.returns def update(self, event): - + if self.last_event: # Day has changed since the last event we saw. Treat - # the last event as the closing price for its day and + # the last event as the closing price for its day and # clear out the oldest close if it has expired. if self.last_event.dt.date() != event.dt.date(): @@ -68,11 +67,10 @@ class ReturnsFromPriorClose(object): # to avoid. if len(self.closes) == self.days: - last_close = self.closes[0] + last_close = self.closes[0] change = event.price - last_close self.returns = change / last_close # the current event is now the last_event self.last_event = event - diff --git a/zipline/lines.py b/zipline/lines.py index 4a54fb5d..5fe87a0d 100644 --- a/zipline/lines.py +++ b/zipline/lines.py @@ -61,7 +61,10 @@ before invoking simulate. """ import sys import zmq +import os +from signal import SIGHUP import multiprocessing +from setproctitle import setproctitle from zipline.test_algorithms import TestAlgorithm from zipline.finance.trading import SIMULATION_STYLE @@ -111,15 +114,17 @@ class SimulatedTrading(object): # optional process if we fork simulate into an # independent process. self.proc = None + self.send_sighup = False self.logger = Logger(sim_id) - def simulate(self, blocking=True): + def simulate(self, blocking=True, send_sighup=False): # for non-blocking, if blocking: self.run_gen() else: + self.send_sighup = send_sighup return self.fork_and_sim() def fork_and_sim(self): @@ -128,7 +133,7 @@ class SimulatedTrading(object): return self.proc def run_gen(self): - + setproctitle(self.sim_id) self.open() if self.zmq_out: @@ -169,6 +174,10 @@ class SimulatedTrading(object): def close(self): log.info("Closing Simulation: {id}".format(id=self.sim_id)) + if self.proc and self.send_sighup: + ppid = os.getppid() + log.warning("Sending SIGHUP") + os.kill(ppid, SIGHUP) def cancel(self): if self.proc and self.proc.is_alive(): @@ -234,6 +243,12 @@ class SimulatedTrading(object): if self.proc: self.proc.join() + def get_pids(self): + if self.proc: + return [self.proc.pid] + else: + return [] + @staticmethod def create_test_zipline(**config): """ diff --git a/zipline/utils/test_utils.py b/zipline/utils/test_utils.py index 036ebe02..e94c49ee 100644 --- a/zipline/utils/test_utils.py +++ b/zipline/utils/test_utils.py @@ -113,6 +113,7 @@ def drain_receiver(receiver): def assert_single_position(test, zipline): + import nose.tools; nose.tools.set_trace() output, transaction_count = drain_zipline(test, zipline) test.assertEqual( From d2d9ef135992ca88426b91ce275cb1c0c0f062b0 Mon Sep 17 00:00:00 2001 From: fawce Date: Tue, 7 Aug 2012 17:55:27 -0400 Subject: [PATCH 54/73] removed a few debug calls --- tests/test_transforms.py | 33 ++++++++++++++++----------------- zipline/gens/returns.py | 5 ----- zipline/utils/test_utils.py | 1 - 3 files changed, 16 insertions(+), 23 deletions(-) diff --git a/tests/test_transforms.py b/tests/test_transforms.py index a7beac1f..9050086e 100644 --- a/tests/test_transforms.py +++ b/tests/test_transforms.py @@ -35,9 +35,9 @@ class FinanceTransformsTestCase(TestCase): self.log_handler.pop_application() def test_vwap(self): - + vwap = StatefulTransform( - VWAP, + VWAP, market_aware = False, delta = timedelta(days = 2) ) @@ -53,29 +53,29 @@ class FinanceTransformsTestCase(TestCase): ((10.0 * 100) + (11.0 * 100)) / (200.0), # We should drop the second event here. ((11.0 * 100) + (11.0 * 300)) / (400.0) - ] + ] # Output should match the expected. assert tnfm_vals == expected - + def test_returns(self): # Daily returns. returns = StatefulTransform(Returns, 1) - + transformed = list(returns.transform(self.source)) tnfm_vals = [message.tnfm_value for message in transformed] # No returns for the first event because we don't have a # previous close. expected = [None, 0.0, 0.1, 0.0] - + assert tnfm_vals == expected # Two-day returns. An extra kink here is that the # factory will automatically skip a weekend for the # last event. Results shouldn't notice this blip. - + trade_history = factory.create_trade_history( 133, [10.0, 15.0, 13.0, 12.0, 13.0], @@ -86,29 +86,28 @@ class FinanceTransformsTestCase(TestCase): self.source = SpecificEquityTrades(event_list=trade_history) returns = StatefulTransform(Returns, 2) - + transformed = list(returns.transform(self.source)) tnfm_vals = [message.tnfm_value for message in transformed] - + expected = [ None, - None, + None, (13.0 - 10.0) / 10.0, (12.0 - 15.0) / 15.0, (13.0 - 13.0) / 13.0 ] - - import nose.tools; nose.tools.set_trace() + assert tnfm_vals == expected - + def test_moving_average(self): mavg = StatefulTransform( - MovingAverage, + MovingAverage, market_aware = False, fields = ['price', 'volume'], - delta = timedelta(days = 2), - ) + delta = timedelta(days = 2), + ) transformed = list(mavg.transform(self.source)) # Output values. tnfm_prices = [message.tnfm_value.price for message in transformed] @@ -131,6 +130,6 @@ class FinanceTransformsTestCase(TestCase): # Second event should get dropped here. ((100.0 + 300.0) / 2.0) ] - + assert tnfm_prices == expected_prices assert tnfm_volumes == expected_volumes diff --git a/zipline/gens/returns.py b/zipline/gens/returns.py index dd83fe7f..055b9735 100644 --- a/zipline/gens/returns.py +++ b/zipline/gens/returns.py @@ -16,11 +16,6 @@ class Returns(object): """ assert event.has_key('dt') assert event.has_key('price') -<<<<<<< HEAD - -======= - ->>>>>>> 28e6dc15b0b5fc9767ea298c7a2d9cacc05b842e tracker = self.mapping[event.sid] tracker.update(event) diff --git a/zipline/utils/test_utils.py b/zipline/utils/test_utils.py index e94c49ee..036ebe02 100644 --- a/zipline/utils/test_utils.py +++ b/zipline/utils/test_utils.py @@ -113,7 +113,6 @@ def drain_receiver(receiver): def assert_single_position(test, zipline): - import nose.tools; nose.tools.set_trace() output, transaction_count = drain_zipline(test, zipline) test.assertEqual( From 74859751fff87c106f0f2644c2830eee372f6e09 Mon Sep 17 00:00:00 2001 From: scottsanderson Date: Tue, 7 Aug 2012 18:25:22 -0400 Subject: [PATCH 55/73] whitespace --- zipline/finance/trading.py | 1 - 1 file changed, 1 deletion(-) diff --git a/zipline/finance/trading.py b/zipline/finance/trading.py index baa21e58..1d82bc66 100644 --- a/zipline/finance/trading.py +++ b/zipline/finance/trading.py @@ -155,7 +155,6 @@ class TransactionSimulator(object): } return zp.ndict(txn) - class TradingEnvironment(object): def __init__( From 0848ef9f9b3c48665b744cb28cf3a2f417020019 Mon Sep 17 00:00:00 2001 From: fawce Date: Tue, 7 Aug 2012 20:17:41 -0400 Subject: [PATCH 56/73] tweaks to patch leaky abstractions --- zipline/gens/merge.py | 4 ++++ zipline/gens/returns.py | 2 +- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/zipline/gens/merge.py b/zipline/gens/merge.py index c4afb1b4..0689d507 100644 --- a/zipline/gens/merge.py +++ b/zipline/gens/merge.py @@ -66,6 +66,10 @@ def merge_one(sources): # Merge any remaining fields into the event dict. event_fields.merge(message) + + # alias dt with datetime, per algoscript api + event_fields['datetime'] = event_fields['dt'] + return event_fields diff --git a/zipline/gens/returns.py b/zipline/gens/returns.py index 055b9735..ccfc7b00 100644 --- a/zipline/gens/returns.py +++ b/zipline/gens/returns.py @@ -34,7 +34,7 @@ class ReturnsFromPriorClose(object): def __init__(self, days): self.closes = deque() self.last_event = None - self.returns = None + self.returns = 0.0 self.days = days def get_returns(self): From 0489bc42169afc0af1393fbe2fcc721443938147 Mon Sep 17 00:00:00 2001 From: scottsanderson Date: Tue, 7 Aug 2012 21:46:56 -0400 Subject: [PATCH 57/73] tests for eventwindow --- tests/test_transforms.py | 155 +++++++++++++++++++++++++++++++++++--- zipline/gens/returns.py | 2 +- zipline/gens/transform.py | 3 +- zipline/lines.py | 2 +- 4 files changed, 147 insertions(+), 15 deletions(-) diff --git a/tests/test_transforms.py b/tests/test_transforms.py index 9050086e..491df1b5 100644 --- a/tests/test_transforms.py +++ b/tests/test_transforms.py @@ -1,20 +1,152 @@ -from datetime import timedelta +import pytz + +from datetime import timedelta, datetime from collections import defaultdict from unittest2 import TestCase -from zipline.utils.test_utils import setup_logger, teardown_logger +from zipline import ndict -import zipline.utils.factory as factory +from zipline.lines import SimulatedTrading + +from zipline.utils.test_utils import setup_logger, teardown_logger +from zipline.utils.date_utils import utcnow from zipline.gens.tradegens import SpecificEquityTrades -from zipline.gens.transform import StatefulTransform +from zipline.gens.transform import StatefulTransform, EventWindow from zipline.gens.vwap import VWAP from zipline.gens.mavg import MovingAverage from zipline.gens.returns import Returns -from zipline.lines import SimulatedTrading -from zipline.core.devsimulator import AddressAllocator -allocator = AddressAllocator(1000) +import zipline.utils.factory as factory + +def to_dt(msg): + return ndict({'dt': msg}) + +class NoopEventWindow(EventWindow): + """ + A no-op EventWindow subclass for testing the base EventWindow logic. + Keeps lists of all added and dropped events. + """ + def __init__(self, market_aware, days, delta): + EventWindow.__init__(self, market_aware, days, delta) + + self.added = [] + self.removed = [] + + def handle_add(self, event): + self.added.append(event) + + def handle_remove(self, event): + self.removed.append(event) + +class EventWindowTestCase(TestCase): + + def setUp(self): + setup_logger(self) + + # Constants calling before open, during the day, and after + # close on a valid trading day. + self.pre_open = datetime(2012, 8, 7, 13, tzinfo = pytz.utc) + self.mid_day = datetime(2012, 8, 7, 15, tzinfo = pytz.utc) + self.post_close = datetime(2012, 8, 7, 22, tzinfo = pytz.utc) + + # Constants calling before open, during the day, and after + # close on a saturday. + self.pre_open_saturday = datetime(2012, 8, 11, 13, tzinfo = pytz.utc) + self.mid_day_saturday = datetime(2012, 8, 11, 15, tzinfo = pytz.utc) + self.post_close_saturday = datetime(2012, 8, 11, 22, tzinfo = pytz.utc) + + # Constants calling before open, during the day, and after + # close on a holiday. + self.pre_open_holiday = datetime(2012, 12, 25, 13, tzinfo = pytz.utc) + self.mid_day_holiday = datetime(2012, 12, 25, tzinfo = pytz.utc) + self.post_close_holiday = datetime(2012, 12, 25, 22, tzinfo = pytz.utc) + + def test_event_window_with_timedelta(self): + + # Keep all events within a 5 minute window. + window = NoopEventWindow( + market_aware = False, + delta = timedelta(minutes = 5), + days = None + ) + now = utcnow() + + # 15 dates, increasing in 1 minute increments. + dates = [now + i * timedelta(minutes = 1) + for i in xrange(15)] + + # Turn the dates into the format required by EventWindow. + dt_messages = [to_dt(date) for date in dates] + + # Run all messages through the window and assert that we're adding + # and removing messages appropriately. We start the enumeration at 1 + # for convenience. + for num, message in enumerate(dt_messages, 1): + window.update(message) + + # Assert that we've added the correct number of events. + assert len(window.added) == num + + # Assert that we removed only events that fall outside (or + # on the boundary of) the delta. + for dropped in window.removed: + assert message.dt - dropped.dt >= timedelta(minutes = 5) + + def test_market_aware_window(self): + window = NoopEventWindow( + market_aware = True, + delta = None, + days = 1 + ) + dates = ([self.pre_open]*3) + dates += ([self.mid_day]*3) + dates += ([self.post_close]*3) + dates += [self.pre_open + timedelta(days = 1, seconds = 1)] + events = [to_dt(date) for date in dates] + + # Run the events. + for event in events: + window.update(event) + + # We should have removed the pre_open events on the first day. + # The rest should be intact. + + assert window.added == events + assert window.removed == events[0:3] + assert list(window.ticks) == events[3:] + + def test_market_aware_window_weekend(self): + window = NoopEventWindow( + market_aware = True, + delta = None, + days = 2 + ) + dates = [self.pre_open_saturday - timedelta(days = 1, seconds=1)] + dates += [self.mid_day_saturday - timedelta(days = 1, seconds=1)] + dates += [self.post_close_saturday - timedelta(days = 1, seconds=1)] + dates += [self.mid_day_saturday + timedelta(days = 1)] + + events = [to_dt(date) for date in dates] + + # Run the events. + for event in events: + window.update(event) + + # We shouldn't remove any events. + assert window.added == events + assert window.removed == [] + assert list(window.ticks) == events + + extra = to_dt(self.mid_day_saturday + timedelta(days = 2)) + window.update(extra) + + # We should remove only the first event. + assert window.removed == [events[0]] + assert list(window.ticks) == events[1:] + [extra] + + def tearDown(self): + setup_logger(self) class FinanceTransformsTestCase(TestCase): @@ -67,11 +199,10 @@ class FinanceTransformsTestCase(TestCase): # No returns for the first event because we don't have a # previous close. - expected = [None, 0.0, 0.1, 0.0] + expected = [0.0, 0.0, 0.1, 0.0] assert tnfm_vals == expected - - + # Two-day returns. An extra kink here is that the # factory will automatically skip a weekend for the # last event. Results shouldn't notice this blip. @@ -91,8 +222,8 @@ class FinanceTransformsTestCase(TestCase): tnfm_vals = [message.tnfm_value for message in transformed] expected = [ - None, - None, + 0.0, + 0.0, (13.0 - 10.0) / 10.0, (12.0 - 15.0) / 15.0, (13.0 - 13.0) / 13.0 diff --git a/zipline/gens/returns.py b/zipline/gens/returns.py index 055b9735..ccfc7b00 100644 --- a/zipline/gens/returns.py +++ b/zipline/gens/returns.py @@ -34,7 +34,7 @@ class ReturnsFromPriorClose(object): def __init__(self, days): self.closes = deque() self.last_event = None - self.returns = None + self.returns = 0.0 self.days = days def get_returns(self): diff --git a/zipline/gens/transform.py b/zipline/gens/transform.py index dbb7c75b..651c337d 100644 --- a/zipline/gens/transform.py +++ b/zipline/gens/transform.py @@ -125,7 +125,8 @@ class StatefulTransform(object): # returned by state.update(event). This is almost # identical to the behavior of FORWARDER, except we # compress the two calculated values (tnfm_id, and - # tnfm_value) into a single field. + # tnfm_value) into a single field. This mode is used by + # the sequential_transforms composite. elif self.append_value: out_message = message_copy out_message[self.namestring] = tnfm_value diff --git a/zipline/lines.py b/zipline/lines.py index 5fe87a0d..0fa990ca 100644 --- a/zipline/lines.py +++ b/zipline/lines.py @@ -307,7 +307,7 @@ class SimulatedTrading(object): simulation_style = SIMULATION_STYLE.FIXED_SLIPPAGE zmq_context = config.get('zmq_context', None) - simulation_id = config.get('simumlation_id', 'test_simulation') + simulation_id = config.get('simulation_id', 'test_simulation') results_socket_uri = config.get('results_socket_uri', None) #------------------- From 1e5191b60cbfa2e9fb28b6e1f38c9063908409a6 Mon Sep 17 00:00:00 2001 From: fawce Date: Tue, 7 Aug 2012 22:26:16 -0400 Subject: [PATCH 58/73] needed to join on monitor process, fixed. --- tests/test_components.py | 21 +++++++++------------ 1 file changed, 9 insertions(+), 12 deletions(-) diff --git a/tests/test_components.py b/tests/test_components.py index f351dac6..de6c0248 100644 --- a/tests/test_components.py +++ b/tests/test_components.py @@ -118,14 +118,14 @@ class ComponentTestCase(TestCase): "source_a" ) - launch_monitor(monitor) + mon_proc = launch_monitor(monitor) for event in comp_a: log.info(event) # wait for the sending process to exit comp_a.proc.join() - + mon_proc.join() def test_sort(self): monitor = create_monitor(allocator) @@ -199,7 +199,7 @@ class ComponentTestCase(TestCase): sorted_out = date_sorted_sources(*sources) - launch_monitor(monitor) + mon_proc = launch_monitor(monitor) prev = None sort_count = 0 @@ -216,6 +216,7 @@ class ComponentTestCase(TestCase): comp_a.proc.join() comp_b.proc.join() comp_c.proc.join() + mon_proc.join() def test_full(self): @@ -247,13 +248,6 @@ class ComponentTestCase(TestCase): sorted_out = date_sorted_sources(*sources) - #launch_monitor(monitor) - #import nose.tools; nose.tools.set_trace() - #for feed_msg in sorted_out: - # log.info(pf(feed_msg)) - - #return - sorted = Component( sorted_out, monitor, @@ -299,7 +293,7 @@ class ComponentTestCase(TestCase): "tsc" ) - launch_monitor(monitor) + mon_proc = launch_monitor(monitor) for message in tsc_comp: log.info(pf(message)) @@ -309,6 +303,8 @@ class ComponentTestCase(TestCase): comp_b.proc.join() sorted.proc.join() merged.proc.join() + tsc_comp.proc.join() + mon_proc.join() return def test_single_thread(self): @@ -374,7 +370,7 @@ class ComponentTestCase(TestCase): tsc_gen = trading_client.simulate(merged) - launch_monitor(monitor) + mon_proc = launch_monitor(monitor) for message in tsc_gen: log.info(pf(message)) @@ -382,4 +378,5 @@ class ComponentTestCase(TestCase): # wait for processes to finish sorted.proc.join() merged.proc.join() + mon_proc.join() return From 35e0433a6eadce8071de5a62882b2c186dc05c4a Mon Sep 17 00:00:00 2001 From: fawce Date: Wed, 8 Aug 2012 00:46:19 -0400 Subject: [PATCH 59/73] added LOG as valid prefix. --- zipline/protocol.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/zipline/protocol.py b/zipline/protocol.py index bbf80f98..b5bf67ae 100644 --- a/zipline/protocol.py +++ b/zipline/protocol.py @@ -131,7 +131,7 @@ from utils.date_utils import EPOCH, UN_EPOCH, epoch_now # Control Protocol # ----------------------- -PRODUCTION_PREFIXES = ['PERF', 'RISK', 'EXCEPTION', 'CANCEL'] +PRODUCTION_PREFIXES = ['PERF', 'RISK', 'EXCEPTION','CANCEL','DONE', 'LOG'] INVALID_CONTROL_FRAME = FrameExceptionFactory('CONTROL') From d2809946bf749552ed549a5fea5a91e9149cf56b Mon Sep 17 00:00:00 2001 From: scottsanderson Date: Wed, 8 Aug 2012 09:20:35 -0400 Subject: [PATCH 60/73] updated exception test tracebacks --- tests/test_exception_handling.py | 74 +++++++------------------------- zipline/gens/composites.py | 12 +++++- zipline/lines.py | 7 +-- 3 files changed, 30 insertions(+), 63 deletions(-) diff --git a/tests/test_exception_handling.py b/tests/test_exception_handling.py index 6a091106..1b7eb521 100644 --- a/tests/test_exception_handling.py +++ b/tests/test_exception_handling.py @@ -19,7 +19,6 @@ EXTENDED_TIMEOUT = 90 allocator = AddressAllocator(1000) - class ExceptionTestCase(TestCase): leased_sockets = defaultdict(list) @@ -104,73 +103,32 @@ class ExceptionTestCase(TestCase): # components, exception in Monitor, etc. write tests # for those scenarios. - - INITIALIZE_TB =\ {'message': 'Algo exception in initialize', 'name': 'Exception', - 'stack': [{'filename': '/zipline/lines.py', 'line': 'for event in self.gen:', 'lineno': 152, 'method': 'stream_results'}, + 'stack': [{'filename': '/zipline/lines.py', 'line': 'for event in self.gen:', 'lineno': 157, 'method': 'stream_results'}, {'filename': '/zipline/gens/tradesimulation.py', 'line': 'self.algo,', 'lineno': 93, 'method': 'simulate'}, - {'filename': '/zipline/gens/tradesimulation.py', - 'line': 'self.algo.initialize()', - 'lineno': 123, - 'method': '__init__'}, - {'filename': '/zipline/test_algorithms.py', - 'line': 'raise Exception("Algo exception in initialize")', - 'lineno': 166, - 'method': 'initialize'}]} + {'filename': '/zipline/gens/tradesimulation.py', 'line': 'self.algo.initialize()', 'lineno': 123, 'method': '__init__'}, + {'filename': '/zipline/test_algorithms.py', 'line': 'raise Exception("Algo exception in initialize")', 'lineno': 166, 'method': 'initialize'}]} HANDLE_DATA_TB =\ {'message': 'Algo exception in handle_data', 'name': 'Exception', - 'stack': [{'filename': '/zipline/lines.py', 'line': 'for event in self.gen:', 'lineno': 152, 'method': 'stream_results'}, - {'filename': '/zipline/gens/tradesimulation.py', - 'line': 'for message in algo_results:', - 'lineno': 100, - 'method': 'simulate'}, - {'filename': '/zipline/gens/tradesimulation.py', - 'line': 'return self.__generator.next()', - 'lineno': 144, - 'method': 'next'}, - {'filename': '/zipline/gens/tradesimulation.py', - 'line': 'self.update_current_snapshot(event)', - 'lineno': 199, - 'method': '_gen'}, - {'filename': '/zipline/gens/tradesimulation.py', - 'line': 'self.simulate_current_snapshot()', - 'lineno': 221, - 'method': 'update_current_snapshot'}, - {'filename': '/zipline/gens/tradesimulation.py', - 'line': 'self.algo.handle_data(self.universe)', - 'lineno': 246, - 'method': 'simulate_current_snapshot'}, - {'filename': '/zipline/test_algorithms.py', - 'line': 'raise Exception("Algo exception in handle_data")', - 'lineno': 187, - 'method': 'handle_data'}]} + 'stack': [{'filename': '/zipline/lines.py', 'line': 'for event in self.gen:', 'lineno': 157, 'method': 'stream_results'}, + {'filename': '/zipline/gens/tradesimulation.py', 'line': 'for message in algo_results:', 'lineno': 100, 'method': 'simulate'}, + {'filename': '/zipline/gens/tradesimulation.py', 'line': 'return self.__generator.next()', 'lineno': 144, 'method': 'next'}, + {'filename': '/zipline/gens/tradesimulation.py', 'line': 'self.update_current_snapshot(event)', 'lineno': 199, 'method': '_gen'}, + {'filename': '/zipline/gens/tradesimulation.py', 'line': 'self.simulate_current_snapshot()', 'lineno': 221, 'method': 'update_current_snapshot'}, + {'filename': '/zipline/gens/tradesimulation.py', 'line': 'self.algo.handle_data(self.universe)', 'lineno': 246, 'method': 'simulate_current_snapshot'}, + {'filename': '/zipline/test_algorithms.py', 'line': 'raise Exception("Algo exception in handle_data")', 'lineno': 187, 'method': 'handle_data'}]} ZERO_DIV_TB= \ {'message': 'integer division or modulo by zero', 'name': 'ZeroDivisionError', - 'stack': [{'filename': '/zipline/lines.py', 'line': 'for event in self.gen:', 'lineno': 152, 'method': 'stream_results'}, - {'filename': '/zipline/gens/tradesimulation.py', - 'line': 'for message in algo_results:', - 'lineno': 100, - 'method': 'simulate'}, - {'filename': '/zipline/gens/tradesimulation.py', - 'line': 'return self.__generator.next()', - 'lineno': 144, - 'method': 'next'}, - {'filename': '/zipline/gens/tradesimulation.py', - 'line': 'self.update_current_snapshot(event)', - 'lineno': 199, - 'method': '_gen'}, - {'filename': '/zipline/gens/tradesimulation.py', - 'line': 'self.simulate_current_snapshot()', - 'lineno': 221, - 'method': 'update_current_snapshot'}, - {'filename': '/zipline/gens/tradesimulation.py', - 'line': 'self.algo.handle_data(self.universe)', - 'lineno': 246, - 'method': 'simulate_current_snapshot'}, + 'stack': [{'filename': '/zipline/lines.py', 'line': 'for event in self.gen:', 'lineno': 157, 'method': 'stream_results'}, + {'filename': '/zipline/gens/tradesimulation.py', 'line': 'for message in algo_results:', 'lineno': 100, 'method': 'simulate'}, + {'filename': '/zipline/gens/tradesimulation.py', 'line': 'return self.__generator.next()', 'lineno': 144, 'method': 'next'}, + {'filename': '/zipline/gens/tradesimulation.py', 'line': 'self.update_current_snapshot(event)', 'lineno': 199, 'method': '_gen'}, + {'filename': '/zipline/gens/tradesimulation.py', 'line': 'self.simulate_current_snapshot()', 'lineno': 221, 'method': 'update_current_snapshot'}, + {'filename': '/zipline/gens/tradesimulation.py', 'line': 'self.algo.handle_data(self.universe)', 'lineno': 246, 'method': 'simulate_current_snapshot'}, {'filename': '/zipline/test_algorithms.py', 'line': '5/0', 'lineno': 218, 'method': 'handle_data'}]} diff --git a/zipline/gens/composites.py b/zipline/gens/composites.py index 4b5cd5ac..10e3cd2e 100644 --- a/zipline/gens/composites.py +++ b/zipline/gens/composites.py @@ -85,9 +85,17 @@ def sequential_transforms(stream_in, *transforms): stream_out = reduce(lambda stream, tnfm: tnfm.transform(stream), transforms, stream_in) - return stream_out - + dt_aliased = alias_dt(stream_out) + return dt_aliased + +def alias_dt(stream_in): + """ + Alias the dt field to datetime on each message. + """ + for message in stream_in: + message['datetime'] = message['dt'] + yield message diff --git a/zipline/lines.py b/zipline/lines.py index 0fa990ca..bf68bd9e 100644 --- a/zipline/lines.py +++ b/zipline/lines.py @@ -74,7 +74,7 @@ from zipline.utils import factory from zipline.test_algorithms import TestAlgorithm from zipline.gens.composites import \ - date_sorted_sources, merged_transforms + date_sorted_sources, merged_transforms, sequential_transforms from zipline.gens.transform import Passthrough, StatefulTransform from zipline.gens.tradesimulation import TradeSimulationClient as tsc from logbook import Logger, NestedSetup, Processor @@ -103,9 +103,10 @@ class SimulatedTrading(object): self.date_sorted = date_sorted_sources(*sources) self.transforms = transforms self.transforms.append(StatefulTransform(Passthrough)) - self.merged = merged_transforms(self.date_sorted, *self.transforms) + # Formerly merged_transforms. + self.with_tnfms = sequential_transforms(self.date_sorted, *self.transforms) self.trading_client = tsc(algorithm, environment, style) - self.gen = self.trading_client.simulate(self.merged) + self.gen = self.trading_client.simulate(self.with_tnfms) self.results_uri = results_socket_uri self.results_socket = None self.context = context From 7b340af890c97168cb625b96b64af327d3002efd Mon Sep 17 00:00:00 2001 From: fawce Date: Wed, 8 Aug 2012 11:19:26 -0400 Subject: [PATCH 61/73] leaky abstractions...patched --- zipline/gens/returns.py | 1 - zipline/lines.py | 3 +-- 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/zipline/gens/returns.py b/zipline/gens/returns.py index ccfc7b00..49d3e9b5 100644 --- a/zipline/gens/returns.py +++ b/zipline/gens/returns.py @@ -1,5 +1,4 @@ from collections import defaultdict, deque -from zipline.transforms.base import BaseTransform class Returns(object): """ diff --git a/zipline/lines.py b/zipline/lines.py index 0fa990ca..5bb50f68 100644 --- a/zipline/lines.py +++ b/zipline/lines.py @@ -144,7 +144,7 @@ class SimulatedTrading(object): data_injector = Processor(inject_event_data) log_pipeline = NestedSetup([self.zmq_out,data_injector]) - with log_pipeline.threadbound(), self.stdout_capture(self.logger, ''): + with log_pipeline.threadbound(), self.stdout_capture(Logger('Print'), ''): self.stream_results() # if no log socket, just run the algo normally else: @@ -234,7 +234,6 @@ class SimulatedTrading(object): socket = sock, ) - # This is a class, which is instantiated later # in run_algorithm. The class provides a generator. self.stdout_capture = stdout_only_pipe From b70e76d51696a0af9b57ec3813876b0512e10404 Mon Sep 17 00:00:00 2001 From: scottsanderson Date: Wed, 8 Aug 2012 11:21:26 -0400 Subject: [PATCH 62/73] commit pre-merge --- tests/test_components.py | 2 +- zipline/gens/tradesimulation.py | 2 +- zipline/lines.py | 17 +++++++---------- zipline/utils/factory.py | 1 - 4 files changed, 9 insertions(+), 13 deletions(-) diff --git a/tests/test_components.py b/tests/test_components.py index de6c0248..8f1099b7 100644 --- a/tests/test_components.py +++ b/tests/test_components.py @@ -292,7 +292,7 @@ class ComponentTestCase(TestCase): BT_UPDATE_UNFRAME, "tsc" ) - + import nose.tools; nose.tools.set_trace() mon_proc = launch_monitor(monitor) for message in tsc_comp: log.info(pf(message)) diff --git a/zipline/gens/tradesimulation.py b/zipline/gens/tradesimulation.py index 522d80da..05f85c5a 100644 --- a/zipline/gens/tradesimulation.py +++ b/zipline/gens/tradesimulation.py @@ -117,7 +117,7 @@ class AlgorithmSimulator(object): # Monkey patch the user algorithm to place orders in the # TransactionSimulator's order book. self.algo.set_order(self.order) - self.algo.set_logger(logbook.Logger("Algolog")) + self.algo.set_logger(logbook.Logger("AlgoLog")) # Call the user's initialize method. self.algo.initialize() diff --git a/zipline/lines.py b/zipline/lines.py index bf68bd9e..0f17725f 100644 --- a/zipline/lines.py +++ b/zipline/lines.py @@ -81,9 +81,9 @@ from logbook import Logger, NestedSetup, Processor import zipline.protocol as zp - log = Logger('Lines') + class CancelSignal(Exception): def __init__(self): pass @@ -102,9 +102,8 @@ class SimulatedTrading(object): self.date_sorted = date_sorted_sources(*sources) self.transforms = transforms - self.transforms.append(StatefulTransform(Passthrough)) # Formerly merged_transforms. - self.with_tnfms = sequential_transforms(self.date_sorted, *self.transforms) + self.with_tnfms = sequential_transforms(self.date_sorted, *self.transforms) self.trading_client = tsc(algorithm, environment, style) self.gen = self.trading_client.simulate(self.with_tnfms) self.results_uri = results_socket_uri @@ -153,9 +152,9 @@ class SimulatedTrading(object): def stream_results(self): assert self.results_socket, \ - "Results socket must exist to stream results" + "Results socket must exist to stream results" try: - for event in self.gen: + for event in self.gen: if event.has_key('daily_perf'): msg = zp.PERF_FRAME(event) else: @@ -218,7 +217,6 @@ class SimulatedTrading(object): except: log.exception("Exception while reporting simulation exception.") - def open(self): if not self.context: self.context = zmq.Context() @@ -228,11 +226,11 @@ class SimulatedTrading(object): self.results_socket = sock self.setup_logging() - def setup_logging(self, socket = None): - sock = socket or self.results_socket + def setup_logging(self): + assert self.results_socket self.zmq_out = ZeroMQLogHandler( - socket = sock, + socket = self.results_socket, ) @@ -275,7 +273,6 @@ class SimulatedTrading(object): of StatefulTransform objects. """ assert isinstance(config, dict) - sid = config['sid'] #-------------------- diff --git a/zipline/utils/factory.py b/zipline/utils/factory.py index 1b881329..e3d92443 100644 --- a/zipline/utils/factory.py +++ b/zipline/utils/factory.py @@ -213,7 +213,6 @@ def create_minutely_trade_source(sids, trade_count, trading_environment): def create_trade_source(sids, trade_count, trade_time_increment, trading_environment): - #Set up source a. One minute between events. args = tuple() kwargs = { 'count' : trade_count, From bd2993d23fdf98876ff58d3092833077694c73b6 Mon Sep 17 00:00:00 2001 From: scottsanderson Date: Wed, 8 Aug 2012 12:27:15 -0400 Subject: [PATCH 63/73] re-enable logging zipline to file and adding algo_dt --- zipline/gens/tradesimulation.py | 86 ++++++++++++++++++++++----------- zipline/lines.py | 20 ++------ 2 files changed, 63 insertions(+), 43 deletions(-) diff --git a/zipline/gens/tradesimulation.py b/zipline/gens/tradesimulation.py index 05f85c5a..d5c7d115 100644 --- a/zipline/gens/tradesimulation.py +++ b/zipline/gens/tradesimulation.py @@ -8,6 +8,7 @@ from zipline import ndict from zipline.gens.transform import StatefulTransform from zipline.finance.trading import TransactionSimulator from zipline.finance.performance import PerformanceTracker +from zipline.utils.log_utils import stdout_only_pipe class TradeSimulationClient(object): """ @@ -106,6 +107,10 @@ class AlgorithmSimulator(object): def __init__(self, stream_in, order_book, algo): self.stream_in = stream_in + + # ========== + # Algo Setup + # ========== # We extract the order book from the txn client so that # the algo can place new orders. @@ -122,18 +127,36 @@ class AlgorithmSimulator(object): # Call the user's initialize method. self.algo.initialize() + # ============== + # Snapshot Setup + # ============== + # The algorithm's universe as of our most recent event. self.universe = ndict() for sid in self.sids: self.universe[sid] = ndict() self.universe.portfolio = None - + # We don't have a datetime for the current snapshot until we # receive a message. self.simulation_dt = None self.this_snapshot_dt = None + # ============= + # Logging Setup + # ============= + + # Processor function for injecting the algo_dt into + # user prints/logs. + def inject_algo_dt(record): + record.extra['algo_dt'] = self.this_snapshot_dt + self.processor = logbook.Processor(inject_algo_dt) + + # This is a class, which is instantiated later + # in run_algorithm. The class provides a generator. + self.stdout_capture = stdout_only_pipe + self.__generator = None def __iter__(self): @@ -158,7 +181,7 @@ class AlgorithmSimulator(object): 'amount' : int(amount), 'filled' : 0 }) - + # Tell the user if they try to buy 0 shares of something. if order.amount == 0: zero_message = "Requested to trade zero shares of {sid}".format( @@ -179,33 +202,40 @@ class AlgorithmSimulator(object): """ Internal generator work loop. """ - for event in self.stream_in: - # Yield any perf messages received to be relayed back to the browser. - if event.perf_message: - yield event.perf_message - del event['perf_message'] - if event.dt == "DONE": - break - - # This should only happen for the first event we run. - if self.simulation_dt == None: - self.simulation_dt = event.dt - - # ====================== - # Time Compression Logic - # ====================== - - if self.this_snapshot_dt != None: - self.update_current_snapshot(event) + # Capture any output of this generator to stdout and pipe it + # to a logbook interface. Also inject the current algo + # snapshot time to any log record generated. - # The algorithm has been missing events because it took - # too long processing. Update the universe with data from - # this event, then check if enough time has passed that we - # can start a new snapshot. - else: - self.update_universe(event) - if event.dt >= self.simulation_dt: - self.this_snapshot_dt = event.dt + with self.processor.threadbound(), self.stdout_capture(Logger('Print'),''): + + for event in self.stream_in: + # Yield any perf messages received to be relayed back to + # the browser. + if event.perf_message: + yield event.perf_message + del event['perf_message'] + if event.dt == "DONE": + break + + # This should only happen for the first event we run. + if self.simulation_dt == None: + self.simulation_dt = event.dt + + # ====================== + # Time Compression Logic + # ====================== + + if self.this_snapshot_dt != None: + self.update_current_snapshot(event) + + # The algorithm has been missing events because it took + # too long processing. Update the universe with data from + # this event, then check if enough time has passed that we + # can start a new snapshot. + else: + self.update_universe(event) + if event.dt >= self.simulation_dt: + self.this_snapshot_dt = event.dt def update_current_snapshot(self, event): """ diff --git a/zipline/lines.py b/zipline/lines.py index 219d90fc..8aeb18bb 100644 --- a/zipline/lines.py +++ b/zipline/lines.py @@ -136,13 +136,6 @@ class SimulatedTrading(object): setproctitle(self.sim_id) self.open() if self.zmq_out: - - def inject_event_data(record): - # Record the simulation time. - #record.extra['algo_dt'] = self.current_dt - pass - - data_injector = Processor(inject_event_data) log_pipeline = NestedSetup([self.zmq_out,data_injector]) with log_pipeline.threadbound(), self.stdout_capture(Logger('Print'), ''): self.stream_results() @@ -160,7 +153,7 @@ class SimulatedTrading(object): else: msg = zp.RISK_FRAME(event) self.results_socket.send(msg) - + self.signal_done() except Exception as exc: self.handle_exception(exc) @@ -213,7 +206,7 @@ class SimulatedTrading(object): ) self.results_socket.send(msg) - + except: log.exception("Exception while reporting simulation exception.") @@ -228,15 +221,12 @@ class SimulatedTrading(object): def setup_logging(self): assert self.results_socket - self.zmq_out = ZeroMQLogHandler( socket = self.results_socket, + filter = lambda r, h: r.channel in ['Print', 'AlgoLog'], + bubble = True ) - - # This is a class, which is instantiated later - # in run_algorithm. The class provides a generator. - self.stdout_capture = stdout_only_pipe - + def join(self): if self.proc: self.proc.join() From 981676dfd3b865d9f9573fcaf7be2e8f4e8caa0a Mon Sep 17 00:00:00 2001 From: fawce Date: Wed, 8 Aug 2012 12:32:52 -0400 Subject: [PATCH 64/73] pulled logger to be explicit --- zipline/lines.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/zipline/lines.py b/zipline/lines.py index 5bb50f68..9ae7d291 100644 --- a/zipline/lines.py +++ b/zipline/lines.py @@ -116,6 +116,7 @@ class SimulatedTrading(object): self.proc = None self.send_sighup = False self.logger = Logger(sim_id) + self.print_logger = Logger('Print') def simulate(self, blocking=True, send_sighup=False): @@ -144,7 +145,7 @@ class SimulatedTrading(object): data_injector = Processor(inject_event_data) log_pipeline = NestedSetup([self.zmq_out,data_injector]) - with log_pipeline.threadbound(), self.stdout_capture(Logger('Print'), ''): + with log_pipeline.threadbound(), self.stdout_capture(self.print_logger, ''): self.stream_results() # if no log socket, just run the algo normally else: From 427cfd53ec10267b70f2c59350f63e7ec10b64e1 Mon Sep 17 00:00:00 2001 From: fawce Date: Wed, 8 Aug 2012 14:00:36 -0400 Subject: [PATCH 65/73] working on logging --- zipline/gens/tradesimulation.py | 94 ++++++++++++++++++--------------- zipline/lines.py | 19 ++++--- zipline/utils/log_utils.py | 14 ++--- zipline/utils/test_utils.py | 4 +- 4 files changed, 72 insertions(+), 59 deletions(-) diff --git a/zipline/gens/tradesimulation.py b/zipline/gens/tradesimulation.py index d5c7d115..50326368 100644 --- a/zipline/gens/tradesimulation.py +++ b/zipline/gens/tradesimulation.py @@ -1,4 +1,4 @@ -import logbook +from logbook import Logger, Processor from datetime import datetime, timedelta from numbers import Integral @@ -9,6 +9,9 @@ from zipline.gens.transform import StatefulTransform from zipline.finance.trading import TransactionSimulator from zipline.finance.performance import PerformanceTracker from zipline.utils.log_utils import stdout_only_pipe +from zipline.gens.utils import hash_args + +log = Logger('Trade Simulation') class TradeSimulationClient(object): """ @@ -43,14 +46,15 @@ class TradeSimulationClient(object): overwritten so that only the most recent snapshot of the universe is sent to the algo. """ - + def __init__(self, algo, environment, sim_style): self.algo = algo self.sids = algo.get_sid_filter() self.environment = environment self.style = sim_style - + self.algo_sim = None + def get_hash(self): """ There should only ever be one TSC in the system. @@ -83,36 +87,36 @@ class TradeSimulationClient(object): self.sids ) with_portfolio = perf_tracker.transform(with_filled_orders) - + # Pass the messages from perf along with the trading client's # state into the algorithm for simulation. We provide the # trading client so that the algorithm can place new orders # into the client's order book. - algo_results = AlgorithmSimulator( + self.algo_sim = AlgorithmSimulator( with_portfolio, ordering_client.state, - self.algo, + self.algo, ) # The algorithm will yield a daily_results message (as # calculated by the performance tracker) at the end of each # day. It will also yield a risk report at the end of the # simulation. - for message in algo_results: + for message in self.algo_sim: yield message class AlgorithmSimulator(object): - - def __init__(self, stream_in, order_book, algo): - + + def __init__(self, stream_in, order_book, algo): + self.stream_in = stream_in - + # ========== # Algo Setup # ========== - # We extract the order book from the txn client so that + # We extract the order book from the txn client so that # the algo can place new orders. self.order_book = order_book @@ -122,10 +126,8 @@ class AlgorithmSimulator(object): # Monkey patch the user algorithm to place orders in the # TransactionSimulator's order book. self.algo.set_order(self.order) - self.algo.set_logger(logbook.Logger("AlgoLog")) + self.algo.set_logger(Logger("AlgoLog")) - # Call the user's initialize method. - self.algo.initialize() # ============== # Snapshot Setup @@ -133,11 +135,11 @@ class AlgorithmSimulator(object): # The algorithm's universe as of our most recent event. self.universe = ndict() - + for sid in self.sids: self.universe[sid] = ndict() self.universe.portfolio = None - + # We don't have a datetime for the current snapshot until we # receive a message. self.simulation_dt = None @@ -146,22 +148,22 @@ class AlgorithmSimulator(object): # ============= # Logging Setup # ============= - + # Processor function for injecting the algo_dt into # user prints/logs. def inject_algo_dt(record): record.extra['algo_dt'] = self.this_snapshot_dt - self.processor = logbook.Processor(inject_algo_dt) + self.processor = Processor(inject_algo_dt) # This is a class, which is instantiated later # in run_algorithm. The class provides a generator. self.stdout_capture = stdout_only_pipe - + self.__generator = None def __iter__(self): return self - + def next(self): if self.__generator: return self.__generator.next() @@ -181,17 +183,17 @@ class AlgorithmSimulator(object): 'amount' : int(amount), 'filled' : 0 }) - + # Tell the user if they try to buy 0 shares of something. if order.amount == 0: zero_message = "Requested to trade zero shares of {sid}".format( - sid=event.sid + sid=order.sid ) log.debug(zero_message) # Don't bother placing orders for 0 shares. - return + return - # Add non-zero orders to the order book. + # Add non-zero orders to the order book. # !!!IMPORTANT SIDE-EFFECT!!! # This modifies the internal state of the transaction # simulator so that it can fill the placed order when it @@ -207,7 +209,9 @@ class AlgorithmSimulator(object): # snapshot time to any log record generated. with self.processor.threadbound(), self.stdout_capture(Logger('Print'),''): - + # Call the user's initialize method. + self.algo.initialize() + for event in self.stream_in: # Yield any perf messages received to be relayed back to # the browser. @@ -215,6 +219,12 @@ class AlgorithmSimulator(object): yield event.perf_message del event['perf_message'] if event.dt == "DONE": + if self.this_snapshot_dt: + # stop iteration happened + # mid-snapshot, so we have a universe + # snapshot that is not yet processed + # by the algorithm. + self.simulate_current_snapshot() break # This should only happen for the first event we run. @@ -232,67 +242,65 @@ class AlgorithmSimulator(object): # too long processing. Update the universe with data from # this event, then check if enough time has passed that we # can start a new snapshot. - else: + else: self.update_universe(event) if event.dt >= self.simulation_dt: self.this_snapshot_dt = event.dt + + def update_current_snapshot(self, event): """ - Update our current snapshot of the universe. Call handle_data if + Update our current snapshot of the universe. Call handle_data if """ # The new event matches our snapshot dt. Just update the # universe and move on. if event.dt == self.this_snapshot_dt: self.update_universe(event) - - # The new event does not match our snapshot. + + # The new event does not match our snapshot. else: self.simulate_current_snapshot() - + # Once we've finished simulating the old snapshot, # we can update the universe with the new event. self.update_universe(event) - + # The current event is later than the simulation time, # which means the algorithm finished quickly enough to # receive the new event. Start a new snapshot with this # event's dt. if event.dt >= self.simulation_dt: self.this_snapshot_dt = event.dt - + # The algorithm spent enough time processing that it # missed the new event. Wait to start a new snapshot until # the events catch up to the algo's simulated dt. else: self.this_snapshot_dt = None - + def simulate_current_snapshot(self): """ Run the user's algo against our current snapshot and update the algo's simulated time. - """ + """ start_tic = datetime.now() self.algo.handle_data(self.universe) stop_tic = datetime.now() - + # How long did you take? delta = stop_tic - start_tic - + # Update the simulation time. self.simulation_dt = self.this_snapshot_dt + delta - + def update_universe(self, event): """ Update the universe with new event information. """ # Update our portfolio. self.universe.portfolio = event.portfolio - + # Update our knowledge of this event's sid for field in event.keys(): self.universe[event.sid][field] = event[field] - - - - diff --git a/zipline/lines.py b/zipline/lines.py index 66e3d70f..9a0b49ef 100644 --- a/zipline/lines.py +++ b/zipline/lines.py @@ -103,7 +103,7 @@ class SimulatedTrading(object): self.date_sorted = date_sorted_sources(*sources) self.transforms = transforms # Formerly merged_transforms. - self.with_tnfms = sequential_transforms(self.date_sorted, *self.transforms) + self.with_tnfms = sequential_transforms(self.date_sorted, *self.transforms) self.trading_client = tsc(algorithm, environment, style) self.gen = self.trading_client.simulate(self.with_tnfms) self.results_uri = results_socket_uri @@ -137,8 +137,7 @@ class SimulatedTrading(object): setproctitle(self.sim_id) self.open() if self.zmq_out: - log_pipeline = NestedSetup([self.zmq_out,data_injector]) - with log_pipeline.threadbound(), self.stdout_capture(self.print_logger, ''): + with self.zmq_out.threadbound(): self.stream_results() # if no log socket, just run the algo normally else: @@ -148,13 +147,13 @@ class SimulatedTrading(object): assert self.results_socket, \ "Results socket must exist to stream results" try: - for event in self.gen: + for event in self.gen: if event.has_key('daily_perf'): msg = zp.PERF_FRAME(event) else: msg = zp.RISK_FRAME(event) self.results_socket.send(msg) - + self.signal_done() except Exception as exc: self.handle_exception(exc) @@ -207,7 +206,7 @@ class SimulatedTrading(object): ) self.results_socket.send(msg) - + except: log.exception("Exception while reporting simulation exception.") @@ -222,12 +221,16 @@ class SimulatedTrading(object): def setup_logging(self): assert self.results_socket + # The filter behavior is: matches are logged, mismatches + # are bubbled. If bubble is True, matches are also + # bubbled. Since we do not want user logs in our system + # logs, we set bubble to False. self.zmq_out = ZeroMQLogHandler( socket = self.results_socket, filter = lambda r, h: r.channel in ['Print', 'AlgoLog'], - bubble = True + bubble=False ) - + def join(self): if self.proc: self.proc.join() diff --git a/zipline/utils/log_utils.py b/zipline/utils/log_utils.py index 6bcf80f8..f9fbc57c 100644 --- a/zipline/utils/log_utils.py +++ b/zipline/utils/log_utils.py @@ -120,12 +120,6 @@ class ZeroMQLogHandler(Handler): #can't be serialized by JSON, so we need to convert to #unix epoch representation. - if record.time: - assert isinstance(record.time, datetime.datetime) - - time = record.time.replace(tzinfo = pytz.utc) - #logbook measures time in utc already, no need to convert. - record.time = EPOCH(time) #Do the same if algo_dt is a datetime object. if record.extra.has_key('algo_dt'): @@ -151,6 +145,14 @@ class ZeroMQLogHandler(Handler): data[field] = record.extra[field] else: data[field] = None + + if data['time']: + assert isinstance(data['time'], datetime.datetime) + + time = data['time'].replace(tzinfo = pytz.utc) + #logbook measures time in utc already, no need to convert. + data['time'] = EPOCH(time) + return data def emit(self, record): diff --git a/zipline/utils/test_utils.py b/zipline/utils/test_utils.py index 036ebe02..02ac4c69 100644 --- a/zipline/utils/test_utils.py +++ b/zipline/utils/test_utils.py @@ -61,7 +61,7 @@ def check(test, a, b, label=None): test.assertEqual(a, b, "mismatch on path: " + label) -def drain_zipline(test, zipline): +def drain_zipline(test, zipline, p_blocking=False): assert test.ctx, "method expects a valid zmq context" assert test.zipline_test_config, "method expects a valid test config" assert isinstance(test.zipline_test_config, dict) @@ -76,7 +76,7 @@ def drain_zipline(test, zipline): time.sleep(1) # start the simulation - zipline.simulate(blocking=False) + zipline.simulate(blocking=p_blocking) output, transaction_count = drain_receiver(test.receiver) # some processes will exit after the message stream is # finished. We block here to avoid collisions with subsequent From 961119232645998877866436441c1111a44fa373 Mon Sep 17 00:00:00 2001 From: scottsanderson Date: Wed, 8 Aug 2012 14:59:31 -0400 Subject: [PATCH 66/73] done message for seq transforms --- zipline/gens/composites.py | 13 +++++++------ zipline/gens/merge.py | 3 +-- zipline/gens/tradesimulation.py | 19 ++++++++++--------- 3 files changed, 18 insertions(+), 17 deletions(-) diff --git a/zipline/gens/composites.py b/zipline/gens/composites.py index 10e3cd2e..3af77fc1 100644 --- a/zipline/gens/composites.py +++ b/zipline/gens/composites.py @@ -1,5 +1,5 @@ import datetime -from itertools import tee, starmap +from itertools import tee, starmap, chain from collections import namedtuple from zipline.gens.tradegens import SpecificEquityTrades @@ -66,7 +66,7 @@ def merged_transforms(sorted_stream, *transforms): # Pipe the stream into merge. merged = merge(to_merge, namestrings) # Return the merged events. - return merged + return add_done(dt_aliased) def sequential_transforms(stream_in, *transforms): """ @@ -87,7 +87,7 @@ def sequential_transforms(stream_in, *transforms): stream_in) dt_aliased = alias_dt(stream_out) - return dt_aliased + return add_done(dt_aliased) def alias_dt(stream_in): """ @@ -95,10 +95,11 @@ def alias_dt(stream_in): """ for message in stream_in: message['datetime'] = message['dt'] - yield message + yield message - - +# Add a done message to a stream. +def add_done(stream_in): + return chain(stream_in, [done_message('Composite')]) diff --git a/zipline/gens/merge.py b/zipline/gens/merge.py index 0689d507..09e1f943 100644 --- a/zipline/gens/merge.py +++ b/zipline/gens/merge.py @@ -51,8 +51,7 @@ def merge(stream_in, tnfm_ids): assert len(queue) == 1, "Bad queue in merge on exit: %s" % queue assert queue[0].dt == "DONE", \ "Bad last message in merge on exit: %s" % queue - yield done_message('Merge') - + def merge_one(sources): event_fields = ndict() diff --git a/zipline/gens/tradesimulation.py b/zipline/gens/tradesimulation.py index 50326368..6f3323bd 100644 --- a/zipline/gens/tradesimulation.py +++ b/zipline/gens/tradesimulation.py @@ -97,7 +97,7 @@ class TradeSimulationClient(object): ordering_client.state, self.algo, ) - + # The algorithm will yield a daily_results message (as # calculated by the performance tracker) at the end of each # day. It will also yield a risk report at the end of the @@ -105,7 +105,6 @@ class TradeSimulationClient(object): for message in self.algo_sim: yield message - class AlgorithmSimulator(object): def __init__(self, stream_in, order_book, algo): @@ -215,16 +214,18 @@ class AlgorithmSimulator(object): for event in self.stream_in: # Yield any perf messages received to be relayed back to # the browser. + if event.perf_message: yield event.perf_message del event['perf_message'] - if event.dt == "DONE": - if self.this_snapshot_dt: - # stop iteration happened - # mid-snapshot, so we have a universe - # snapshot that is not yet processed - # by the algorithm. - self.simulate_current_snapshot() + + if event.dt == "DONE": + if self.this_snapshot_dt: + # stop iteration happened + # mid-snapshot, so we have a universe + # snapshot that is not yet processed + # by the algorithm. + self.simulate_current_snapshot() break # This should only happen for the first event we run. From a0db79428d5b619a4f1d874eea53c04d90f820da Mon Sep 17 00:00:00 2001 From: fawce Date: Wed, 8 Aug 2012 15:06:24 -0400 Subject: [PATCH 67/73] fixed dt to datetime alias logic-O --- tests/test_components.py | 1 - zipline/gens/composites.py | 17 +++++++---------- 2 files changed, 7 insertions(+), 11 deletions(-) diff --git a/tests/test_components.py b/tests/test_components.py index 8f1099b7..785a0ae2 100644 --- a/tests/test_components.py +++ b/tests/test_components.py @@ -292,7 +292,6 @@ class ComponentTestCase(TestCase): BT_UPDATE_UNFRAME, "tsc" ) - import nose.tools; nose.tools.set_trace() mon_proc = launch_monitor(monitor) for message in tsc_comp: log.info(pf(message)) diff --git a/zipline/gens/composites.py b/zipline/gens/composites.py index 3af77fc1..7fc5d71b 100644 --- a/zipline/gens/composites.py +++ b/zipline/gens/composites.py @@ -28,7 +28,7 @@ def date_sorted_sources(*sources): # Convert the list of generators into a flat stream by pulling # one element at a time from each. stream_in = roundrobin(sources, names) - + # Guarantee the flat stream will be sorted by date, using # source_id as tie-breaker, which is fully deterministic (given # deterministic string representation for all args/kwargs) @@ -65,6 +65,8 @@ def merged_transforms(sorted_stream, *transforms): to_merge = roundrobin(tnfm_gens, namestrings) # Pipe the stream into merge. merged = merge(to_merge, namestrings) + + dt_aliased = alias_dt(merged) # Return the merged events. return add_done(dt_aliased) @@ -74,7 +76,7 @@ def sequential_transforms(stream_in, *transforms): Each transform application will add a new entry indexed to the transform's hash string. """ - + assert isinstance(transforms, (list, tuple)) for tnfm in transforms: tnfm.forward_all = False @@ -82,8 +84,8 @@ def sequential_transforms(stream_in, *transforms): tnfm.append_value = True # Recursively apply all transforms to the stream. - stream_out = reduce(lambda stream, tnfm: tnfm.transform(stream), - transforms, + stream_out = reduce(lambda stream, tnfm: tnfm.transform(stream), + transforms, stream_in) dt_aliased = alias_dt(stream_out) @@ -95,13 +97,8 @@ def alias_dt(stream_in): """ for message in stream_in: message['datetime'] = message['dt'] - yield message + yield message # Add a done message to a stream. def add_done(stream_in): return chain(stream_in, [done_message('Composite')]) - - - - - From 0d34de3b464f716d552753c8d3e30c9604cd8c11 Mon Sep 17 00:00:00 2001 From: scottsanderson Date: Wed, 8 Aug 2012 15:23:20 -0400 Subject: [PATCH 68/73] fix tracebacks --- tests/test_exception_handling.py | 39 +++++++++++++++++--------------- 1 file changed, 21 insertions(+), 18 deletions(-) diff --git a/tests/test_exception_handling.py b/tests/test_exception_handling.py index 1b7eb521..283789d9 100644 --- a/tests/test_exception_handling.py +++ b/tests/test_exception_handling.py @@ -49,7 +49,8 @@ class ExceptionTestCase(TestCase): **self.zipline_test_config ) output, _ = drain_zipline(self, zipline) - self.assertEqual(len(output), 2) + + import nose.tools; nose.tools.set_trace() self.assertEqual(output[-1]['prefix'], 'EXCEPTION') payload = output[-1]['payload'] self.assertTrue(payload['date']) @@ -70,7 +71,7 @@ class ExceptionTestCase(TestCase): ) output, _ = drain_zipline(self, zipline) - self.assertEqual(len(output), 3) + import nose.tools; nose.tools.set_trace() self.assertEqual(output[-1]['prefix'], 'EXCEPTION') payload = output[-1]['payload'] self.assertTrue(payload['date']) @@ -91,7 +92,8 @@ class ExceptionTestCase(TestCase): ) output, _ = drain_zipline(self, zipline) - self.assertEqual(len(output), 6) + + import nose.tools; nose.tools.set_trace() self.assertEqual(output[-1]['prefix'], 'EXCEPTION') payload = output[-1]['payload'] self.assertTrue(payload['date']) @@ -106,29 +108,30 @@ class ExceptionTestCase(TestCase): INITIALIZE_TB =\ {'message': 'Algo exception in initialize', 'name': 'Exception', - 'stack': [{'filename': '/zipline/lines.py', 'line': 'for event in self.gen:', 'lineno': 157, 'method': 'stream_results'}, - {'filename': '/zipline/gens/tradesimulation.py', 'line': 'self.algo,', 'lineno': 93, 'method': 'simulate'}, - {'filename': '/zipline/gens/tradesimulation.py', 'line': 'self.algo.initialize()', 'lineno': 123, 'method': '__init__'}, + 'stack': [{'filename': '/zipline/lines.py', 'line': 'for event in self.gen:', 'lineno': 150, 'method': 'stream_results'}, + {'filename': '/zipline/gens/tradesimulation.py', 'line': 'for message in self.algo_sim:', 'lineno': 105, 'method': 'simulate'}, + {'filename': '/zipline/gens/tradesimulation.py', 'line': 'return self.__generator.next()', 'lineno': 171, 'method': 'next'}, + {'filename': '/zipline/gens/tradesimulation.py', 'line': 'self.algo.initialize()', 'lineno': 212, 'method': '_gen'}, {'filename': '/zipline/test_algorithms.py', 'line': 'raise Exception("Algo exception in initialize")', 'lineno': 166, 'method': 'initialize'}]} HANDLE_DATA_TB =\ {'message': 'Algo exception in handle_data', 'name': 'Exception', - 'stack': [{'filename': '/zipline/lines.py', 'line': 'for event in self.gen:', 'lineno': 157, 'method': 'stream_results'}, - {'filename': '/zipline/gens/tradesimulation.py', 'line': 'for message in algo_results:', 'lineno': 100, 'method': 'simulate'}, - {'filename': '/zipline/gens/tradesimulation.py', 'line': 'return self.__generator.next()', 'lineno': 144, 'method': 'next'}, - {'filename': '/zipline/gens/tradesimulation.py', 'line': 'self.update_current_snapshot(event)', 'lineno': 199, 'method': '_gen'}, - {'filename': '/zipline/gens/tradesimulation.py', 'line': 'self.simulate_current_snapshot()', 'lineno': 221, 'method': 'update_current_snapshot'}, - {'filename': '/zipline/gens/tradesimulation.py', 'line': 'self.algo.handle_data(self.universe)', 'lineno': 246, 'method': 'simulate_current_snapshot'}, + 'stack': [{'filename': '/zipline/lines.py', 'line': 'for event in self.gen:', 'lineno': 150, 'method': 'stream_results'}, + {'filename': '/zipline/gens/tradesimulation.py', 'line': 'for message in self.algo_sim:', 'lineno': 105, 'method': 'simulate'}, + {'filename': '/zipline/gens/tradesimulation.py', 'line': 'return self.__generator.next()', 'lineno': 168, 'method': 'next'}, + {'filename': '/zipline/gens/tradesimulation.py', 'line': 'self.update_current_snapshot(event)', 'lineno': 240, 'method': '_gen'}, + {'filename': '/zipline/gens/tradesimulation.py', 'line': 'self.simulate_current_snapshot()', 'lineno': 264, 'method': 'update_current_snapshot'}, + {'filename': '/zipline/gens/tradesimulation.py', 'line': 'self.algo.handle_data(self.universe)', 'lineno': 289, 'method': 'simulate_current_snapshot'}, {'filename': '/zipline/test_algorithms.py', 'line': 'raise Exception("Algo exception in handle_data")', 'lineno': 187, 'method': 'handle_data'}]} ZERO_DIV_TB= \ {'message': 'integer division or modulo by zero', 'name': 'ZeroDivisionError', - 'stack': [{'filename': '/zipline/lines.py', 'line': 'for event in self.gen:', 'lineno': 157, 'method': 'stream_results'}, - {'filename': '/zipline/gens/tradesimulation.py', 'line': 'for message in algo_results:', 'lineno': 100, 'method': 'simulate'}, - {'filename': '/zipline/gens/tradesimulation.py', 'line': 'return self.__generator.next()', 'lineno': 144, 'method': 'next'}, - {'filename': '/zipline/gens/tradesimulation.py', 'line': 'self.update_current_snapshot(event)', 'lineno': 199, 'method': '_gen'}, - {'filename': '/zipline/gens/tradesimulation.py', 'line': 'self.simulate_current_snapshot()', 'lineno': 221, 'method': 'update_current_snapshot'}, - {'filename': '/zipline/gens/tradesimulation.py', 'line': 'self.algo.handle_data(self.universe)', 'lineno': 246, 'method': 'simulate_current_snapshot'}, + 'stack': [{'filename': '/zipline/lines.py', 'line': 'for event in self.gen:', 'lineno': 150, 'method': 'stream_results'}, + {'filename': '/zipline/gens/tradesimulation.py', 'line': 'for message in self.algo_sim:', 'lineno': 105, 'method': 'simulate'}, + {'filename': '/zipline/gens/tradesimulation.py', 'line': 'return self.__generator.next()', 'lineno': 168, 'method': 'next'}, + {'filename': '/zipline/gens/tradesimulation.py', 'line': 'self.update_current_snapshot(event)', 'lineno': 240, 'method': '_gen'}, + {'filename': '/zipline/gens/tradesimulation.py', 'line': 'self.simulate_current_snapshot()', 'lineno': 264, 'method': 'update_current_snapshot'}, + {'filename': '/zipline/gens/tradesimulation.py', 'line': 'self.algo.handle_data(self.universe)', 'lineno': 289, 'method': 'simulate_current_snapshot'}, {'filename': '/zipline/test_algorithms.py', 'line': '5/0', 'lineno': 218, 'method': 'handle_data'}]} From 0267bc50739563c457295ddf8ba2d9ea452dff08 Mon Sep 17 00:00:00 2001 From: fawce Date: Wed, 8 Aug 2012 15:32:54 -0400 Subject: [PATCH 69/73] removed traces --- tests/test_exception_handling.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/tests/test_exception_handling.py b/tests/test_exception_handling.py index 283789d9..f547ecc9 100644 --- a/tests/test_exception_handling.py +++ b/tests/test_exception_handling.py @@ -49,8 +49,7 @@ class ExceptionTestCase(TestCase): **self.zipline_test_config ) output, _ = drain_zipline(self, zipline) - - import nose.tools; nose.tools.set_trace() + self.assertEqual(output[-1]['prefix'], 'EXCEPTION') payload = output[-1]['payload'] self.assertTrue(payload['date']) @@ -71,7 +70,6 @@ class ExceptionTestCase(TestCase): ) output, _ = drain_zipline(self, zipline) - import nose.tools; nose.tools.set_trace() self.assertEqual(output[-1]['prefix'], 'EXCEPTION') payload = output[-1]['payload'] self.assertTrue(payload['date']) @@ -93,7 +91,6 @@ class ExceptionTestCase(TestCase): output, _ = drain_zipline(self, zipline) - import nose.tools; nose.tools.set_trace() self.assertEqual(output[-1]['prefix'], 'EXCEPTION') payload = output[-1]['payload'] self.assertTrue(payload['date']) From c64175e2e59cf2cc440754f71fefef9f987183cd Mon Sep 17 00:00:00 2001 From: scottsanderson Date: Wed, 8 Aug 2012 18:44:45 -0400 Subject: [PATCH 70/73] added tests for exception messages in datasources and transforms --- tests/test_exception_handling.py | 38 +++++++++++++++++++++++++++++++- zipline/gens/composites.py | 9 +++----- zipline/utils/test_utils.py | 25 +++++++++++++++++++++ 3 files changed, 65 insertions(+), 7 deletions(-) diff --git a/tests/test_exception_handling.py b/tests/test_exception_handling.py index f547ecc9..f604466a 100644 --- a/tests/test_exception_handling.py +++ b/tests/test_exception_handling.py @@ -7,12 +7,15 @@ from zipline.test_algorithms import ExceptionAlgorithm, DivByZeroAlgorithm from zipline.finance.trading import SIMULATION_STYLE from zipline.core.devsimulator import AddressAllocator from zipline.lines import SimulatedTrading +from zipline.gens.transform import StatefulTransform from zipline.utils.test_utils import \ drain_zipline, \ check, \ setup_logger, \ - teardown_logger + teardown_logger, \ + ExceptionSource, \ + ExceptionTransform DEFAULT_TIMEOUT = 15 # seconds EXTENDED_TIMEOUT = 90 @@ -36,6 +39,39 @@ class ExceptionTestCase(TestCase): self.ctx.term() teardown_logger(self) + def test_datasource_exception(self): + self.zipline_test_config['trade_source'] = ExceptionSource() + zipline = SimulatedTrading.create_test_zipline( + **self.zipline_test_config + ) + output, _ = drain_zipline(self, zipline) + assert len(output) == 1 + assert output[0]['prefix'] == 'EXCEPTION' + message = output[0]['payload'] + for field in ['date', 'message', 'name', 'stack']: + assert field in message.keys() + + assert message['message'] == 'integer division or modulo by zero' + assert message['name'] == 'ZeroDivisionError' + + def test_tranform_exception(self): + exc_tnfm = StatefulTransform(ExceptionTransform) + self.zipline_test_config['transforms'] = [exc_tnfm] + + zipline = SimulatedTrading.create_test_zipline( + **self.zipline_test_config + ) + output, _ = drain_zipline(self, zipline) + assert len(output) == 1 + assert output[0]['prefix'] == 'EXCEPTION' + message = output[0]['payload'] + for field in ['date', 'message', 'name', 'stack']: + assert field in message.keys() + + assert message['message'] == 'An assertion message' + assert message['name'] == 'AssertionError' + + def test_exception_in_init(self): # Simulation # ---------- diff --git a/zipline/gens/composites.py b/zipline/gens/composites.py index 7fc5d71b..b3fa7576 100644 --- a/zipline/gens/composites.py +++ b/zipline/gens/composites.py @@ -8,14 +8,10 @@ from zipline.gens.sort import date_sort from zipline.gens.merge import merge from zipline.gens.transform import StatefulTransform -SourceBundle = namedtuple("SourceBundle", ['source', 'args', 'kwargs']) -TransformBundle = namedtuple("TransformBundle", ['tnfm', 'args', 'kwargs']) - def date_sorted_sources(*sources): """ - Takes an iterable of SortBundles, generating namestrings and - initialized datasources for each before piping them into a - date_sort. + Takes an iterable of sources, generating namestrings and + piping their output into date_sort. """ for source in sources: @@ -63,6 +59,7 @@ def merged_transforms(sorted_stream, *transforms): # Roundrobin the outputs of our transforms to create a single flat # stream. to_merge = roundrobin(tnfm_gens, namestrings) + # Pipe the stream into merge. merged = merge(to_merge, namestrings) diff --git a/zipline/utils/test_utils.py b/zipline/utils/test_utils.py index 02ac4c69..c31b2f25 100644 --- a/zipline/utils/test_utils.py +++ b/zipline/utils/test_utils.py @@ -168,3 +168,28 @@ def create_monitor(allocator): ) return mon + +class ExceptionSource(object): + + def __init__(self): + pass + + def get_hash(self): + return "ExceptionSource" + + def __iter__(self): + return self + + def next(self): + 5 / 0 + +class ExceptionTransform(object): + + def __init__(self): + pass + + def get_hash(self): + return "ExceptionTransform" + + def update(self, event): + assert False, "An assertion message" From aec76868f9b3bb2e789411c053cb74f063aaceb9 Mon Sep 17 00:00:00 2001 From: fawce Date: Wed, 8 Aug 2012 21:40:33 -0400 Subject: [PATCH 71/73] exception handling --- zipline/lines.py | 23 +++++++++++++---------- zipline/protocol.py | 6 +++++- zipline/utils/test_utils.py | 9 +++++++++ 3 files changed, 27 insertions(+), 11 deletions(-) diff --git a/zipline/lines.py b/zipline/lines.py index 9a0b49ef..9bc8b3ac 100644 --- a/zipline/lines.py +++ b/zipline/lines.py @@ -62,7 +62,7 @@ before invoking simulate. import sys import zmq import os -from signal import SIGHUP +from signal import SIGHUP, SIGINT import multiprocessing from setproctitle import setproctitle @@ -118,6 +118,9 @@ class SimulatedTrading(object): self.logger = Logger(sim_id) self.print_logger = Logger('Print') + # exit status flag + self.success = False + def simulate(self, blocking=True, send_sighup=False): @@ -155,28 +158,28 @@ class SimulatedTrading(object): self.results_socket.send(msg) self.signal_done() + self.success = True except Exception as exc: self.handle_exception(exc) finally: + # not much to do besides log our exit. self.close() def signal_done(self): # notify monitor we're done - done_frame = zp.DONE_FRAME('succes') + done_frame = zp.DONE_FRAME('success') self.results_socket.send(done_frame) def close(self): log.info("Closing Simulation: {id}".format(id=self.sim_id)) if self.proc and self.send_sighup: ppid = os.getppid() - log.warning("Sending SIGHUP") - os.kill(ppid, SIGHUP) - - def cancel(self): - if self.proc and self.proc.is_alive(): - self.proc.terminate() - else: - self.gen.throw(CancelSignal()) + if self.success: + log.warning("Sending SIGHUP") + os.kill(ppid, SIGHUP) + else: + log.warning("Sending SIGINT") + os.kill(ppid, SIGINT) def handle_exception(self, exc): if isinstance(exc, CancelSignal): diff --git a/zipline/protocol.py b/zipline/protocol.py index b5bf67ae..dd46bd60 100644 --- a/zipline/protocol.py +++ b/zipline/protocol.py @@ -527,11 +527,15 @@ def EXCEPTION_FRAME(exception_tb, name, message): rlist = [] for stack in stack_list: filename = shorten_filename(stack[0]) + # default the line to empty string rather than None + line = '' + if stack[3]: + line = stack[3] rstack = { 'filename' : filename, 'lineno' : stack[1], 'method' : stack[2], - 'line' : stack[3] + 'line' : line } rlist.append(rstack) result = { diff --git a/zipline/utils/test_utils.py b/zipline/utils/test_utils.py index 02ac4c69..13924f60 100644 --- a/zipline/utils/test_utils.py +++ b/zipline/utils/test_utils.py @@ -4,6 +4,7 @@ import time import zipline.protocol as zp from datetime import datetime import blist +from bson import ObjectId from zipline.utils.date_utils import EPOCH from itertools import izip from logbook import FileHandler @@ -31,6 +32,7 @@ def check_dict(test, a, b, label): # ignore the extra fields used by dictshield if key in ['progress']: continue + test.assertTrue(a.has_key(key), "missing key at: " + label + "." + key) test.assertTrue(b.has_key(key), "missing key at: " + label + "." + key) a_val = a[key] @@ -60,6 +62,13 @@ def check(test, a, b, label=None): else: test.assertEqual(a, b, "mismatch on path: " + label) +def check_excluded(test, a, excluded_keys=[]): + for key, value in a.iteritems(): + test.assertTrue(key not in excluded_keys) + test.assertFalse(key.endswith('_id'), 'Avoid _id fields!') + test.assertFalse(isinstance(value, ObjectId)) + if isinstance(value, dict): + check_excluded(test, value, excluded_keys) def drain_zipline(test, zipline, p_blocking=False): assert test.ctx, "method expects a valid zmq context" From e5b7c69a68dc3324605fc30c5331858c9760c958 Mon Sep 17 00:00:00 2001 From: fawce Date: Wed, 8 Aug 2012 21:48:16 -0400 Subject: [PATCH 72/73] simplified to be more flexible --- tests/test_exception_handling.py | 61 ++++++++++---------------------- 1 file changed, 18 insertions(+), 43 deletions(-) diff --git a/tests/test_exception_handling.py b/tests/test_exception_handling.py index f604466a..d1561837 100644 --- a/tests/test_exception_handling.py +++ b/tests/test_exception_handling.py @@ -50,14 +50,14 @@ class ExceptionTestCase(TestCase): message = output[0]['payload'] for field in ['date', 'message', 'name', 'stack']: assert field in message.keys() - + assert message['message'] == 'integer division or modulo by zero' assert message['name'] == 'ZeroDivisionError' def test_tranform_exception(self): exc_tnfm = StatefulTransform(ExceptionTransform) self.zipline_test_config['transforms'] = [exc_tnfm] - + zipline = SimulatedTrading.create_test_zipline( **self.zipline_test_config ) @@ -67,7 +67,7 @@ class ExceptionTestCase(TestCase): message = output[0]['payload'] for field in ['date', 'message', 'name', 'stack']: assert field in message.keys() - + assert message['message'] == 'An assertion message' assert message['name'] == 'AssertionError' @@ -89,8 +89,11 @@ class ExceptionTestCase(TestCase): self.assertEqual(output[-1]['prefix'], 'EXCEPTION') payload = output[-1]['payload'] self.assertTrue(payload['date']) - del payload['date'] - check(self, payload, INITIALIZE_TB) + self.assertEqual(payload['message'],'Algo exception in initialize') + self.assertEqual(payload['name'],'Exception') + # make sure our path shortening is working + self.assertEqual(payload['stack'][0]['filename'], '/zipline/lines.py') + self.assertEqual(payload['stack'][-1]['filename'], '/zipline/test_algorithms.py') def test_exception_in_handle_data(self): # Simulation @@ -110,7 +113,11 @@ class ExceptionTestCase(TestCase): payload = output[-1]['payload'] self.assertTrue(payload['date']) del payload['date'] - check(self, payload, HANDLE_DATA_TB) + self.assertEqual(payload['message'],'Algo exception in handle_data') + self.assertEqual(payload['name'],'Exception') + # make sure our path shortening is working + self.assertEqual(payload['stack'][0]['filename'], '/zipline/lines.py') + self.assertEqual(payload['stack'][-1]['filename'], '/zipline/test_algorithms.py') def test_zerodivision_exception_in_handle_data(self): @@ -131,40 +138,8 @@ class ExceptionTestCase(TestCase): payload = output[-1]['payload'] self.assertTrue(payload['date']) del payload['date'] - check(self, payload, ZERO_DIV_TB) - - # TODO: - # - define more zipline failure modes: exception in other - # components, exception in Monitor, etc. write tests - # for those scenarios. - -INITIALIZE_TB =\ -{'message': 'Algo exception in initialize', - 'name': 'Exception', - 'stack': [{'filename': '/zipline/lines.py', 'line': 'for event in self.gen:', 'lineno': 150, 'method': 'stream_results'}, - {'filename': '/zipline/gens/tradesimulation.py', 'line': 'for message in self.algo_sim:', 'lineno': 105, 'method': 'simulate'}, - {'filename': '/zipline/gens/tradesimulation.py', 'line': 'return self.__generator.next()', 'lineno': 171, 'method': 'next'}, - {'filename': '/zipline/gens/tradesimulation.py', 'line': 'self.algo.initialize()', 'lineno': 212, 'method': '_gen'}, - {'filename': '/zipline/test_algorithms.py', 'line': 'raise Exception("Algo exception in initialize")', 'lineno': 166, 'method': 'initialize'}]} - -HANDLE_DATA_TB =\ -{'message': 'Algo exception in handle_data', - 'name': 'Exception', - 'stack': [{'filename': '/zipline/lines.py', 'line': 'for event in self.gen:', 'lineno': 150, 'method': 'stream_results'}, - {'filename': '/zipline/gens/tradesimulation.py', 'line': 'for message in self.algo_sim:', 'lineno': 105, 'method': 'simulate'}, - {'filename': '/zipline/gens/tradesimulation.py', 'line': 'return self.__generator.next()', 'lineno': 168, 'method': 'next'}, - {'filename': '/zipline/gens/tradesimulation.py', 'line': 'self.update_current_snapshot(event)', 'lineno': 240, 'method': '_gen'}, - {'filename': '/zipline/gens/tradesimulation.py', 'line': 'self.simulate_current_snapshot()', 'lineno': 264, 'method': 'update_current_snapshot'}, - {'filename': '/zipline/gens/tradesimulation.py', 'line': 'self.algo.handle_data(self.universe)', 'lineno': 289, 'method': 'simulate_current_snapshot'}, - {'filename': '/zipline/test_algorithms.py', 'line': 'raise Exception("Algo exception in handle_data")', 'lineno': 187, 'method': 'handle_data'}]} - -ZERO_DIV_TB= \ -{'message': 'integer division or modulo by zero', - 'name': 'ZeroDivisionError', - 'stack': [{'filename': '/zipline/lines.py', 'line': 'for event in self.gen:', 'lineno': 150, 'method': 'stream_results'}, - {'filename': '/zipline/gens/tradesimulation.py', 'line': 'for message in self.algo_sim:', 'lineno': 105, 'method': 'simulate'}, - {'filename': '/zipline/gens/tradesimulation.py', 'line': 'return self.__generator.next()', 'lineno': 168, 'method': 'next'}, - {'filename': '/zipline/gens/tradesimulation.py', 'line': 'self.update_current_snapshot(event)', 'lineno': 240, 'method': '_gen'}, - {'filename': '/zipline/gens/tradesimulation.py', 'line': 'self.simulate_current_snapshot()', 'lineno': 264, 'method': 'update_current_snapshot'}, - {'filename': '/zipline/gens/tradesimulation.py', 'line': 'self.algo.handle_data(self.universe)', 'lineno': 289, 'method': 'simulate_current_snapshot'}, - {'filename': '/zipline/test_algorithms.py', 'line': '5/0', 'lineno': 218, 'method': 'handle_data'}]} + self.assertEqual(payload['message'],'integer division or modulo by zero') + self.assertEqual(payload['name'],'ZeroDivisionError') + # make sure our path shortening is working + self.assertEqual(payload['stack'][0]['filename'], '/zipline/lines.py') + self.assertEqual(payload['stack'][-1]['filename'], '/zipline/test_algorithms.py') From 36a7c5da30ab32385c2cc53a1911047bd122b87c Mon Sep 17 00:00:00 2001 From: scottsanderson Date: Wed, 8 Aug 2012 22:24:43 -0400 Subject: [PATCH 73/73] whitespace --- zipline/gens/tradesimulation.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/zipline/gens/tradesimulation.py b/zipline/gens/tradesimulation.py index 6f3323bd..5e56b4f0 100644 --- a/zipline/gens/tradesimulation.py +++ b/zipline/gens/tradesimulation.py @@ -87,7 +87,7 @@ class TradeSimulationClient(object): self.sids ) with_portfolio = perf_tracker.transform(with_filled_orders) - + # Pass the messages from perf along with the trading client's # state into the algorithm for simulation. We provide the # trading client so that the algorithm can place new orders