Files
catalyst/zipline/gens/sort.py
T
2012-08-06 13:11:16 -04:00

118 lines
3.8 KiB
Python

"""
Generator version of Feed.
"""
from collections import deque
from zipline import ndict
from zipline.gens.utils import \
assert_datasource_unframe_protocol, \
assert_sort_protocol
def date_sort(stream_in, source_ids):
"""
A generator that takes a generator and a list of source_ids. We
maintain an internal queue for each id in source_ids. While we
have messages pending from all sources, we pull the earliest
message and yield it.
"""
assert isinstance(source_ids, (list, tuple))
# Set up an internal queue for each expected source.
sources = {}
for id in source_ids:
assert isinstance(id, basestring), "Bad source_id %s" % id
sources[id] = deque()
# Process incoming streams.
for message in stream_in:
# Incoming messages should be the output of DATASOURCE_UNFRAME.
assert_datasource_unframe_protocol(message), \
"Bad message in date_sort: %s" % message
# Only allow messages from sources we expect.
assert message.source_id in sources, "Unexpected source: %s" % message
sources[message.source_id].append(message)
# Only pop messages when we have a pending message from
# all datasources. Stop if all sources have signalled done.
while ready(sources) and not done(sources):
message = pop_oldest(sources)
assert_sort_protocol(message)
yield message
# We should have only a done message left in each queue.
for queue in sources.itervalues():
assert len(queue) == 1, "Bad queue in date_sort on exit: %s" % queue
assert queue[0].dt == "DONE", \
"Bad last message in date_sort on exit: %s" % queue
def ready(sources):
"""
Feed is ready when every internal queue has at least one
message. Note that this include DONE messages, so done(sources) is
True only if ready(sources).
"""
assert isinstance(sources, dict)
return all( (queue_is_ready(source) for source in sources.itervalues()) )
def queue_is_ready(queue):
assert isinstance(queue, deque)
return len(queue) > 0
def done(sources):
"""Feed is done when all internal queues have only a "DONE" message."""
assert isinstance(sources, dict)
return all( (queue_is_done(source) for source in sources.itervalues()) )
def queue_is_done(queue):
assert isinstance(queue, deque)
if len(queue) == 0:
return False
if queue[0].dt == "DONE":
assert len(queue) == 1, "Message after DONE in date_sort: %s" % queue
return True
else:
return False
def pop_oldest(sources):
oldest_event = None
# Iterate over the dict, checking internal queues for the oldest
# pending event.
for queue in sources.itervalues():
current_event = queue[0]
# Skip queues that are done.
if current_event.dt == "DONE":
continue
# Any event is older than nothing.
elif oldest_event == None:
oldest_event = current_event
# Keep the older event. Break ties by source_id. This will
# trip an assert if we have duplicate sources.
else:
oldest_event = older(oldest_event, current_event)
# Pop the oldest event we found from its queue and return it.
return sources[oldest_event.source_id].popleft()
# Return the event with the older timestamp. Break ties by source_id.
def older(oldest, current):
assert isinstance(oldest, ndict)
assert isinstance(oldest, ndict)
# Try to compare by dt.
if oldest.dt < current.dt:
return oldest
elif oldest.dt > current.dt:
return current
# Break ties by source_id.
elif oldest.source_id < current.source_id:
return oldest
elif oldest.source_id > current.source_id:
return current
else:
assert False, "Duplicate event"