Files
catalyst/zipline/protocol.py
T
2012-03-03 21:28:41 -05:00

664 lines
18 KiB
Python

"""
The messaging protocol for Zipline.
Asserts are in place because any protocol error corresponds to a
programmer error so we want it to fail fast and in an obvious way
so it doesn't happen again. ZeroMQ follows the same philosophy.
Notes
=====
Msgpack
-------
Msgpack is the fastest serialization protocol in Python at the
moment. Its 100% C is typically orders of magnitude faster than
json and pickle making it awesome for ZeroMQ.
You can only serialize Python structural primitives: strings,
numeric types, dicts, tuples and lists. Any any recursive
combinations of these.
Basically every basestring in Python corresponds to valid
msgpack message since the protocol is highly error tolerant.
Just keep in mind that if you ever unpack a raw msgpack string
make sure it looks like what you intend and/or catch ValueError
and TypeError exceptions.
It also has the nice benefit of never invoking ``eval`` ( unlike
json and pickle) which is a major security boon since it is
impossible to arbitrary code for evaluation through messages.
UltraJSON
---------
For anything going to the browser UltraJSON is the fastest
serializer, its mostly C as well.
The same domain of serialization as msgpack applies: Python
structural primitives. It also has the additional constraint
that anything outside of UTF8 can cause serious problems, so if
you have a strong desire to JSON encode ancient Sanskrit
( admit it, we all do ), just say no.
Data Structures
===============
Enum
----
Classic C style enumeration::
opts = Enum('FOO', 'BAR')
opts.FOO # 0
opts.BAR # 1
opts.FOO = opts.BAR # False
Oh, and if you do this::
protocol.Enum([1,2,3])
Your interpreter will segfault, think of this like an extreme assert.
Namedict
--------
Namedicts are dict like objects that have fields accessible by attribute lookup
as well as being indexable and iterable::
HEARTBEAT_PROTOCOL = namedict({
'REQ' : b'\x01',
'REP' : b'\x02',
})
HEARTBEAT_PROTOCOL.REQ # syntactic sugar
HEARTBEAT_PROTOCOL.REP # oh suga suga
HEARTBEAT_PROTOCOL['REQ'] # classic dictionary index
Namedtuple
----------
From the standard library, namedtuples are great for specifying
containers for spec'ing data container objects::
from collections import namedtuple
Person = namedtuple('Person', 'name age gender')
bob = Person(name='Bob', age=30, gender='male')
bob.name # 'Bob'
bob.age # 30
bob.gender # male
# The slots on the tuple are also finite and read-only. This
# is a good thing, keeps us honest!
bob.hobby = 'underwater archery'
# Will raise:
# AttributeError: 'Person' object has no attribute 'hobby'
bob.name = 'joe'
# Will raise:
# AttributeError: can't set attribute
# Namedtuples are normally read-only, but you can change the
# internals using a private operation.
bob._replace(gender='female')
# You can also dump out to dictionary form:
OrderedDict([('name', 'Bob'), ('age', 30), ('gender', 'male')])
# Or JSON.
json.dumps(bob._asdict())
'{"gender":"male","age":30,"name":"Bob"}'
"""
import msgpack
import numbers
import datetime
import pytz
import copy
from collections import namedtuple
import zipline.util as qutil
#import ujson
#import ultrajson_numpy
from ctypes import Structure, c_ubyte
def Enum(*options):
"""
Fast enums are very important when we want really tight zmq
loops. These are probably going to evolve into pure C structs
anyways so might as well get going on that.
"""
class cstruct(Structure):
_fields_ = [(o, c_ubyte) for o in options]
return cstruct(*range(len(options)))
def FrameExceptionFactory(name):
"""
Exception factory with a closure around the frame class name.
"""
class InvalidFrame(Exception):
def __init__(self, got):
self.got = got
def __str__(self):
return "Invalid {framecls} Frame: {got}".format(
framecls = name,
got = self.got,
)
return InvalidFrame
class namedict(object):
"""
So that you can use::
foo.BAR
-- or --
foo['BAR']
For more complex structs use collections.namedtuple:
"""
def __init__(self, dct=None):
if(dct):
self.__dict__.update(dct)
def __setitem__(self, key, value):
"""
Required for use by pymongo as_class parameter to find.
"""
if(key == '_id'):
self.__dict__['id'] = value
else:
self.__dict__[key] = value
def __getitem__(self, key):
return self.__dict__[key]
def keys(self):
return self.__dict__.keys()
def as_dict(self):
# shallow copy is O(n)
return copy.copy(self.__dict__)
def delete(self, key):
del(self.__dict__[key])
def merge(self, other_nd):
assert isinstance(other_nd, namedict)
self.__dict__.update(other_nd.__dict__)
def __repr__(self):
return "namedict: " + str(self.__dict__)
def __eq__(self, other):
# !!!!!!!!!!!!!!!!!!!!
# !!!! DANGEROUS !!!!!
# !!!!!!!!!!!!!!!!!!!!
return other != None and self.__dict__ == other.__dict__
def has_attr(self, name):
return self.__dict__.has_key(name)
# ================
# Control Protocol
# ================
INVALID_CONTROL_FRAME = FrameExceptionFactory('CONTROL')
CONTROL_PROTOCOL = Enum(
'INIT' , # 0 - req
'INFO' , # 1 - req
'STATUS' , # 2 - req
'SHUTDOWN' , # 3 - req
'KILL' , # 4 - req
'OK' , # 5 - rep
'DONE' , # 6 - rep
'EXCEPTION' , # 7 - rep
)
def CONTROL_FRAME(id, status):
assert isinstance(id, basestring,)
assert isinstance(status, int)
return msgpack.dumps(tuple([id, status]))
def CONTORL_UNFRAME(msg):
assert isinstance(msg, basestring)
try:
id, status = msgpack.loads(msg)
assert isinstance(id, basestring)
assert isinstance(status, int)
return id, status
except TypeError:
raise INVALID_CONTROL_FRAME(msg)
except ValueError:
raise INVALID_CONTROL_FRAME(msg)
#except AssertionError:
#raise INVALID_CONTROL_FRAME(msg)
# ==================
# Heartbeat Protocol
# ==================
# These encode the msgpack equivelant of 1 and 2. The heartbeat
# frame should only be 1 byte on the wire.
HEARTBEAT_PROTOCOL = namedict({
'REQ' : b'\x01',
'REP' : b'\x02',
})
# ==================
# Component State
# ==================
COMPONENT_TYPE = Enum(
'SOURCE' , # 0
'CONDUIT' , # 1
'SINK' , # 2
)
COMPONENT_STATE = Enum(
'OK' , # 0
'DONE' , # 1
'EXCEPTION' , # 2
)
# ==================
# Datasource Protocol
# ==================
INVALID_DATASOURCE_FRAME = FrameExceptionFactory('DATASOURCE')
def DATASOURCE_FRAME(event):
"""
Wraps any datasource payload with id and type, so that unpacking may choose
the write UNFRAME for the payload.
::ds_id:: an identifier that is unique to the datasource in the context of
a component host (e.g. Simulator
::ds_type:: a string denoting the datasource type. Must be on of::
TRADE
(others to follow soon)
::payload:: a msgpack string carrying the payload for the frame
"""
assert isinstance(event.source_id, basestring)
assert isinstance(event.type, int), 'Unexpected type %s' % (event.type)
if(event.type == DATASOURCE_TYPE.TRADE):
return msgpack.dumps(tuple([event.type, TRADE_FRAME(event)]))
elif(event.type == DATASOURCE_TYPE.ORDER):
return msgpack.dumps(tuple([event.type, ORDER_SOURCE_FRAME(event)]))
else:
raise INVALID_DATASOURCE_FRAME(str(event))
def DATASOURCE_UNFRAME(msg):
"""
Extracts payload, and calls correct UNFRAME method based on the datasource
type passed along.
returns a dict containing at least::
- source_id
- type
other properties are added based on the datasource type::
- TRADE::
- sid - int security identifier
- price - float
- volume - int
- dt - a datetime object
"""
try:
ds_type, payload = msgpack.loads(msg)
assert isinstance(ds_type, int)
if(ds_type == DATASOURCE_TYPE.TRADE):
return TRADE_UNFRAME(payload)
elif(ds_type == DATASOURCE_TYPE.ORDER):
return ORDER_SOURCE_UNFRAME(payload)
else:
raise INVALID_DATASOURCE_FRAME(msg)
except TypeError:
raise INVALID_DATASOURCE_FRAME(msg)
except ValueError:
raise INVALID_DATASOURCE_FRAME(msg)
# ==================
# Feed Protocol
# ==================
INVALID_FEED_FRAME = FrameExceptionFactory('FEED')
def FEED_FRAME(event):
"""
:event: a nameddict with at least::
- source_id
- type
"""
assert isinstance(event, namedict)
source_id = event.source_id
ds_type = event.type
PACK_DATE(event)
payload = event.as_dict()
return msgpack.dumps(payload)
def FEED_UNFRAME(msg):
try:
payload = msgpack.loads(msg)
#TODO: anything we can do to assert more about the content of the dict?
assert isinstance(payload, dict)
rval = namedict(payload)
UNPACK_DATE(rval)
return rval
except TypeError:
raise INVALID_FEED_FRAME(msg)
except ValueError:
raise INVALID_FEED_FRAME(msg)
# ==================
# Transform Protocol
# ==================
INVALID_TRANSFORM_FRAME = FrameExceptionFactory('TRANSFORM')
def TRANSFORM_FRAME(name, value):
assert isinstance(name, basestring)
if value == None:
return msgpack.dumps(tuple([name, TRANSFORM_TYPE.EMPTY]))
if(name == TRANSFORM_TYPE.TRANSACTION):
value = TRANSACTION_FRAME(value)
return msgpack.dumps(tuple([name, value]))
def TRANSFORM_UNFRAME(msg):
"""
:rtype: namedict with <transform_name>:<transform_value>
"""
try:
name, value = msgpack.loads(msg)
if(value == TRANSFORM_TYPE.EMPTY):
return namedict({name : None})
#TODO: anything we can do to assert more about the content of the dict?
assert isinstance(name, basestring)
if(name == TRANSFORM_TYPE.PASSTHROUGH):
value = FEED_UNFRAME(value)
elif(name == TRANSFORM_TYPE.TRANSACTION):
value = TRANSACTION_UNFRAME(value)
return namedict({name : value})
except TypeError:
raise INVALID_TRANSFORM_FRAME(msg)
except ValueError:
raise INVALID_TRANSFORM_FRAME(msg)
# ==================
# Merge Protocol
# ==================
INVALID_MERGE_FRAME = FrameExceptionFactory('MERGE')
def MERGE_FRAME(event):
"""
:event: a nameddict with at least::
- source_id
- type
"""
assert isinstance(event, namedict)
PACK_DATE(event)
if(event.has_attr(TRANSFORM_TYPE.TRANSACTION)):
if(event.TRANSACTION == None):
event.TRANSACTION = TRANSFORM_TYPE.EMPTY
else:
event.TRANSACTION = TRANSACTION_FRAME(event.TRANSACTION)
payload = event.as_dict()
return msgpack.dumps(payload)
def MERGE_UNFRAME(msg):
try:
payload = msgpack.loads(msg)
#TODO: anything we can do to assert more about the content of the dict?
assert isinstance(payload, dict)
payload = namedict(payload)
if(payload.has_attr(TRANSFORM_TYPE.TRANSACTION)):
if(payload.TRANSACTION == TRANSFORM_TYPE.EMPTY):
payload.TRANSACTION = None
else:
payload.TRANSACTION = TRANSACTION_UNFRAME(payload.TRANSACTION)
UNPACK_DATE(payload)
return payload
except TypeError:
raise INVALID_MERGE_FRAME(msg)
except ValueError:
raise INVALID_MERGE_FRAME(msg)
# ==================
# Finance Protocol
# ==================
INVALID_ORDER_FRAME = FrameExceptionFactory('ORDER')
INVALID_TRADE_FRAME = FrameExceptionFactory('TRADE')
# ==================
# Trades - Should only be called from inside DATASOURCE_ (UN)FRAME.
# ==================
def TRADE_FRAME(event):
""":event: should be a namedict with::
- ds_id -- the datasource id sending this trade out
- sid -- the security id
- price -- float of the price printed for the trade
- volume -- int for shares in the trade
- dt -- datetime for the trade
"""
assert isinstance(event, namedict)
assert isinstance(event.source_id, basestring)
assert event.type == DATASOURCE_TYPE.TRADE
assert isinstance(event.sid, int)
assert isinstance(event.price, float)
assert isinstance(event.volume, int)
PACK_DATE(event)
return msgpack.dumps(tuple([
event.sid,
event.price,
event.volume,
event.epoch,
event.micros,
event.type,
event.source_id
]))
def TRADE_UNFRAME(msg):
try:
packed = msgpack.loads(msg)
sid, price, volume, epoch, micros, source_type, source_id = packed
assert isinstance(sid, int)
assert isinstance(price, float)
assert isinstance(volume, int)
rval = namedict({
'sid' : sid,
'price' : price,
'volume' : volume,
'epoch' : epoch,
'micros' : micros,
'type' : source_type,
'source_id' : source_id
})
UNPACK_DATE(rval)
return rval
except TypeError:
raise INVALID_TRADE_FRAME(msg)
except ValueError:
raise INVALID_TRADE_FRAME(msg)
# =========
# Orders - from client to order source
# =========
def ORDER_FRAME(sid, amount):
assert isinstance(sid, int)
assert isinstance(amount, int) #no partial shares...
return msgpack.dumps(tuple([sid, amount]))
def ORDER_UNFRAME(msg):
try:
sid, amount = msgpack.loads(msg)
assert isinstance(sid, int)
assert isinstance(amount, int)
return sid, amount
except TypeError:
raise INVALID_ORDER_FRAME(msg)
except ValueError:
raise INVALID_ORDER_FRAME(msg)
#
# ==================
# TRANSACTIONS - Should only be called from inside TRANSFORM_(UN)FRAME.
# ==================
def TRANSACTION_FRAME(event):
assert isinstance(event, namedict)
assert isinstance(event.sid, int)
assert isinstance(event.price, float)
assert isinstance(event.commission, float)
assert isinstance(event.amount, int)
PACK_DATE(event)
return msgpack.dumps(tuple([
event.sid,
event.price,
event.amount,
event.commission,
event.epoch,
event.micros
]))
def TRANSACTION_UNFRAME(msg):
try:
sid, price, amount, commission, epoch, micros = msgpack.loads(msg)
assert isinstance(sid, int)
assert isinstance(price, float)
assert isinstance(commission, float)
assert isinstance(amount, int)
rval = namedict({
'sid' : sid,
'price' : price,
'amount' : amount,
'commission' : commission,
'epoch' : epoch,
'micros' : micros
})
UNPACK_DATE(rval)
return rval
except TypeError:
raise INVALID_TRADE_FRAME(msg)
except ValueError:
raise INVALID_TRADE_FRAME(msg)
# =========
# Orders - from order source to feed
# - should only be called from inside DATASOURCE_(UN)FRAME
# =========
def ORDER_SOURCE_FRAME(event):
assert isinstance(event.sid, int)
assert isinstance(event.amount, int) #no partial shares...
assert isinstance(event.source_id, basestring)
assert event.type == DATASOURCE_TYPE.ORDER
PACK_DATE(event)
return msgpack.dumps(tuple([
event.sid,
event.amount,
event.epoch,
event.micros,
event.source_id,
event.type
]))
def ORDER_SOURCE_UNFRAME(msg):
try:
sid, amount, epoch, micros, source_id, source_type = msgpack.loads(msg)
event = namedict({
"sid" : sid,
"amount" : amount,
"epoch" : epoch,
"micros" : micros,
"source_id" : source_id,
"type" : source_type
})
assert isinstance(sid, int)
assert isinstance(amount, int)
assert isinstance(source_id, basestring)
assert isinstance(source_type, int)
UNPACK_DATE(event)
return event
except TypeError:
raise INVALID_ORDER_FRAME(msg)
except ValueError:
raise INVALID_ORDER_FRAME(msg)
# =================
# Date Helpers
# =================
def PACK_DATE(event):
assert isinstance(event.dt, datetime.datetime)
assert event.dt.tzinfo == pytz.utc #utc only please
epoch = long(event.dt.strftime('%s'))
event['epoch'] = epoch
event['micros'] = event.dt.microsecond
event.delete('dt')
return event
def UNPACK_DATE(payload):
assert isinstance(payload.epoch, numbers.Integral)
assert isinstance(payload.micros, numbers.Integral)
dt = datetime.datetime.fromtimestamp(payload.epoch)
dt = dt.replace(microsecond = payload.micros, tzinfo = pytz.utc)
payload.delete('epoch')
payload.delete('micros')
payload.dt = dt
return payload
DATASOURCE_TYPE = Enum(
'ORDER' ,
'TRADE' ,
)
ORDER_PROTOCOL = Enum(
'DONE',
'BREAK'
)
#Transform type needs to be a namedict to facilitate merging.
TRANSFORM_TYPE = namedict({
'TRANSACTION' : 'TRANSACTION', #needed?
'PASSTHROUGH' : 'PASSTHROUGH',
'EMPTY' : ''
})
FINANCE_COMPONENT = namedict({
'TRADING_CLIENT' : 'TRADING_CLIENT',
'PORTFOLIO_CLIENT' : 'PORTFOLIO_CLIENT',
'ORDER_SOURCE' : 'ORDER_SOURCE',
'TRANSACTION_SIM' : 'TRANSACTION_SIM'
})