Files
catalyst/zipline/sources/data_frame_source.py
T
Thomas Wiecki 439f404bed MAINT Increase default volume from 1000 to 1e9
For more discussion see https://github.com/quantopian/zipline/pull/485.

Basically, 1000 is just a number that was supposed to be high enough if no volume was available. It turns out that number is actually very low so now we are increasing it so that volume restrictions should no matter. 1e9 of shares ought to be enough for anybody ;).

Thanks to @jlowin for pointing that out.
2015-02-12 16:42:52 +01:00

157 lines
4.6 KiB
Python

#
# Copyright 2013 Quantopian, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
Tools to generate data sources.
"""
import pandas as pd
from zipline.gens.utils import hash_args
from zipline.sources.data_source import DataSource
class DataFrameSource(DataSource):
"""
Yields all events in event_list that match the given sid_filter.
If no event_list is specified, generates an internal stream of events
to filter. Returns all events if filter is None.
Configuration options:
sids : list of values representing simulated internal sids
start : start date
delta : timedelta between internal events
filter : filter to remove the sids
"""
def __init__(self, data, **kwargs):
assert isinstance(data.index, pd.tseries.index.DatetimeIndex)
self.data = data
# Unpack config dictionary with default values.
self.sids = kwargs.get('sids', data.columns)
self.start = kwargs.get('start', data.index[0])
self.end = kwargs.get('end', data.index[-1])
# Hash_value for downstream sorting.
self.arg_string = hash_args(data, **kwargs)
self._raw_data = None
@property
def mapping(self):
return {
'dt': (lambda x: x, 'dt'),
'sid': (lambda x: x, 'sid'),
'price': (float, 'price'),
'volume': (int, 'volume'),
}
@property
def instance_hash(self):
return self.arg_string
def raw_data_gen(self):
for dt, series in self.data.iterrows():
for sid, price in series.iteritems():
if sid in self.sids:
event = {
'dt': dt,
'sid': sid,
'price': price,
# Just chose something large
# if no volume available.
'volume': 1e9,
}
yield event
@property
def raw_data(self):
if not self._raw_data:
self._raw_data = self.raw_data_gen()
return self._raw_data
class DataPanelSource(DataSource):
"""
Yields all events in event_list that match the given sid_filter.
If no event_list is specified, generates an internal stream of events
to filter. Returns all events if filter is None.
Configuration options:
sids : list of values representing simulated internal sids
start : start date
delta : timedelta between internal events
filter : filter to remove the sids
"""
def __init__(self, data, **kwargs):
assert isinstance(data.major_axis, pd.tseries.index.DatetimeIndex)
self.data = data
# Unpack config dictionary with default values.
self.sids = kwargs.get('sids', data.items)
self.start = kwargs.get('start', data.major_axis[0])
self.end = kwargs.get('end', data.major_axis[-1])
# Hash_value for downstream sorting.
self.arg_string = hash_args(data, **kwargs)
self._raw_data = None
@property
def mapping(self):
mapping = {
'dt': (lambda x: x, 'dt'),
'sid': (lambda x: x, 'sid'),
'price': (float, 'price'),
'volume': (int, 'volume'),
}
# Add additional fields.
for field_name in self.data.minor_axis:
if field_name in ['price', 'volume', 'dt', 'sid']:
continue
mapping[field_name] = (lambda x: x, field_name)
return mapping
@property
def instance_hash(self):
return self.arg_string
def raw_data_gen(self):
for dt in self.data.major_axis:
df = self.data.major_xs(dt)
for sid, series in df.iteritems():
if sid in self.sids:
event = {
'dt': dt,
'sid': sid,
}
for field_name, value in series.iteritems():
event[field_name] = value
yield event
@property
def raw_data(self):
if not self._raw_data:
self._raw_data = self.raw_data_gen()
return self._raw_data