Files
catalyst/tests/test_rolling_panel.py
T
Scott Sanderson 235954d480 DEV: Overhaul core history logic.
Overhaul the core HistoryContainer logic to be more robust to changing
universes.

Major Changes
-------------
* Remove `return_frame` cache.  The original purpose of using
  return_frames was to avoid having to create new DataFrames on each
  iteration of handle_data, but we ended up having to copy the return
  frames anyway because user code could mutate the frames in place.
  Removing the return_frames reduces unnecessary copying, and reduces
  the logic of `get_history` to just forward-filling and concatenating
  two DataFrames.

* Use a `MultiIndex`ed DataFrame to represent
  `last_known_prior_values`.  This makes lookups faster and greatly
  simplifies the logic of adding and dropping sids.

* HistoryContainer no longer attempts to determine its universe based on
  the contents of its internal buffers.  The TradingAlgorithm
  controlling the container is now responsible for explicitly calling
  `add_sids` or `drop_sids` when securities enter or leave the
  algorithm's universe.  These methods, along with the internal
  `_realign` method, provide a clean interface for changing the universe
  of securities managed by the container.

* Refactor index mutation logic in `RollingPanel` into a
  `MutableIndexRollingPanel` subclass.  Maintenance of the old behavior
  is regrettably necessary to support `BatchTransform`.

* Refactor shared logic from `roll` and `get_history` into a single
  `aggregate_ohlcv_panel` method that's responsible for collapsing an
  OHLCV buffer into a frame.
2014-09-29 14:42:57 -04:00

118 lines
3.8 KiB
Python

#
# Copyright 2014 Quantopian, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import unittest
from collections import deque
import numpy as np
import pandas as pd
import pandas.util.testing as tm
from zipline.utils.data import MutableIndexRollingPanel
class TestMutableIndexRollingPanel(unittest.TestCase):
def test_basics(self, window=10):
items = ['bar', 'baz', 'foo']
minor = ['A', 'B', 'C', 'D']
rp = MutableIndexRollingPanel(window, items, minor, cap_multiple=2)
dates = pd.date_range('2000-01-01', periods=30, tz='utc')
major_deque = deque(maxlen=window)
frames = {}
for i, date in enumerate(dates):
frame = pd.DataFrame(np.random.randn(3, 4), index=items,
columns=minor)
rp.add_frame(date, frame)
frames[date] = frame
major_deque.append(date)
result = rp.get_current()
expected = pd.Panel(frames, items=list(major_deque),
major_axis=items, minor_axis=minor)
tm.assert_panel_equal(result, expected.swapaxes(0, 1))
def test_adding_and_dropping_items(self, n_items=5, n_minor=10, window=10,
periods=30):
np.random.seed(123)
items = deque(range(n_items))
minor = deque(range(n_minor))
expected_items = deque(range(n_items))
expected_minor = deque(range(n_minor))
first_non_existant = max(n_items, n_minor) + 1
# We want to add new columns with random order
add_items = np.arange(first_non_existant, first_non_existant + periods)
np.random.shuffle(add_items)
rp = MutableIndexRollingPanel(window, items, minor, cap_multiple=2)
dates = pd.date_range('2000-01-01', periods=periods, tz='utc')
frames = {}
expected_frames = deque(maxlen=window)
expected_dates = deque()
for i, (date, add_item) in enumerate(zip(dates, add_items)):
frame = pd.DataFrame(np.random.randn(n_items, n_minor),
index=items, columns=minor)
if i >= window:
# Old labels and dates should start to get dropped at every
# call
del frames[expected_dates.popleft()]
expected_minor.popleft()
expected_items.popleft()
expected_frames.append(frame)
expected_dates.append(date)
rp.add_frame(date, frame)
frames[date] = frame
result = rp.get_current()
np.testing.assert_array_equal(sorted(result.minor_axis.values),
sorted(expected_minor))
np.testing.assert_array_equal(sorted(result.items.values),
sorted(expected_items))
tm.assert_frame_equal(frame.T,
result.ix[frame.index, -1, frame.columns])
expected_result = pd.Panel(frames).swapaxes(0, 1)
tm.assert_panel_equal(expected_result,
result)
# Insert new items
minor.popleft()
minor.append(add_item)
items.popleft()
items.append(add_item)
expected_minor.append(add_item)
expected_items.append(add_item)