catalyst/tests/history_cases.py

"""
Test case definitions for history tests.
"""

import pandas as pd
import numpy as np

from zipline.finance.trading import TradingEnvironment, noop_load
from zipline.history.history import HistorySpec
from zipline.protocol import BarData
from zipline.utils.test_utils import to_utc

_cases_env = TradingEnvironment(load=noop_load)


def mixed_frequency_expected_index(count, frequency):
    """
    Helper for enumerating expected indices for test_mixed_frequency.
    """
    minute = MIXED_FREQUENCY_MINUTES[count]

    if frequency == '1d':
        return [_cases_env.previous_open_and_close(minute)[1], minute]
    elif frequency == '1m':
        return [_cases_env.previous_market_minute(minute), minute]


def mixed_frequency_expected_data(count, frequency):
    """
    Helper for enumerating expected data test_mixed_frequency.
    """
    if frequency == '1d':
        # First day of this test is July 3rd, which is a half day.
        if count < 210:
            return [np.nan, count]
        else:
            return [209, count]
    elif frequency == '1m':
        if count == 0:
            return [np.nan, count]
        else:
            return [count - 1, count]


MIXED_FREQUENCY_MINUTES = _cases_env.market_minute_window(
    to_utc('2013-07-03 9:31AM'), 600,
)
ONE_MINUTE_PRICE_ONLY_SPECS = [
    HistorySpec(1, '1m', 'price', True, _cases_env, data_frequency='minute'),
]
DAILY_OPEN_CLOSE_SPECS = [
    HistorySpec(3, '1d', 'open_price', False, _cases_env,
                data_frequency='minute'),
    HistorySpec(3, '1d', 'close_price', False, _cases_env,
                data_frequency='minute'),
]
ILLIQUID_PRICES_SPECS = [
    HistorySpec(3, '1m', 'price', False, _cases_env, data_frequency='minute'),
    HistorySpec(5, '1m', 'price', True, _cases_env, data_frequency='minute'),
]
MIXED_FREQUENCY_SPECS = [
    HistorySpec(1, '1m', 'price', False, _cases_env, data_frequency='minute'),
    HistorySpec(2, '1m', 'price', False, _cases_env, data_frequency='minute'),
    HistorySpec(2, '1d', 'price', False, _cases_env, data_frequency='minute'),
]
MIXED_FIELDS_SPECS = [
    HistorySpec(3, '1m', 'price', True, _cases_env, data_frequency='minute'),
    HistorySpec(3, '1m', 'open_price', True, _cases_env,
                data_frequency='minute'),
    HistorySpec(3, '1m', 'close_price', True, _cases_env,
                data_frequency='minute'),
    HistorySpec(3, '1m', 'high', True, _cases_env, data_frequency='minute'),
    HistorySpec(3, '1m', 'low', True, _cases_env, data_frequency='minute'),
    HistorySpec(3, '1m', 'volume', True, _cases_env, data_frequency='minute'),
]


HISTORY_CONTAINER_TEST_CASES = {
    #      June 2013
    # Su Mo Tu We Th Fr Sa
    #                    1
    #  2  3  4  5  6  7  8
    #  9 10 11 12 13 14 15
    # 16 17 18 19 20 21 22
    # 23 24 25 26 27 28 29
    # 30

    'test one minute price only': {
        # A list of HistorySpec objects.
        'specs': ONE_MINUTE_PRICE_ONLY_SPECS,
        # Sids for the test.
        'sids': [1],
        # Start date for test.
        'dt': to_utc('2013-06-21 9:31AM'),
        # Sequency of updates to the container
        'updates': [
            BarData(
                {
                    1: {
                        'price': 5,
                        'dt': to_utc('2013-06-21 9:31AM'),
                    },
                },
            ),
            BarData(
                {
                    1: {
                        'price': 6,
                        'dt': to_utc('2013-06-21 9:32AM'),
                    },
                },
            ),
        ],
        # Expected results
        'expected': {
            ONE_MINUTE_PRICE_ONLY_SPECS[0].key_str: [
                pd.DataFrame(
                    data={
                        1: [5],
                    },
                    index=[
                        to_utc('2013-06-21 9:31AM'),
                    ],
                ),
                pd.DataFrame(
                    data={
                        1: [6],
                    },
                    index=[
                        to_utc('2013-06-21 9:32AM'),
                    ],
                ),
            ],
        },
    },

    'test daily open close': {
        # A list of HistorySpec objects.
        'specs': DAILY_OPEN_CLOSE_SPECS,

        # Sids for the test.
        'sids': [1],

        # Start date for test.
        'dt': to_utc('2013-06-21 9:31AM'),

        # Sequence of updates to the container
        'updates': [
            BarData(
                {
                    1: {
                        'open_price': 10,
                        'close_price': 11,
                        'dt': to_utc('2013-06-21 10:00AM'),
                    },
                },
            ),
            BarData(
                {
                    1: {
                        'open_price': 12,
                        'close_price': 13,
                        'dt': to_utc('2013-06-21 3:30PM'),
                    },
                },
            ),
            BarData(
                {
                    1: {
                        'open_price': 14,
                        'close_price': 15,
                        # Wait a full market day before the next bar.
                        # We should end up with nans for Monday the 24th.
                        'dt': to_utc('2013-06-25 9:31AM'),
                    },
                },
            ),
        ],

        # Dictionary mapping spec_key -> list of expected outputs
        'expected': {
            # open
            DAILY_OPEN_CLOSE_SPECS[0].key_str: [
                pd.DataFrame(
                    data={
                        1: [np.nan, np.nan, 10]
                    },
                    index=[
                        to_utc('2013-06-19 4:00PM'),
                        to_utc('2013-06-20 4:00PM'),
                        to_utc('2013-06-21 10:00AM'),
                    ],
                ),

                pd.DataFrame(
                    data={
                        1: [np.nan, np.nan, 10]
                    },
                    index=[
                        to_utc('2013-06-19 4:00PM'),
                        to_utc('2013-06-20 4:00PM'),
                        to_utc('2013-06-21 3:30PM'),
                    ],
                ),

                pd.DataFrame(
                    data={
                        1: [10, np.nan, 14]
                    },
                    index=[
                        to_utc('2013-06-21 4:00PM'),
                        to_utc('2013-06-24 4:00PM'),
                        to_utc('2013-06-25 9:31AM'),
                    ],
                ),
            ],
            # close
            DAILY_OPEN_CLOSE_SPECS[1].key_str: [
                pd.DataFrame(
                    data={
                        1: [np.nan, np.nan, 11]
                    },
                    index=[
                        to_utc('2013-06-19 4:00PM'),
                        to_utc('2013-06-20 4:00PM'),
                        to_utc('2013-06-21 10:00AM'),
                    ],
                ),

                pd.DataFrame(
                    data={
                        1: [np.nan, np.nan, 13]
                    },
                    index=[
                        to_utc('2013-06-19 4:00PM'),
                        to_utc('2013-06-20 4:00PM'),
                        to_utc('2013-06-21 3:30PM'),
                    ],
                ),

                pd.DataFrame(
                    data={
                        1: [13, np.nan, 15]
                    },
                    index=[
                        to_utc('2013-06-21 4:00PM'),
                        to_utc('2013-06-24 4:00PM'),
                        to_utc('2013-06-25 9:31AM'),
                    ],
                ),
            ],
        },
    },
    'test illiquid prices': {

        # A list of HistorySpec objects.
        'specs': ILLIQUID_PRICES_SPECS,

        # Sids for the test.
        'sids': [1],

        # Start date for test.
        'dt': to_utc('2013-06-28 9:31AM'),

        # Sequence of updates to the container
        'updates': [
            BarData(
                {
                    1: {
                        'price': 10,
                        'dt': to_utc('2013-06-28 9:31AM'),
                    },
                },
            ),
            BarData(
                {
                    1: {
                        'price': 11,
                        'dt': to_utc('2013-06-28 9:32AM'),
                    },
                },
            ),
            BarData(
                {
                    1: {
                        'price': 12,
                        'dt': to_utc('2013-06-28 9:33AM'),
                    },
                },
            ),
            BarData(
                {
                    1: {
                        'price': 13,
                        # Note: Skipping 9:34 to simulate illiquid bar/missing
                        # data.
                        'dt': to_utc('2013-06-28 9:35AM'),
                    },
                },
            ),
        ],

        # Dictionary mapping spec_key -> list of expected outputs
        'expected': {
            ILLIQUID_PRICES_SPECS[0].key_str: [
                pd.DataFrame(
                    data={
                        1: [np.nan, np.nan, 10],
                    },
                    index=[
                        to_utc('2013-06-27 3:59PM'),
                        to_utc('2013-06-27 4:00PM'),
                        to_utc('2013-06-28 9:31AM'),
                    ],
                ),

                pd.DataFrame(
                    data={
                        1: [np.nan, 10, 11],
                    },
                    index=[
                        to_utc('2013-06-27 4:00PM'),
                        to_utc('2013-06-28 9:31AM'),
                        to_utc('2013-06-28 9:32AM'),
                    ],
                ),

                pd.DataFrame(
                    data={
                        1: [10, 11, 12],
                    },
                    index=[
                        to_utc('2013-06-28 9:31AM'),
                        to_utc('2013-06-28 9:32AM'),
                        to_utc('2013-06-28 9:33AM'),
                    ],
                ),

                # Since there's no update for 9:34, this is called at 9:35.
                pd.DataFrame(
                    data={
                        1: [12, np.nan, 13],
                    },
                    index=[
                        to_utc('2013-06-28 9:33AM'),
                        to_utc('2013-06-28 9:34AM'),
                        to_utc('2013-06-28 9:35AM'),
                    ],
                ),
            ],

            ILLIQUID_PRICES_SPECS[1].key_str: [
                pd.DataFrame(
                    data={
                        1: [np.nan, np.nan, np.nan, np.nan, 10],
                    },
                    index=[
                        to_utc('2013-06-27 3:57PM'),
                        to_utc('2013-06-27 3:58PM'),
                        to_utc('2013-06-27 3:59PM'),
                        to_utc('2013-06-27 4:00PM'),
                        to_utc('2013-06-28 9:31AM'),
                    ],
                ),

                pd.DataFrame(
                    data={
                        1: [np.nan, np.nan, np.nan, 10, 11],
                    },
                    index=[
                        to_utc('2013-06-27 3:58PM'),
                        to_utc('2013-06-27 3:59PM'),
                        to_utc('2013-06-27 4:00PM'),
                        to_utc('2013-06-28 9:31AM'),
                        to_utc('2013-06-28 9:32AM'),
                    ],
                ),

                pd.DataFrame(
                    data={
                        1: [np.nan, np.nan, 10, 11, 12],
                    },
                    index=[
                        to_utc('2013-06-27 3:59PM'),
                        to_utc('2013-06-27 4:00PM'),
                        to_utc('2013-06-28 9:31AM'),
                        to_utc('2013-06-28 9:32AM'),
                        to_utc('2013-06-28 9:33AM'),
                    ],
                ),

                # Since there's no update for 9:34, this is called at 9:35.
                # The 12 value from 9:33 should be forward-filled.
                pd.DataFrame(
                    data={
                        1: [10, 11, 12, 12, 13],
                    },
                    index=[
                        to_utc('2013-06-28 9:31AM'),
                        to_utc('2013-06-28 9:32AM'),
                        to_utc('2013-06-28 9:33AM'),
                        to_utc('2013-06-28 9:34AM'),
                        to_utc('2013-06-28 9:35AM'),
                    ],
                ),
            ],
        },
    },

    'test mixed frequencies': {

        # A list of HistorySpec objects.
        'specs': MIXED_FREQUENCY_SPECS,

        # Sids for the test.
        'sids': [1],

        # Start date for test.
        #      July 2013
        # Su Mo Tu We Th Fr Sa
        #     1  2  3  4  5  6
        #  7  8  9 10 11 12 13
        # 14 15 16 17 18 19 20
        # 21 22 23 24 25 26 27
        # 28 29 30 31
        'dt': to_utc('2013-07-03 9:31AM'),

        # Sequence of updates to the container
        'updates': [
            BarData(
                {
                    1: {
                        'price': count,
                        'dt': dt,
                    }
                }
            )
            for count, dt in enumerate(MIXED_FREQUENCY_MINUTES)
        ],

        # Dictionary mapping spec_key -> list of expected outputs.
        'expected': {

            MIXED_FREQUENCY_SPECS[0].key_str: [
                pd.DataFrame(
                    data={
                        1: [count],
                    },
                    index=[minute],
                )
                for count, minute in enumerate(MIXED_FREQUENCY_MINUTES)
            ],

            MIXED_FREQUENCY_SPECS[1].key_str: [
                pd.DataFrame(
                    data={
                        1: mixed_frequency_expected_data(count, '1m'),
                    },
                    index=mixed_frequency_expected_index(count, '1m'),
                )
                for count in range(len(MIXED_FREQUENCY_MINUTES))
            ],

            MIXED_FREQUENCY_SPECS[2].key_str: [
                pd.DataFrame(
                    data={
                        1: mixed_frequency_expected_data(count, '1d'),
                    },
                    index=mixed_frequency_expected_index(count, '1d'),
                )
                for count in range(len(MIXED_FREQUENCY_MINUTES))
            ]
        },
    },

    'test multiple fields and sids': {

        # A list of HistorySpec objects.
        'specs': MIXED_FIELDS_SPECS,

        # Sids for the test.
        'sids': [1, 10],

        # Start date for test.
        'dt': to_utc('2013-06-28 9:31AM'),

        # Sequence of updates to the container
        'updates': [
            BarData(
                {
                    1: {
                        'dt': dt,
                        'price': count,
                        'open_price': count,
                        'close_price': count,
                        'high': count,
                        'low': count,
                        'volume': count,
                    },
                    10: {
                        'dt': dt,
                        'price': count * 10,
                        'open_price': count * 10,
                        'close_price': count * 10,
                        'high': count * 10,
                        'low': count * 10,
                        'volume': count * 10,
                    },
                },
            )
            for count, dt in enumerate([
                to_utc('2013-06-28 9:31AM'),
                to_utc('2013-06-28 9:32AM'),
                to_utc('2013-06-28 9:33AM'),
                # NOTE: No update for 9:34
                to_utc('2013-06-28 9:35AM'),
            ])
        ],

        # Dictionary mapping spec_key -> list of expected outputs
        'expected': dict(

            # Build a dict from a list of tuples.  Doing it this way because
            # there are two distinct cases we want to test: forward-fillable
            # fields and non-forward-fillable fields.
            [
                (
                    # Non forward-fill fields
                    key,
                    [
                        pd.DataFrame(
                            data={
                                1: [np.nan, np.nan, 0],
                                10: [np.nan, np.nan, 0],
                            },
                            index=[
                                to_utc('2013-06-27 3:59PM'),
                                to_utc('2013-06-27 4:00PM'),
                                to_utc('2013-06-28 9:31AM'),
                            ],

                        ),
                        pd.DataFrame(
                            data={
                                1: [np.nan, 0, 1],
                                10: [np.nan, 0, 10],
                            },
                            index=[
                                to_utc('2013-06-27 4:00PM'),
                                to_utc('2013-06-28 9:31AM'),
                                to_utc('2013-06-28 9:32AM'),
                            ],
                        ),

                        pd.DataFrame(
                            data={
                                1: [0, 1, 2],
                                10: [0, 10, 20],
                            },
                            index=[
                                to_utc('2013-06-28 9:31AM'),
                                to_utc('2013-06-28 9:32AM'),
                                to_utc('2013-06-28 9:33AM'),
                            ],

                        ),
                        pd.DataFrame(
                            data={
                                1: [2, np.nan, 3],
                                10: [20, np.nan, 30],
                            },
                            index=[
                                to_utc('2013-06-28 9:33AM'),
                                to_utc('2013-06-28 9:34AM'),
                                to_utc('2013-06-28 9:35AM'),
                            ],
                            # For volume, when we are missing data, we replace
                            # it with 0s to show that no trades occured.
                        ).fillna(0 if 'volume' in key else np.nan),
                    ],
                )
                for key in [spec.key_str for spec in MIXED_FIELDS_SPECS
                            if spec.field not in HistorySpec.FORWARD_FILLABLE]
            ] +

            # Concatenate the expected results for non-ffillable with
            # expected result for ffillable.
            [
                (
                    # Forward-fillable fields
                    key,
                    [
                        pd.DataFrame(
                            data={
                                1: [np.nan, np.nan, 0],
                                10: [np.nan, np.nan, 0],
                            },
                            index=[
                                to_utc('2013-06-27 3:59PM'),
                                to_utc('2013-06-27 4:00PM'),
                                to_utc('2013-06-28 9:31AM'),
                            ],
                        ),

                        pd.DataFrame(
                            data={
                                1: [np.nan, 0, 1],
                                10: [np.nan, 0, 10],
                            },
                            index=[
                                to_utc('2013-06-27 4:00PM'),
                                to_utc('2013-06-28 9:31AM'),
                                to_utc('2013-06-28 9:32AM'),
                            ],
                        ),

                        pd.DataFrame(
                            data={
                                1: [0, 1, 2],
                                10: [0, 10, 20],
                            },
                            index=[
                                to_utc('2013-06-28 9:31AM'),
                                to_utc('2013-06-28 9:32AM'),
                                to_utc('2013-06-28 9:33AM'),
                            ],
                        ),

                        pd.DataFrame(
                            data={
                                1: [2, 2, 3],
                                10: [20, 20, 30],
                            },
                            index=[
                                to_utc('2013-06-28 9:33AM'),
                                to_utc('2013-06-28 9:34AM'),
                                to_utc('2013-06-28 9:35AM'),
                            ],
                        ),
                    ],
                )
                for key in [spec.key_str for spec in MIXED_FIELDS_SPECS
                            if spec.field in HistorySpec.FORWARD_FILLABLE]
            ]
        ),
    },
}