mirror of
https://github.com/wassname/catalyst.git
synced 2026-06-30 06:12:06 +08:00
647 lines
21 KiB
Python
647 lines
21 KiB
Python
"""
|
|
Test case definitions for history tests.
|
|
"""
|
|
|
|
import pandas as pd
|
|
import numpy as np
|
|
|
|
from zipline.finance.trading import TradingEnvironment
|
|
from zipline.history.history import HistorySpec
|
|
from zipline.protocol import BarData
|
|
from zipline.utils.test_utils import to_utc
|
|
|
|
|
|
def mixed_frequency_expected_index(count, frequency):
|
|
"""
|
|
Helper for enumerating expected indices for test_mixed_frequency.
|
|
"""
|
|
env = TradingEnvironment.instance()
|
|
minute = MIXED_FREQUENCY_MINUTES[count]
|
|
|
|
if frequency == '1d':
|
|
return [env.previous_open_and_close(minute)[1], minute]
|
|
elif frequency == '1m':
|
|
return [env.previous_market_minute(minute), minute]
|
|
|
|
|
|
def mixed_frequency_expected_data(count, frequency):
|
|
"""
|
|
Helper for enumerating expected data test_mixed_frequency.
|
|
"""
|
|
if frequency == '1d':
|
|
# First day of this test is July 3rd, which is a half day.
|
|
if count < 210:
|
|
return [np.nan, count]
|
|
else:
|
|
return [209, count]
|
|
elif frequency == '1m':
|
|
if count == 0:
|
|
return [np.nan, count]
|
|
else:
|
|
return [count - 1, count]
|
|
|
|
|
|
MIXED_FREQUENCY_MINUTES = TradingEnvironment.instance().market_minute_window(
|
|
to_utc('2013-07-03 9:31AM'), 600,
|
|
)
|
|
ONE_MINUTE_PRICE_ONLY_SPECS = [
|
|
HistorySpec(1, '1m', 'price', True),
|
|
]
|
|
DAILY_OPEN_CLOSE_SPECS = [
|
|
HistorySpec(3, '1d', 'open_price', False),
|
|
HistorySpec(3, '1d', 'close_price', False),
|
|
]
|
|
ILLIQUID_PRICES_SPECS = [
|
|
HistorySpec(3, '1m', 'price', False),
|
|
HistorySpec(5, '1m', 'price', True),
|
|
]
|
|
MIXED_FREQUENCY_SPECS = [
|
|
HistorySpec(1, '1m', 'price', False),
|
|
HistorySpec(2, '1m', 'price', False),
|
|
HistorySpec(2, '1d', 'price', False),
|
|
]
|
|
MIXED_FIELDS_SPECS = [
|
|
HistorySpec(3, '1m', 'price', True),
|
|
HistorySpec(3, '1m', 'open_price', True),
|
|
HistorySpec(3, '1m', 'close_price', True),
|
|
HistorySpec(3, '1m', 'high', True),
|
|
HistorySpec(3, '1m', 'low', True),
|
|
HistorySpec(3, '1m', 'volume', True),
|
|
]
|
|
|
|
|
|
HISTORY_CONTAINER_TEST_CASES = {
|
|
# June 2013
|
|
# Su Mo Tu We Th Fr Sa
|
|
# 1
|
|
# 2 3 4 5 6 7 8
|
|
# 9 10 11 12 13 14 15
|
|
# 16 17 18 19 20 21 22
|
|
# 23 24 25 26 27 28 29
|
|
# 30
|
|
|
|
'test one minute price only': {
|
|
# A list of HistorySpec objects.
|
|
'specs': ONE_MINUTE_PRICE_ONLY_SPECS,
|
|
# Sids for the test.
|
|
'sids': [1],
|
|
# Start date for test.
|
|
'dt': to_utc('2013-06-21 9:31AM'),
|
|
# Sequency of updates to the container
|
|
'updates': [
|
|
BarData(
|
|
{
|
|
1: {
|
|
'price': 5,
|
|
'dt': to_utc('2013-06-21 9:31AM'),
|
|
},
|
|
},
|
|
),
|
|
BarData(
|
|
{
|
|
1: {
|
|
'price': 6,
|
|
'dt': to_utc('2013-06-21 9:32AM'),
|
|
},
|
|
},
|
|
),
|
|
],
|
|
# Expected results
|
|
'expected': {
|
|
ONE_MINUTE_PRICE_ONLY_SPECS[0].key_str: [
|
|
pd.DataFrame(
|
|
data={
|
|
1: [5],
|
|
},
|
|
index=[
|
|
to_utc('2013-06-21 9:31AM'),
|
|
],
|
|
),
|
|
pd.DataFrame(
|
|
data={
|
|
1: [6],
|
|
},
|
|
index=[
|
|
to_utc('2013-06-21 9:32AM'),
|
|
],
|
|
),
|
|
],
|
|
},
|
|
},
|
|
|
|
'test daily open close': {
|
|
# A list of HistorySpec objects.
|
|
'specs': DAILY_OPEN_CLOSE_SPECS,
|
|
|
|
# Sids for the test.
|
|
'sids': [1],
|
|
|
|
# Start date for test.
|
|
'dt': to_utc('2013-06-21 9:31AM'),
|
|
|
|
# Sequence of updates to the container
|
|
'updates': [
|
|
BarData(
|
|
{
|
|
1: {
|
|
'open_price': 10,
|
|
'close_price': 11,
|
|
'dt': to_utc('2013-06-21 10:00AM'),
|
|
},
|
|
},
|
|
),
|
|
BarData(
|
|
{
|
|
1: {
|
|
'open_price': 12,
|
|
'close_price': 13,
|
|
'dt': to_utc('2013-06-21 3:30PM'),
|
|
},
|
|
},
|
|
),
|
|
BarData(
|
|
{
|
|
1: {
|
|
'open_price': 14,
|
|
'close_price': 15,
|
|
# Wait a full market day before the next bar.
|
|
# We should end up with nans for Monday the 24th.
|
|
'dt': to_utc('2013-06-25 9:31AM'),
|
|
},
|
|
},
|
|
),
|
|
],
|
|
|
|
# Dictionary mapping spec_key -> list of expected outputs
|
|
'expected': {
|
|
# open
|
|
DAILY_OPEN_CLOSE_SPECS[0].key_str: [
|
|
pd.DataFrame(
|
|
data={
|
|
1: [np.nan, np.nan, 10]
|
|
},
|
|
index=[
|
|
to_utc('2013-06-19 4:00PM'),
|
|
to_utc('2013-06-20 4:00PM'),
|
|
to_utc('2013-06-21 10:00AM'),
|
|
],
|
|
),
|
|
|
|
pd.DataFrame(
|
|
data={
|
|
1: [np.nan, np.nan, 10]
|
|
},
|
|
index=[
|
|
to_utc('2013-06-19 4:00PM'),
|
|
to_utc('2013-06-20 4:00PM'),
|
|
to_utc('2013-06-21 3:30PM'),
|
|
],
|
|
),
|
|
|
|
pd.DataFrame(
|
|
data={
|
|
1: [10, np.nan, 14]
|
|
},
|
|
index=[
|
|
to_utc('2013-06-21 4:00PM'),
|
|
to_utc('2013-06-24 4:00PM'),
|
|
to_utc('2013-06-25 9:31AM'),
|
|
],
|
|
),
|
|
],
|
|
# close
|
|
DAILY_OPEN_CLOSE_SPECS[1].key_str: [
|
|
pd.DataFrame(
|
|
data={
|
|
1: [np.nan, np.nan, 11]
|
|
},
|
|
index=[
|
|
to_utc('2013-06-19 4:00PM'),
|
|
to_utc('2013-06-20 4:00PM'),
|
|
to_utc('2013-06-21 10:00AM'),
|
|
],
|
|
),
|
|
|
|
pd.DataFrame(
|
|
data={
|
|
1: [np.nan, np.nan, 13]
|
|
},
|
|
index=[
|
|
to_utc('2013-06-19 4:00PM'),
|
|
to_utc('2013-06-20 4:00PM'),
|
|
to_utc('2013-06-21 3:30PM'),
|
|
],
|
|
),
|
|
|
|
pd.DataFrame(
|
|
data={
|
|
1: [13, np.nan, 15]
|
|
},
|
|
index=[
|
|
to_utc('2013-06-21 4:00PM'),
|
|
to_utc('2013-06-24 4:00PM'),
|
|
to_utc('2013-06-25 9:31AM'),
|
|
],
|
|
),
|
|
],
|
|
},
|
|
},
|
|
'test illiquid prices': {
|
|
|
|
# A list of HistorySpec objects.
|
|
'specs': ILLIQUID_PRICES_SPECS,
|
|
|
|
# Sids for the test.
|
|
'sids': [1],
|
|
|
|
# Start date for test.
|
|
'dt': to_utc('2013-06-28 9:31AM'),
|
|
|
|
# Sequence of updates to the container
|
|
'updates': [
|
|
BarData(
|
|
{
|
|
1: {
|
|
'price': 10,
|
|
'dt': to_utc('2013-06-28 9:31AM'),
|
|
},
|
|
},
|
|
),
|
|
BarData(
|
|
{
|
|
1: {
|
|
'price': 11,
|
|
'dt': to_utc('2013-06-28 9:32AM'),
|
|
},
|
|
},
|
|
),
|
|
BarData(
|
|
{
|
|
1: {
|
|
'price': 12,
|
|
'dt': to_utc('2013-06-28 9:33AM'),
|
|
},
|
|
},
|
|
),
|
|
BarData(
|
|
{
|
|
1: {
|
|
'price': 13,
|
|
# Note: Skipping 9:34 to simulate illiquid bar/missing
|
|
# data.
|
|
'dt': to_utc('2013-06-28 9:35AM'),
|
|
},
|
|
},
|
|
),
|
|
],
|
|
|
|
# Dictionary mapping spec_key -> list of expected outputs
|
|
'expected': {
|
|
ILLIQUID_PRICES_SPECS[0].key_str: [
|
|
pd.DataFrame(
|
|
data={
|
|
1: [np.nan, np.nan, 10],
|
|
},
|
|
index=[
|
|
to_utc('2013-06-27 3:59PM'),
|
|
to_utc('2013-06-27 4:00PM'),
|
|
to_utc('2013-06-28 9:31AM'),
|
|
],
|
|
),
|
|
|
|
pd.DataFrame(
|
|
data={
|
|
1: [np.nan, 10, 11],
|
|
},
|
|
index=[
|
|
to_utc('2013-06-27 4:00PM'),
|
|
to_utc('2013-06-28 9:31AM'),
|
|
to_utc('2013-06-28 9:32AM'),
|
|
],
|
|
),
|
|
|
|
pd.DataFrame(
|
|
data={
|
|
1: [10, 11, 12],
|
|
},
|
|
index=[
|
|
to_utc('2013-06-28 9:31AM'),
|
|
to_utc('2013-06-28 9:32AM'),
|
|
to_utc('2013-06-28 9:33AM'),
|
|
],
|
|
),
|
|
|
|
# Since there's no update for 9:34, this is called at 9:35.
|
|
pd.DataFrame(
|
|
data={
|
|
1: [12, np.nan, 13],
|
|
},
|
|
index=[
|
|
to_utc('2013-06-28 9:33AM'),
|
|
to_utc('2013-06-28 9:34AM'),
|
|
to_utc('2013-06-28 9:35AM'),
|
|
],
|
|
),
|
|
],
|
|
|
|
ILLIQUID_PRICES_SPECS[1].key_str: [
|
|
pd.DataFrame(
|
|
data={
|
|
1: [np.nan, np.nan, np.nan, np.nan, 10],
|
|
},
|
|
index=[
|
|
to_utc('2013-06-27 3:57PM'),
|
|
to_utc('2013-06-27 3:58PM'),
|
|
to_utc('2013-06-27 3:59PM'),
|
|
to_utc('2013-06-27 4:00PM'),
|
|
to_utc('2013-06-28 9:31AM'),
|
|
],
|
|
),
|
|
|
|
pd.DataFrame(
|
|
data={
|
|
1: [np.nan, np.nan, np.nan, 10, 11],
|
|
},
|
|
index=[
|
|
to_utc('2013-06-27 3:58PM'),
|
|
to_utc('2013-06-27 3:59PM'),
|
|
to_utc('2013-06-27 4:00PM'),
|
|
to_utc('2013-06-28 9:31AM'),
|
|
to_utc('2013-06-28 9:32AM'),
|
|
],
|
|
),
|
|
|
|
pd.DataFrame(
|
|
data={
|
|
1: [np.nan, np.nan, 10, 11, 12],
|
|
},
|
|
index=[
|
|
to_utc('2013-06-27 3:59PM'),
|
|
to_utc('2013-06-27 4:00PM'),
|
|
to_utc('2013-06-28 9:31AM'),
|
|
to_utc('2013-06-28 9:32AM'),
|
|
to_utc('2013-06-28 9:33AM'),
|
|
],
|
|
),
|
|
|
|
# Since there's no update for 9:34, this is called at 9:35.
|
|
# The 12 value from 9:33 should be forward-filled.
|
|
pd.DataFrame(
|
|
data={
|
|
1: [10, 11, 12, 12, 13],
|
|
},
|
|
index=[
|
|
to_utc('2013-06-28 9:31AM'),
|
|
to_utc('2013-06-28 9:32AM'),
|
|
to_utc('2013-06-28 9:33AM'),
|
|
to_utc('2013-06-28 9:34AM'),
|
|
to_utc('2013-06-28 9:35AM'),
|
|
],
|
|
),
|
|
],
|
|
},
|
|
},
|
|
|
|
'test mixed frequencies': {
|
|
|
|
# A list of HistorySpec objects.
|
|
'specs': MIXED_FREQUENCY_SPECS,
|
|
|
|
# Sids for the test.
|
|
'sids': [1],
|
|
|
|
# Start date for test.
|
|
# July 2013
|
|
# Su Mo Tu We Th Fr Sa
|
|
# 1 2 3 4 5 6
|
|
# 7 8 9 10 11 12 13
|
|
# 14 15 16 17 18 19 20
|
|
# 21 22 23 24 25 26 27
|
|
# 28 29 30 31
|
|
'dt': to_utc('2013-07-03 9:31AM'),
|
|
|
|
# Sequence of updates to the container
|
|
'updates': [
|
|
BarData(
|
|
{
|
|
1: {
|
|
'price': count,
|
|
'dt': dt,
|
|
}
|
|
}
|
|
)
|
|
for count, dt in enumerate(MIXED_FREQUENCY_MINUTES)
|
|
],
|
|
|
|
# Dictionary mapping spec_key -> list of expected outputs.
|
|
'expected': {
|
|
|
|
MIXED_FREQUENCY_SPECS[0].key_str: [
|
|
pd.DataFrame(
|
|
data={
|
|
1: [count],
|
|
},
|
|
index=[minute],
|
|
)
|
|
for count, minute in enumerate(MIXED_FREQUENCY_MINUTES)
|
|
],
|
|
|
|
MIXED_FREQUENCY_SPECS[1].key_str: [
|
|
pd.DataFrame(
|
|
data={
|
|
1: mixed_frequency_expected_data(count, '1m'),
|
|
},
|
|
index=mixed_frequency_expected_index(count, '1m'),
|
|
)
|
|
for count in range(len(MIXED_FREQUENCY_MINUTES))
|
|
],
|
|
|
|
MIXED_FREQUENCY_SPECS[2].key_str: [
|
|
pd.DataFrame(
|
|
data={
|
|
1: mixed_frequency_expected_data(count, '1d'),
|
|
},
|
|
index=mixed_frequency_expected_index(count, '1d'),
|
|
)
|
|
for count in range(len(MIXED_FREQUENCY_MINUTES))
|
|
]
|
|
},
|
|
},
|
|
|
|
'test multiple fields and sids': {
|
|
|
|
# A list of HistorySpec objects.
|
|
'specs': MIXED_FIELDS_SPECS,
|
|
|
|
# Sids for the test.
|
|
'sids': [1, 10],
|
|
|
|
# Start date for test.
|
|
'dt': to_utc('2013-06-28 9:31AM'),
|
|
|
|
# Sequence of updates to the container
|
|
'updates': [
|
|
BarData(
|
|
{
|
|
1: {
|
|
'dt': dt,
|
|
'price': count,
|
|
'open_price': count,
|
|
'close_price': count,
|
|
'high': count,
|
|
'low': count,
|
|
'volume': count,
|
|
},
|
|
10: {
|
|
'dt': dt,
|
|
'price': count * 10,
|
|
'open_price': count * 10,
|
|
'close_price': count * 10,
|
|
'high': count * 10,
|
|
'low': count * 10,
|
|
'volume': count * 10,
|
|
},
|
|
},
|
|
)
|
|
for count, dt in enumerate([
|
|
to_utc('2013-06-28 9:31AM'),
|
|
to_utc('2013-06-28 9:32AM'),
|
|
to_utc('2013-06-28 9:33AM'),
|
|
# NOTE: No update for 9:34
|
|
to_utc('2013-06-28 9:35AM'),
|
|
])
|
|
],
|
|
|
|
# Dictionary mapping spec_key -> list of expected outputs
|
|
'expected': dict(
|
|
|
|
# Build a dict from a list of tuples. Doing it this way because
|
|
# there are two distinct cases we want to test: forward-fillable
|
|
# fields and non-forward-fillable fields.
|
|
[
|
|
(
|
|
# Non forward-fill fields
|
|
key,
|
|
[
|
|
pd.DataFrame(
|
|
data={
|
|
1: [np.nan, np.nan, 0],
|
|
10: [np.nan, np.nan, 0],
|
|
},
|
|
index=[
|
|
to_utc('2013-06-27 3:59PM'),
|
|
to_utc('2013-06-27 4:00PM'),
|
|
to_utc('2013-06-28 9:31AM'),
|
|
],
|
|
|
|
# Missing volume data should manifest as 0's rather
|
|
# than nans.
|
|
).fillna(0 if 'volume' in key else np.nan),
|
|
pd.DataFrame(
|
|
data={
|
|
1: [np.nan, 0, 1],
|
|
10: [np.nan, 0, 10],
|
|
},
|
|
index=[
|
|
to_utc('2013-06-27 4:00PM'),
|
|
to_utc('2013-06-28 9:31AM'),
|
|
to_utc('2013-06-28 9:32AM'),
|
|
],
|
|
).fillna(0 if 'volume' in key else np.nan),
|
|
|
|
pd.DataFrame(
|
|
data={
|
|
1: [0, 1, 2],
|
|
10: [0, 10, 20],
|
|
},
|
|
index=[
|
|
to_utc('2013-06-28 9:31AM'),
|
|
to_utc('2013-06-28 9:32AM'),
|
|
to_utc('2013-06-28 9:33AM'),
|
|
],
|
|
|
|
# Note: Calling fillna() here even though there are
|
|
# no NaNs because this makes it less likely
|
|
# for us to introduce a stupid bug by
|
|
# copy/pasting in the future.
|
|
).fillna(0 if 'volume' in key else np.nan),
|
|
pd.DataFrame(
|
|
data={
|
|
1: [2, np.nan, 3],
|
|
10: [20, np.nan, 30],
|
|
},
|
|
index=[
|
|
to_utc('2013-06-28 9:33AM'),
|
|
to_utc('2013-06-28 9:34AM'),
|
|
to_utc('2013-06-28 9:35AM'),
|
|
],
|
|
).fillna(0 if 'volume' in key else np.nan),
|
|
],
|
|
)
|
|
for key in [spec.key_str for spec in MIXED_FIELDS_SPECS
|
|
if spec.field not in HistorySpec.FORWARD_FILLABLE]
|
|
]
|
|
|
|
+ # Concatenate the expected results for non-ffillable with
|
|
# expected result for ffillable.
|
|
[
|
|
(
|
|
# Forward-fillable fields
|
|
key,
|
|
[
|
|
pd.DataFrame(
|
|
data={
|
|
1: [np.nan, np.nan, 0],
|
|
10: [np.nan, np.nan, 0],
|
|
},
|
|
index=[
|
|
to_utc('2013-06-27 3:59PM'),
|
|
to_utc('2013-06-27 4:00PM'),
|
|
to_utc('2013-06-28 9:31AM'),
|
|
],
|
|
),
|
|
|
|
pd.DataFrame(
|
|
data={
|
|
1: [np.nan, 0, 1],
|
|
10: [np.nan, 0, 10],
|
|
},
|
|
index=[
|
|
to_utc('2013-06-27 4:00PM'),
|
|
to_utc('2013-06-28 9:31AM'),
|
|
to_utc('2013-06-28 9:32AM'),
|
|
],
|
|
),
|
|
|
|
pd.DataFrame(
|
|
data={
|
|
1: [0, 1, 2],
|
|
10: [0, 10, 20],
|
|
},
|
|
index=[
|
|
to_utc('2013-06-28 9:31AM'),
|
|
to_utc('2013-06-28 9:32AM'),
|
|
to_utc('2013-06-28 9:33AM'),
|
|
],
|
|
),
|
|
|
|
pd.DataFrame(
|
|
data={
|
|
1: [2, 2, 3],
|
|
10: [20, 20, 30],
|
|
},
|
|
index=[
|
|
to_utc('2013-06-28 9:33AM'),
|
|
to_utc('2013-06-28 9:34AM'),
|
|
to_utc('2013-06-28 9:35AM'),
|
|
],
|
|
),
|
|
],
|
|
)
|
|
for key in [spec.key_str for spec in MIXED_FIELDS_SPECS
|
|
if spec.field in HistorySpec.FORWARD_FILLABLE]
|
|
]
|
|
),
|
|
},
|
|
}
|