mirror of
https://github.com/wassname/catalyst.git
synced 2026-06-29 09:04:17 +08:00
Merge pull request #1184 from quantopian/no-more-dups-2-electric-boogaloo
TEST/MAINT: Refactor unique axis verification.
This commit is contained in:
@@ -13,34 +13,36 @@
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
from itertools import permutations
|
||||
|
||||
import pandas as pd
|
||||
|
||||
from zipline.data.us_equity_pricing import PanelDailyBarReader
|
||||
from zipline.testing.fixtures import WithTradingEnvironment, ZiplineTestCase
|
||||
from zipline.testing import ExplodingObject
|
||||
from zipline.testing.fixtures import ZiplineTestCase
|
||||
|
||||
|
||||
class TestPanelDailyBarReader(WithTradingEnvironment, ZiplineTestCase):
|
||||
class TestPanelDailyBarReader(ZiplineTestCase):
|
||||
def test_duplicate_values(self):
|
||||
df = pd.DataFrame()
|
||||
panel = pd.concat([pd.Panel({"X": df}), pd.Panel({"X": df})])
|
||||
UNIMPORTANT_VALUE = 57
|
||||
|
||||
with self.assertRaises(ValueError) as e:
|
||||
# panel's items has duplicates
|
||||
PanelDailyBarReader(None, panel)
|
||||
panel = pd.Panel(
|
||||
UNIMPORTANT_VALUE,
|
||||
items=['a', 'b', 'b', 'a'],
|
||||
major_axis=['c'],
|
||||
minor_axis=['d'],
|
||||
)
|
||||
unused = ExplodingObject()
|
||||
|
||||
self.assertEqual("Duplicated items found: ['X']",
|
||||
e.exception.message)
|
||||
axis_names = ['items', 'major_axis', 'minor_axis']
|
||||
|
||||
with self.assertRaises(ValueError) as e:
|
||||
# panel's major axis has duplicates
|
||||
PanelDailyBarReader(None, panel.swapaxes(0, 1))
|
||||
for axis_order in permutations((0, 1, 2)):
|
||||
with self.assertRaises(ValueError) as e:
|
||||
PanelDailyBarReader(unused, panel.transpose(*axis_order))
|
||||
|
||||
self.assertEqual("Duplicated items found: ['X']",
|
||||
e.exception.message)
|
||||
|
||||
with self.assertRaises(ValueError) as e:
|
||||
# panel's minor axis has duplicates
|
||||
PanelDailyBarReader(None, panel.swapaxes(0, 2))
|
||||
|
||||
self.assertEqual("Duplicated items found: ['X']",
|
||||
e.exception.message)
|
||||
expected = (
|
||||
"Duplicate entries in Panel.{name}: ['a', 'b'].".format(
|
||||
name=axis_names[axis_order.index(0)],
|
||||
)
|
||||
)
|
||||
self.assertEqual(str(e.exception), expected)
|
||||
|
||||
@@ -54,10 +54,12 @@ from six import (
|
||||
)
|
||||
|
||||
from zipline.utils.functional import apply
|
||||
from zipline.utils.preprocess import call
|
||||
from zipline.utils.input_validation import (
|
||||
coerce_string,
|
||||
preprocess,
|
||||
expect_element,
|
||||
verify_indices_all_unique,
|
||||
)
|
||||
from zipline.utils.sqlite_utils import group_into_chunks
|
||||
from zipline.utils.memoize import lazyval
|
||||
@@ -696,9 +698,12 @@ class PanelDailyBarReader(DailyBarReader):
|
||||
|
||||
DataPanel Structure
|
||||
-------
|
||||
items : Int64Index, asset identifiers
|
||||
major_axis : DatetimeIndex, days provided by the Panel.
|
||||
items : Int64Index
|
||||
Asset identifiers. Must be unique.
|
||||
major_axis : DatetimeIndex
|
||||
Dates for data provided provided by the Panel. Must be unique.
|
||||
minor_axis : ['open', 'high', 'low', 'close', 'volume']
|
||||
Price attributes. Must be unique.
|
||||
|
||||
Attributes
|
||||
----------
|
||||
@@ -710,17 +715,8 @@ class PanelDailyBarReader(DailyBarReader):
|
||||
first_trading_day : pd.Timestamp
|
||||
The first trading day in the dataset.
|
||||
"""
|
||||
@preprocess(panel=call(verify_indices_all_unique))
|
||||
def __init__(self, calendar, panel):
|
||||
# check duplicates on all indices of panel
|
||||
|
||||
for attr_name in ["items", "major_axis", "minor_axis"]:
|
||||
index = getattr(panel, attr_name)
|
||||
duplicates = index.duplicated()
|
||||
|
||||
if duplicates.any():
|
||||
raise ValueError("Duplicated items found: {0}".format(
|
||||
index[duplicates].values
|
||||
))
|
||||
|
||||
panel = panel.copy()
|
||||
if 'volume' not in panel.items:
|
||||
|
||||
@@ -25,6 +25,43 @@ import toolz.curried.operator as op
|
||||
from zipline.utils.preprocess import preprocess
|
||||
|
||||
|
||||
def verify_indices_all_unique(obj):
|
||||
"""
|
||||
Check that all axes of a pandas object are unique.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
obj : pd.Series / pd.DataFrame / pd.Panel
|
||||
The object to validate.
|
||||
|
||||
Returns
|
||||
-------
|
||||
None
|
||||
|
||||
Raises
|
||||
------
|
||||
ValueError
|
||||
If any axis has duplicate entries.
|
||||
"""
|
||||
axis_names = [
|
||||
('index',), # Series
|
||||
('index', 'columns'), # DataFrame
|
||||
('items', 'major_axis', 'minor_axis') # Panel
|
||||
][obj.ndim - 1] # ndim = 1 should go to entry 0,
|
||||
|
||||
for axis_name, index in zip(axis_names, obj.axes):
|
||||
if index.is_unique:
|
||||
continue
|
||||
|
||||
raise ValueError(
|
||||
"Duplicate entries in {type}.{axis}: {dupes}.".format(
|
||||
type=type(obj).__name__,
|
||||
axis=axis_name,
|
||||
dupes=sorted(index[index.duplicated()]),
|
||||
)
|
||||
)
|
||||
|
||||
|
||||
def optionally(preprocessor):
|
||||
"""Modify a preprocessor to explicitly allow `None`.
|
||||
|
||||
|
||||
Reference in New Issue
Block a user