Merge pull request #1184 from quantopian/no-more-dups-2-electric-boogaloo

TEST/MAINT: Refactor unique axis verification.
This commit is contained in:
Jean Bredeche
2016-05-06 09:22:12 -04:00
3 changed files with 68 additions and 33 deletions
+23 -21
View File
@@ -13,34 +13,36 @@
# See the License for the specific language governing permissions and
# limitations under the License.
from itertools import permutations
import pandas as pd
from zipline.data.us_equity_pricing import PanelDailyBarReader
from zipline.testing.fixtures import WithTradingEnvironment, ZiplineTestCase
from zipline.testing import ExplodingObject
from zipline.testing.fixtures import ZiplineTestCase
class TestPanelDailyBarReader(WithTradingEnvironment, ZiplineTestCase):
class TestPanelDailyBarReader(ZiplineTestCase):
def test_duplicate_values(self):
df = pd.DataFrame()
panel = pd.concat([pd.Panel({"X": df}), pd.Panel({"X": df})])
UNIMPORTANT_VALUE = 57
with self.assertRaises(ValueError) as e:
# panel's items has duplicates
PanelDailyBarReader(None, panel)
panel = pd.Panel(
UNIMPORTANT_VALUE,
items=['a', 'b', 'b', 'a'],
major_axis=['c'],
minor_axis=['d'],
)
unused = ExplodingObject()
self.assertEqual("Duplicated items found: ['X']",
e.exception.message)
axis_names = ['items', 'major_axis', 'minor_axis']
with self.assertRaises(ValueError) as e:
# panel's major axis has duplicates
PanelDailyBarReader(None, panel.swapaxes(0, 1))
for axis_order in permutations((0, 1, 2)):
with self.assertRaises(ValueError) as e:
PanelDailyBarReader(unused, panel.transpose(*axis_order))
self.assertEqual("Duplicated items found: ['X']",
e.exception.message)
with self.assertRaises(ValueError) as e:
# panel's minor axis has duplicates
PanelDailyBarReader(None, panel.swapaxes(0, 2))
self.assertEqual("Duplicated items found: ['X']",
e.exception.message)
expected = (
"Duplicate entries in Panel.{name}: ['a', 'b'].".format(
name=axis_names[axis_order.index(0)],
)
)
self.assertEqual(str(e.exception), expected)
+8 -12
View File
@@ -54,10 +54,12 @@ from six import (
)
from zipline.utils.functional import apply
from zipline.utils.preprocess import call
from zipline.utils.input_validation import (
coerce_string,
preprocess,
expect_element,
verify_indices_all_unique,
)
from zipline.utils.sqlite_utils import group_into_chunks
from zipline.utils.memoize import lazyval
@@ -696,9 +698,12 @@ class PanelDailyBarReader(DailyBarReader):
DataPanel Structure
-------
items : Int64Index, asset identifiers
major_axis : DatetimeIndex, days provided by the Panel.
items : Int64Index
Asset identifiers. Must be unique.
major_axis : DatetimeIndex
Dates for data provided provided by the Panel. Must be unique.
minor_axis : ['open', 'high', 'low', 'close', 'volume']
Price attributes. Must be unique.
Attributes
----------
@@ -710,17 +715,8 @@ class PanelDailyBarReader(DailyBarReader):
first_trading_day : pd.Timestamp
The first trading day in the dataset.
"""
@preprocess(panel=call(verify_indices_all_unique))
def __init__(self, calendar, panel):
# check duplicates on all indices of panel
for attr_name in ["items", "major_axis", "minor_axis"]:
index = getattr(panel, attr_name)
duplicates = index.duplicated()
if duplicates.any():
raise ValueError("Duplicated items found: {0}".format(
index[duplicates].values
))
panel = panel.copy()
if 'volume' not in panel.items:
+37
View File
@@ -25,6 +25,43 @@ import toolz.curried.operator as op
from zipline.utils.preprocess import preprocess
def verify_indices_all_unique(obj):
"""
Check that all axes of a pandas object are unique.
Parameters
----------
obj : pd.Series / pd.DataFrame / pd.Panel
The object to validate.
Returns
-------
None
Raises
------
ValueError
If any axis has duplicate entries.
"""
axis_names = [
('index',), # Series
('index', 'columns'), # DataFrame
('items', 'major_axis', 'minor_axis') # Panel
][obj.ndim - 1] # ndim = 1 should go to entry 0,
for axis_name, index in zip(axis_names, obj.axes):
if index.is_unique:
continue
raise ValueError(
"Duplicate entries in {type}.{axis}: {dupes}.".format(
type=type(obj).__name__,
axis=axis_name,
dupes=sorted(index[index.duplicated()]),
)
)
def optionally(preprocessor):
"""Modify a preprocessor to explicitly allow `None`.