BUG: Fix NoneType comparisons in PY3.

This commit is contained in:
Scott Sanderson
2017-06-07 17:32:47 -04:00
parent 09cc54e08a
commit 3b8a6b543e
2 changed files with 34 additions and 5 deletions
+1 -1
View File
@@ -472,7 +472,7 @@ class ClassifierTestCase(BasePipelineTestCase):
__fail_fast=True,
labelarray_dtype=(categorical_dtype, bytes_dtype, unicode_dtype),
relabel_func=[
lambda s: s[0],
lambda s: str(s[0]),
lambda s: str(len(s)),
lambda s: str(len([c for c in s if c == 'a'])),
lambda s: None,
+33 -4
View File
@@ -1,7 +1,7 @@
"""
An ndarray subclass for working with arrays of strings.
"""
from functools import partial
from functools import partial, total_ordering
from operator import eq, ne
import re
@@ -584,8 +584,13 @@ class LabelArray(ndarray):
missing_value=self.missing_value,
otypes=allowed_outtypes):
# Don't call f on the missing value; those locations don't exist
# semantically. We return _sortable_sentinel rather than None
# because the np.unique call below sorts the categories array,
# which raises an error on Python 3 because None and str aren't
# comparable.
if x == missing_value:
return x
return _sortable_sentinel
ret = f(x)
@@ -600,6 +605,9 @@ class LabelArray(ndarray):
)
)
if ret == missing_value:
return _sortable_sentinel
return ret
new_categories_with_duplicates = (
@@ -610,14 +618,21 @@ class LabelArray(ndarray):
# with the same code duplicated multiple times. Compress the categories
# by running them through np.unique, and then use the reverse lookup
# table to compress codes as well.
new_categories, bloated_reverse_index = np.unique(
new_categories, bloated_inverse_index = np.unique(
new_categories_with_duplicates,
return_inverse=True
)
if new_categories[0] == _sortable_sentinel:
# f_to_use return _sortable_sentinel for locations that should be
# missing values in our output. Since np.unique returns the uniques
# in sorted order, and since _sortable_sentinel sorts before any
# string, we only need to check the first array entry.
new_categories[0] = self.missing_value
# `reverse_index` will always be a 64 bit integer even if we can hold a
# smaller array.
reverse_index = bloated_reverse_index.astype(
reverse_index = bloated_inverse_index.astype(
smallest_uint_that_can_hold(len(new_categories))
)
new_codes = np.take(reverse_index, self.as_int_array())
@@ -714,3 +729,17 @@ class LabelArray(ndarray):
element of self was an element of ``container``.
"""
return self.map_predicate(container.__contains__)
@total_ordering
class _SortableSentinel(object):
"""Dummy object that sorts before any other python object.
"""
def __eq__(self, other):
return isinstance(other, _SortableSentinel)
def __lt__(self, other):
return not isinstance(other, _SortableSentinel)
_sortable_sentinel = _SortableSentinel()