mirror of
https://github.com/wassname/catalyst.git
synced 2026-06-28 16:12:05 +08:00
BUG: Fix NoneType comparisons in PY3.
This commit is contained in:
@@ -472,7 +472,7 @@ class ClassifierTestCase(BasePipelineTestCase):
|
||||
__fail_fast=True,
|
||||
labelarray_dtype=(categorical_dtype, bytes_dtype, unicode_dtype),
|
||||
relabel_func=[
|
||||
lambda s: s[0],
|
||||
lambda s: str(s[0]),
|
||||
lambda s: str(len(s)),
|
||||
lambda s: str(len([c for c in s if c == 'a'])),
|
||||
lambda s: None,
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
"""
|
||||
An ndarray subclass for working with arrays of strings.
|
||||
"""
|
||||
from functools import partial
|
||||
from functools import partial, total_ordering
|
||||
from operator import eq, ne
|
||||
import re
|
||||
|
||||
@@ -584,8 +584,13 @@ class LabelArray(ndarray):
|
||||
missing_value=self.missing_value,
|
||||
otypes=allowed_outtypes):
|
||||
|
||||
# Don't call f on the missing value; those locations don't exist
|
||||
# semantically. We return _sortable_sentinel rather than None
|
||||
# because the np.unique call below sorts the categories array,
|
||||
# which raises an error on Python 3 because None and str aren't
|
||||
# comparable.
|
||||
if x == missing_value:
|
||||
return x
|
||||
return _sortable_sentinel
|
||||
|
||||
ret = f(x)
|
||||
|
||||
@@ -600,6 +605,9 @@ class LabelArray(ndarray):
|
||||
)
|
||||
)
|
||||
|
||||
if ret == missing_value:
|
||||
return _sortable_sentinel
|
||||
|
||||
return ret
|
||||
|
||||
new_categories_with_duplicates = (
|
||||
@@ -610,14 +618,21 @@ class LabelArray(ndarray):
|
||||
# with the same code duplicated multiple times. Compress the categories
|
||||
# by running them through np.unique, and then use the reverse lookup
|
||||
# table to compress codes as well.
|
||||
new_categories, bloated_reverse_index = np.unique(
|
||||
new_categories, bloated_inverse_index = np.unique(
|
||||
new_categories_with_duplicates,
|
||||
return_inverse=True
|
||||
)
|
||||
|
||||
if new_categories[0] == _sortable_sentinel:
|
||||
# f_to_use return _sortable_sentinel for locations that should be
|
||||
# missing values in our output. Since np.unique returns the uniques
|
||||
# in sorted order, and since _sortable_sentinel sorts before any
|
||||
# string, we only need to check the first array entry.
|
||||
new_categories[0] = self.missing_value
|
||||
|
||||
# `reverse_index` will always be a 64 bit integer even if we can hold a
|
||||
# smaller array.
|
||||
reverse_index = bloated_reverse_index.astype(
|
||||
reverse_index = bloated_inverse_index.astype(
|
||||
smallest_uint_that_can_hold(len(new_categories))
|
||||
)
|
||||
new_codes = np.take(reverse_index, self.as_int_array())
|
||||
@@ -714,3 +729,17 @@ class LabelArray(ndarray):
|
||||
element of self was an element of ``container``.
|
||||
"""
|
||||
return self.map_predicate(container.__contains__)
|
||||
|
||||
|
||||
@total_ordering
|
||||
class _SortableSentinel(object):
|
||||
"""Dummy object that sorts before any other python object.
|
||||
"""
|
||||
def __eq__(self, other):
|
||||
return isinstance(other, _SortableSentinel)
|
||||
|
||||
def __lt__(self, other):
|
||||
return not isinstance(other, _SortableSentinel)
|
||||
|
||||
|
||||
_sortable_sentinel = _SortableSentinel()
|
||||
|
||||
Reference in New Issue
Block a user