From 3b8a6b543ebefabb49ea0c12039df73abdd775f2 Mon Sep 17 00:00:00 2001
From: Scott Sanderson <ssanderson@quantopian.com>
Date: Wed, 7 Jun 2017 17:32:47 -0400
Subject: [PATCH] BUG: Fix NoneType comparisons in PY3.

---
 tests/pipeline/test_classifier.py |  2 +-
 zipline/lib/labelarray.py         | 37 +++++++++++++++++++++++++++----
 2 files changed, 34 insertions(+), 5 deletions(-)

diff --git a/tests/pipeline/test_classifier.py b/tests/pipeline/test_classifier.py
index e70479ea..5bfc31b0 100644
--- a/tests/pipeline/test_classifier.py
+++ b/tests/pipeline/test_classifier.py
@@ -472,7 +472,7 @@ class ClassifierTestCase(BasePipelineTestCase):
         __fail_fast=True,
         labelarray_dtype=(categorical_dtype, bytes_dtype, unicode_dtype),
         relabel_func=[
-            lambda s: s[0],
+            lambda s: str(s[0]),
             lambda s: str(len(s)),
             lambda s: str(len([c for c in s if c == 'a'])),
             lambda s: None,
diff --git a/zipline/lib/labelarray.py b/zipline/lib/labelarray.py
index 4db75672..bd3ce2d6 100644
--- a/zipline/lib/labelarray.py
+++ b/zipline/lib/labelarray.py
@@ -1,7 +1,7 @@
 """
 An ndarray subclass for working with arrays of strings.
 """
-from functools import partial
+from functools import partial, total_ordering
 from operator import eq, ne
 import re
 
@@ -584,8 +584,13 @@ class LabelArray(ndarray):
                      missing_value=self.missing_value,
                      otypes=allowed_outtypes):
 
+            # Don't call f on the missing value; those locations don't exist
+            # semantically. We return _sortable_sentinel rather than None
+            # because the np.unique call below sorts the categories array,
+            # which raises an error on Python 3 because None and str aren't
+            # comparable.
             if x == missing_value:
-                return x
+                return _sortable_sentinel
 
             ret = f(x)
 
@@ -600,6 +605,9 @@ class LabelArray(ndarray):
                     )
                 )
 
+            if ret == missing_value:
+                return _sortable_sentinel
+
             return ret
 
         new_categories_with_duplicates = (
@@ -610,14 +618,21 @@ class LabelArray(ndarray):
         # with the same code duplicated multiple times. Compress the categories
         # by running them through np.unique, and then use the reverse lookup
         # table to compress codes as well.
-        new_categories, bloated_reverse_index = np.unique(
+        new_categories, bloated_inverse_index = np.unique(
             new_categories_with_duplicates,
             return_inverse=True
         )
 
+        if new_categories[0] == _sortable_sentinel:
+            # f_to_use return _sortable_sentinel for locations that should be
+            # missing values in our output. Since np.unique returns the uniques
+            # in sorted order, and since _sortable_sentinel sorts before any
+            # string, we only need to check the first array entry.
+            new_categories[0] = self.missing_value
+
         # `reverse_index` will always be a 64 bit integer even if we can hold a
         # smaller array.
-        reverse_index = bloated_reverse_index.astype(
+        reverse_index = bloated_inverse_index.astype(
             smallest_uint_that_can_hold(len(new_categories))
         )
         new_codes = np.take(reverse_index, self.as_int_array())
@@ -714,3 +729,17 @@ class LabelArray(ndarray):
             element of self was an element of ``container``.
         """
         return self.map_predicate(container.__contains__)
+
+
+@total_ordering
+class _SortableSentinel(object):
+    """Dummy object that sorts before any other python object.
+    """
+    def __eq__(self, other):
+        return isinstance(other, _SortableSentinel)
+
+    def __lt__(self, other):
+        return not isinstance(other, _SortableSentinel)
+
+
+_sortable_sentinel = _SortableSentinel()