diff --git a/setup.py b/setup.py
index 78063820..e12e0b49 100644
--- a/setup.py
+++ b/setup.py
@@ -81,11 +81,13 @@ class LazyBuildExtCommandClass(dict):
 ext_modules = [
     Extension('zipline.assets._assets', ['zipline/assets/_assets.pyx']),
     Extension('zipline.lib.adjustment', ['zipline/lib/adjustment.pyx']),
+    Extension('zipline.lib._factorize', ['zipline/lib/_factorize.pyx']),
     Extension(
         'zipline.lib._float64window', ['zipline/lib/_float64window.pyx']
     ),
     Extension('zipline.lib._int64window', ['zipline/lib/_int64window.pyx']),
     Extension('zipline.lib._uint8window', ['zipline/lib/_uint8window.pyx']),
+    Extension('zipline.lib._labelwindow', ['zipline/lib/_labelwindow.pyx']),
     Extension('zipline.lib.rank', ['zipline/lib/rank.pyx']),
     Extension('zipline.data._equities', ['zipline/data/_equities.pyx']),
     Extension('zipline.data._adjustments', ['zipline/data/_adjustments.pyx']),
diff --git a/tests/pipeline/test_adjusted_array.py b/tests/pipeline/test_adjusted_array.py
index a2a8cde0..11fb5130 100644
--- a/tests/pipeline/test_adjusted_array.py
+++ b/tests/pipeline/test_adjusted_array.py
@@ -9,19 +9,23 @@ from nose_parameterized import parameterized
 from numpy import (
     arange,
     array,
+    asarray,
+    dtype,
     full,
     where,
 )
-from numpy.testing import assert_array_equal
 from six.moves import zip_longest
+from toolz import curry
 
 from zipline.errors import WindowLengthNotPositive, WindowLengthTooLong
 from zipline.lib.adjustment import (
     Datetime64Overwrite,
     Float64Multiply,
     Float64Overwrite,
+    ObjectOverwrite,
 )
 from zipline.lib.adjusted_array import AdjustedArray, NOMASK
+from zipline.lib.labelarray import LabelArray
 from zipline.testing import check_arrays, parameter_space
 from zipline.utils.numpy_utils import (
     coerce_to_dtype,
@@ -29,6 +33,7 @@ from zipline.utils.numpy_utils import (
     default_missing_value_for_dtype,
     float64_dtype,
     int64_dtype,
+    object_dtype,
 )
 
 
@@ -62,12 +67,41 @@ def valid_window_lengths(underlying_buffer_length):
     return iter(range(1, underlying_buffer_length + 1))
 
 
-def _gen_unadjusted_cases(dtype):
+@curry
+def as_dtype(dtype, data):
+    """
+    Curried wrapper around array.astype for when you have the dtype before you
+    have the data.
+    """
+    return asarray(data).astype(dtype)
 
+
+@curry
+def as_labelarray(initial_dtype, missing_value, array):
+    """
+    Curried wrapper around LabelArray, that round-trips the input data through
+    `initial_dtype` first.
+    """
+    return LabelArray(
+        array.astype(initial_dtype),
+        missing_value=initial_dtype.type(''),
+    )
+
+
+bytes_dtype = dtype('S3')
+unicode_dtype = dtype('U3')
+
+
+def _gen_unadjusted_cases(name,
+                          make_input,
+                          make_expected_output,
+                          missing_value):
     nrows = 6
     ncols = 3
-    data = arange(nrows * ncols).astype(dtype).reshape(nrows, ncols)
-    missing_value = default_missing_value_for_dtype(dtype)
+
+    raw_data = arange(nrows * ncols).reshape(nrows, ncols)
+    input_array = make_input(raw_data)
+    expected_output_array = make_expected_output(raw_data)
 
     for windowlen in valid_window_lengths(nrows):
 
@@ -76,13 +110,13 @@ def _gen_unadjusted_cases(dtype):
         )
 
         yield (
-            "dtype_%s_length_%d" % (dtype, windowlen),
-            data,
+            "%s_length_%d" % (name, windowlen),
+            input_array,
             windowlen,
             {},
             missing_value,
             [
-                data[offset:offset + windowlen]
+                expected_output_array[offset:offset + windowlen]
                 for offset in range(num_legal_windows)
             ],
         )
@@ -156,84 +190,125 @@ def _gen_multiplicative_adjustment_cases(dtype):
                              [1,  6,  1],
                              [1,  1,  1]], dtype=dtype)
 
-    return _gen_expectations(baseline, adjustments, buffer_as_of, nrows)
-
-
-def _gen_overwrite_adjustment_cases(dtype):
-    """
-    Generate test cases for overwrite adjustments.
-
-    The algorithm used here is the same as the one used above for
-    multiplicative adjustments.  The only difference is the semantics of how
-    the adjustments are expected to modify the arrays.
-    """
-    adjustment_type = {
-        float64_dtype: Float64Overwrite,
-        datetime64ns_dtype: Datetime64Overwrite,
-    }[dtype]
-
-    nrows, ncols = 6, 3
-    adjustments = {}
-    buffer_as_of = [None] * 6
-    baseline = buffer_as_of[0] = full((nrows, ncols), 2, dtype=dtype)
-
-    # Note that row indices are inclusive!
-    adjustments[1] = [
-        adjustment_type(0, 0, 0, 0, coerce_to_dtype(dtype, 1)),
-    ]
-    buffer_as_of[1] = array([[1, 2, 2],
-                             [2, 2, 2],
-                             [2, 2, 2],
-                             [2, 2, 2],
-                             [2, 2, 2],
-                             [2, 2, 2]], dtype=dtype)
-
-    # No adjustment at index 2.
-    buffer_as_of[2] = buffer_as_of[1]
-
-    adjustments[3] = [
-        adjustment_type(1, 2, 1, 1, coerce_to_dtype(dtype, 3)),
-        adjustment_type(0, 1, 0, 0, coerce_to_dtype(dtype, 4)),
-    ]
-    buffer_as_of[3] = array([[4, 2, 2],
-                             [4, 3, 2],
-                             [2, 3, 2],
-                             [2, 2, 2],
-                             [2, 2, 2],
-                             [2, 2, 2]], dtype=dtype)
-
-    adjustments[4] = [
-        adjustment_type(0, 3, 2, 2, coerce_to_dtype(dtype, 5))
-    ]
-    buffer_as_of[4] = array([[4, 2, 5],
-                             [4, 3, 5],
-                             [2, 3, 5],
-                             [2, 2, 5],
-                             [2, 2, 2],
-                             [2, 2, 2]], dtype=dtype)
-
-    adjustments[5] = [
-        adjustment_type(0, 4, 1, 1, coerce_to_dtype(dtype, 6)),
-        adjustment_type(2, 2, 2, 2, coerce_to_dtype(dtype, 7)),
-    ]
-    buffer_as_of[5] = array([[4,  6,  5],
-                             [4,  6,  5],
-                             [2,  6,  7],
-                             [2,  6,  5],
-                             [2,  6,  2],
-                             [2,  2,  2]], dtype=dtype)
-
     return _gen_expectations(
         baseline,
+        default_missing_value_for_dtype(dtype),
         adjustments,
         buffer_as_of,
         nrows,
     )
 
 
-def _gen_expectations(baseline, adjustments, buffer_as_of, nrows):
+def _gen_overwrite_adjustment_cases(name,
+                                    make_input,
+                                    make_expected_output,
+                                    dtype,
+                                    missing_value):
+    """
+    Generate test cases for overwrite adjustments.
+
+    The algorithm used here is the same as the one used above for
+    multiplicative adjustments.  The only difference is the semantics of how
+    the adjustments are expected to modify the arrays.
+
+    This is parameterized on `make_input` and make_expected_output functions,
+    which take 2-D lists of values and transform them into desired input/output
+    arrays. We do this so that we can easily test both vanilla numpy ndarrays
+    and our own LabelArray class for strings.
+    """
+    adjustment_type = {
+        float64_dtype: Float64Overwrite,
+        datetime64ns_dtype: Datetime64Overwrite,
+        bytes_dtype: ObjectOverwrite,
+        unicode_dtype: ObjectOverwrite,
+        object_dtype: ObjectOverwrite,
+    }[dtype]
+
+    if dtype == object_dtype:
+        # When we're testing object dtypes, we expect to have strings, but
+        # coerce_to_dtype(object, 3) just gives 3 as a Python integer.
+        def make_overwrite_value(dtype, value):
+            return str(value)
+    else:
+        make_overwrite_value = coerce_to_dtype
+
+    adjustments = {}
+    buffer_as_of = [None] * 6
+    baseline = make_input([[2, 2, 2],
+                           [2, 2, 2],
+                           [2, 2, 2],
+                           [2, 2, 2],
+                           [2, 2, 2],
+                           [2, 2, 2]])
+
+    buffer_as_of[0] = make_expected_output([[2, 2, 2],
+                                            [2, 2, 2],
+                                            [2, 2, 2],
+                                            [2, 2, 2],
+                                            [2, 2, 2],
+                                            [2, 2, 2]])
+
+    # Note that row indices are inclusive!
+    adjustments[1] = [
+        adjustment_type(0, 0, 0, 0, make_overwrite_value(dtype, 1)),
+    ]
+    buffer_as_of[1] = make_expected_output([[1, 2, 2],
+                                            [2, 2, 2],
+                                            [2, 2, 2],
+                                            [2, 2, 2],
+                                            [2, 2, 2],
+                                            [2, 2, 2]])
+
+    # No adjustment at index 2.
+    buffer_as_of[2] = buffer_as_of[1]
+
+    adjustments[3] = [
+        adjustment_type(1, 2, 1, 1, make_overwrite_value(dtype, 3)),
+        adjustment_type(0, 1, 0, 0, make_overwrite_value(dtype, 4)),
+    ]
+    buffer_as_of[3] = make_expected_output([[4, 2, 2],
+                                            [4, 3, 2],
+                                            [2, 3, 2],
+                                            [2, 2, 2],
+                                            [2, 2, 2],
+                                            [2, 2, 2]])
+
+    adjustments[4] = [
+        adjustment_type(0, 3, 2, 2, make_overwrite_value(dtype, 5))
+    ]
+    buffer_as_of[4] = make_expected_output([[4, 2, 5],
+                                            [4, 3, 5],
+                                            [2, 3, 5],
+                                            [2, 2, 5],
+                                            [2, 2, 2],
+                                            [2, 2, 2]])
+
+    adjustments[5] = [
+        adjustment_type(0, 4, 1, 1, make_overwrite_value(dtype, 6)),
+        adjustment_type(2, 2, 2, 2, make_overwrite_value(dtype, 7)),
+    ]
+    buffer_as_of[5] = make_expected_output([[4, 6, 5],
+                                            [4, 6, 5],
+                                            [2, 6, 7],
+                                            [2, 6, 5],
+                                            [2, 6, 2],
+                                            [2, 2, 2]])
+
+    return _gen_expectations(
+        baseline,
+        missing_value,
+        adjustments,
+        buffer_as_of,
+        nrows=6,
+    )
+
+
+def _gen_expectations(baseline,
+                      missing_value,
+                      adjustments,
+                      buffer_as_of,
+                      nrows):
 
-    missing_value = default_missing_value_for_dtype(baseline.dtype)
     for windowlen in valid_window_lengths(nrows):
 
         num_legal_windows = num_windows_of_length_M_on_buffers_of_length_N(
@@ -263,8 +338,60 @@ class AdjustedArrayTestCase(TestCase):
 
     @parameterized.expand(
         chain(
-            _gen_unadjusted_cases(float64_dtype),
-            _gen_unadjusted_cases(datetime64ns_dtype),
+            _gen_unadjusted_cases(
+                'float',
+                make_input=as_dtype(float64_dtype),
+                make_expected_output=as_dtype(float64_dtype),
+                missing_value=default_missing_value_for_dtype(float64_dtype),
+            ),
+            _gen_unadjusted_cases(
+                'datetime',
+                make_input=as_dtype(datetime64ns_dtype),
+                make_expected_output=as_dtype(datetime64ns_dtype),
+                missing_value=default_missing_value_for_dtype(
+                    datetime64ns_dtype
+                ),
+            ),
+            # Test passing an array of strings to AdjustedArray.
+            _gen_unadjusted_cases(
+                'bytes_ndarray',
+                make_input=as_dtype(bytes_dtype),
+                make_expected_output=as_labelarray(bytes_dtype, b''),
+                missing_value=b'',
+            ),
+            _gen_unadjusted_cases(
+                'unicode_ndarray',
+                make_input=as_dtype(unicode_dtype),
+                make_expected_output=as_labelarray(unicode_dtype, u''),
+                missing_value=u'',
+            ),
+            _gen_unadjusted_cases(
+                'object_ndarray',
+                make_input=lambda a: a.astype(str).astype(object),
+                make_expected_output=as_labelarray(bytes_dtype, b''),
+                missing_value=b'',
+            ),
+            # Test passing a LabelArray directly to AdjustedArray.
+            _gen_unadjusted_cases(
+                'bytes_labelarray',
+                make_input=as_labelarray(bytes_dtype, b''),
+                make_expected_output=as_labelarray(bytes_dtype, b''),
+                missing_value=b'',
+            ),
+            _gen_unadjusted_cases(
+                'unicode_labelarray',
+                make_input=as_labelarray(unicode_dtype, u''),
+                make_expected_output=as_labelarray(bytes_dtype, u''),
+                missing_value=u'',
+            ),
+            _gen_unadjusted_cases(
+                'object_labelarray',
+                make_input=(
+                    lambda a: LabelArray(a.astype(str).astype(object), b'')
+                ),
+                make_expected_output=as_labelarray(bytes_dtype, b''),
+                missing_value=b'',
+            ),
         )
     )
     def test_no_adjustments(self,
@@ -273,14 +400,13 @@ class AdjustedArrayTestCase(TestCase):
                             lookback,
                             adjustments,
                             missing_value,
-                            expected):
+                            expected_output):
 
         array = AdjustedArray(data, NOMASK, adjustments, missing_value)
         for _ in range(2):  # Iterate 2x ensure adjusted_arrays are re-usable.
-            window_iter = array.traverse(lookback)
-            for yielded, expected_yield in zip_longest(window_iter, expected):
-                self.assertEqual(yielded.dtype, data.dtype)
-                assert_array_equal(yielded, expected_yield)
+            in_out = zip(array.traverse(lookback), expected_output)
+            for yielded, expected_yield in in_out:
+                check_arrays(yielded, expected_yield)
 
     @parameterized.expand(_gen_multiplicative_adjustment_cases(float64_dtype))
     def test_multiplicative_adjustments(self,
@@ -295,12 +421,70 @@ class AdjustedArrayTestCase(TestCase):
         for _ in range(2):  # Iterate 2x ensure adjusted_arrays are re-usable.
             window_iter = array.traverse(lookback)
             for yielded, expected_yield in zip_longest(window_iter, expected):
-                assert_array_equal(yielded, expected_yield)
+                check_arrays(yielded, expected_yield)
 
     @parameterized.expand(
         chain(
-            _gen_overwrite_adjustment_cases(float64_dtype),
-            _gen_overwrite_adjustment_cases(datetime64ns_dtype),
+            _gen_overwrite_adjustment_cases(
+                'float',
+                make_input=as_dtype(float64_dtype),
+                make_expected_output=as_dtype(float64_dtype),
+                dtype=float64_dtype,
+                missing_value=default_missing_value_for_dtype(float64_dtype),
+            ),
+            _gen_overwrite_adjustment_cases(
+                'datetime',
+                make_input=as_dtype(datetime64ns_dtype),
+                make_expected_output=as_dtype(datetime64ns_dtype),
+                dtype=datetime64ns_dtype,
+                missing_value=default_missing_value_for_dtype(
+                    datetime64ns_dtype,
+                ),
+            ),
+            # There are six cases here:
+            # Using np.bytes/np.unicode/python string arrays as inputs.
+            # Passing np.bytes/np.unicode/python string arrays to LabelArray,
+            # and using those as input.
+            #
+            # The outputs should always be LabelArrays.
+            _gen_unadjusted_cases(
+                'bytes_ndarray',
+                make_input=as_dtype(bytes_dtype),
+                make_expected_output=as_labelarray(bytes_dtype, b''),
+                missing_value=b'',
+            ),
+            _gen_unadjusted_cases(
+                'unicode_ndarray',
+                make_input=as_dtype(unicode_dtype),
+                make_expected_output=as_labelarray(unicode_dtype, u''),
+                missing_value=u'',
+            ),
+            _gen_unadjusted_cases(
+                'object_ndarray',
+                make_input=lambda a: a.astype(str).astype(object),
+                make_expected_output=as_labelarray(bytes_dtype, b''),
+                missing_value=b'',
+            ),
+            _gen_unadjusted_cases(
+                'bytes_labelarray',
+                make_input=as_labelarray(bytes_dtype, b''),
+                make_expected_output=as_labelarray(bytes_dtype, b''),
+                missing_value=b'',
+            ),
+            _gen_unadjusted_cases(
+                'unicode_labelarray',
+                make_input=as_labelarray(unicode_dtype, u''),
+                make_expected_output=as_labelarray(bytes_dtype, u''),
+                missing_value=u'',
+            ),
+            _gen_unadjusted_cases(
+                'object_labelarray',
+                make_input=(
+                    lambda a: LabelArray(a.astype(str).astype(object), b'')
+                ),
+                make_expected_output=as_labelarray(bytes_dtype, b''),
+                missing_value=b'',
+            ),
         )
     )
     def test_overwrite_adjustment_cases(self,
@@ -314,11 +498,15 @@ class AdjustedArrayTestCase(TestCase):
         for _ in range(2):  # Iterate 2x ensure adjusted_arrays are re-usable.
             window_iter = array.traverse(lookback)
             for yielded, expected_yield in zip_longest(window_iter, expected):
-                self.assertEqual(yielded.dtype, data.dtype)
-                assert_array_equal(yielded, expected_yield)
+                check_arrays(yielded, expected_yield)
 
     @parameter_space(
-        dtype=[float64_dtype, int64_dtype, datetime64ns_dtype],
+        __fail_fast=True,
+        dtype=[
+            float64_dtype,
+            int64_dtype,
+            datetime64ns_dtype,
+        ],
         missing_value=[0, 10000],
         window_length=[2, 3],
     )
@@ -341,6 +529,37 @@ class AdjustedArrayTestCase(TestCase):
         for expected, actual in zip(gen_expected, gen_actual):
             check_arrays(expected, actual)
 
+    @parameter_space(
+        __fail_fast=True,
+        dtype=[bytes_dtype, unicode_dtype, object_dtype],
+        missing_value=["0", "-1", ""],
+        window_length=[2, 3],
+    )
+    def test_masking_with_strings(self, dtype, missing_value, window_length):
+        missing_value = coerce_to_dtype(dtype, missing_value)
+        baseline_ints = arange(15).reshape(5, 3)
+
+        # Coerce to string first so that coercion to object gets us an array of
+        # string objects.
+        baseline = baseline_ints.astype(str).astype(dtype)
+        mask = (baseline_ints % 2).astype(bool)
+
+        masked_baseline = LabelArray(baseline, missing_value=missing_value)
+        masked_baseline[~mask] = missing_value
+
+        array = AdjustedArray(
+            baseline,
+            mask,
+            adjustments={},
+            missing_value=missing_value,
+        )
+
+        gen_expected = moving_window(masked_baseline, window_length)
+        gen_actual = array.traverse(window_length=window_length)
+
+        for expected, actual in zip(gen_expected, gen_actual):
+            check_arrays(expected, actual)
+
     def test_invalid_lookback(self):
 
         data = arange(30, dtype=float).reshape(6, 5)
diff --git a/tests/pipeline/test_classifier.py b/tests/pipeline/test_classifier.py
index a4b25178..77f76079 100644
--- a/tests/pipeline/test_classifier.py
+++ b/tests/pipeline/test_classifier.py
@@ -1,12 +1,21 @@
 import numpy as np
 
+from zipline.lib.labelarray import LabelArray
 from zipline.pipeline import Classifier
 from zipline.testing import parameter_space
-from zipline.utils.numpy_utils import int64_dtype
+from zipline.utils.numpy_utils import (
+    categorical_dtype,
+    coerce_to_dtype,
+    int64_dtype,
+)
 
 from .base import BasePipelineTestCase
 
 
+bytes_dtype = np.dtype('S3')
+unicode_dtype = np.dtype('U3')
+
+
 class ClassifierTestCase(BasePipelineTestCase):
 
     @parameter_space(mv=[-1, 0, 1, 999])
@@ -69,10 +78,56 @@ class ClassifierTestCase(BasePipelineTestCase):
             mask=self.build_mask(self.ones_mask(shape=data.shape)),
         )
 
-    @parameter_space(missing=[-1, 0, 1])
-    def test_disallow_comparison_to_missing_value(self, missing):
+    @parameter_space(
+        __fail_fast=True,
+        compval=['a', 'ab', 'not in the array'],
+        labelarray_dtype=(bytes_dtype, categorical_dtype, unicode_dtype),
+    )
+    def test_string_eq(self, compval, labelarray_dtype):
+
+        compval = labelarray_dtype.type(compval)
+
         class C(Classifier):
-            dtype = int64_dtype
+            dtype = categorical_dtype
+            missing_value = ''
+            inputs = ()
+            window_length = 0
+
+        c = C()
+
+        # There's no significance to the values here other than that they
+        # contain a mix of the comparison value and other values.
+        data = LabelArray(
+            np.asarray(
+                [['',    'a',  'ab', 'ba'],
+                 ['z',  'ab',   'a', 'ab'],
+                 ['aa', 'ab',    '', 'ab'],
+                 ['aa',  'a',  'ba', 'ba']],
+                dtype=labelarray_dtype,
+            ),
+            missing_value='',
+        )
+
+        self.check_terms(
+            terms={
+                'eq': c.eq(compval),
+            },
+            expected={
+                'eq': (data == compval),
+            },
+            initial_workspace={c: data},
+            mask=self.build_mask(self.ones_mask(shape=data.shape)),
+        )
+
+    @parameter_space(
+        missing=[-1, 0, 1],
+        dtype_=[int64_dtype, categorical_dtype],
+    )
+    def test_disallow_comparison_to_missing_value(self, missing, dtype_):
+        missing = coerce_to_dtype(dtype_, missing)
+
+        class C(Classifier):
+            dtype = dtype_
             missing_value = missing
             inputs = ()
             window_length = 0
@@ -82,7 +137,7 @@ class ClassifierTestCase(BasePipelineTestCase):
         errmsg = str(e.exception)
         self.assertEqual(
             errmsg,
-            "Comparison against self.missing_value ({v}) in C.eq().\n"
+            "Comparison against self.missing_value ({v!r}) in C.eq().\n"
             "Missing values have NaN semantics, so the requested comparison"
             " would always produce False.\n"
             "Use the isnull() method to check for missing values.".format(
@@ -118,3 +173,100 @@ class ClassifierTestCase(BasePipelineTestCase):
             initial_workspace={c: data},
             mask=self.build_mask(self.ones_mask(shape=data.shape)),
         )
+
+    @parameter_space(
+        __fail_fast=True,
+        compval=['a', 'ab', '', 'not in the array'],
+        missing=['a', 'ab', '', 'not in the array'],
+        labelarray_dtype=(bytes_dtype, unicode_dtype, categorical_dtype),
+    )
+    def test_string_not_equal(self, compval, missing, labelarray_dtype):
+
+        compval = labelarray_dtype.type(compval)
+
+        class C(Classifier):
+            dtype = categorical_dtype
+            missing_value = missing
+            inputs = ()
+            window_length = 0
+
+        c = C()
+
+        # There's no significance to the values here other than that they
+        # contain a mix of the comparison value and other values.
+        data = LabelArray(
+            np.asarray(
+                [['',    'a',  'ab', 'ba'],
+                 ['z',  'ab',   'a', 'ab'],
+                 ['aa', 'ab',    '', 'ab'],
+                 ['aa',  'a',  'ba', 'ba']],
+                dtype=labelarray_dtype,
+            ),
+            missing_value=missing,
+        )
+
+        expected = (
+            (data.as_int_array() != data.reverse_categories.get(compval, -1)) &
+            (data.as_int_array() != data.reverse_categories[C.missing_value])
+        )
+
+        self.check_terms(
+            terms={
+                'ne': c != compval,
+            },
+            expected={
+                'ne': expected,
+            },
+            initial_workspace={c: data},
+            mask=self.build_mask(self.ones_mask(shape=data.shape)),
+        )
+
+    @parameter_space(
+        __fail_fast=True,
+        compval=['a', 'b', 'ab', 'not in the array'],
+        missing=['a', 'ab', '', 'not in the array'],
+        labelarray_dtype=(categorical_dtype, bytes_dtype, unicode_dtype),
+    )
+    def test_string_elementwise_predicates(self,
+                                           compval,
+                                           missing,
+                                           labelarray_dtype):
+
+        missing = labelarray_dtype.type(missing)
+        compval = labelarray_dtype.type(compval)
+
+        class C(Classifier):
+            dtype = categorical_dtype
+            missing_value = missing
+            inputs = ()
+            window_length = 0
+
+        c = C()
+
+        # There's no significance to the values here other than that they
+        # contain a mix of the comparison value and other values.
+        data = LabelArray(
+            np.asarray(
+                [['',    'a',  'ab', 'ba'],
+                 ['z',  'ab',   'a', 'ab'],
+                 ['aa', 'ab',    '', 'ab'],
+                 ['aa',  'a',  'ba', 'ba']],
+                dtype=labelarray_dtype,
+            ),
+            missing_value=missing,
+        )
+
+        self.check_terms(
+            terms={
+                'startswith': c.startswith(compval),
+                'endswith': c.endswith(compval),
+                'contains': c.contains(compval),
+            },
+            expected={
+                'startswith': (data.startswith(compval) & (data != missing)),
+                'endswith': (data.endswith(compval) & (data != missing)),
+                'contains': (data.contains(compval) & (data != missing)),
+            },
+            initial_workspace={c: data},
+            mask=self.build_mask(self.ones_mask(shape=data.shape)),
+        )
diff --git a/tests/pipeline/test_column.py b/tests/pipeline/test_column.py
index d270dc3b..717fc53d 100644
--- a/tests/pipeline/test_column.py
+++ b/tests/pipeline/test_column.py
@@ -7,6 +7,7 @@ from unittest import TestCase
 from pandas import date_range, DataFrame
 from pandas.util.testing import assert_frame_equal
 
+from zipline.lib.labelarray import LabelArray
 from zipline.pipeline import Pipeline
 from zipline.pipeline.data.testing import TestingDataSet as TDS
 from zipline.testing import chrange, temp_pipeline_engine
@@ -35,6 +36,21 @@ class LatestTestCase(TestCase):
 
     def expected_latest(self, column, slice_):
         loader = self.engine.get_loader(column)
+
+        index = self.calendar[slice_]
+        columns = self.assets
+        values = loader.values(column.dtype, self.calendar, self.sids)[slice_]
+
+        if column.dtype.kind in ('O', 'S', 'U'):
+            # For string columns, we expect a categorical in the output.
+            return LabelArray(
+                values,
+                missing_value=column.missing_value,
+            ).as_categorical_frame(
+                index=index,
+                columns=columns,
+            )
+
         return DataFrame(
             loader.values(column.dtype, self.calendar, self.sids)[slice_],
             index=self.calendar[slice_],
@@ -55,6 +71,6 @@ class LatestTestCase(TestCase):
             dates_to_test[-1],
         )
         for column in columns:
-            float_result = result[column.name].unstack()
-            expected_float_result = self.expected_latest(column, cal_slice)
-            assert_frame_equal(float_result, expected_float_result)
+            col_result = result[column.name].unstack()
+            expected_col_result = self.expected_latest(column, cal_slice)
+            assert_frame_equal(col_result, expected_col_result)
diff --git a/tests/pipeline/test_term.py b/tests/pipeline/test_term.py
index 04f35ed6..408cb689 100644
--- a/tests/pipeline/test_term.py
+++ b/tests/pipeline/test_term.py
@@ -471,7 +471,8 @@ class ObjectIdentityTestCase(TestCase):
         for column in TestingDataSet.columns:
             if column.dtype == bool_dtype:
                 self.assertIsInstance(column.latest, Filter)
-            elif column.dtype == int64_dtype:
+            elif (column.dtype == int64_dtype
+                  or column.dtype.kind in ('O', 'S', 'U')):
                 self.assertIsInstance(column.latest, Classifier)
             elif column.dtype in factor_dtypes:
                 self.assertIsInstance(column.latest, Factor)
diff --git a/tests/test_labelarray.py b/tests/test_labelarray.py
new file mode 100644
index 00000000..5b24659f
--- /dev/null
+++ b/tests/test_labelarray.py
@@ -0,0 +1,151 @@
+from itertools import product
+from operator import eq, ne
+import numpy as np
+
+from zipline.lib.labelarray import LabelArray
+from zipline.testing import check_arrays, parameter_space, ZiplineTestCase
+
+
+def rotN(l, N):
+    """
+    Rotate a list of elements.
+
+    Pulls N elements off the end of the list and appends them to the front.
+
+    >>> rotN(['a', 'b', 'c', 'd'], 2)
+    ['c', 'd', 'a', 'b']
+    >>> rotN(['a', 'b', 'c', 'd'], 3)
+    ['d', 'a', 'b', 'c']
+    """
+    assert len(l) >= N, "Can't rotate list by longer than its length."
+    return l[N:] + l[:N]
+
+
+class LabelArrayTestCase(ZiplineTestCase):
+
+    @classmethod
+    def init_class_fixtures(cls):
+        super(LabelArrayTestCase, cls).init_class_fixtures()
+
+        cls.rowvalues = row = ['', 'a', 'b', 'ab', 'a', '', 'b', 'ab', 'z']
+        cls.strs = np.array([rotN(row, i) for i in range(3)])
+
+    def test_fail_on_direct_construction(self):
+        # See http://docs.scipy.org/doc/numpy-1.10.0/user/basics.subclassing.html#simple-example-adding-an-extra-attribute-to-ndarray  # noqa
+
+        with self.assertRaises(TypeError) as e:
+            np.ndarray.__new__(LabelArray, (5, 5))
+
+        self.assertEqual(
+            str(e.exception),
+            "Direct construction of LabelArrays is not supported."
+        )
+
+    @parameter_space(
+        __fail_fast=True,
+        s=['', 'a', 'z', 'aa', 'not in the array'],
+        shape=[(27,), (9, 3), (3, 9), (3, 3, 3)],
+        array_astype=(bytes, unicode, object),
+        scalar_astype=(bytes, unicode, object),
+    )
+    def test_compare_to_str(self, s, shape, array_astype, scalar_astype):
+        strs = self.strs.reshape(shape).astype(array_astype)
+        arr = LabelArray(strs, missing_value='')
+        check_arrays(strs == s, arr == s)
+        check_arrays(strs != s, arr != s)
+
+        np_startswith = np.vectorize(lambda elem: elem.startswith(s))
+        check_arrays(arr.startswith(s), np_startswith(strs))
+
+        np_endswith = np.vectorize(lambda elem: elem.endswith(s))
+        check_arrays(arr.endswith(s), np_endswith(strs))
+
+        np_contains = np.vectorize(lambda elem: s in elem)
+        check_arrays(arr.contains(s), np_contains(strs))
+
+    def test_compare_to_str_array(self):
+        strs = self.strs
+        shape = strs.shape
+        arr = LabelArray(strs, missing_value='')
+        check_arrays(strs == arr, np.full_like(strs, True, dtype=bool))
+        check_arrays(strs != arr, np.full_like(strs, False, dtype=bool))
+
+        def broadcastable_row(value, dtype):
+            return np.full((shape[0], 1), value, dtype=strs.dtype)
+
+        def broadcastable_col(value, dtype):
+            return np.full((1, shape[1]), value, dtype=strs.dtype)
+
+        for comparator, dtype, value in product((eq, ne),
+                                                (bytes, unicode, object),
+                                                set(self.rowvalues)):
+            check_arrays(
+                comparator(arr, np.full_like(strs, value)),
+                comparator(strs, value),
+            )
+            check_arrays(
+                comparator(arr, broadcastable_row(value, dtype=dtype)),
+                comparator(strs, value),
+            )
+            check_arrays(
+                comparator(arr, broadcastable_col(value, dtype=dtype)),
+                comparator(strs, value),
+            )
+
+    @parameter_space(
+        __fail_fast=True,
+        slice_=[
+            0, 1, -1,
+            slice(None),
+            slice(0, 0),
+            slice(0, 3),
+            slice(1, 4),
+            slice(0),
+            slice(None, 1),
+            slice(0, 4, 2),
+            (slice(None), 1),
+            (slice(None), slice(None)),
+            (slice(None), slice(1, 2)),
+        ]
+    )
+    def test_slicing_preserves_attributes(self, slice_):
+        arr = LabelArray(self.strs.reshape((9, 3)), missing_value='')
+        sliced = arr[slice_]
+        self.assertIsInstance(sliced, LabelArray)
+        self.assertIs(sliced.categories, arr.categories)
+        self.assertIs(sliced.reverse_categories, arr.reverse_categories)
+        self.assertIs(sliced.missing_value, arr.missing_value)
+
+    def test_infer_categories(self):
+        arr1d = LabelArray(self.strs, missing_value='')
+        codes1d = arr1d.as_int_array()
+        self.assertEqual(arr1d.shape, self.strs.shape)
+        self.assertEqual(arr1d.shape, codes1d.shape)
+
+        categories = arr1d.categories
+        unique_rowvalues = set(self.rowvalues)
+
+        # There should be an entry in categories for each unique row value, and
+        # each integer stored in the data array should be an index into
+        # categories.
+        self.assertEqual(list(categories), sorted(set(self.rowvalues)))
+        self.assertEqual(
+            set(codes1d.ravel()),
+            set(range(len(unique_rowvalues)))
+        )
+        for idx, value in enumerate(arr1d.categories):
+            check_arrays(
+                self.strs == value,
+                arr1d.view(type=np.ndarray) == idx,
+            )
+
+        for shape in (9, 3), (3, 9), (3, 3, 3):
+            strs2d = self.strs.reshape(shape)
+            arr2d = LabelArray(strs2d, missing_value='')
+            codes2d = arr2d.as_int_array()
+
+            self.assertEqual(arr2d.shape, shape)
+            check_arrays(arr2d.categories, categories)
+
+            for idx, value in enumerate(arr2d.categories):
+                check_arrays(strs2d == value, codes2d == idx)
diff --git a/zipline/algorithm.py b/zipline/algorithm.py
index 496a6a56..9b0026ce 100644
--- a/zipline/algorithm.py
+++ b/zipline/algorithm.py
@@ -665,6 +665,7 @@ class TradingAlgorithm(object):
         new_assets = tuple(new_assets)
         new_sids = tuple(new_sids)
         new_symbols = tuple(new_symbols)
+
         number_of_kinds_of_new_things = (
             sum((bool(new_assets), bool(new_sids), bool(new_symbols)))
         )
diff --git a/zipline/data/us_equity_loader.py b/zipline/data/us_equity_loader.py
index 5385b203..ffb3d7e2 100644
--- a/zipline/data/us_equity_loader.py
+++ b/zipline/data/us_equity_loader.py
@@ -19,7 +19,7 @@ from abc import (
 )
 
 from cachetools import LRUCache
-from numpy import dtype, around, hstack
+from numpy import around, hstack
 from pandas.tslib import normalize_date
 
 from six import with_metaclass
@@ -28,6 +28,7 @@ from zipline.lib._float64window import AdjustedArrayWindow as Float64Window
 from zipline.lib.adjustment import Float64Multiply
 from zipline.utils.cache import ExpiringCache
 from zipline.utils.memoize import lazyval
+from zipline.utils.numpy_utils import float64_dtype
 
 
 class SlidingWindow(object):
@@ -237,9 +238,9 @@ class USEquityHistoryLoader(with_metaclass(ABCMeta)):
             prefetch_dts = cal[start_ix:prefetch_end_ix + 1]
             prefetch_len = len(prefetch_dts)
             array = self._array(prefetch_dts, needed_assets, field)
+            view_kwargs = {}
             if field == 'volume':
-                array = array.astype('float64')
-            dtype_ = dtype('float64')
+                array = array.astype(float64_dtype)
 
             for i, asset in enumerate(needed_assets):
                 if self._adjustments_reader:
@@ -249,7 +250,7 @@ class USEquityHistoryLoader(with_metaclass(ABCMeta)):
                     adjs = {}
                 window = Float64Window(
                     array[:, i].reshape(prefetch_len, 1),
-                    dtype_,
+                    view_kwargs,
                     adjs,
                     offset,
                     size
diff --git a/zipline/lib/_factorize.pyx b/zipline/lib/_factorize.pyx
new file mode 100644
index 00000000..d3a98d18
--- /dev/null
+++ b/zipline/lib/_factorize.pyx
@@ -0,0 +1,108 @@
+"""
+Factorization algorithms.
+"""
+from numpy cimport ndarray, int64_t, PyArray_Check, import_array
+from numpy import arange, asarray, empty, int64, isnan, ndarray
+
+import_array()
+
+
+cpdef factorize_strings_known_categories(ndarray[object] values,
+                                         list categories,
+                                         object missing_value,
+                                         int sort):
+    """
+    Factorize an array whose categories are already known.
+
+    Any entries not in the specified categories will be given the code for
+    `missing_value`.
+    """
+    if missing_value not in categories:
+        categories.append(missing_value)
+
+    if sort:
+        categories = sorted(categories)
+
+    cdef:
+        Py_ssize_t      nvalues = len(values)
+        dict reverse_categories = dict(
+            zip(categories, range(len(categories)))
+        )
+
+    if not nvalues:
+        return (
+            asarray([], dtype=int64),
+            asarray(categories, dtype=object),
+            reverse_categories,
+        )
+
+    cdef:
+        Py_ssize_t            i
+        Py_ssize_t missing_code = reverse_categories[missing_value]
+        ndarray[int64_t]  codes = empty(nvalues, dtype=int64)
+
+    for i in range(nvalues):
+        codes[i] = reverse_categories.get(values[i], missing_code)
+
+    return codes, asarray(categories, dtype=object), reverse_categories
+
+cpdef factorize_strings(ndarray[object] values,
+                        object missing_value,
+                        int sort):
+    """
+    Factorize an array of (possibly duplicated) labels into an array of indices
+    into a unique array of labels.
+
+    This is ~30% faster than pandas.factorize, at the cost of not having
+    special treatment for NaN, which we don't care about because we only
+    support arrays of strings.
+
+    (Though it's faster even if you throw in the nan checks that pandas does,
+    because we're using dict and list instead of PyObjectHashTable and
+    ObjectVector.  Python's builtin data structures are **really**
+    well-optimized.)
+    """
+    cdef:
+        Py_ssize_t      nvalues = len(values)
+        list         categories = [missing_value]
+        dict reverse_categories = {missing_value: 0}
+
+    # Short circuit on empty array.
+    if not nvalues:
+        return (
+            asarray([], dtype=int64),
+            asarray(categories, dtype=object),
+            reverse_categories,
+        )
+
+    cdef:
+        Py_ssize_t      i, code
+        object              key = None
+        ndarray[int64_t]  codes = empty(nvalues, dtype=int64)
+
+    for i in range(nvalues):
+        key = values[i]
+        code = reverse_categories.get(key, -1)
+        if code == -1:
+            # Assign new code.
+            code = len(reverse_categories)
+            reverse_categories[key] = code
+            categories.append(key)
+        codes[i] = code
+
+    cdef ndarray[int64_t, ndim=1] sorter
+    cdef ndarray[int64_t, ndim=1] reverse_indexer
+    cdef int ncategories
+    cdef ndarray[object] categories_array = asarray(categories, dtype=object)
+    if sort:
+        # This is all taken from pandas.core.algorithms.factorize.
+        ncategories = len(categories_array)
+        sorter = categories_array.argsort()
+        reverse_indexer = empty(ncategories, dtype=int64)
+        reverse_indexer.put(sorter, arange(ncategories))
+
+        codes = reverse_indexer.take(codes)
+        categories_array = categories_array.take(sorter)
+        reverse_categories = dict(zip(categories_array, range(ncategories)))
+
+    return codes, categories_array, reverse_categories
diff --git a/zipline/lib/_float64window.pyx b/zipline/lib/_float64window.pyx
index e3df0225..1747b014 100644
--- a/zipline/lib/_float64window.pyx
+++ b/zipline/lib/_float64window.pyx
@@ -1,5 +1,7 @@
 """
 float specialization of AdjustedArrayWindow
 """
-from numpy cimport float64_t as ctype
+from numpy cimport float64_t
+ctypedef float64_t[:, :] databuffer
+
 include "_windowtemplate.pxi"
diff --git a/zipline/lib/_int64window.pyx b/zipline/lib/_int64window.pyx
index 9951bd8c..d30b97ca 100644
--- a/zipline/lib/_int64window.pyx
+++ b/zipline/lib/_int64window.pyx
@@ -1,5 +1,8 @@
 """
 datetime specialization of AdjustedArrayWindow
 """
-from numpy cimport int64_t as ctype
+from numpy cimport int64_t
+
+ctypedef int64_t[:, :] databuffer
+
 include "_windowtemplate.pxi"
diff --git a/zipline/lib/_labelwindow.pyx b/zipline/lib/_labelwindow.pyx
new file mode 100644
index 00000000..702f9d79
--- /dev/null
+++ b/zipline/lib/_labelwindow.pyx
@@ -0,0 +1,6 @@
+"""
+AdjustedArrayWindow type used for LabelArrays.
+"""
+ctypedef object databuffer
+
+include "_windowtemplate.pxi"
diff --git a/zipline/lib/_uint8window.pyx b/zipline/lib/_uint8window.pyx
index 99dad45f..8b8ce98b 100644
--- a/zipline/lib/_uint8window.pyx
+++ b/zipline/lib/_uint8window.pyx
@@ -1,5 +1,8 @@
 """
 bool specialization of AdjustedArrayWindow
 """
-from numpy cimport uint8_t as ctype
+from numpy cimport uint8_t
+
+ctypedef uint8_t[:, :] databuffer
+
 include "_windowtemplate.pxi"
diff --git a/zipline/lib/_windowtemplate.pxi b/zipline/lib/_windowtemplate.pxi
index 2a1ae38b..fb50736d 100644
--- a/zipline/lib/_windowtemplate.pxi
+++ b/zipline/lib/_windowtemplate.pxi
@@ -2,8 +2,8 @@
 Template for AdjustedArray windowed iterators.
 
 This file is intended to be used by inserting it via a Cython include into a
-file that's define a type symbol named `ctype` and string constant named
-`dtype`.
+file that's defined a type symbol named `databuffer` that can be used like a
+2-D numpy array.
 
 See Also
 --------
@@ -12,9 +12,7 @@ zipline.lib._intwindow
 zipline.lib._datewindow
 """
 from numpy cimport ndarray
-from numpy import asarray
-
-ctypedef ctype[:, :] databuffer
+from numpy import asanyarray
 
 
 cdef class AdjustedArrayWindow:
@@ -33,8 +31,8 @@ cdef class AdjustedArrayWindow:
     """
     cdef:
         # ctype must be defined by the file into which this is being copied.
-        databuffer data
-        object viewtype
+        readonly databuffer data
+        readonly dict view_kwargs
         readonly Py_ssize_t window_length
         Py_ssize_t anchor, next_anchor, max_anchor, next_adj
         dict adjustments
@@ -43,13 +41,13 @@ cdef class AdjustedArrayWindow:
 
     def __cinit__(self,
                   databuffer data not None,
-                  object viewtype not None,
+                  dict view_kwargs not None,
                   dict adjustments not None,
                   Py_ssize_t offset,
                   Py_ssize_t window_length):
 
         self.data = data
-        self.viewtype = viewtype
+        self.view_kwargs = view_kwargs
         self.adjustments = adjustments
         self.adjustment_indices = sorted(adjustments, reverse=True)
         self.window_length = window_length
@@ -74,9 +72,10 @@ cdef class AdjustedArrayWindow:
 
     def __next__(self):
         cdef:
-            ndarray out
             object adjustment
+            ndarray out
             Py_ssize_t start, anchor
+            dict view_kwargs
 
         anchor = self.anchor = self.next_anchor
         if anchor > self.max_anchor:
@@ -93,7 +92,13 @@ cdef class AdjustedArrayWindow:
             self.next_adj = self.pop_next_adj()
 
         start = anchor - self.window_length
-        out = asarray(self.data[start:self.anchor]).view(self.viewtype)
+
+        # If our data is a custom subclass of ndarray, preserve that subclass
+        # by using asanyarray instead of asarray.
+        out = asanyarray(self.data[start:self.anchor])
+        view_kwargs = self.view_kwargs
+        if view_kwargs:
+            out = out.view(**view_kwargs)
         out.setflags(write=False)
 
         self.next_anchor = self.anchor + 1
@@ -101,8 +106,7 @@ cdef class AdjustedArrayWindow:
         return out
 
     def seek(self, target_anchor):
-        cdef:
-            ndarray out
+        cdef ndarray out = None
 
         if target_anchor < self.anchor:
             raise Exception('Can not access data after window has passed.')
@@ -122,5 +126,5 @@ cdef class AdjustedArrayWindow:
             self.window_length,
             self.anchor,
             self.max_anchor,
-            self.viewtype,
+            self.view_kwargs.get('dtype'),
         )
diff --git a/zipline/lib/adjusted_array.py b/zipline/lib/adjusted_array.py
index 4c0ee5b8..b00f3459 100644
--- a/zipline/lib/adjusted_array.py
+++ b/zipline/lib/adjusted_array.py
@@ -17,6 +17,8 @@ from zipline.errors import (
     WindowLengthNotPositive,
     WindowLengthTooLong,
 )
+from zipline.lib.labelarray import LabelArray
+from zipline.utils.compat import unicode
 from zipline.utils.numpy_utils import (
     datetime64ns_dtype,
     float64_dtype,
@@ -28,8 +30,10 @@ from zipline.utils.memoize import lazyval
 # These class names are all the same because of our bootleg templating system.
 from ._float64window import AdjustedArrayWindow as Float64Window
 from ._int64window import AdjustedArrayWindow as Int64Window
+from ._labelwindow import AdjustedArrayWindow as LabelWindow
 from ._uint8window import AdjustedArrayWindow as UInt8Window
 
+
 NOMASK = None
 BOOL_DTYPES = frozenset(
     map(dtype, [bool_]),
@@ -44,16 +48,20 @@ INT_DTYPES = frozenset(
 DATETIME_DTYPES = frozenset(
     map(dtype, ['datetime64[ns]', 'datetime64[D]']),
 )
+# We use object arrays for strings.
+CATEGORICAL_DTYPES = frozenset(map(dtype, ['O']))
+
 REPRESENTABLE_DTYPES = BOOL_DTYPES.union(
     FLOAT_DTYPES,
     INT_DTYPES,
-    DATETIME_DTYPES
+    DATETIME_DTYPES,
+    CATEGORICAL_DTYPES,
 )
 
 
 def can_represent_dtype(dtype):
     """
-    Can we build an AdjustedArray for a baseline of dtype ``dtype``?
+    Can we build an AdjustedArray for a baseline of `dtype``?
     """
     return dtype in REPRESENTABLE_DTYPES
 
@@ -65,11 +73,11 @@ CONCRETE_WINDOW_TYPES = {
 }
 
 
-def _normalize_array(data):
+def _normalize_array(data, missing_value):
     """
     Coerce buffer data for an AdjustedArray into a standard scalar
-    representation, returning the coerced array and a numpy dtype object to use
-    as a view type when providing public view into the data.
+    representation, returning the coerced array and a dict of argument to pass
+    to np.view to use when providing a user-facing view of the underlying data.
 
     - float* data is coerced to float64 with viewtype float64.
     - int32, int64, and uint32 are converted to int64 with viewtype int64.
@@ -82,19 +90,29 @@ def _normalize_array(data):
 
     Returns
     -------
-    coerced, viewtype : (np.ndarray, np.dtype)
+    coerced, view_kwargs : (np.ndarray, np.dtype)
     """
+    if isinstance(data, LabelArray):
+        return data, {}
+
     data_dtype = data.dtype
     if data_dtype == bool_:
-        return data.astype(uint8), dtype(bool_)
+        return data.astype(uint8), {'dtype': dtype(bool_)}
     elif data_dtype in FLOAT_DTYPES:
-        return data.astype(float64), dtype(float64)
+        return data.astype(float64), {'dtype': dtype(float64)}
     elif data_dtype in INT_DTYPES:
-        return data.astype(int64), dtype(int64)
-    elif data_dtype.name.startswith('datetime'):
+        return data.astype(int64), {'dtype': dtype(int64)}
+    elif data_dtype in CATEGORICAL_DTYPES:
+        if not isinstance(missing_value, (bytes, unicode)):
+            raise TypeError(
+                "Invalid missing_value for categorical array.\n"
+                "Expected a string, got %r" % missing_value,
+            )
+        return LabelArray(data, missing_value), {}
+    elif data_dtype.kind == 'M':
         try:
             outarray = data.astype('datetime64[ns]').view('int64')
-            return outarray, datetime64ns_dtype
+            return outarray, {'dtype': datetime64ns_dtype}
         except OverflowError:
             raise ValueError(
                 "AdjustedArray received a datetime array "
@@ -127,18 +145,17 @@ class AdjustedArray(object):
     missing_value : object
         A value to use to fill missing data in yielded windows.
         Should be a value coercible to `data.dtype`.
-
     """
     __slots__ = (
         '_data',
-        '_viewtype',
+        '_view_kwargs',
         'adjustments',
         'missing_value',
         '__weakref__',
     )
 
     def __init__(self, data, mask, adjustments, missing_value):
-        self._data, self._viewtype = _normalize_array(data)
+        self._data, self._view_kwargs = _normalize_array(data, missing_value)
 
         self.adjustments = adjustments
         self.missing_value = missing_value
@@ -158,20 +175,22 @@ class AdjustedArray(object):
         """
         The data stored in this array.
         """
-        return self._data.view(self._viewtype)
+        return self._data.view(**self._view_kwargs)
 
     @lazyval
     def dtype(self):
         """
         The dtype of the data stored in this array.
         """
-        return self._viewtype
+        return self._view_kwargs.get('dtype') or self._data.dtype
 
     @lazyval
     def _iterator_type(self):
         """
         The iterator produced when `traverse` is called on this Array.
         """
+        if isinstance(self._data, LabelArray):
+            return LabelWindow
         return CONCRETE_WINDOW_TYPES[self._data.dtype]
 
     def traverse(self, window_length, offset=0):
@@ -190,7 +209,7 @@ class AdjustedArray(object):
         _check_window_params(data, window_length)
         return self._iterator_type(
             data,
-            self._viewtype,
+            self._view_kwargs,
             self.adjustments,
             offset,
             window_length,
diff --git a/zipline/lib/adjustment.pyx b/zipline/lib/adjustment.pyx
index 748c3025..8708b2e6 100644
--- a/zipline/lib/adjustment.pyx
+++ b/zipline/lib/adjustment.pyx
@@ -316,12 +316,13 @@ cdef class Float64Multiply(Float64Adjustment):
 
     cpdef mutate(self, float64_t[:, :] data):
         cdef Py_ssize_t row, col
+        cdef float64_t value = self.value
 
         # last_col + 1 because last_col should also be affected.
         for col in range(self.first_col, self.last_col + 1):
             # last_row + 1 because last_row should also be affected.
             for row in range(self.first_row, self.last_row + 1):
-                data[row, col] *= self.value
+                data[row, col] *= value
 
 
 cdef class Float64Overwrite(Float64Adjustment):
@@ -354,12 +355,13 @@ cdef class Float64Overwrite(Float64Adjustment):
 
     cpdef mutate(self, float64_t[:, :] data):
         cdef Py_ssize_t row, col
+        cdef float64_t value = self.value
 
         # last_col + 1 because last_col should also be affected.
         for col in range(self.first_col, self.last_col + 1):
             # last_row + 1 because last_row should also be affected.
             for row in range(self.first_row, self.last_row + 1):
-                data[row, col] = self.value
+                data[row, col] = value
 
 
 cdef class Float64Add(Float64Adjustment):
@@ -392,12 +394,13 @@ cdef class Float64Add(Float64Adjustment):
 
     cpdef mutate(self, float64_t[:, :] data):
         cdef Py_ssize_t row, col
+        cdef float64_t value = self.value
 
         # last_col + 1 because last_col should also be affected.
         for col in range(self.first_col, self.last_col + 1):
             # last_row + 1 because last_row should also be affected.
             for row in range(self.first_row, self.last_row + 1):
-                data[row, col] += self.value
+                data[row, col] += value
 
 
 cdef class _Int64Adjustment(Adjustment):
@@ -530,9 +533,62 @@ cdef class Datetime64Overwrite(Datetime64Adjustment):
     """
     cpdef mutate(self, int64_t[:, :] data):
         cdef Py_ssize_t row, col
+        cdef int64_t value = self.value
 
         # last_col + 1 because last_col should also be affected.
         for col in range(self.first_col, self.last_col + 1):
             # last_row + 1 because last_row should also be affected.
             for row in range(self.first_row, self.last_row + 1):
-                data[row, col] = self.value
+                data[row, col] = value
+
+
+cdef class _ObjectAdjustment(Adjustment):
+    """
+    Base class for adjustments that operate on arbitrary objects.
+
+    We use only this for categorical data, where our data buffer is an array of
+    indices into an array of unique Python string objects.
+    """
+    cdef:
+        readonly object value
+
+    def __init__(self,
+                 Py_ssize_t first_row,
+                 Py_ssize_t last_row,
+                 Py_ssize_t first_col,
+                 Py_ssize_t last_col,
+                 object value):
+        super(Adjustment, self).__init__(
+            first_row=first_row,
+            last_row=last_row,
+            first_col=first_col,
+            last_col=last_col,
+        )
+        self.value = value
+
+    def __repr__(self):
+        return (
+            "%s(first_row=%d, last_row=%d,"
+            " first_col=%d, last_col=%d, value=%r)" % (
+                type(self).__name__,
+                self.first_row,
+                self.last_row,
+                self.first_col,
+                self.last_col,
+                self.value,
+            )
+        )
+
+
+cdef class ObjectOverwrite(_ObjectAdjustment):
+
+    cpdef mutate(self, object data):
+        # data is an object here because this is intended to be used with a
+        # `zipline.lib.LabelArray`.
+        cdef Py_ssize_t row, col
+        cdef object value = self.value
+
+        # We don't do this in a loop because we only want to look up the label
+        # code in the array's categories once.
+        data[self.first_row:self.last_row + 1,
+             self.first_col:self.last_col + 1] = self.value
diff --git a/zipline/lib/labelarray.py b/zipline/lib/labelarray.py
new file mode 100644
index 00000000..837a39c2
--- /dev/null
+++ b/zipline/lib/labelarray.py
@@ -0,0 +1,482 @@
+"""
+An ndarray subclass for working with arrays of strings.
+"""
+from functools import partial
+from numbers import Number
+from operator import eq, ne
+import re
+
+import numpy as np
+from numpy import ndarray
+import pandas as pd
+from toolz import compose
+
+from zipline.utils.preprocess import preprocess
+from zipline.utils.sentinel import sentinel
+from zipline.utils.input_validation import (
+    coerce,
+    expect_kinds,
+    expect_types,
+    optional,
+)
+from zipline.utils.numpy_utils import is_object, int64_dtype
+
+from ._factorize import (
+    factorize_strings,
+    factorize_strings_known_categories,
+)
+
+
+def compare_arrays(left, right):
+    "Eq check with a short-circuit for identical objects."
+    return (
+        left is right
+        or ((left.shape == right.shape) and (left == right).all())
+    )
+
+
+def _make_unsupported_method(name):
+    def method(*args, **kwargs):
+        raise NotImplementedError(
+            "Method %s is not supported on LabelArrays." % name
+        )
+    method.__name__ = name
+    method.__doc__ = "Unsupported LabelArray Method: %s" % name
+    return method
+
+
+class CategoryMismatch(ValueError):
+    """
+    Error raised on attempt to perform operations between LabelArrays with
+    mismatched category arrays.
+    """
+    def __init__(self, left, right):
+        (mismatches,) = np.where(left != right)
+        assert len(mismatches), "Not actually a mismatch!"
+        super(CategoryMismatch, self).__init__(
+            "LabelArray categories don't match:\n"
+            "Mismatched Indices: {mismatches}\n"
+            "Left: {left}\n"
+            "Right: {right}".format(
+                mismatches=mismatches,
+                left=left[mismatches],
+                right=right[mismatches],
+            )
+        )
+
+_NotPassed = sentinel('_NotPassed')
+
+
+class LabelArray(ndarray):
+    """
+    An ndarray subclass for working with arrays of strings.
+
+    Factorizes the input array into integers, but overloads equality on strings
+    to check against the factor label.
+
+    See Also
+    --------
+    http://docs.scipy.org/doc/numpy-1.10.0/user/basics.subclassing.html
+    """
+    @preprocess(
+        values=coerce(list, partial(np.asarray, dtype=object)),
+    )
+    @expect_types(
+        values=np.ndarray,
+        categories=optional(list),
+    )
+    @expect_kinds(values=("O", "S", "U"))
+    def __new__(cls,
+                values,
+                missing_value,
+                categories=None,
+                sort=True):
+
+        # Numpy's fixed-width string types aren't very efficient. Working with
+        # object arrays is faster than bytes or unicode arrays in almost all
+        # cases.
+        if not is_object(values):
+            values = values.astype(object)
+
+        if categories is None:
+            codes, categories, reverse_categories = factorize_strings(
+                values.ravel(),
+                missing_value=missing_value,
+                sort=sort,
+            )
+        else:
+            codes, categories, reverse_categories = (
+                factorize_strings_known_categories(
+                    values.ravel(),
+                    categories=categories,
+                    missing_value=missing_value,
+                    sort=sort,
+                )
+            )
+        categories.setflags(write=False)
+
+        ret = codes.reshape(values.shape).view(type=cls)
+        ret._categories = categories
+        ret._reverse_categories = reverse_categories
+        ret._missing_value = missing_value
+        return ret
+
+    @property
+    def categories(self):
+        # This is a property because it should be immutable.
+        return self._categories
+
+    @property
+    def reverse_categories(self):
+        # This is a property because it should be immutable.
+        return self._reverse_categories
+
+    @property
+    def missing_value(self):
+        # This is a property because it should be immutable.
+        return self._missing_value
+
+    def __array_finalize__(self, obj):
+        """
+        Called by Numpy after array construction.
+
+        There are three cases where this can happen:
+
+        1. Someone tries to directly construct a new array by doing::
+
+            >>> ndarray.__new__(LabelArray, ...)
+
+           In this case, obj will be None.  We treat this as an error case and
+           fail.
+
+        2. Someone (most likely our own __new__) calls
+           other_array.view(type=LabelArray).
+
+           In this case, `self` will be the new LabelArray instance, and
+           ``obj` will be the array on which ``view`` is being called.
+
+           The caller of ``obj.view`` is responsible for copying setting
+           category metadata on ``self`` after we exit.
+
+        3. Someone creates a new LabelArray by slicing an existing one.
+
+           In this case, ``obj`` will be the original LabelArray.  We're
+           responsible for copying over the parent array's category metadata.
+        """
+        if obj is None:
+            raise TypeError(
+                "Direct construction of LabelArrays is not supported."
+            )
+
+        # See docstring for an explanation of when these will or will not be
+        # set.
+        self._categories = getattr(obj, 'categories', None)
+        self._reverse_categories = getattr(obj, 'reverse_categories', None)
+        self._missing_value = getattr(obj, 'missing_value', None)
+
+    def __array_wrap__(self, obj, context=None):
+        """
+        Called by numpy after completion of a ufunc.
+
+        We coerce back into a vanilla ndarray if our dtype changed, since that
+        indicates that our categories are no longer meaningful.
+        """
+        if obj.dtype != self.dtype:
+            return obj.view(type=np.ndarray)
+        return obj
+
+    def as_int_array(self):
+        """
+        Convert self into a regular ndarray of ints.
+
+        This is an O(1) operation. It does not copy the underlying data.
+        """
+        return self.view(type=ndarray)
+
+    def as_string_array(self):
+        """
+        Convert self back into an array of strings.
+
+        This is an O(N) operation.
+        """
+        return self.categories[self]
+
+    def as_categorical(self, name=None):
+        """
+        Coerce self into a pandas categorical.
+
+        This is only defined on 1D arrays, since that's all pandas supports.
+        """
+        if len(self.shape) > 1:
+            raise ValueError("Can't convert a 2D array to a categorical.")
+        return pd.Categorical.from_codes(
+            self.as_int_array(),
+            self.categories,
+            ordered=False,
+            name=name,
+        )
+
+    def as_categorical_frame(self, index, columns, name=None):
+        """
+        Coerce self into a pandas DataFrame of Categoricals.
+        """
+        if len(self.shape) != 2:
+            raise ValueError(
+                "Can't convert a non-2D LabelArray into a DataFrame."
+            )
+
+        expected_shape = (len(index), len(columns))
+        if expected_shape != self.shape:
+            raise ValueError(
+                "Can't construct a DataFrame with provided indices:\n\n"
+                "LabelArray shape is {actual}, but index and columns imply "
+                "that shape should be {expected}.".format(
+                    actual=self.shape,
+                    expected=expected_shape,
+                )
+            )
+
+        return pd.Series(
+            index=pd.MultiIndex.from_product([index, columns]),
+            data=self.ravel().as_categorical(name=name),
+        ).unstack()
+
+    def __setitem__(self, indexer, value):
+        self_categories = self.categories
+
+        if isinstance(value, LabelArray):
+            value_categories = value.categories
+            if compare_arrays(self_categories, value_categories):
+                return super(LabelArray, self).__setitem__(indexer, value)
+            else:
+                raise CategoryMismatch(self_categories, value_categories)
+
+        elif isinstance(value, (bytes, unicode)):
+            value_code = self.reverse_categories.get(value, None)
+            if value_code is None:
+                raise ValueError("%r is not in LabelArray categories." % value)
+            return super(LabelArray, self).__setitem__(indexer, value_code)
+
+        else:
+            raise NotImplementedError(
+                "Setting into a LabelArray with a value of "
+                "type {type} is not yet supported.".format(
+                    type=type(value).__name__,
+                ),
+            )
+
+    def _equality_check(op):
+        """
+        Shared code for __eq__ and __ne__, parameterized on the actual
+        comparison operator to use.
+        """
+        # What value should we return if we compare against a value not in our
+        # categories?
+        if op is eq:
+            COMPARE_TO_UNKNOWN = False
+        elif op is ne:
+            COMPARE_TO_UNKNOWN = True
+        else:
+            raise AssertionError("_make_equality_check called with %s" % op)
+
+        def method(self, other):
+            self_categories = self.categories
+
+            if isinstance(other, LabelArray):
+                other_categories = other.categories
+                if compare_arrays(self_categories, other_categories):
+                    return op(self.as_int_array(), other.as_int_array())
+                else:
+                    raise CategoryMismatch(self_categories, other_categories)
+
+            elif isinstance(other, ndarray):
+                # Compare to ndarrays as though we were an array of strings.
+                # This is fairly expensive, and should generally be avoided.
+                return op(self.as_string_array(), other)
+
+            elif isinstance(other, (bytes, unicode)):
+                i = self._reverse_categories.get(other, None)
+                if i is None:
+                    # Requested string isn't in our categories.  Short circuit.
+                    # This isn't full_like because that would try to return a
+                    # LabelArray.
+                    return np.full(self.shape, COMPARE_TO_UNKNOWN, dtype=bool)
+
+                return op(self.as_int_array(), i)
+
+            elif isinstance(other, Number):
+                return NotImplemented
+
+            return op(super(LabelArray, self), other)
+        return method
+
+    __eq__ = _equality_check(eq)
+    __ne__ = _equality_check(ne)
+    del _equality_check
+
+    def view(self, dtype=_NotPassed, type=_NotPassed):
+        if type is _NotPassed and dtype not in (_NotPassed, self.dtype):
+            raise TypeError("Can't view LabelArray as another dtype.")
+
+        # The text signature on ndarray.view makes it look like the default
+        # values for dtype and type are `None`, but passing None explicitly has
+        # different semantics than not passing an arg at all, so we reconstruct
+        # the kwargs dict here to simulate the args not being passed at all.
+        kwargs = {}
+        if dtype is not _NotPassed:
+            kwargs['dtype'] = dtype
+        if type is not _NotPassed:
+            kwargs['type'] = type
+        return super(LabelArray, self).view(**kwargs)
+
+    # In general, we support resizing, slicing, and reshaping methods, but not
+    # numeric methods.
+    SUPPORTED_NDARRAY_METHODS = frozenset([
+        'base',
+        'byteswap',
+        'compress',
+        'copy',
+        'data',
+        'diagonal',
+        'dtype',
+        'flat',
+        'flatten',
+        'item',
+        'itemset',
+        'itemsize',
+        'nbytes',
+        'ndim',
+        'newbyteorder',
+        'ravel',
+        'repeat',
+        'reshape',
+        'resize',
+        'setflags',
+        'shape',
+        'size',
+        'squeeze',
+        'strides',
+        'swapaxes',
+        'take',
+        'trace',
+        'transpose',
+        'view'
+    ])
+    PUBLIC_NDARRAY_METHODS = frozenset([
+        s for s in dir(ndarray) if not s.startswith('_')
+    ])
+
+    # Generate failing wrappers for all unsupported methods.
+    locals().update(
+        {
+            method: _make_unsupported_method(method)
+            for method in PUBLIC_NDARRAY_METHODS - SUPPORTED_NDARRAY_METHODS
+        }
+    )
+
+    def __repr__(self):
+        # This happens if you call a ufunc on a LabelArray that changes the
+        # dtype.  This is generally an indicator that the array has been used
+        # incorrectly, and it means we're no longer valid for anything.
+        if self.dtype != int64_dtype:
+            return "Invalid LabelArray: dtype={}, shape={}".format(
+                self.dtype, self.shape
+            )
+        repr_lines = repr(self.as_string_array()).splitlines()
+        repr_lines[0] = repr_lines[0].replace('array(', 'LabelArray(', 1)
+        repr_lines[-1] = repr_lines[-1].rsplit(',', 1)[0] + ')'
+        # The extra spaces here account for the difference in length between
+        # 'array(' and 'LabelArray('.
+        return '\n     '.join(repr_lines)
+
+    def empty_like(self, shape):
+        """
+        Make an empty LabelArray with the same categories as ``self``, filled
+        with ``self.missing_value``.
+        """
+        out = np.full(
+            shape,
+            self.reverse_categories[self.missing_value],
+            dtype=self.dtype
+        ).view(
+            type=type(self)
+        )
+
+        out._categories = self.categories
+        out._reverse_categories = self.reverse_categories
+        out._missing_value = self.missing_value
+
+        return out
+
+    def apply(self, f, dtype):
+        """
+        Map a function elementwise over entries in ``self``.
+
+        ``f`` will be applied exactly once to each unique value in ``self``.
+        """
+        return np.vectorize(f, otypes=[dtype])(self.categories)[self]
+
+    def startswith(self, prefix):
+        """
+        Element-wise startswith.
+
+        Parameters
+        ----------
+        prefix : str
+
+        Returns
+        -------
+        matches : np.ndarray[bool]
+            An array with the same shape as self indicating whether each
+            element of self started with ``prefix``.
+        """
+        return self.apply(lambda elem: elem.startswith(prefix), dtype=bool)
+
+    def endswith(self, suffix):
+        """
+        Elementwise endswith.
+
+        Parameters
+        ----------
+        suffix : str
+
+        Returns
+        -------
+        matches : np.ndarray[bool]
+            An array with the same shape as self indicating whether each
+            element of self ended with ``suffix``.w
+        """
+        return self.apply(lambda elem: elem.endswith(suffix), dtype=bool)
+
+    def contains(self, substring):
+        """
+        Elementwise contains.
+
+        Parameters
+        ----------
+        substring : str
+
+        Returns
+        -------
+        matches : np.ndarray[bool]
+            An array with the same shape as self indicating whether each
+            element of self ended with ``suffix``.
+        """
+        return self.apply(lambda elem: substring in elem, dtype=bool)
+
+    @preprocess(pattern=re.compile)
+    def matches(self, pattern):
+        """
+        Elementwise regex match.
+
+        Parameters
+        ----------
+        pattern : str or compiled regex
+
+        Returns
+        -------
+        matches : np.ndarray[bool]
+            An array with the same shape as self indicating whether each
+            element of self was matched by ``pattern``.
+        """
+        return self.apply(compose(bool, pattern.match))
diff --git a/zipline/pipeline/api_utils.py b/zipline/pipeline/api_utils.py
new file mode 100644
index 00000000..1ccf3543
--- /dev/null
+++ b/zipline/pipeline/api_utils.py
@@ -0,0 +1,49 @@
+"""
+Utilities for creating public APIs (e.g. argument validation decorators).
+"""
+from zipline.utils.input_validation import preprocess
+
+
+def restrict_to_dtype(dtype, message_template):
+    """
+    A factory for decorators that restrict Term methods to only be callable on
+    Terms with a specific dtype.
+
+    This is conceptually similar to
+    zipline.utils.input_validation.expect_dtypes, but provides more flexibility
+    for providing error messages that are specifically targeting Term methods.
+
+    Parameters
+    ----------
+    dtype : numpy.dtype
+        The dtype on which the decorated method may be called.
+    message_template : str
+        A template for the error message to be raised.
+        `message_template.format` will be called with keyword arguments
+        `method_name`, `expected_dtype`, and `received_dtype`.
+
+    Usage
+    -----
+    @restrict_to_dtype(
+        dtype=float64_dtype,
+        message_template=(
+            "{method_name}() was called on a factor of dtype {received_dtype}."
+            "{method_name}() requires factors of dtype{expected_dtype}."
+
+        ),
+    )
+    def some_factor_method(self, ...):
+        self.stuff_that_requires_being_float64(...)
+    """
+    def processor(term_method, _, term_instance):
+        term_dtype = term_instance.dtype
+        if term_dtype != dtype:
+            raise TypeError(
+                message_template.format(
+                    method_name=term_method.__name__,
+                    expected_dtype=dtype.name,
+                    received_dtype=term_dtype,
+                )
+            )
+        return term_instance
+    return preprocess(self=processor)
diff --git a/zipline/pipeline/classifiers/classifier.py b/zipline/pipeline/classifiers/classifier.py
index c4d77ec9..e27016eb 100644
--- a/zipline/pipeline/classifiers/classifier.py
+++ b/zipline/pipeline/classifiers/classifier.py
@@ -1,16 +1,24 @@
 """
 classifier.py
 """
+from functools import wraps
 from numbers import Number
+import operator
 
 from numpy import where, isnan, nan, zeros
 
+from zipline.lib.labelarray import LabelArray
 from zipline.lib.quantiles import quantiles
-from zipline.pipeline.term import ComputableTerm
+from zipline.pipeline.api_utils import restrict_to_dtype
+from zipline.pipeline.term import ComputableTerm, NotSpecified
+from zipline.utils.compat import unicode
 from zipline.utils.input_validation import expect_types
-from zipline.utils.numpy_utils import int64_dtype
+from zipline.utils.numpy_utils import (
+    categorical_dtype,
+    int64_dtype,
+)
 
-from ..filters import NullFilter, NumExprFilter
+from ..filters import Filter, NullFilter, NumExprFilter
 from ..mixins import (
     CustomTermMixin,
     LatestMixin,
@@ -20,6 +28,15 @@ from ..mixins import (
 )
 
 
+strings_only = restrict_to_dtype(
+    dtype=categorical_dtype,
+    message_template=(
+        "{method_name}() is only defined on Classifiers producing strings"
+        " but it was called on a Factor of dtype {received_dtype}."
+    )
+)
+
+
 class Classifier(RestrictedDTypeMixin, ComputableTerm):
     """
     A Pipeline expression computing a categorical output.
@@ -30,7 +47,9 @@ class Classifier(RestrictedDTypeMixin, ComputableTerm):
     indicating that means/standard deviations should be computed on assets for
     which the classifier produced the same label.
     """
-    ALLOWED_DTYPES = (int64_dtype,)  # Used by RestrictedDTypeMixin
+    # Used by RestrictedDTypeMixin
+    ALLOWED_DTYPES = (int64_dtype, categorical_dtype)
+    categories = NotSpecified
 
     def isnull(self):
         """
@@ -45,9 +64,8 @@ class Classifier(RestrictedDTypeMixin, ComputableTerm):
         return ~self.isnull()
 
     # We explicitly don't support classifier to classifier comparisons, since
-    # the numbers likely don't mean the same thing. This may be relaxed in the
-    # future, but for now we're starting conservatively.
-    @expect_types(other=Number)
+    # the stored values likely don't mean the same thing. This may be relaxed
+    # in the future, but for now we're starting conservatively.
     def eq(self, other):
         """
         Construct a Filter returning True for asset/date pairs where the output
@@ -58,7 +76,7 @@ class Classifier(RestrictedDTypeMixin, ComputableTerm):
         # certainly not what the user wants.
         if other == self.missing_value:
             raise ValueError(
-                "Comparison against self.missing_value ({value}) in"
+                "Comparison against self.missing_value ({value!r}) in"
                 " {typename}.eq().\n"
                 "Missing values have NaN semantics, so the "
                 "requested comparison would always produce False.\n"
@@ -67,24 +85,87 @@ class Classifier(RestrictedDTypeMixin, ComputableTerm):
                     typename=(type(self).__name__),
                 )
             )
-        return NumExprFilter.create(
-            "x_0 == {other}".format(other=int(other)),
-            binds=(self,),
-        )
 
-    @expect_types(other=Number)
+        if isinstance(other, Number) != (self.dtype == int64_dtype):
+            raise InvalidClassifierComparison(self, other)
+
+        if isinstance(other, Number):
+            return NumExprFilter.create(
+                "x_0 == {other}".format(other=int(other)),
+                binds=(self,),
+            )
+        else:
+            return ScalarStringPredicate(
+                classifier=self,
+                op=operator.eq,
+                compval=other,
+            )
+
     def __ne__(self, other):
         """
         Construct a Filter returning True for asset/date pairs where the output
         of ``self`` matches ``other.
         """
-        return NumExprFilter.create(
-            "((x_0 != {other}) & (x_0 != {missing}))".format(
-                other=int(other),
-                missing=self.missing_value,
-            ),
-            binds=(self,),
-        )
+        if isinstance(other, Number) != (self.dtype == int64_dtype):
+            raise InvalidClassifierComparison(self, other)
+
+        if isinstance(other, Number):
+            return NumExprFilter.create(
+                "((x_0 != {other}) & (x_0 != {missing}))".format(
+                    other=int(other),
+                    missing=self.missing_value,
+                ),
+                binds=(self,),
+            )
+        else:
+            return ScalarStringPredicate(
+                classifier=self,
+                op=operator.ne,
+                compval=other,
+            )
+
+    def _string_predicate(f):
+        """
+        Decorator for converting a function from (LabelArray, str) -> bool
+        into a Classifier method that returns a ScalarStringPredicate filter.
+
+        This mainly exists to avoid replicating shared boilerplate
+        (e.g. argument type validation).
+        """
+        @wraps(f)
+        @expect_types(compval=(bytes, unicode))
+        @strings_only
+        def method(self, compval):
+            return ScalarStringPredicate(
+                classifier=self,
+                op=f,
+                compval=compval,
+            )
+        return method
+
+    @_string_predicate
+    @expect_types(label_array=LabelArray)
+    def startswith(label_array, other):
+        return label_array.startswith(other)
+
+    @_string_predicate
+    @expect_types(label_array=LabelArray)
+    def endswith(label_array, other):
+        return label_array.endswith(other)
+
+    @_string_predicate
+    @expect_types(label_array=LabelArray)
+    def contains(label_array, other):
+        return label_array.contains(other)
+
+    del _string_predicate
+
+    def postprocess(self, data):
+        if self.dtype == int64_dtype:
+            return data
+        if not isinstance(data, LabelArray):
+            raise AssertionError("Expected a LabelArray, got %s." % type(data))
+        return data.as_categorical()
 
 
 class Everything(Classifier):
@@ -127,6 +208,50 @@ class Quantiles(SingleInputMixin, Classifier):
         return type(self).__name__ + '(%d)' % self.params['bins']
 
 
+class ScalarStringPredicate(SingleInputMixin, Filter):
+    """
+    A filter that applies a function from (LabelArray, str) -> ndarray[bool].
+
+    Examples include ``==, !=, startswith, and endswith``.
+
+    This exists because we represent string arrays with
+    ``zipline.lib.LabelArray``s, which numexpr doesn't know about, so we can't
+    use the generic NumExprFilter implementation here.
+    """
+    window_length = 0
+
+    @expect_types(classifier=Classifier, compval=(bytes, unicode))
+    def __new__(cls, classifier, op, compval):
+        return super(ScalarStringPredicate, cls).__new__(
+            ScalarStringPredicate,
+            compval=compval,
+            op=op,
+            inputs=(classifier,),
+            mask=classifier.mask,
+        )
+
+    def _init(self, op, compval, *args, **kwargs):
+        self._op = op
+        self._compval = compval
+        return super(ScalarStringPredicate, self)._init(*args, **kwargs)
+
+    @classmethod
+    def static_identity(cls, op, compval, *args, **kwargs):
+        return (
+            super(ScalarStringPredicate, cls).static_identity(*args, **kwargs),
+            op,
+            compval,
+        )
+
+    def _compute(self, arrays, dates, assets, mask):
+        data = arrays[0]
+        return (
+            self._op(data, self._compval)
+            & (data != self.inputs[0].missing_value)
+            & mask
+        )
+
+
 class CustomClassifier(PositiveWindowLengthMixin, CustomTermMixin, Classifier):
     """
     Base class for user-defined Classifiers.
@@ -149,3 +274,27 @@ class Latest(LatestMixin, CustomClassifier):
     zipline.pipeline.factors.factor.Latest
     zipline.pipeline.filters.filter.Latest
     """
+    def _allocate_output(self, windows, shape):
+        """
+        Override the default array allocation to produce a LabelArray when we
+        have a string-like dtype.
+        """
+        if self.dtype == int64_dtype:
+            return super(Latest, self)._allocate_output(windows, shape)
+
+        # This is a little bit of a hack.  We might not know what the
+        # categories for a LabelArray are until it's actually been loaded, so
+        # we need to look at the underlying data.
+        return windows[0].data.empty_like(shape)
+
+
+class InvalidClassifierComparison(TypeError):
+    def __init__(self, classifier, compval):
+        super(InvalidClassifierComparison, self).__init__(
+            "Can't compare classifier of dtype"
+            " {dtype} to value {value} of type {type}.".format(
+                dtype=classifier.dtype,
+                value=compval,
+                type=type(compval).__name__,
+            )
+        )
diff --git a/zipline/pipeline/data/dataset.py b/zipline/pipeline/data/dataset.py
index b5980d83..fcd6cfe5 100644
--- a/zipline/pipeline/data/dataset.py
+++ b/zipline/pipeline/data/dataset.py
@@ -14,11 +14,7 @@ from zipline.pipeline.term import (
     Term,
 )
 from zipline.utils.input_validation import ensure_dtype
-from zipline.utils.numpy_utils import (
-    bool_dtype,
-    int64_dtype,
-    NoDefaultMissingValue,
-)
+from zipline.utils.numpy_utils import NoDefaultMissingValue
 from zipline.utils.preprocess import preprocess
 
 
@@ -26,7 +22,6 @@ class Column(object):
     """
     An abstract column of data, not yet associated with a dataset.
     """
-
     @preprocess(dtype=ensure_dtype)
     def __init__(self, dtype, missing_value=NotSpecified):
         self.dtype = dtype
@@ -164,15 +159,25 @@ class BoundColumn(LoadableTerm):
 
     @property
     def latest(self):
-        if self.dtype == bool_dtype:
-            from zipline.pipeline.filters import Latest
-        elif self.dtype == int64_dtype:
-            from zipline.pipeline.classifiers import Latest
+        from zipline.pipeline.factors import Factor, Latest as LatestFactor
+        from zipline.pipeline.filters import Filter, Latest as LatestFilter
+        from zipline.pipeline.classifiers import (
+            Classifier,
+            Latest as LatestClassifier,
+        )
+
+        dtype = self.dtype
+        if dtype in Filter.ALLOWED_DTYPES:
+            Latest = LatestFilter
+        elif dtype in Classifier.ALLOWED_DTYPES:
+            Latest = LatestClassifier
         else:
-            from zipline.pipeline.factors import Latest
+            assert dtype in Factor.ALLOWED_DTYPES, "Unknown dtype %s." % dtype
+            Latest = LatestFactor
+
         return Latest(
             inputs=(self,),
-            dtype=self.dtype,
+            dtype=dtype,
             missing_value=self.missing_value,
         )
 
diff --git a/zipline/pipeline/data/testing.py b/zipline/pipeline/data/testing.py
index 5d452769..30f92642 100644
--- a/zipline/pipeline/data/testing.py
+++ b/zipline/pipeline/data/testing.py
@@ -7,6 +7,7 @@ zipline.pipeline.data.testing.
 from .dataset import Column, DataSet
 from zipline.utils.numpy_utils import (
     bool_dtype,
+    categorical_dtype,
     float64_dtype,
     datetime64ns_dtype,
     int64_dtype,
@@ -22,3 +23,9 @@ class TestingDataSet(DataSet):
     float_col = Column(dtype=float64_dtype)
     datetime_col = Column(dtype=datetime64ns_dtype)
     int_col = Column(dtype=int64_dtype, missing_value=0)
+
+    categorical_col = Column(dtype=categorical_dtype, missing_value=u'')
+    categorical_default_NULL = Column(
+        dtype=categorical_dtype,
+        missing_value=u'NULL',
+    )
diff --git a/zipline/pipeline/engine.py b/zipline/pipeline/engine.py
index 8a751bc5..79be34a6 100644
--- a/zipline/pipeline/engine.py
+++ b/zipline/pipeline/engine.py
@@ -51,7 +51,7 @@ class PipelineEngine(with_metaclass(ABCMeta)):
         result : pd.DataFrame
             A frame of computed results.
 
-            The columns `result` correspond will be the computed results of
+            The columns `result` correspond to the entries of
             `pipeline.columns`, which should be a dictionary mapping strings to
             instances of `zipline.pipeline.term.Term`.
 
@@ -165,17 +165,20 @@ class SimplePipelineEngine(object):
         root_mask = self._compute_root_mask(start_date, end_date, extra_rows)
         dates, assets, root_mask_values = explode(root_mask)
 
-        outputs = self.compute_chunk(
+        results = self.compute_chunk(
             graph,
             dates,
             assets,
             initial_workspace={self._root_mask_term: root_mask_values},
         )
 
-        out_dates = dates[extra_rows:]
-        screen_values = outputs.pop(screen_name)
-
-        return self._to_narrow(outputs, screen_values, out_dates, assets)
+        return self._to_narrow(
+            graph.outputs,
+            results,
+            results.pop(screen_name),
+            dates[extra_rows:],
+            assets,
+        )
 
     def _compute_root_mask(self, start_date, end_date, extra_rows):
         """
@@ -363,14 +366,16 @@ class SimplePipelineEngine(object):
             out[name] = workspace[term][graph_extra_rows[term]:]
         return out
 
-    def _to_narrow(self, data, mask, dates, assets):
+    def _to_narrow(self, terms, data, mask, dates, assets):
         """
         Convert raw computed pipeline results into a DataFrame for public APIs.
 
         Parameters
         ----------
+        terms : dict[str -> Term]
+            Dict mapping column names to terms.
         data : dict[str -> ndarray[ndim=2]]
-            Dict mapping column names to computed results.
+            Dict mapping column names to computed results for those names.
         mask : ndarray[bool, ndim=2]
             Mask array of values to keep.
         dates : ndarray[datetime64, ndim=1]
@@ -412,8 +417,18 @@ class SimplePipelineEngine(object):
         resolved_assets = array(self._finder.retrieve_all(assets))
         dates_kept = repeat_last_axis(dates.values, len(assets))[mask]
         assets_kept = repeat_first_axis(resolved_assets, len(dates))[mask]
+
+        final_columns = {}
+        for name in data:
+            # Each term that computed an output has its postprocess method
+            # called on the filtered result.
+            #
+            # We use this convert LabelArrays into categoricals, among other
+            # things.
+            final_columns[name] = terms[name].postprocess(data[name][mask])
+
         return DataFrame(
-            data={name: arr[mask] for name, arr in iteritems(data)},
+            data=final_columns,
             index=MultiIndex.from_arrays([dates_kept, assets_kept]),
         ).tz_localize('UTC', level=0)
 
@@ -423,6 +438,7 @@ class SimplePipelineEngine(object):
         """
         root = self._root_mask_term
         clsname = type(self).__name__
+
         # Writing this out explicitly so this errors in testing if we change
         # the name without updating this line.
         compute_chunk_name = self.compute_chunk.__name__
diff --git a/zipline/pipeline/factors/factor.py b/zipline/pipeline/factors/factor.py
index 79d10f71..a76002a9 100644
--- a/zipline/pipeline/factors/factor.py
+++ b/zipline/pipeline/factors/factor.py
@@ -6,11 +6,11 @@ from operator import attrgetter
 from numbers import Number
 
 from numpy import inf, where
-from toolz import curry
 
 from zipline.errors import UnknownRankMethod
 from zipline.lib.normalize import naive_grouped_rowwise_apply
 from zipline.lib.rank import masked_rankdata_2d
+from zipline.pipeline.api_utils import restrict_to_dtype
 from zipline.pipeline.classifiers import Classifier, Everything, Quantiles
 from zipline.pipeline.mixins import (
     CustomTermMixin,
@@ -42,6 +42,7 @@ from zipline.pipeline.filters import (
     PercentileFilter,
     NullFilter,
 )
+from zipline.utils.functional import with_doc, with_name
 from zipline.utils.input_validation import expect_types
 from zipline.utils.math_utils import nanmean, nanstd
 from zipline.utils.numpy_utils import (
@@ -51,7 +52,6 @@ from zipline.utils.numpy_utils import (
     float64_dtype,
     int64_dtype,
 )
-from zipline.utils.preprocess import preprocess
 
 
 _RANK_METHODS = frozenset(['average', 'min', 'max', 'dense', 'ordinal'])
@@ -81,37 +81,6 @@ def coerce_numbers_to_my_dtype(f):
     return method
 
 
-@curry
-def set_attribute(name, value):
-    """
-    Decorator factory for setting attributes on a function.
-
-    Doesn't change the behavior of the wrapped function.
-
-    Usage
-    -----
-    >>> @set_attribute('__name__', 'foo')
-    ... def bar():
-    ...     return 3
-    ...
-    >>> bar()
-    3
-    >>> bar.__name__
-    'foo'
-    """
-    def decorator(f):
-        setattr(f, name, value)
-        return f
-    return decorator
-
-
-# Decorators for setting the __name__ and __doc__ properties of a decorated
-# function.
-# Example:
-with_name = set_attribute('__name__')
-with_doc = set_attribute('__doc__')
-
-
 def binop_return_type(op):
     if is_comparison(op):
         return NumExprFilter
@@ -328,51 +297,6 @@ def function_application(func):
     return mathfunc
 
 
-def restrict_to_dtype(dtype, message_template):
-    """
-    A factory for decorators that restricting Factor methods to only be
-    callable on Factors with a specific dtype.
-
-    This is conceptually similar to
-    zipline.utils.input_validation.expect_dtypes, but provides more flexibility
-    for providing error messages that are specifically targeting Factor
-    methods.
-
-    Parameters
-    ----------
-    dtype : numpy.dtype
-        The dtype on which the decorated method may be called.
-    message_template : str
-        A template for the error message to be raised.
-        `message_template.format` will be called with keyword arguments
-        `method_name`, `expected_dtype`, and `received_dtype`.
-
-    Usage
-    -----
-    @restrict_to_dtype(
-        dtype=float64_dtype,
-        message_template=(
-            "{method_name}() was called on a factor of dtype {received_dtype}."
-            "{method_name}() requires factors of dtype{expected_dtype}."
-
-        ),
-    )
-    def some_factor_method(self, ...):
-        self.stuff_that_requires_being_float64(...)
-    """
-    def processor(factor_method, _, factor_instance):
-        factor_dtype = factor_instance.dtype
-        if factor_dtype != dtype:
-            raise TypeError(
-                message_template.format(
-                    method_name=factor_method.__name__,
-                    expected_dtype=dtype.name,
-                    received_dtype=factor_dtype,
-                )
-            )
-        return factor_instance
-    return preprocess(self=processor)
-
 # Decorators for Factor methods.
 if_not_float64_tell_caller_to_use_isnull = restrict_to_dtype(
     dtype=float64_dtype,
diff --git a/zipline/pipeline/loaders/synthetic.py b/zipline/pipeline/loaders/synthetic.py
index f9957ae9..dfea5829 100644
--- a/zipline/pipeline/loaders/synthetic.py
+++ b/zipline/pipeline/loaders/synthetic.py
@@ -27,6 +27,7 @@ from zipline.utils.numpy_utils import (
     datetime64ns_dtype,
     float64_dtype,
     int64_dtype,
+    object_dtype,
 )
 
 
@@ -148,6 +149,7 @@ class SeededRandomLoader(PrecomputedLoader):
             float64_dtype: self._float_values,
             int64_dtype: self._int_values,
             bool_dtype: self._bool_values,
+            object_dtype: self._object_values,
         }[dtype](shape)
 
     @property
@@ -191,6 +193,9 @@ class SeededRandomLoader(PrecomputedLoader):
         """
         return self.state.randn(*shape) < 0
 
+    def _object_values(self, shape):
+        return self._int_values(shape).astype(str).astype(object)
+
 
 OHLCV = ('open', 'high', 'low', 'close', 'volume')
 OHLC = ('open', 'high', 'low', 'close')
diff --git a/zipline/pipeline/mixins.py b/zipline/pipeline/mixins.py
index d5f0fcb2..497fc145 100644
--- a/zipline/pipeline/mixins.py
+++ b/zipline/pipeline/mixins.py
@@ -1,7 +1,7 @@
 """
 Mixins classes for use with Filters and Factors.
 """
-from numpy import full_like, recarray
+from numpy import full, recarray
 
 from zipline.utils.control_flow import nullctx
 from zipline.errors import WindowLengthNotPositive, UnsupportedDataType
@@ -51,7 +51,7 @@ class RestrictedDTypeMixin(object):
 
         if self.dtype not in self.ALLOWED_DTYPES:
             raise UnsupportedDataType(
-                typename=type(self.__name__),
+                typename=type(self).__name__,
                 dtype=self.dtype,
             )
 
@@ -103,27 +103,45 @@ class CustomTermMixin(object):
         """
         raise NotImplementedError()
 
+    def _allocate_output(self, windows, shape):
+        """
+        Allocate an output array whose rows should be passed to `self.compute`.
+
+        The resulting array must have a shape of ``shape``.
+
+        If we have standard outputs (i.e. self.outputs is NotSpecified), the
+        default is an empty ndarray whose dtype is ``self.dtype``.
+
+        If we have an outputs tuple, the default is an empty recarray with
+        ``self.outputs`` as field names. Each field will have dtype
+        ``self.dtype``, the default shape is ``self.shape``.
+
+        This can be overridden to control the kind of array constructed
+        (e.g. to produce a LabelArray instead of an ndarray).
+        """
+        missing_value = self.missing_value
+        outputs = self.outputs
+        if outputs is not NotSpecified:
+            out = recarray(
+                shape,
+                formats=[self.dtype.str] * len(outputs),
+                names=outputs,
+            )
+            out[:] = missing_value
+        else:
+            out = full(shape, missing_value, dtype=self.dtype)
+        return out
+
     def _compute(self, windows, dates, assets, mask):
         """
         Call the user's `compute` function on each window with a pre-built
         output array.
         """
         compute = self.compute
-        missing_value = self.missing_value
         params = self.params
-        outputs = self.outputs
-        if outputs is not NotSpecified:
-            out = recarray(
-                mask.shape,
-                formats=[self.dtype.str] * len(outputs),
-                names=outputs,
-            )
-            out[:] = missing_value
-        else:
-            out = full_like(mask, missing_value, dtype=self.dtype)
+        out = self._allocate_output(windows, mask.shape)
+
         with self.ctx:
-            # TODO: Consider pre-filtering columns that are all-nan at each
-            # time-step?
             for idx, date in enumerate(dates):
                 col_mask = mask[idx]
                 masked_out = out[idx][col_mask]
diff --git a/zipline/pipeline/term.py b/zipline/pipeline/term.py
index e6b6e9bf..1bffc3e7 100644
--- a/zipline/pipeline/term.py
+++ b/zipline/pipeline/term.py
@@ -4,7 +4,7 @@ Base class for Filters, Factors and Classifiers
 from abc import ABCMeta, abstractproperty
 from weakref import WeakValueDictionary
 
-from numpy import dtype as dtype_class
+from numpy import dtype as dtype_class, ndarray
 from six import with_metaclass
 from zipline.errors import (
     DTypeNotSpecified,
@@ -16,6 +16,7 @@ from zipline.errors import (
     WindowLengthNotSpecified,
 )
 from zipline.lib.adjusted_array import can_represent_dtype
+from zipline.utils.input_validation import expect_types
 from zipline.utils.memoize import lazyval
 from zipline.utils.numpy_utils import (
     bool_dtype,
@@ -476,6 +477,19 @@ class ComputableTerm(Term):
         out[self.mask] = 0
         return out
 
+    @expect_types(data=ndarray)
+    def postprocess(self, data):
+        """
+        Called with an result of ``self``, unravelled (i.e. 1-dimensional)
+        after any user-defined screens have been applied.
+
+        This is mostly useful for transforming the dtype of an output, e.g., to
+        convert a LabelArray into a pandas Categorical.
+
+        The default implementation is to just return data unchanged.
+        """
+        return data
+
     def __repr__(self):
         return (
             "{type}({inputs}, window_length={window_length})"
diff --git a/zipline/testing/__init__.py b/zipline/testing/__init__.py
index a415daa8..c52ff03e 100644
--- a/zipline/testing/__init__.py
+++ b/zipline/testing/__init__.py
@@ -51,3 +51,4 @@ from .core import (  # noqa
     write_bcolz_minute_data,
     write_compressed,
 )
+from .fixtures import ZiplineTestCase  # noqa
diff --git a/zipline/testing/core.py b/zipline/testing/core.py
index 11430470..ad130a79 100644
--- a/zipline/testing/core.py
+++ b/zipline/testing/core.py
@@ -1032,7 +1032,7 @@ def temp_pipeline_engine(calendar, sids, random_seed, symbols=None):
         yield SimplePipelineEngine(get_loader, calendar, finder)
 
 
-def parameter_space(**params):
+def parameter_space(__fail_fast=False, **params):
     """
     Wrapper around subtest that allows passing keywords mapping names to
     iterables of values.
@@ -1083,7 +1083,16 @@ def parameter_space(**params):
             )
 
         param_sets = product(*(params[name] for name in argnames))
-        return subtest(param_sets, *argnames)(f)
+
+        if __fail_fast:
+            @wraps(f)
+            def wrapped(self):
+                for args in param_sets:
+                    f(self, *args)
+            return wrapped
+        else:
+            return subtest(param_sets, *argnames)(f)
+
     return decorator
 
 
diff --git a/zipline/utils/compat.py b/zipline/utils/compat.py
index 62ed44ef..0282cb89 100644
--- a/zipline/utils/compat.py
+++ b/zipline/utils/compat.py
@@ -11,6 +11,9 @@ if PY2:
 else:
     from types import MappingProxyType as mappingproxy
 
+
+unicode = type(u'')
+
 __all__ = [
     'mappingproxy',
 ]
diff --git a/zipline/utils/functional.py b/zipline/utils/functional.py
index 79569de0..685f0aef 100644
--- a/zipline/utils/functional.py
+++ b/zipline/utils/functional.py
@@ -56,6 +56,8 @@ def apply(f, *args, **kwargs):
 # Alias for use as a class decorator.
 instance = apply
 
+from zipline.utils.sentinel import sentinel
+
 
 def mapall(funcs, seq):
     """
@@ -242,3 +244,87 @@ def unzip(seq, elem_len=None):
     if elem_len is None:
         raise ValueError("cannot unzip empty sequence without 'elem_len'")
     return ((),) * elem_len
+
+
+_no_default = sentinel('_no_default')
+
+
+def getattrs(value, attrs, default=_no_default):
+    """
+    Perform a chained application of ``getattr`` on ``value`` with the values
+    in ``attrs``.
+
+    If ``default`` is supplied, return it if any of the attribute lookups fail.
+
+    Parameters
+    ----------
+    value : object
+        Root of the lookup chain.
+    attrs : iterable[str]
+        Sequence of attributes to look up.
+    default : object, optional
+        Value to return if any of the lookups fail.
+
+    Returns
+    -------
+    result : object
+        Result of the lookup sequence.
+
+    Example
+    -------
+    >>> class EmptyObject(object):
+    ...     pass
+    ...
+    >>> obj = EmptyObject()
+    >>> obj.foo = EmptyObject()
+    >>> obj.foo.bar = "value"
+    >>> getattrs(obj, ('foo', 'bar'))
+    'value'
+
+    >>> getattrs(obj, ('foo', 'buzz'))
+    Traceback (most recent call last):
+       ...
+    AttributeError: 'EmptyObject' object has no attribute 'buzz'
+
+    >>> getattrs(obj, ('foo', 'buzz'), 'default')
+    'default'
+    """
+    try:
+        for attr in attrs:
+            value = getattr(value, attr)
+    except AttributeError:
+        if default is _no_default:
+            raise
+        value = default
+    return value
+
+
+@curry
+def set_attribute(name, value):
+    """
+    Decorator factory for setting attributes on a function.
+
+    Doesn't change the behavior of the wrapped function.
+
+    Usage
+    -----
+    >>> @set_attribute('__name__', 'foo')
+    ... def bar():
+    ...     return 3
+    ...
+    >>> bar()
+    3
+    >>> bar.__name__
+    'foo'
+    """
+    def decorator(f):
+        setattr(f, name, value)
+        return f
+    return decorator
+
+
+# Decorators for setting the __name__ and __doc__ properties of a decorated
+# function.
+# Example:
+with_name = set_attribute('__name__')
+with_doc = set_attribute('__doc__')
diff --git a/zipline/utils/input_validation.py b/zipline/utils/input_validation.py
index 66e8b9fe..0424e6f4 100644
--- a/zipline/utils/input_validation.py
+++ b/zipline/utils/input_validation.py
@@ -22,7 +22,8 @@ from six import iteritems, string_types, PY3
 from toolz import valmap, complement, compose
 import toolz.curried.operator as op
 
-from zipline.utils.preprocess import preprocess
+from zipline.utils.functional import getattrs
+from zipline.utils.preprocess import call, preprocess
 
 
 def optionally(preprocessor):
@@ -163,7 +164,7 @@ def ensure_timestamp(func, argname, arg):
         )
 
 
-def expect_dtypes(*_pos, **named):
+def expect_dtypes(**named):
     """
     Preprocessing decorator that verifies inputs have expected numpy dtypes.
 
@@ -181,9 +182,6 @@ def expect_dtypes(*_pos, **named):
        ...
     TypeError: foo() expected an argument with dtype 'int64' for argument 'x', but got dtype 'float64' instead.  # noqa
     """
-    if _pos:
-        raise TypeError("expect_dtypes() only takes keyword arguments.")
-
     for name, type_ in iteritems(named):
         if not isinstance(type_, (dtype, tuple)):
             raise TypeError(
@@ -193,42 +191,101 @@ def expect_dtypes(*_pos, **named):
                 )
             )
 
-    def _expect_dtype(_dtype_or_dtype_tuple):
+    @preprocess(dtypes=call(lambda x: x if isinstance(x, tuple) else (x,)))
+    def _expect_dtype(dtypes):
         """
-        Factory for dtype-checking functions that work the @preprocess
+        Factory for dtype-checking functions that work with the @preprocess
         decorator.
         """
-        # Slightly different messages for dtype and tuple of dtypes.
-        if isinstance(_dtype_or_dtype_tuple, tuple):
-            allowed_dtypes = _dtype_or_dtype_tuple
-        else:
-            allowed_dtypes = (_dtype_or_dtype_tuple,)
-        template = (
-            "%(funcname)s() expected a value with dtype {dtype_str} "
-            "for argument '%(argname)s', but got %(actual)r instead."
-        ).format(dtype_str=' or '.join(repr(d.name) for d in allowed_dtypes))
-
-        def check_dtype(value):
-            return getattr(value, 'dtype', None) not in allowed_dtypes
-
-        def display_bad_value(value):
+        def error_message(func, argname, value):
             # If the bad value has a dtype, but it's wrong, show the dtype
-            # name.
+            # name.  Otherwise just show the value.
             try:
-                return value.dtype.name
+                value_to_show = value.dtype.name
             except AttributeError:
-                return value
+                value_to_show = value
+            return (
+                "{funcname}() expected a value with dtype {dtype_str} "
+                "for argument {argname!r}, but got {value!r} instead."
+            ).format(
+                funcname=_qualified_name(func),
+                dtype_str=' or '.join(repr(d.name) for d in dtypes),
+                argname=argname,
+                value=value_to_show,
+            )
 
-        return make_check(
-            exc_type=TypeError,
-            template=template,
-            pred=check_dtype,
-            actual=display_bad_value,
-        )
+        def _actual_preprocessor(func, argname, argvalue):
+            if getattr(argvalue, 'dtype', object()) not in dtypes:
+                raise TypeError(error_message(func, argname, argvalue))
+            return argvalue
+
+        return _actual_preprocessor
 
     return preprocess(**valmap(_expect_dtype, named))
 
 
+def expect_kinds(**named):
+    """
+    Preprocessing decorator that verifies inputs have expected dtype kinds.
+
+    Usage
+    -----
+    >>> from numpy import int64, int32, float32
+    >>> @expect_kinds(x='i')
+    ... def foo(x):
+    ...    return x
+    ...
+    >>> foo(int64(2))
+    2
+    >>> foo(int32(2))
+    2
+    >>> foo(float32(2))
+    Traceback (most recent call last):
+       ...n
+    TypeError: foo() expected a numpy object of kind 'i' for argument 'x', but got 'f' instead.  # noqa
+    """
+    for name, kind in iteritems(named):
+        if not isinstance(kind, (str, tuple)):
+            raise TypeError(
+                "expect_dtype_kinds() expected a string or tuple of strings"
+                " for argument {name!r}, but got {kind} instead.".format(
+                    name=name, kind=dtype,
+                )
+            )
+
+    @preprocess(kinds=call(lambda x: x if isinstance(x, tuple) else (x,)))
+    def _expect_kind(kinds):
+        """
+        Factory for kind-checking functions that work the @preprocess
+        decorator.
+        """
+        def error_message(func, argname, value):
+            # If the bad value has a dtype, but it's wrong, show the dtype
+            # kind.  Otherwise just show the value.
+            try:
+                value_to_show = value.dtype.kind
+            except AttributeError:
+                value_to_show = value
+            return (
+                "{funcname}() expected a numpy object of kind {kinds} "
+                "for argument {argname!r}, but got {value!r} instead."
+            ).format(
+                funcname=_qualified_name(func),
+                kinds=' or '.join(map(repr, kinds)),
+                argname=argname,
+                value=value_to_show,
+            )
+
+        def _actual_preprocessor(func, argname, argvalue):
+            if getattrs(argvalue, ('dtype', 'kind'), object()) not in kinds:
+                raise TypeError(error_message(func, argname, argvalue))
+            return argvalue
+
+        return _actual_preprocessor
+
+    return preprocess(**valmap(_expect_kind, named))
+
+
 def expect_types(*_pos, **named):
     """
     Preprocessing decorator that verifies inputs have expected types.
diff --git a/zipline/utils/numpy_utils.py b/zipline/utils/numpy_utils.py
index 5df6e213..a254fff8 100644
--- a/zipline/utils/numpy_utils.py
+++ b/zipline/utils/numpy_utils.py
@@ -32,6 +32,10 @@ complex128_dtype = dtype('complex128')
 datetime64D_dtype = dtype('datetime64[D]')
 datetime64ns_dtype = dtype('datetime64[ns]')
 
+object_dtype = dtype('O')
+# We use object arrays for strings.
+categorical_dtype = object_dtype
+
 make_datetime64ns = flip(datetime64, 'ns')
 make_datetime64D = flip(datetime64, 'D')
 
@@ -71,6 +75,7 @@ def make_kind_check(python_types, numpy_kind):
 is_float = make_kind_check(float, 'f')
 is_int = make_kind_check(int, 'i')
 is_datetime = make_kind_check(datetime, 'M')
+is_object = make_kind_check(object, 'O')
 
 
 def coerce_to_dtype(dtype, value):
@@ -263,9 +268,7 @@ def rolling_window(array, length):
 _notNaT = make_datetime64D(0)
 
 
-def busday_count_mask_NaT(begindates,
-                          enddates,
-                          out=None):
+def busday_count_mask_NaT(begindates, enddates, out=None):
     """
     Simple of numpy.busday_count that returns `float` arrays rather than int
     arrays, and handles `NaT`s by returning `NaN`s where the inputs were `NaT`.