From 6c6a33c73b7545f96e6217e9fdccf1fbad90cb22 Mon Sep 17 00:00:00 2001
From: Maya Tydykov <mtydykov@quantopian.com>
Date: Fri, 5 Aug 2016 11:53:29 -0400
Subject: [PATCH 01/16] ENH: add loader for estimates

---
 tests/pipeline/test_quarters_estimates.py     |  14 ++
 zipline/pipeline/common.py                    |   2 +
 zipline/pipeline/loaders/blaze/estimates.py   | 146 ++++++++++++
 zipline/pipeline/loaders/blaze/events.py      |  32 +--
 zipline/pipeline/loaders/events.py            |  12 +-
 zipline/pipeline/loaders/quarter_estimates.py | 221 ++++++++++++++++++
 zipline/pipeline/loaders/utils.py             |  32 +++
 7 files changed, 420 insertions(+), 39 deletions(-)
 create mode 100644 tests/pipeline/test_quarters_estimates.py
 create mode 100644 zipline/pipeline/loaders/blaze/estimates.py
 create mode 100644 zipline/pipeline/loaders/quarter_estimates.py

diff --git a/tests/pipeline/test_quarters_estimates.py b/tests/pipeline/test_quarters_estimates.py
new file mode 100644
index 00000000..201011ea
--- /dev/null
+++ b/tests/pipeline/test_quarters_estimates.py
@@ -0,0 +1,14 @@
+def test_shift_quarters_forward():
+    quarters = list(range(1, 5))
+    shifts = list(range(5))
+    expected = [(x, i) for ]
+    expected = ((0, 1), (0, 2), (0, 3), (0, 4), (1, 1),
+                (0, 2), (0, 3), (0, 4), (1, 1), (1, 2))
+    for quarter in quarters:
+        for shift in shifts:
+            yrs_to_shift, new_qtr = EstimizeLoader.calc_forward_shift(quarter,
+                                                                      shift)
+            if quarter + shift <= 4:
+                assert yrs_to_shift == 0
+                assert new_qtr == quarter + shift
+            else:
diff --git a/zipline/pipeline/common.py b/zipline/pipeline/common.py
index e64b1dc9..7b48e271 100644
--- a/zipline/pipeline/common.py
+++ b/zipline/pipeline/common.py
@@ -6,6 +6,8 @@ ANNOUNCEMENT_FIELD_NAME = 'announcement_date'
 CASH_FIELD_NAME = 'cash'
 DAYS_SINCE_PREV = 'days_since_prev'
 DAYS_TO_NEXT = 'days_to_next'
+FISCAL_QUARTER_FIELD_NAME = 'fiscal_quarter'
+FISCAL_YEAR_FIELD_NAME = 'fiscal_year'
 NEXT_ANNOUNCEMENT = 'next_announcement'
 PREVIOUS_AMOUNT = 'previous_amount'
 PREVIOUS_ANNOUNCEMENT = 'previous_announcement'
diff --git a/zipline/pipeline/loaders/blaze/estimates.py b/zipline/pipeline/loaders/blaze/estimates.py
new file mode 100644
index 00000000..6a89fa7b
--- /dev/null
+++ b/zipline/pipeline/loaders/blaze/estimates.py
@@ -0,0 +1,146 @@
+from datashape import istabular
+
+from .core import (
+    bind_expression_to_resources,
+    ffill_query_in_range,
+)
+from zipline.pipeline.loaders.base import PipelineLoader
+from zipline.pipeline.loaders.events import (
+    EventsLoader,
+    required_event_fields,
+)
+from zipline.pipeline.common import (
+    SID_FIELD_NAME,
+    TS_FIELD_NAME,
+)
+from zipline.pipeline.loaders.quarter_estimates import \
+    NextQuartersEstimatesLoader, PreviousQuartersEstimatesLoader
+from zipline.pipeline.loaders.utils import (
+    check_data_query_args,
+    normalize_data_query_bounds,
+    normalize_timestamp_to_query_time,
+    load_raw_data)
+from zipline.utils.input_validation import ensure_timezone, optionally
+from zipline.utils.preprocess import preprocess
+
+
+class BlazeEstimatesLoader(PipelineLoader):
+    """An abstract pipeline loader for the estimates datasets that loads
+    data from a blaze expression.
+
+    Parameters
+    ----------
+    expr : Expr
+        The expression representing the data to load.
+    resources : dict, optional
+        Mapping from the loadable terms of ``expr`` to actual data resources.
+    odo_kwargs : dict, optional
+        Extra keyword arguments to pass to odo when executing the expression.
+    data_query_time : time, optional
+        The time to use for the data query cutoff.
+    data_query_tz : tzinfo or str
+        The timezeone to use for the data query cutoff.
+    dataset : DataSet
+        The DataSet object for which this loader loads data.
+
+    Notes
+    -----
+    The expression should have a tabular dshape of::
+
+       Dim * {{
+           {SID_FIELD_NAME}: int64,
+           {TS_FIELD_NAME}: datetime,
+       }}
+
+    And other dataset-specific fields, where each row of the table is a
+    record including the sid to identify the company, the timestamp where we
+    learned about the announcement, and the date when the earnings will be z
+    announced.
+
+    If the '{TS_FIELD_NAME}' field is not included it is assumed that we
+    start the backtest with knowledge of all announcements.
+    """
+
+    @preprocess(data_query_tz=optionally(ensure_timezone))
+    def __init__(self,
+                 expr,
+                 columns,
+                 resources=None,
+                 odo_kwargs=None,
+                 data_query_time=None,
+                 data_query_tz=None,
+                 loader=None):
+
+        dshape = expr.dshape
+        if not istabular(dshape):
+            raise ValueError(
+                'expression dshape must be tabular, got: %s' % dshape,
+            )
+
+        required_cols = list(
+            required_event_fields(columns)
+        )
+        self._expr = bind_expression_to_resources(
+            expr[required_cols],
+            resources,
+        )
+        self._columns = columns
+        self._odo_kwargs = odo_kwargs if odo_kwargs is not None else {}
+        check_data_query_args(data_query_time, data_query_tz)
+        self._data_query_time = data_query_time
+        self._data_query_tz = data_query_tz
+        self.loader = loader
+
+    def load_adjusted_array(self, columns, dates, assets, mask):
+        raw = load_raw_data(assets, dates, self._data_query_time,
+                            self._data_query_tz, self._exp, self._odo_kwargs)
+
+        return self.loader(
+            events=raw,
+            next_value_columns=self._columns,
+        ).load_adjusted_array(
+            columns,
+            dates,
+            assets,
+            mask,
+        )
+
+
+class BlazeNextEstimatesLoader(BlazeEstimatesLoader):
+    loader = NextQuartersEstimatesLoader
+
+    def __init__(self,
+                 expr,
+                 columns,
+                 resources=None,
+                 odo_kwargs=None,
+                 data_query_time=None,
+                 data_query_tz=None,
+                 loader=None):
+        super(BlazeNextEstimatesLoader).__init__(expr,
+                                                 columns,
+                                                 resources,
+                                                 odo_kwargs,
+                                                 data_query_time,
+                                                 data_query_tz,
+                                                 loader)
+
+
+class BlazePreviousEstimatesLoader(BlazeEstimatesLoader):
+    loader = PreviousQuartersEstimatesLoader
+
+    def __init__(self,
+                 expr,
+                 columns,
+                 resources=None,
+                 odo_kwargs=None,
+                 data_query_time=None,
+                 data_query_tz=None,
+                 loader=None):
+        super(BlazeNextEstimatesLoader).__init__(expr,
+                                                 columns,
+                                                 resources,
+                                                 odo_kwargs,
+                                                 data_query_time,
+                                                 data_query_tz,
+                                                 loader)
diff --git a/zipline/pipeline/loaders/blaze/events.py b/zipline/pipeline/loaders/blaze/events.py
index c71646cb..4165166b 100644
--- a/zipline/pipeline/loaders/blaze/events.py
+++ b/zipline/pipeline/loaders/blaze/events.py
@@ -17,7 +17,7 @@ from zipline.pipeline.loaders.utils import (
     check_data_query_args,
     normalize_data_query_bounds,
     normalize_timestamp_to_query_time,
-)
+    load_raw_data)
 from zipline.utils.input_validation import ensure_timezone, optionally
 from zipline.utils.preprocess import preprocess
 
@@ -90,34 +90,8 @@ class BlazeEventsLoader(PipelineLoader):
         self._data_query_tz = data_query_tz
 
     def load_adjusted_array(self, columns, dates, assets, mask):
-        data_query_time = self._data_query_time
-        data_query_tz = self._data_query_tz
-        lower_dt, upper_dt = normalize_data_query_bounds(
-            dates[0],
-            dates[-1],
-            data_query_time,
-            data_query_tz,
-        )
-
-        raw = ffill_query_in_range(
-            self._expr,
-            lower_dt,
-            upper_dt,
-            self._odo_kwargs,
-        )
-        sids = raw.loc[:, SID_FIELD_NAME]
-        raw.drop(
-            sids[~sids.isin(assets)].index,
-            inplace=True
-        )
-        if data_query_time is not None:
-            normalize_timestamp_to_query_time(
-                raw,
-                data_query_time,
-                data_query_tz,
-                inplace=True,
-                ts_field=TS_FIELD_NAME,
-            )
+        raw = load_raw_data(assets, dates, self._data_query_time,
+                            self._data_query_tz, self._expr, self._odo_kwargs)
 
         return EventsLoader(
             events=raw,
diff --git a/zipline/pipeline/loaders/events.py b/zipline/pipeline/loaders/events.py
index 645143d8..1c49779c 100644
--- a/zipline/pipeline/loaders/events.py
+++ b/zipline/pipeline/loaders/events.py
@@ -41,16 +41,8 @@ def validate_column_specs(events, next_value_columns, previous_value_columns):
     serve the BoundColumns described by ``next_value_columns`` and
     ``previous_value_columns``.
     """
-    required = {
-        TS_FIELD_NAME,
-        SID_FIELD_NAME,
-        EVENT_DATE_FIELD_NAME,
-    }.union(
-        # We also expect any of the field names that our loadable columns
-        # are mapped to.
-        viewvalues(next_value_columns),
-        viewvalues(previous_value_columns),
-    )
+    required = required_event_fields(next_value_columns,
+                                     previous_value_columns)
     received = set(events.columns)
     missing = required - received
     if missing:
diff --git a/zipline/pipeline/loaders/quarter_estimates.py b/zipline/pipeline/loaders/quarter_estimates.py
new file mode 100644
index 00000000..495bc075
--- /dev/null
+++ b/zipline/pipeline/loaders/quarter_estimates.py
@@ -0,0 +1,221 @@
+from itertools import groupby
+import numpy as np
+import pandas as pd
+from six import viewvalues
+from zipline.pipeline.common import AD_FIELD_NAME, SID_FIELD_NAME, \
+    EVENT_DATE_FIELD_NAME, FISCAL_QUARTER_FIELD_NAME, FISCAL_YEAR_FIELD_NAME
+from zipline.pipeline.loaders.base import PipelineLoader
+from zipline.pipeline.loaders.frame import DataFrameLoader
+
+
+def required_event_fields(columns):
+    """
+    Compute the set of resource columns required to serve
+    ``next_value_columns`` and ``previous_value_columns``.
+    """
+    # These metadata columns are used to align event indexers.
+    return {
+        AD_FIELD_NAME,
+        SID_FIELD_NAME,
+        EVENT_DATE_FIELD_NAME,
+        FISCAL_QUARTER_FIELD_NAME,
+        FISCAL_YEAR_FIELD_NAME
+    }.union(
+        # We also expect any of the field names that our loadable columns
+        # are mapped to.
+        viewvalues(columns),
+    )
+
+
+def validate_column_specs(events, columns):
+    """
+    Verify that the columns of ``events`` can be used by an EventsLoader to
+    serve the BoundColumns described by ``next_value_columns`` and
+    ``previous_value_columns``.
+    """
+    required = required_event_fields(columns)
+    received = set(events.columns)
+    missing = required - received
+    if missing:
+        raise ValueError(
+            "EventsLoader missing required columns {missing}.\n"
+            "Got Columns: {received}\n"
+            "Expected Columns: {required}".format(
+                missing=sorted(missing),
+                received=sorted(received),
+                required=sorted(required),
+            )
+        )
+
+
+def calc_forward_shift(qtr, num_shifts):
+    yrs_to_shift, new_qtr = divmod(qtr + num_shifts, 4)
+    if yrs_to_shift == 1 and new_qtr == 0:
+        yrs_to_shift = 0
+        new_qtr = 4
+    return yrs_to_shift, new_qtr
+
+
+def calc_backward_shift(qtr, num_shifts):
+    yrs_to_shift, new_qtr = divmod(abs(num_shifts - qtr), 4)
+    if yrs_to_shift == 0 and new_qtr == 0:
+        yrs_to_shift = 1
+        new_qtr = 4
+    yrs_to_shift = -yrs_to_shift
+    return yrs_to_shift, new_qtr
+
+
+class QuarterEstimatesLoader(PipelineLoader):
+    def __init__(self,
+                 events,
+                 columns):
+        validate_column_specs(
+            events,
+            columns
+        )
+
+        self.events = events[
+            events[EVENT_DATE_FIELD_NAME].notnull() and
+            events[FISCAL_QUARTER_FIELD_NAME].notnull() and
+            events[FISCAL_YEAR_FIELD_NAME].notnull()
+        ]
+
+        self.columns = columns
+
+    def load_quarters(self, next_releases, num_quarters, dates_sids, gb):
+        pass
+
+    def load_adjusted_array(self, columns, dates, assets, mask):
+        groups = groupby(lambda x: x.dataset.num_quarters, columns)
+        out = {}
+        date_values = pd.DataFrame(dates, columns=['dates'])
+        date_values['key'] = 1
+        self.events['key'] = 1
+        merged = pd.merge(date_values, self.events, on='key')
+        asset_df = pd.DataFrame(assets, columns=['sid'])
+        asset_df['key'] = 1
+        dates_sids = pd.merge(date_values, asset_df, on='key')
+        for num_quarters in groups:
+            columns = groups[num_quarters]
+            # First, group by sid, fiscal year, and fiscal quarter and only
+            # keep the last estimate made.
+            final_releases_per_qtr = merged[merged.asof_date <=
+                                            merged.dates].sort(
+                ['dates', 'asof_date']
+            ).groupby(
+                ['dates', 'sid', 'fiscal_year', 'fiscal_quarter']
+            ).last()
+            gb = final_releases_per_qtr.reset_index().groupby(['dates', 'sid'])
+            # Split the date-sid combinations into ones with a next release
+            # and ones without
+            eligible_next_releases = pd.concat([group[1] for group in gb if (
+                group[1][EVENT_DATE_FIELD_NAME] >= group[1]['dates']
+            ).any()])
+
+            eligible_next_releases.sort(EVENT_DATE_FIELD_NAME)
+            # For each sid, get the next release/year/quarter that we care
+            # about.
+            next_releases = eligible_next_releases.groupby(
+                ['dates', 'sid']
+            ).min()
+            next_releases = next_releases.rename(
+                columns={'fiscal_year': 'next_fiscal_year',
+                         'fiscal_quarter': 'next_fiscal_quarter'}
+            )
+
+            result = self.load_quarters(next_releases,
+                                        num_quarters,
+                                        dates_sids)
+
+            for c in columns:
+                column_name = self.columns[c.name]
+                # Need to pass a DataFrame that has dates as the index and
+                # all sids as columns with column values being the value in
+                # 'result' for column c
+                loader = DataFrameLoader(
+                    c,
+                    result.pivot(index='dates',
+                                 columns='sid',
+                                 values=column_name),
+                    adjustments=None
+                )
+                out[c] = loader.load_adjusted_array([c], dates, assets, mask)[c]
+        return out
+
+
+class NextQuartersEstimatesLoader(QuarterEstimatesLoader):
+    def __init__(self,
+                 events,
+                 columns):
+        super(NextQuartersEstimatesLoader).__init__(events, columns)
+
+    def load_quarters(self, next_releases, num_quarters, dates_sids, gb):
+        # `next_qtr` is already the next quarter over,
+        # so we should offest `num_shifts` by 1.
+        next_releases['fiscal_quarter'] = next_releases.apply(
+            lambda x: calc_forward_shift(x['next_fiscal_quarter'],
+                                         num_quarters - 1)[1],
+            axis=1
+        )
+        next_releases['fiscal_year'] = next_releases.apply(
+            lambda x:
+            x['next_fiscal_year'] +
+            calc_forward_shift(x['next_fiscal_quarter'],
+                               num_quarters - 1)[0],
+            axis=1
+        )
+        # Merge to get the rows we care about for each date
+        result = dates_sids.merge(next_releases.reset_index(),
+                                  on=(['dates', 'sid']),
+                                  how='left')
+        return result
+
+
+class PreviousQuartersEstimatesLoader(QuarterEstimatesLoader):
+    def __init__(self,
+                 events,
+                 columns):
+        super(PreviousQuartersEstimatesLoader).__init__(events, columns)
+
+    def load_quarters(self, next_releases, num_quarters, dates_sids, gb):
+        next_releases['fiscal_quarter'] = next_releases.apply(
+            lambda x: calc_backward_shift(x['next_fiscal_quarter'],
+                                          num_quarters)[1],
+            axis=1
+        )
+        next_releases['fiscal_year'] = next_releases.apply(
+            lambda x:
+            x['next_fiscal_year'] +
+            calc_backward_shift(x['next_fiscal_quarter'],
+                                num_quarters)[0],
+            axis=1
+        )
+        only_previous_releases = pd.concat([group[1] for group in gb if (
+                group[1][EVENT_DATE_FIELD_NAME] < group[1]['dates']
+            ).all()])
+        only_previous_releases.sort(EVENT_DATE_FIELD_NAME)
+        # For each sid, get the latest release we knew about prior to
+        # each simulation date.
+        previous_releases = only_previous_releases.groupby(['dates',
+                                                            'sid']).max()
+        previous_releases = previous_releases.rename(columns={
+            'fiscal_year': 'previous_fiscal_year',
+            'fiscal_quarter': 'previous_fiscal_quarter'
+        })
+        previous_releases['fiscal_quarter'] = previous_releases.apply(
+            lambda x: calc_backward_shift(x['previous_fiscal_quarter'],
+                                          num_quarters)[1],
+            axis=1
+        )
+        previous_releases['fiscal_year'] = previous_releases.apply(
+            lambda x:
+            x['previous_fiscal_year'] +
+            calc_backward_shift(x['previous_fiscal_quarter'],
+                                num_quarters)[0],
+            axis=1
+        )
+        all_releases = pd.concat([next_releases, previous_releases])
+        # Merge to get the rows we care about for each date
+        result = dates_sids.merge(all_releases.reset_index(),
+                                  on=(['dates', 'sid']), how='left')
+        return result
diff --git a/zipline/pipeline/loaders/utils.py b/zipline/pipeline/loaders/utils.py
index 77a9f447..2f388810 100644
--- a/zipline/pipeline/loaders/utils.py
+++ b/zipline/pipeline/loaders/utils.py
@@ -2,6 +2,8 @@ import datetime
 
 import numpy as np
 import pandas as pd
+from zipline.pipeline.common import TS_FIELD_NAME, SID_FIELD_NAME
+from zipline.pipeline.loaders.blaze.core import ffill_query_in_range
 from zipline.utils.pandas_utils import mask_between_time
 
 
@@ -272,3 +274,33 @@ def check_data_query_args(data_query_time, data_query_tz):
                 data_query_tz,
             ),
         )
+
+
+def load_raw_data(assets, dates, data_query_time, data_query_tz, expr,
+                  odo_kwargs):
+    lower_dt, upper_dt = normalize_data_query_bounds(
+        dates[0],
+        dates[-1],
+        data_query_time,
+        data_query_tz,
+    )
+    raw = ffill_query_in_range(
+        expr,
+        lower_dt,
+        upper_dt,
+        odo_kwargs,
+    )
+    sids = raw.loc[:, SID_FIELD_NAME]
+    raw.drop(
+        sids[~sids.isin(assets)].index,
+        inplace=True
+    )
+    if data_query_time is not None:
+        normalize_timestamp_to_query_time(
+            raw,
+            data_query_time,
+            data_query_tz,
+            inplace=True,
+            ts_field=TS_FIELD_NAME,
+        )
+    return raw

From 6454fca6dca2e0248d98df4f62466d540d926827 Mon Sep 17 00:00:00 2001
From: Maya Tydykov <mtydykov@quantopian.com>
Date: Tue, 9 Aug 2016 09:52:09 -0400
Subject: [PATCH 02/16] TST: add tests for quarter estimates

MAINT: modify algorithm for calculating previous releases

BUG: fix quarter calculation logic
---
 tests/pipeline/test_quarters_estimates.py     | 190 ++++++++++++++++--
 zipline/pipeline/loaders/blaze/core.py        |  32 ++-
 zipline/pipeline/loaders/blaze/estimates.py   |  11 +-
 zipline/pipeline/loaders/blaze/events.py      |  12 +-
 zipline/pipeline/loaders/quarter_estimates.py | 135 ++++++-------
 zipline/pipeline/loaders/utils.py             |  86 +++++---
 6 files changed, 326 insertions(+), 140 deletions(-)

diff --git a/tests/pipeline/test_quarters_estimates.py b/tests/pipeline/test_quarters_estimates.py
index 201011ea..02666964 100644
--- a/tests/pipeline/test_quarters_estimates.py
+++ b/tests/pipeline/test_quarters_estimates.py
@@ -1,14 +1,178 @@
-def test_shift_quarters_forward():
-    quarters = list(range(1, 5))
-    shifts = list(range(5))
-    expected = [(x, i) for ]
-    expected = ((0, 1), (0, 2), (0, 3), (0, 4), (1, 1),
-                (0, 2), (0, 3), (0, 4), (1, 1), (1, 2))
-    for quarter in quarters:
-        for shift in shifts:
-            yrs_to_shift, new_qtr = EstimizeLoader.calc_forward_shift(quarter,
-                                                                      shift)
-            if quarter + shift <= 4:
-                assert yrs_to_shift == 0
-                assert new_qtr == quarter + shift
+from itertools import product
+import numpy as np
+import pandas as pd
+from zipline.pipeline import SimplePipelineEngine, Pipeline
+
+from zipline.pipeline.data import DataSet, Column
+from zipline.pipeline.loaders.quarter_estimates import \
+    NextQuartersEstimatesLoader, PreviousQuartersEstimatesLoader
+from zipline.testing import ZiplineTestCase
+from zipline.testing.fixtures import WithAssetFinder, WithTradingSessions
+from zipline.testing.predicates import assert_equal
+from zipline.utils.numpy_utils import datetime64ns_dtype, float64_dtype
+
+
+class Estimates(DataSet):
+    event_date = Column(dtype=datetime64ns_dtype)
+    fiscal_quarter = Column(dtype=float64_dtype)
+    fiscal_year = Column(dtype=float64_dtype)
+    estimate = Column(dtype=float64_dtype)
+    value = Column(dtype=float64_dtype)
+
+
+def QuartersEstimates(num_qtr):
+    class QtrEstimates(Estimates):
+        num_quarters = num_qtr
+        name=Estimates
+    return QtrEstimates
+
+# Final release dates never change
+releases = pd.DataFrame({
+    'sid': [1, 1],
+    'timestamp': [pd.Timestamp('2015-01-20'), pd.Timestamp('2015-4-20')],
+    'event_date': [pd.Timestamp('2015-01-20'), pd.Timestamp('2015-04-20')],
+    'estimate': [0.5, 0.8],
+    'value': [0.6, 0.9],
+    'fiscal_quarter': [1, 2],
+    'fiscal_year': [2015, 2015]
+})
+
+estimates = pd.DataFrame({
+    'sid': [1, 1, 1, 1],
+    'timestamp': [pd.Timestamp('2015-01-02'),
+                  pd.Timestamp('2015-01-10'),
+                  pd.Timestamp('2015-04-02'),
+                  pd.Timestamp('2015-4-10')],
+    'event_date': [pd.Timestamp('2015-01-20'),
+                   pd.Timestamp('2015-01-20'),
+                   pd.Timestamp('2015-04-20'),
+                   pd.Timestamp('2015-04-20')],
+    'estimate': [.1, .2, .3, .4],
+    'value': [np.NaN, np.NaN, np.NaN, np.NaN],
+    'fiscal_quarter': [1, 1, 2, 2],
+    'fiscal_year': [2015, 2015, 2015, 2015]
+})
+
+events = pd.concat([releases, estimates])
+
+
+class NextEstimateTestCase(WithAssetFinder,
+                           WithTradingSessions,
+                           ZiplineTestCase):
+    START_DATE = pd.Timestamp('2015-01-01')
+    END_DATE = pd.Timestamp('2015-04-30')
+
+    @classmethod
+    def make_loader(cls, events, columns):
+        return NextQuartersEstimatesLoader(events, columns)
+
+    @classmethod
+    def init_class_fixtures(cls):
+        cls.events = events
+        cls.columns = {
+            Estimates.estimate: 'estimate',
+            Estimates.event_date: 'event_date',
+            Estimates.fiscal_quarter: 'fiscal_quarter',
+            Estimates.fiscal_year: 'fiscal_year',
+            Estimates.value: 'value',
+        }
+        cls.loader = cls.make_loader(
+            events=cls.events,
+            columns=cls.columns
+        )
+        cls.ASSET_FINDER_EQUITY_SIDS = list(cls.events['sid'].unique())
+        cls.ASSET_FINDER_EQUITY_SYMBOLS = [
+            's' + str(n) for n in cls.ASSET_FINDER_EQUITY_SIDS
+        ]
+        super(NextEstimateTestCase, cls).init_class_fixtures()
+
+    def test_regular(self):
+        dataset = QuartersEstimates(1)
+        engine = SimplePipelineEngine(
+            lambda x: self.loader,
+            self.trading_days,
+            self.asset_finder,
+        )
+
+        results = engine.run_pipeline(
+            Pipeline({c.name: c.latest for c in dataset.columns}),
+            start_date=self.trading_days[0],
+            end_date=self.trading_days[-1],
+        )
+        sid_events = results.xs(1, level=1)
+        ed_sorted_events = self.events.sort(['event_date', 'timestamp'])
+        for i, date in enumerate(sid_events.index):
+            # Get all upcoming events that we know about on 'date'
+            eligible_timestamps = ed_sorted_events[ed_sorted_events['timestamp']
+                                                <= date]
+            eligible_events = eligible_timestamps[eligible_timestamps['event_date'] >= date]
+            if not eligible_events.empty:
+                smallest_event_date = eligible_events.iloc[0]['event_date']
+                expected_event = eligible_events[eligible_events['event_date'] == smallest_event_date].iloc[-1]
+                for colname in sid_events.columns:
+                    expected_value = expected_event[colname]
+                    computed_value = sid_events.iloc[i][colname]
+                    assert_equal(expected_value, computed_value)
             else:
+                assert sid_events.iloc[i].isnull().all()
+
+
+class PreviousEstimateTestCase(WithAssetFinder,
+                               WithTradingSessions,
+                               ZiplineTestCase):
+    START_DATE = pd.Timestamp('2015-01-01')
+    END_DATE = pd.Timestamp('2015-04-30')
+
+    @classmethod
+    def make_loader(cls, events, columns):
+        return PreviousQuartersEstimatesLoader(events, columns)
+
+    @classmethod
+    def init_class_fixtures(cls):
+        cls.events = events
+        cls.columns = {
+            Estimates.estimate: 'estimate',
+            Estimates.event_date: 'event_date',
+            Estimates.fiscal_quarter: 'fiscal_quarter',
+            Estimates.fiscal_year: 'fiscal_year',
+            Estimates.value: 'value',
+        }
+        cls.loader = cls.make_loader(
+            events=cls.events,
+            columns=cls.columns
+        )
+        cls.ASSET_FINDER_EQUITY_SIDS = list(cls.events['sid'].unique())
+        cls.ASSET_FINDER_EQUITY_SYMBOLS = [
+            's' + str(n) for n in cls.ASSET_FINDER_EQUITY_SIDS
+        ]
+        super(PreviousEstimateTestCase, cls).init_class_fixtures()
+
+    def test_regular(self):
+        dataset = QuartersEstimates(1)
+        engine = SimplePipelineEngine(
+            lambda x: self.loader,
+            self.trading_days,
+            self.asset_finder,
+        )
+
+        results = engine.run_pipeline(
+            Pipeline({c.name: c.latest for c in dataset.columns}),
+            start_date=self.trading_days[0],
+            end_date=self.trading_days[-1],
+        )
+        sid_events = results.xs(1, level=1)
+        ed_sorted_events = self.events.sort(['event_date', 'timestamp'])
+        for i, date in enumerate(sid_events.index):
+            # Filter for events that happened on or before the simulation
+            # date and that we knew about on or before the simulation date.
+            ed_eligible_events = ed_sorted_events[ed_sorted_events['event_date'] <= date]
+            ts_eligible_events = ed_eligible_events[ed_eligible_events['timestamp'] <= date]
+            if not ts_eligible_events.empty:
+                # The expected event is the one we knew about last.
+                expected_event = ts_eligible_events.iloc[-1]
+                for colname in sid_events.columns:
+                    expected_value = expected_event[colname]
+                    computed_value = sid_events.iloc[i][colname]
+                    assert_equal(expected_value, computed_value)
+            else:
+                assert sid_events.iloc[i].isnull().all()
diff --git a/zipline/pipeline/loaders/blaze/core.py b/zipline/pipeline/loaders/blaze/core.py
index 01e1e659..fc3252e4 100644
--- a/zipline/pipeline/loaders/blaze/core.py
+++ b/zipline/pipeline/loaders/blaze/core.py
@@ -1219,6 +1219,36 @@ def bind_expression_to_resources(expr, resources):
     })
 
 
+def load_raw_data(assets, dates, data_query_time, data_query_tz, expr,
+                  odo_kwargs):
+    lower_dt, upper_dt = normalize_data_query_bounds(
+        dates[0],
+        dates[-1],
+        data_query_time,
+        data_query_tz,
+    )
+    raw = ffill_query_in_range(
+        expr,
+        lower_dt,
+        upper_dt,
+        odo_kwargs,
+    )
+    sids = raw.loc[:, SID_FIELD_NAME]
+    raw.drop(
+        sids[~sids.isin(assets)].index,
+        inplace=True
+    )
+    if data_query_time is not None:
+        normalize_timestamp_to_query_time(
+            raw,
+            data_query_time,
+            data_query_tz,
+            inplace=True,
+            ts_field=TS_FIELD_NAME,
+        )
+    return raw
+
+
 def ffill_query_in_range(expr,
                          lower,
                          upper,
@@ -1273,4 +1303,4 @@ def ffill_query_in_range(expr,
         **odo_kwargs
     )
     raw.loc[:, ts_field] = raw.loc[:, ts_field].astype('datetime64[ns]')
-    return raw
+    return raw
\ No newline at end of file
diff --git a/zipline/pipeline/loaders/blaze/estimates.py b/zipline/pipeline/loaders/blaze/estimates.py
index 6a89fa7b..c2341164 100644
--- a/zipline/pipeline/loaders/blaze/estimates.py
+++ b/zipline/pipeline/loaders/blaze/estimates.py
@@ -2,24 +2,17 @@ from datashape import istabular
 
 from .core import (
     bind_expression_to_resources,
-    ffill_query_in_range,
+    load_raw_data,
 )
 from zipline.pipeline.loaders.base import PipelineLoader
 from zipline.pipeline.loaders.events import (
-    EventsLoader,
     required_event_fields,
 )
-from zipline.pipeline.common import (
-    SID_FIELD_NAME,
-    TS_FIELD_NAME,
-)
 from zipline.pipeline.loaders.quarter_estimates import \
     NextQuartersEstimatesLoader, PreviousQuartersEstimatesLoader
 from zipline.pipeline.loaders.utils import (
     check_data_query_args,
-    normalize_data_query_bounds,
-    normalize_timestamp_to_query_time,
-    load_raw_data)
+)
 from zipline.utils.input_validation import ensure_timezone, optionally
 from zipline.utils.preprocess import preprocess
 
diff --git a/zipline/pipeline/loaders/blaze/events.py b/zipline/pipeline/loaders/blaze/events.py
index 4165166b..d5ac8e37 100644
--- a/zipline/pipeline/loaders/blaze/events.py
+++ b/zipline/pipeline/loaders/blaze/events.py
@@ -2,22 +2,14 @@ from datashape import istabular
 
 from .core import (
     bind_expression_to_resources,
-    ffill_query_in_range,
+    load_raw_data,
 )
 from zipline.pipeline.loaders.base import PipelineLoader
 from zipline.pipeline.loaders.events import (
     EventsLoader,
     required_event_fields,
 )
-from zipline.pipeline.common import (
-    SID_FIELD_NAME,
-    TS_FIELD_NAME,
-)
-from zipline.pipeline.loaders.utils import (
-    check_data_query_args,
-    normalize_data_query_bounds,
-    normalize_timestamp_to_query_time,
-    load_raw_data)
+from zipline.pipeline.loaders.utils import check_data_query_args
 from zipline.utils.input_validation import ensure_timezone, optionally
 from zipline.utils.preprocess import preprocess
 
diff --git a/zipline/pipeline/loaders/quarter_estimates.py b/zipline/pipeline/loaders/quarter_estimates.py
index 495bc075..6fb2d5d8 100644
--- a/zipline/pipeline/loaders/quarter_estimates.py
+++ b/zipline/pipeline/loaders/quarter_estimates.py
@@ -1,11 +1,17 @@
-from itertools import groupby
-import numpy as np
 import pandas as pd
 from six import viewvalues
-from zipline.pipeline.common import AD_FIELD_NAME, SID_FIELD_NAME, \
-    EVENT_DATE_FIELD_NAME, FISCAL_QUARTER_FIELD_NAME, FISCAL_YEAR_FIELD_NAME
+from toolz import groupby
+from zipline.pipeline.common import (
+    EVENT_DATE_FIELD_NAME,
+    FISCAL_QUARTER_FIELD_NAME,
+    FISCAL_YEAR_FIELD_NAME,
+    SID_FIELD_NAME,
+    TS_FIELD_NAME,
+)
 from zipline.pipeline.loaders.base import PipelineLoader
 from zipline.pipeline.loaders.frame import DataFrameLoader
+from zipline.pipeline.loaders.utils import calc_backward_shift, \
+    calc_forward_shift
 
 
 def required_event_fields(columns):
@@ -15,7 +21,7 @@ def required_event_fields(columns):
     """
     # These metadata columns are used to align event indexers.
     return {
-        AD_FIELD_NAME,
+        TS_FIELD_NAME,
         SID_FIELD_NAME,
         EVENT_DATE_FIELD_NAME,
         FISCAL_QUARTER_FIELD_NAME,
@@ -48,23 +54,6 @@ def validate_column_specs(events, columns):
         )
 
 
-def calc_forward_shift(qtr, num_shifts):
-    yrs_to_shift, new_qtr = divmod(qtr + num_shifts, 4)
-    if yrs_to_shift == 1 and new_qtr == 0:
-        yrs_to_shift = 0
-        new_qtr = 4
-    return yrs_to_shift, new_qtr
-
-
-def calc_backward_shift(qtr, num_shifts):
-    yrs_to_shift, new_qtr = divmod(abs(num_shifts - qtr), 4)
-    if yrs_to_shift == 0 and new_qtr == 0:
-        yrs_to_shift = 1
-        new_qtr = 4
-    yrs_to_shift = -yrs_to_shift
-    return yrs_to_shift, new_qtr
-
-
 class QuarterEstimatesLoader(PipelineLoader):
     def __init__(self,
                  events,
@@ -75,60 +64,45 @@ class QuarterEstimatesLoader(PipelineLoader):
         )
 
         self.events = events[
-            events[EVENT_DATE_FIELD_NAME].notnull() and
-            events[FISCAL_QUARTER_FIELD_NAME].notnull() and
+            events[EVENT_DATE_FIELD_NAME].notnull() &
+            events[FISCAL_QUARTER_FIELD_NAME].notnull() &
             events[FISCAL_YEAR_FIELD_NAME].notnull()
         ]
 
         self.columns = columns
 
-    def load_quarters(self, next_releases, num_quarters, dates_sids, gb):
+    def load_quarters(self, num_quarters, dates_sids, final_releases_per_qtr):
         pass
 
     def load_adjusted_array(self, columns, dates, assets, mask):
         groups = groupby(lambda x: x.dataset.num_quarters, columns)
         out = {}
-        date_values = pd.DataFrame(dates, columns=['dates'])
+        date_values = pd.DataFrame({'dates': dates})
         date_values['key'] = 1
         self.events['key'] = 1
         merged = pd.merge(date_values, self.events, on='key')
-        asset_df = pd.DataFrame(assets, columns=['sid'])
+        asset_df = pd.DataFrame({'sid': assets})
         asset_df['key'] = 1
         dates_sids = pd.merge(date_values, asset_df, on='key')
         for num_quarters in groups:
             columns = groups[num_quarters]
             # First, group by sid, fiscal year, and fiscal quarter and only
             # keep the last estimate made.
-            final_releases_per_qtr = merged[merged.asof_date <=
+            final_releases_per_qtr = merged[merged[TS_FIELD_NAME] <=
                                             merged.dates].sort(
-                ['dates', 'asof_date']
+                ['dates', TS_FIELD_NAME]
             ).groupby(
                 ['dates', 'sid', 'fiscal_year', 'fiscal_quarter']
             ).last()
-            gb = final_releases_per_qtr.reset_index().groupby(['dates', 'sid'])
-            # Split the date-sid combinations into ones with a next release
-            # and ones without
-            eligible_next_releases = pd.concat([group[1] for group in gb if (
-                group[1][EVENT_DATE_FIELD_NAME] >= group[1]['dates']
-            ).any()])
+            final_releases_per_qtr = final_releases_per_qtr.reset_index()
 
-            eligible_next_releases.sort(EVENT_DATE_FIELD_NAME)
-            # For each sid, get the next release/year/quarter that we care
-            # about.
-            next_releases = eligible_next_releases.groupby(
-                ['dates', 'sid']
-            ).min()
-            next_releases = next_releases.rename(
-                columns={'fiscal_year': 'next_fiscal_year',
-                         'fiscal_quarter': 'next_fiscal_quarter'}
-            )
-
-            result = self.load_quarters(next_releases,
-                                        num_quarters,
-                                        dates_sids)
+            result = self.load_quarters(num_quarters,
+                                        dates_sids,
+                                        final_releases_per_qtr)
 
             for c in columns:
-                column_name = self.columns[c.name]
+                super_col = getattr(c.dataset.__base__, c.name)
+                column_name = self.columns[super_col]
                 # Need to pass a DataFrame that has dates as the index and
                 # all sids as columns with column values being the value in
                 # 'result' for column c
@@ -147,9 +121,24 @@ class NextQuartersEstimatesLoader(QuarterEstimatesLoader):
     def __init__(self,
                  events,
                  columns):
-        super(NextQuartersEstimatesLoader).__init__(events, columns)
+        super(NextQuartersEstimatesLoader, self).__init__(events, columns)
 
-    def load_quarters(self, next_releases, num_quarters, dates_sids, gb):
+    def load_quarters(self, num_quarters, dates_sids, final_releases_per_qtr):
+        # Filter for releases that are after each simulation date.
+        eligible_next_releases = final_releases_per_qtr[
+            final_releases_per_qtr[EVENT_DATE_FIELD_NAME] >=
+            final_releases_per_qtr['dates']
+        ]
+
+        eligible_next_releases.sort(EVENT_DATE_FIELD_NAME)
+        # For each sid, get the upcoming release/year/quarter.
+        next_releases = eligible_next_releases.groupby(
+            ['dates', 'sid']
+        ).min()
+        next_releases = next_releases.rename(
+            columns={'fiscal_year': 'next_fiscal_year',
+                     'fiscal_quarter': 'next_fiscal_quarter'}
+        )
         # `next_qtr` is already the next quarter over,
         # so we should offest `num_shifts` by 1.
         next_releases['fiscal_quarter'] = next_releases.apply(
@@ -175,47 +164,39 @@ class PreviousQuartersEstimatesLoader(QuarterEstimatesLoader):
     def __init__(self,
                  events,
                  columns):
-        super(PreviousQuartersEstimatesLoader).__init__(events, columns)
+        super(PreviousQuartersEstimatesLoader, self).__init__(events, columns)
 
-    def load_quarters(self, next_releases, num_quarters, dates_sids, gb):
-        next_releases['fiscal_quarter'] = next_releases.apply(
-            lambda x: calc_backward_shift(x['next_fiscal_quarter'],
-                                          num_quarters)[1],
-            axis=1
-        )
-        next_releases['fiscal_year'] = next_releases.apply(
-            lambda x:
-            x['next_fiscal_year'] +
-            calc_backward_shift(x['next_fiscal_quarter'],
-                                num_quarters)[0],
-            axis=1
-        )
-        only_previous_releases = pd.concat([group[1] for group in gb if (
-                group[1][EVENT_DATE_FIELD_NAME] < group[1]['dates']
-            ).all()])
-        only_previous_releases.sort(EVENT_DATE_FIELD_NAME)
+    def load_quarters(self, num_quarters, dates_sids, final_releases_per_qtr):
+        # Filter for releases that are before each simulation date.
+        eligible_previous_releases = final_releases_per_qtr[
+            final_releases_per_qtr[EVENT_DATE_FIELD_NAME] <=
+            final_releases_per_qtr['dates']
+        ]
+
+        eligible_previous_releases.sort(EVENT_DATE_FIELD_NAME)
         # For each sid, get the latest release we knew about prior to
         # each simulation date.
-        previous_releases = only_previous_releases.groupby(['dates',
-                                                            'sid']).max()
+        previous_releases = eligible_previous_releases.groupby(
+            ['dates', 'sid']
+        ).max()
+
         previous_releases = previous_releases.rename(columns={
             'fiscal_year': 'previous_fiscal_year',
             'fiscal_quarter': 'previous_fiscal_quarter'
         })
         previous_releases['fiscal_quarter'] = previous_releases.apply(
             lambda x: calc_backward_shift(x['previous_fiscal_quarter'],
-                                          num_quarters)[1],
+                                          (num_quarters - 1))[1],
             axis=1
         )
         previous_releases['fiscal_year'] = previous_releases.apply(
             lambda x:
-            x['previous_fiscal_year'] +
+            x['previous_fiscal_year'] -
             calc_backward_shift(x['previous_fiscal_quarter'],
-                                num_quarters)[0],
+                                (num_quarters - 1))[0],
             axis=1
         )
-        all_releases = pd.concat([next_releases, previous_releases])
         # Merge to get the rows we care about for each date
-        result = dates_sids.merge(all_releases.reset_index(),
+        result = dates_sids.merge(previous_releases.reset_index(),
                                   on=(['dates', 'sid']), how='left')
         return result
diff --git a/zipline/pipeline/loaders/utils.py b/zipline/pipeline/loaders/utils.py
index 2f388810..4e999b07 100644
--- a/zipline/pipeline/loaders/utils.py
+++ b/zipline/pipeline/loaders/utils.py
@@ -2,8 +2,6 @@ import datetime
 
 import numpy as np
 import pandas as pd
-from zipline.pipeline.common import TS_FIELD_NAME, SID_FIELD_NAME
-from zipline.pipeline.loaders.blaze.core import ffill_query_in_range
 from zipline.utils.pandas_utils import mask_between_time
 
 
@@ -276,31 +274,59 @@ def check_data_query_args(data_query_time, data_query_tz):
         )
 
 
-def load_raw_data(assets, dates, data_query_time, data_query_tz, expr,
-                  odo_kwargs):
-    lower_dt, upper_dt = normalize_data_query_bounds(
-        dates[0],
-        dates[-1],
-        data_query_time,
-        data_query_tz,
-    )
-    raw = ffill_query_in_range(
-        expr,
-        lower_dt,
-        upper_dt,
-        odo_kwargs,
-    )
-    sids = raw.loc[:, SID_FIELD_NAME]
-    raw.drop(
-        sids[~sids.isin(assets)].index,
-        inplace=True
-    )
-    if data_query_time is not None:
-        normalize_timestamp_to_query_time(
-            raw,
-            data_query_time,
-            data_query_tz,
-            inplace=True,
-            ts_field=TS_FIELD_NAME,
-        )
-    return raw
+def calc_forward_shift(qtr, num_qtrs_shift):
+    """
+    Calculate the number of years to shift forward and the new quarter in the
+    shifted year.
+
+    Parameters
+    ----------
+    qtr : int
+        The starting quarter.
+    num_qtr_shift : int
+        The number of quarters to shift forward.
+
+    Returns
+    -------
+    yrs_to_shift : int
+        The number of years to shift forward.
+    new_qtr : int
+        The quarter number of the new quarter after shifting num_qtrs_shift
+        forward from qtr.
+    """
+    yrs_to_shift, new_qtr = divmod(qtr + num_qtrs_shift, 4)
+    if new_qtr == 0:
+        yrs_to_shift -= 1
+        new_qtr = 4
+    return yrs_to_shift, new_qtr
+
+
+def calc_backward_shift(qtr, num_qtrs_shift):
+    """
+    Calculate the number of years to shift backward and the new quarter in the
+    shifted year.
+
+    Parameters
+    ----------
+    qtr : int
+        The starting quarter.
+    num_qtr_shift : int
+        The number of quarters to shift backward.
+
+    Returns
+    -------
+    yrs_to_shift : int
+        The number of years to shift backward.
+    new_qtr : int
+        The quarter number of the new quarter after shifting num_qtrs_shift
+        backward from qtr.
+    """
+    if qtr > num_qtrs_shift:
+        return 0, qtr - num_qtrs_shift
+    # num_qtrs_shift >= qtr; subtract to offset qtr, then calculate how many
+    # years/quarters to subtract.
+    yrs_to_shift, subtract_qtr = divmod(abs(num_qtrs_shift - qtr), 4)
+    # Must add 1 year since we go backwards at least `qtr` number of quarters
+    yrs_to_shift += 1
+    new_qtr = 4 - subtract_qtr
+    return yrs_to_shift, new_qtr
\ No newline at end of file

From 863da5932c8e455c1caf7f277e9ee22e7c07491b Mon Sep 17 00:00:00 2001
From: Maya Tydykov <mtydykov@quantopian.com>
Date: Tue, 9 Aug 2016 16:24:03 -0400
Subject: [PATCH 03/16] TST: add tests for quarter rotation logic

---
 tests/pipeline/test_quarters_estimates.py     | 281 +++++++++++-------
 zipline/pipeline/loaders/events.py            |  23 +-
 zipline/pipeline/loaders/quarter_estimates.py | 186 ++++++++----
 zipline/pipeline/loaders/utils.py             |  68 +----
 4 files changed, 321 insertions(+), 237 deletions(-)

diff --git a/tests/pipeline/test_quarters_estimates.py b/tests/pipeline/test_quarters_estimates.py
index 02666964..2328f965 100644
--- a/tests/pipeline/test_quarters_estimates.py
+++ b/tests/pipeline/test_quarters_estimates.py
@@ -1,15 +1,24 @@
-from itertools import product
+import itertools
 import numpy as np
 import pandas as pd
+from pandas.util.testing import assert_series_equal
 from zipline.pipeline import SimplePipelineEngine, Pipeline
 
 from zipline.pipeline.data import DataSet, Column
-from zipline.pipeline.loaders.quarter_estimates import \
-    NextQuartersEstimatesLoader, PreviousQuartersEstimatesLoader
+from zipline.pipeline.loaders.quarter_estimates import (
+    NextQuartersEstimatesLoader,
+    PreviousQuartersEstimatesLoader
+)
+from zipline.pipeline.loaders.quarter_estimates import (
+    calc_forward_shift,
+    calc_backward_shift
+)
 from zipline.testing import ZiplineTestCase
 from zipline.testing.fixtures import WithAssetFinder, WithTradingSessions
 from zipline.testing.predicates import assert_equal
 from zipline.utils.numpy_utils import datetime64ns_dtype, float64_dtype
+import line_profiler
+prof = line_profiler.LineProfiler()
 
 
 class Estimates(DataSet):
@@ -23,70 +32,106 @@ class Estimates(DataSet):
 def QuartersEstimates(num_qtr):
     class QtrEstimates(Estimates):
         num_quarters = num_qtr
-        name=Estimates
+        name = Estimates
     return QtrEstimates
 
-# Final release dates never change
+# Final release dates never change. The quarters have very tight date ranges
+# in order to reduce the number of dates we need to iterate through when
+# testing.
 releases = pd.DataFrame({
-    'sid': [1, 1],
-    'timestamp': [pd.Timestamp('2015-01-20'), pd.Timestamp('2015-4-20')],
-    'event_date': [pd.Timestamp('2015-01-20'), pd.Timestamp('2015-04-20')],
+    'timestamp': [pd.Timestamp('2015-01-15'), pd.Timestamp('2015-01-31')],
+    'event_date': [pd.Timestamp('2015-01-15'), pd.Timestamp('2015-01-31')],
     'estimate': [0.5, 0.8],
     'value': [0.6, 0.9],
-    'fiscal_quarter': [1, 2],
-    'fiscal_year': [2015, 2015]
+    'fiscal_quarter': [1.0, 2.0],
+    'fiscal_year': [2015.0, 2015.0]
 })
 
+q1_knowledge_dates = [pd.Timestamp('2015-01-01'), pd.Timestamp('2015-01-04'),
+                      pd.Timestamp('2015-01-08'), pd.Timestamp('2015-01-12')]
+q2_knowledge_dates = [pd.Timestamp('2015-01-16'), pd.Timestamp('2015-01-20'),
+                      pd.Timestamp('2015-01-24'), pd.Timestamp('2015-01-28')]
+# We want to model the possibility of an estimate predicting a release date
+# that gets shifted forward/backward.
+q1_release_dates = [pd.Timestamp('2015-01-13'), pd.Timestamp('2015-01-15')]
+q2_release_dates = [pd.Timestamp('2015-01-28'), pd.Timestamp('2015-01-30')]
 estimates = pd.DataFrame({
-    'sid': [1, 1, 1, 1],
-    'timestamp': [pd.Timestamp('2015-01-02'),
-                  pd.Timestamp('2015-01-10'),
-                  pd.Timestamp('2015-04-02'),
-                  pd.Timestamp('2015-4-10')],
-    'event_date': [pd.Timestamp('2015-01-20'),
-                   pd.Timestamp('2015-01-20'),
-                   pd.Timestamp('2015-04-20'),
-                   pd.Timestamp('2015-04-20')],
     'estimate': [.1, .2, .3, .4],
     'value': [np.NaN, np.NaN, np.NaN, np.NaN],
-    'fiscal_quarter': [1, 1, 2, 2],
-    'fiscal_year': [2015, 2015, 2015, 2015]
+    'fiscal_quarter': [1.0, 1.0, 2.0, 2.0],
+    'fiscal_year': [2015.0, 2015.0, 2015.0, 2015.0]
 })
 
-events = pd.concat([releases, estimates])
+
+def gen_estimates():
+    sid_estimates = []
+    sid_releases = []
+    release_dates = list(itertools.product(q1_release_dates, q2_release_dates))
+    knowledge_permutations = list(itertools.permutations(q1_knowledge_dates +
+                                                         q2_knowledge_dates,
+                                                         4))
+    all_permutations = itertools.product(knowledge_permutations,
+                                         release_dates)
+    for sid, ((q1e1, q1e2, q2e1, q2e2), (rd1, rd2)) in enumerate(
+            all_permutations):
+        # We're assuming that estimates must come before the relevant release.
+        if q1e1 < q1e2 and q2e1 < q2e2 and q1e1 < rd1 and q1e2 < \
+                rd2:
+            sid_estimate = estimates.copy(True)
+            sid_estimate['timestamp'] = [q1e1, q1e2, q2e1, q2e2]
+            sid_estimate['event_date'] = [rd1]*2 + [rd2] * 2
+            sid_estimate['sid'] = sid
+            sid_estimates += [sid_estimate]
+            sid_release = releases.copy(True)
+            sid_release['sid'] = sid_estimate['sid']
+            sid_releases += [sid_release]
+
+    return pd.concat(sid_estimates + sid_releases).reset_index(drop=True)
 
 
-class NextEstimateTestCase(WithAssetFinder,
-                           WithTradingSessions,
-                           ZiplineTestCase):
-    START_DATE = pd.Timestamp('2015-01-01')
-    END_DATE = pd.Timestamp('2015-04-30')
+class EstimateTestCase(WithAssetFinder,
+                       WithTradingSessions,
+                       ZiplineTestCase):
+    START_DATE = pd.Timestamp('2014-12-28')
+    END_DATE = pd.Timestamp('2015-02-03')
 
+    @classmethod
+    def make_loader(cls, events, columns):
+        pass
+
+    @classmethod
+    def init_class_fixtures(cls):
+        cls.events = gen_estimates()
+        cls.sids = cls.events['sid'].unique()
+        cls.columns = {
+            Estimates.estimate: 'estimate',
+            Estimates.event_date: 'event_date',
+            Estimates.fiscal_quarter: 'fiscal_quarter',
+            Estimates.fiscal_year: 'fiscal_year',
+            Estimates.value: 'value',
+        }
+        cls.loader = cls.make_loader(
+            events=cls.events,
+            columns=cls.columns
+        )
+        cls.ASSET_FINDER_EQUITY_SIDS = list(cls.events['sid'].unique())
+        cls.ASSET_FINDER_EQUITY_SYMBOLS = [
+            's' + str(n) for n in cls.ASSET_FINDER_EQUITY_SIDS
+        ]
+        super(EstimateTestCase, cls).init_class_fixtures()
+
+
+class NextEstimateTestCase(EstimateTestCase):
     @classmethod
     def make_loader(cls, events, columns):
         return NextQuartersEstimatesLoader(events, columns)
 
-    @classmethod
-    def init_class_fixtures(cls):
-        cls.events = events
-        cls.columns = {
-            Estimates.estimate: 'estimate',
-            Estimates.event_date: 'event_date',
-            Estimates.fiscal_quarter: 'fiscal_quarter',
-            Estimates.fiscal_year: 'fiscal_year',
-            Estimates.value: 'value',
-        }
-        cls.loader = cls.make_loader(
-            events=cls.events,
-            columns=cls.columns
-        )
-        cls.ASSET_FINDER_EQUITY_SIDS = list(cls.events['sid'].unique())
-        cls.ASSET_FINDER_EQUITY_SYMBOLS = [
-            's' + str(n) for n in cls.ASSET_FINDER_EQUITY_SIDS
-        ]
-        super(NextEstimateTestCase, cls).init_class_fixtures()
-
-    def test_regular(self):
+    #@profile
+    def test_next_estimates(self):
+        """
+        The goal of this test is to make sure that we select the right
+        datapoint as our 'next' w.r.t each date.
+        """
         dataset = QuartersEstimates(1)
         engine = SimplePipelineEngine(
             lambda x: self.loader,
@@ -99,55 +144,43 @@ class NextEstimateTestCase(WithAssetFinder,
             start_date=self.trading_days[0],
             end_date=self.trading_days[-1],
         )
-        sid_events = results.xs(1, level=1)
-        ed_sorted_events = self.events.sort(['event_date', 'timestamp'])
-        for i, date in enumerate(sid_events.index):
-            # Get all upcoming events that we know about on 'date'
-            eligible_timestamps = ed_sorted_events[ed_sorted_events['timestamp']
-                                                <= date]
-            eligible_events = eligible_timestamps[eligible_timestamps['event_date'] >= date]
-            if not eligible_events.empty:
-                smallest_event_date = eligible_events.iloc[0]['event_date']
-                expected_event = eligible_events[eligible_events['event_date'] == smallest_event_date].iloc[-1]
-                for colname in sid_events.columns:
-                    expected_value = expected_event[colname]
-                    computed_value = sid_events.iloc[i][colname]
-                    assert_equal(expected_value, computed_value)
-            else:
-                assert sid_events.iloc[i].isnull().all()
+        for sid in self.sids:
+            sid_events = results.xs(sid, level=1)
+            ed_sorted_events = self.events[
+                self.events['sid'] == sid
+            ]
+            ed_sorted_events['key'] = 1
+            all_dates = pd.DataFrame({'all_dates': sid_events.index})
+            all_dates['key'] = 1
+            crossproduct = pd.merge(all_dates, ed_sorted_events, on='key')
+            crossproduct = crossproduct[crossproduct['timestamp'] <=
+                                        crossproduct['all_dates']]
+            crossproduct = crossproduct[crossproduct['event_date'] >=
+                                        crossproduct['all_dates']]
+            final = crossproduct.sort_values(by=['all_dates',
+                                                 'event_date',
+                                                 'timestamp'],
+                                             ascending=[True, True,
+                                                        False]).groupby([
+                'all_dates', 'sid']).first().reset_index()
+            final = pd.merge(final, all_dates,
+                             how='right').sort_values(by='all_dates').set_index(
+                'all_dates')
+            final.index.name = None
+            for colname in sid_events.columns:
+                assert_series_equal(final[colname], sid_events[colname])
 
 
-class PreviousEstimateTestCase(WithAssetFinder,
-                               WithTradingSessions,
-                               ZiplineTestCase):
-    START_DATE = pd.Timestamp('2015-01-01')
-    END_DATE = pd.Timestamp('2015-04-30')
-
+class PreviousEstimateTestCase(EstimateTestCase):
     @classmethod
     def make_loader(cls, events, columns):
         return PreviousQuartersEstimatesLoader(events, columns)
 
-    @classmethod
-    def init_class_fixtures(cls):
-        cls.events = events
-        cls.columns = {
-            Estimates.estimate: 'estimate',
-            Estimates.event_date: 'event_date',
-            Estimates.fiscal_quarter: 'fiscal_quarter',
-            Estimates.fiscal_year: 'fiscal_year',
-            Estimates.value: 'value',
-        }
-        cls.loader = cls.make_loader(
-            events=cls.events,
-            columns=cls.columns
-        )
-        cls.ASSET_FINDER_EQUITY_SIDS = list(cls.events['sid'].unique())
-        cls.ASSET_FINDER_EQUITY_SYMBOLS = [
-            's' + str(n) for n in cls.ASSET_FINDER_EQUITY_SIDS
-        ]
-        super(PreviousEstimateTestCase, cls).init_class_fixtures()
-
-    def test_regular(self):
+    def test_previous_estimates(self):
+        """
+        The goal of this test is to make sure that we select the right
+        datapoint as our 'previous' w.r.t each date.
+        """
         dataset = QuartersEstimates(1)
         engine = SimplePipelineEngine(
             lambda x: self.loader,
@@ -160,19 +193,53 @@ class PreviousEstimateTestCase(WithAssetFinder,
             start_date=self.trading_days[0],
             end_date=self.trading_days[-1],
         )
-        sid_events = results.xs(1, level=1)
-        ed_sorted_events = self.events.sort(['event_date', 'timestamp'])
-        for i, date in enumerate(sid_events.index):
-            # Filter for events that happened on or before the simulation
-            # date and that we knew about on or before the simulation date.
-            ed_eligible_events = ed_sorted_events[ed_sorted_events['event_date'] <= date]
-            ts_eligible_events = ed_eligible_events[ed_eligible_events['timestamp'] <= date]
-            if not ts_eligible_events.empty:
-                # The expected event is the one we knew about last.
-                expected_event = ts_eligible_events.iloc[-1]
-                for colname in sid_events.columns:
-                    expected_value = expected_event[colname]
-                    computed_value = sid_events.iloc[i][colname]
-                    assert_equal(expected_value, computed_value)
-            else:
-                assert sid_events.iloc[i].isnull().all()
+        for sid in self.sids:
+            sid_events = results.xs(sid, level=1)
+            ed_sorted_events = self.events[
+                self.events['sid'] == sid
+            ].sort_values(by=['event_date', 'timestamp'])
+            for i, date in enumerate(sid_events.index):
+                # Filter for events that happened on or before the simulation
+                # date and that we knew about on or before the simulation date.
+                ed_eligible_events = ed_sorted_events[ed_sorted_events['event_date'] <= date]
+                ts_eligible_events = ed_eligible_events[ed_eligible_events['timestamp'] <= date]
+                if not ts_eligible_events.empty:
+                    # The expected event is the one we knew about last.
+                    expected_event = ts_eligible_events.iloc[-1]
+                    for colname in sid_events.columns:
+                        expected_value = expected_event[colname]
+                        computed_value = sid_events.iloc[i][colname]
+                        assert_equal(expected_value, computed_value)
+                else:
+                    assert sid_events.iloc[i].isnull().all()
+
+
+class QuarterShiftTestCase(ZiplineTestCase):
+    """
+    This tests, in isolation, quarter calculation logic for shifting quarters
+    backwards/forwards from a starting point.
+    """
+    def test_calc_forward_shift(self):
+        input_yrs = pd.Series([0] * 4)
+        input_qtrs = pd.Series(range(1, 5))
+        expected = pd.DataFrame(([yr, qtr] for yr in range(0, 4) for qtr
+                                 in range(1, 5)))
+        for i in range(0, 8):
+            years, quarters = calc_forward_shift(input_yrs, input_qtrs, i)
+            # Can't use assert_series_equal here with check_names=False
+            # because that still fails due to name differences.
+            assert years.equals(expected[i:i+4].reset_index(drop=True)[0])
+            assert quarters.equals(expected[i:i+4].reset_index(drop=True)[1])
+
+
+    def test_calc_backward_shift(self):
+        input_yrs = pd.Series([0] * 4)
+        input_qtrs = pd.Series(range(4, 0, -1))
+        expected = pd.DataFrame(([yr, qtr] for yr in range(0, -4, -1) for qtr
+                                 in range(4, 0, -1)))
+        for i in range(0, 8):
+            years, quarters = calc_backward_shift(input_yrs, input_qtrs, i)
+            # Can't use assert_series_equal here with check_names=False
+            # because that still fails due to name differences.
+            assert years.equals(expected[i:i+4].reset_index(drop=True)[0])
+            assert quarters.equals(expected[i:i+4].reset_index(drop=True)[1])
diff --git a/zipline/pipeline/loaders/events.py b/zipline/pipeline/loaders/events.py
index 1c49779c..af11499e 100644
--- a/zipline/pipeline/loaders/events.py
+++ b/zipline/pipeline/loaders/events.py
@@ -12,6 +12,7 @@ from zipline.pipeline.common import (
     TS_FIELD_NAME,
 )
 from zipline.pipeline.loaders.utils import (
+    choose_rows_by_indexer,
     next_event_indexer,
     previous_event_indexer,
 )
@@ -166,7 +167,8 @@ class EventsLoader(PipelineLoader):
         if not columns:
             return {}
 
-        return self._load_events(
+        return choose_rows_by_indexer(
+            rows=self.events,
             name_map=self.next_value_columns,
             indexer=self.next_event_indexer(dates, sids),
             columns=columns,
@@ -179,7 +181,8 @@ class EventsLoader(PipelineLoader):
         if not columns:
             return {}
 
-        return self._load_events(
+        return choose_rows_by_indexer(
+            rows=self.events,
             name_map=self.previous_value_columns,
             indexer=self.previous_event_indexer(dates, sids),
             columns=columns,
@@ -188,22 +191,6 @@ class EventsLoader(PipelineLoader):
             mask=mask,
         )
 
-    def _load_events(self, name_map, indexer, columns, dates, sids, mask):
-        def to_frame(array):
-            return pd.DataFrame(array, index=dates, columns=sids)
-
-        out = {}
-        for c in columns:
-            raw = self.events[name_map[c]][indexer]
-            # indexer will be -1 for locations where we don't have a known
-            # value.
-            raw[indexer < 0] = c.missing_value
-
-            # Delegate the actual array formatting logic to a DataFrameLoader.
-            loader = DataFrameLoader(c, to_frame(raw), adjustments=None)
-            out[c] = loader.load_adjusted_array([c], dates, sids, mask)[c]
-        return out
-
     def load_adjusted_array(self, columns, dates, sids, mask):
         n, p = self.split_next_and_previous_event_columns(columns)
         return merge(
diff --git a/zipline/pipeline/loaders/quarter_estimates.py b/zipline/pipeline/loaders/quarter_estimates.py
index 6fb2d5d8..fa480b4d 100644
--- a/zipline/pipeline/loaders/quarter_estimates.py
+++ b/zipline/pipeline/loaders/quarter_estimates.py
@@ -1,3 +1,4 @@
+import numpy as np
 import pandas as pd
 from six import viewvalues
 from toolz import groupby
@@ -10,8 +11,84 @@ from zipline.pipeline.common import (
 )
 from zipline.pipeline.loaders.base import PipelineLoader
 from zipline.pipeline.loaders.frame import DataFrameLoader
-from zipline.pipeline.loaders.utils import calc_backward_shift, \
-    calc_forward_shift
+
+import line_profiler
+from zipline.pipeline.loaders.utils import choose_rows_by_indexer
+
+PREVIOUS_FISCAL_QUARTER = 'previous_fiscal_quarter'
+
+PREVIOUS_FISCAL_YEAR = 'previous_fiscal_year'
+
+NEXT_FISCAL_QUARTER = 'next_fiscal_quarter'
+
+NEXT_FISCAL_YEAR = 'next_fiscal_year'
+
+FISCAL_QUARTER = 'fiscal_quarter'
+
+FISCAL_YEAR = 'fiscal_year'
+
+ALL_DATES = 'dates'
+
+prof = line_profiler.LineProfiler()
+
+
+#@profile
+def calc_forward_shift(yrs, qtrs, num_qtrs_shift):
+    """
+    Calculate the number of years to shift forward and the new quarter in the
+    shifted year.
+
+    Parameters
+    ----------
+    qtr : int
+        The starting quarter.
+    num_qtr_shift : int
+        The number of quarters to shift forward.
+    yr : int
+        The starting year.
+
+    Returns
+    -------
+    s : pd.Series
+        A series containins the new year and quarter.
+    """
+
+    result_qtrs = (qtrs + num_qtrs_shift) % 4
+    result_years = yrs + (qtrs + num_qtrs_shift) // 4
+    to_adjust = result_qtrs[result_qtrs == 0].index
+    result_years.iloc[to_adjust] -= 1
+    result_qtrs.iloc[to_adjust] = 4
+    return result_years, result_qtrs
+
+
+#@profile
+def calc_backward_shift(yrs, qtrs, num_qtrs_shift):
+    """
+    Calculate the number of years to shift backward and the new quarter in the
+    shifted year.
+
+    Parameters
+    ----------
+    qtr : int
+        The starting quarter.
+    num_qtr_shift : int
+        The number of quarters to shift backward.
+    yr : int
+        The starting year.
+
+    Returns
+    -------
+    s : pd.Series
+        A series containins the new year and quarter.
+    """
+    result_qtrs = 4 - (num_qtrs_shift - qtrs) % 4
+    # Must subtract 1 year since we go backwards at least `qtr` number of
+    # quarters
+    result_years = yrs - (num_qtrs_shift - qtrs) // 4 - 1
+    no_yr_boundary_crossed = qtrs[qtrs > num_qtrs_shift].index
+    result_years.iloc[no_yr_boundary_crossed] = yrs.iloc[no_yr_boundary_crossed]
+    result_qtrs.iloc[no_yr_boundary_crossed] = qtrs.iloc[no_yr_boundary_crossed] - num_qtrs_shift
+    return result_years, result_qtrs
 
 
 def required_event_fields(columns):
@@ -56,35 +133,40 @@ def validate_column_specs(events, columns):
 
 class QuarterEstimatesLoader(PipelineLoader):
     def __init__(self,
-                 events,
-                 columns):
+                 estimates,
+                 base_column_name_map):
         validate_column_specs(
-            events,
-            columns
+            estimates,
+            base_column_name_map
         )
 
-        self.events = events[
-            events[EVENT_DATE_FIELD_NAME].notnull() &
-            events[FISCAL_QUARTER_FIELD_NAME].notnull() &
-            events[FISCAL_YEAR_FIELD_NAME].notnull()
+        self.estimates = estimates[
+            estimates[EVENT_DATE_FIELD_NAME].notnull() &
+            estimates[FISCAL_QUARTER_FIELD_NAME].notnull() &
+            estimates[FISCAL_YEAR_FIELD_NAME].notnull()
         ]
 
-        self.columns = columns
+        self.base_column_name_map = base_column_name_map
 
     def load_quarters(self, num_quarters, dates_sids, final_releases_per_qtr):
         pass
 
+    #@profile
     def load_adjusted_array(self, columns, dates, assets, mask):
         groups = groupby(lambda x: x.dataset.num_quarters, columns)
         out = {}
         date_values = pd.DataFrame({'dates': dates})
         date_values['key'] = 1
-        self.events['key'] = 1
-        merged = pd.merge(date_values, self.events, on='key')
-        asset_df = pd.DataFrame({'sid': assets})
+        self.estimates['key'] = 1
+        merged = pd.merge(date_values, self.estimates, on='key')
+        asset_df = pd.DataFrame({SID_FIELD_NAME: assets})
         asset_df['key'] = 1
         dates_sids = pd.merge(date_values, asset_df, on='key')
+        merged.drop('key', axis=1, inplace=True)
+        dates_sids.drop('key', axis=1, inplace=True)
         for num_quarters in groups:
+            name_map = {c: self.base_column_name_map[getattr(c.dataset.__base__, c.name)] for c in columns}
+
             columns = groups[num_quarters]
             # First, group by sid, fiscal year, and fiscal quarter and only
             # keep the last estimate made.
@@ -92,7 +174,7 @@ class QuarterEstimatesLoader(PipelineLoader):
                                             merged.dates].sort(
                 ['dates', TS_FIELD_NAME]
             ).groupby(
-                ['dates', 'sid', 'fiscal_year', 'fiscal_quarter']
+                ['dates', SID_FIELD_NAME, FISCAL_YEAR, FISCAL_QUARTER]
             ).last()
             final_releases_per_qtr = final_releases_per_qtr.reset_index()
 
@@ -101,15 +183,14 @@ class QuarterEstimatesLoader(PipelineLoader):
                                         final_releases_per_qtr)
 
             for c in columns:
-                super_col = getattr(c.dataset.__base__, c.name)
-                column_name = self.columns[super_col]
+                column_name = name_map[c]
                 # Need to pass a DataFrame that has dates as the index and
                 # all sids as columns with column values being the value in
                 # 'result' for column c
                 loader = DataFrameLoader(
                     c,
                     result.pivot(index='dates',
-                                 columns='sid',
+                                 columns=SID_FIELD_NAME,
                                  values=column_name),
                     adjustments=None
                 )
@@ -118,11 +199,8 @@ class QuarterEstimatesLoader(PipelineLoader):
 
 
 class NextQuartersEstimatesLoader(QuarterEstimatesLoader):
-    def __init__(self,
-                 events,
-                 columns):
-        super(NextQuartersEstimatesLoader, self).__init__(events, columns)
 
+    #@profile
     def load_quarters(self, num_quarters, dates_sids, final_releases_per_qtr):
         # Filter for releases that are after each simulation date.
         eligible_next_releases = final_releases_per_qtr[
@@ -133,39 +211,34 @@ class NextQuartersEstimatesLoader(QuarterEstimatesLoader):
         eligible_next_releases.sort(EVENT_DATE_FIELD_NAME)
         # For each sid, get the upcoming release/year/quarter.
         next_releases = eligible_next_releases.groupby(
-            ['dates', 'sid']
-        ).min()
+            ['dates', SID_FIELD_NAME]
+        ).nth(0).reset_index()  # We use nth here to avoid forward filling
+        # NaNs, which `first()` will do.
         next_releases = next_releases.rename(
-            columns={'fiscal_year': 'next_fiscal_year',
-                     'fiscal_quarter': 'next_fiscal_quarter'}
+            columns={FISCAL_YEAR: NEXT_FISCAL_YEAR,
+                     FISCAL_QUARTER: NEXT_FISCAL_QUARTER}
         )
         # `next_qtr` is already the next quarter over,
         # so we should offest `num_shifts` by 1.
-        next_releases['fiscal_quarter'] = next_releases.apply(
-            lambda x: calc_forward_shift(x['next_fiscal_quarter'],
-                                         num_quarters - 1)[1],
-            axis=1
-        )
-        next_releases['fiscal_year'] = next_releases.apply(
-            lambda x:
-            x['next_fiscal_year'] +
-            calc_forward_shift(x['next_fiscal_quarter'],
-                               num_quarters - 1)[0],
-            axis=1
+        (next_releases[FISCAL_YEAR],
+         next_releases[FISCAL_QUARTER]) = calc_forward_shift(
+            next_releases[NEXT_FISCAL_YEAR],
+            next_releases[NEXT_FISCAL_QUARTER], (num_quarters - 1)
         )
         # Merge to get the rows we care about for each date
-        result = dates_sids.merge(next_releases.reset_index(),
-                                  on=(['dates', 'sid']),
+        result = dates_sids.merge(next_releases,
+                                  on=(['dates', SID_FIELD_NAME]),
                                   how='left')
         return result
 
 
 class PreviousQuartersEstimatesLoader(QuarterEstimatesLoader):
     def __init__(self,
-                 events,
+                 estimates,
                  columns):
-        super(PreviousQuartersEstimatesLoader, self).__init__(events, columns)
+        super(PreviousQuartersEstimatesLoader, self).__init__(estimates, columns)
 
+    #@profile
     def load_quarters(self, num_quarters, dates_sids, final_releases_per_qtr):
         # Filter for releases that are before each simulation date.
         eligible_previous_releases = final_releases_per_qtr[
@@ -177,26 +250,23 @@ class PreviousQuartersEstimatesLoader(QuarterEstimatesLoader):
         # For each sid, get the latest release we knew about prior to
         # each simulation date.
         previous_releases = eligible_previous_releases.groupby(
-            ['dates', 'sid']
-        ).max()
+            ['dates', SID_FIELD_NAME]
+        ).nth(-1).reset_index()  # We use nth here to avoid forward filling
+        # NaNs, which `last()` will do.
 
         previous_releases = previous_releases.rename(columns={
-            'fiscal_year': 'previous_fiscal_year',
-            'fiscal_quarter': 'previous_fiscal_quarter'
+            FISCAL_YEAR: PREVIOUS_FISCAL_YEAR,
+            FISCAL_QUARTER: PREVIOUS_FISCAL_QUARTER
         })
-        previous_releases['fiscal_quarter'] = previous_releases.apply(
-            lambda x: calc_backward_shift(x['previous_fiscal_quarter'],
-                                          (num_quarters - 1))[1],
-            axis=1
-        )
-        previous_releases['fiscal_year'] = previous_releases.apply(
-            lambda x:
-            x['previous_fiscal_year'] -
-            calc_backward_shift(x['previous_fiscal_quarter'],
-                                (num_quarters - 1))[0],
-            axis=1
+
+        (previous_releases[FISCAL_YEAR],
+         previous_releases[FISCAL_QUARTER]) = \
+            calc_backward_shift(
+            previous_releases[PREVIOUS_FISCAL_YEAR], previous_releases[
+                    PREVIOUS_FISCAL_QUARTER], (num_quarters - 1)
         )
         # Merge to get the rows we care about for each date
-        result = dates_sids.merge(previous_releases.reset_index(),
-                                  on=(['dates', 'sid']), how='left')
+        result = dates_sids.merge(previous_releases,
+                                  on=(['dates', SID_FIELD_NAME]), how='left')
         return result
+
diff --git a/zipline/pipeline/loaders/utils.py b/zipline/pipeline/loaders/utils.py
index 4e999b07..385022a8 100644
--- a/zipline/pipeline/loaders/utils.py
+++ b/zipline/pipeline/loaders/utils.py
@@ -2,6 +2,7 @@ import datetime
 
 import numpy as np
 import pandas as pd
+from zipline.pipeline.loaders.frame import DataFrameLoader
 from zipline.utils.pandas_utils import mask_between_time
 
 
@@ -274,59 +275,18 @@ def check_data_query_args(data_query_time, data_query_tz):
         )
 
 
-def calc_forward_shift(qtr, num_qtrs_shift):
-    """
-    Calculate the number of years to shift forward and the new quarter in the
-    shifted year.
+def choose_rows_by_indexer(rows, name_map, indexer, columns, dates, sids, mask):
+    def to_frame(array):
+        return pd.DataFrame(array, index=dates, columns=sids)
 
-    Parameters
-    ----------
-    qtr : int
-        The starting quarter.
-    num_qtr_shift : int
-        The number of quarters to shift forward.
+    out = {}
+    for c in columns:
+        raw = rows[name_map[c]][indexer]
+        # indexer will be -1 for locations where we don't have a known
+        # value.
+        raw[indexer < 0] = c.missing_value
 
-    Returns
-    -------
-    yrs_to_shift : int
-        The number of years to shift forward.
-    new_qtr : int
-        The quarter number of the new quarter after shifting num_qtrs_shift
-        forward from qtr.
-    """
-    yrs_to_shift, new_qtr = divmod(qtr + num_qtrs_shift, 4)
-    if new_qtr == 0:
-        yrs_to_shift -= 1
-        new_qtr = 4
-    return yrs_to_shift, new_qtr
-
-
-def calc_backward_shift(qtr, num_qtrs_shift):
-    """
-    Calculate the number of years to shift backward and the new quarter in the
-    shifted year.
-
-    Parameters
-    ----------
-    qtr : int
-        The starting quarter.
-    num_qtr_shift : int
-        The number of quarters to shift backward.
-
-    Returns
-    -------
-    yrs_to_shift : int
-        The number of years to shift backward.
-    new_qtr : int
-        The quarter number of the new quarter after shifting num_qtrs_shift
-        backward from qtr.
-    """
-    if qtr > num_qtrs_shift:
-        return 0, qtr - num_qtrs_shift
-    # num_qtrs_shift >= qtr; subtract to offset qtr, then calculate how many
-    # years/quarters to subtract.
-    yrs_to_shift, subtract_qtr = divmod(abs(num_qtrs_shift - qtr), 4)
-    # Must add 1 year since we go backwards at least `qtr` number of quarters
-    yrs_to_shift += 1
-    new_qtr = 4 - subtract_qtr
-    return yrs_to_shift, new_qtr
\ No newline at end of file
+        # Delegate the actual array formatting logic to a DataFrameLoader.
+        loader = DataFrameLoader(c, to_frame(raw), adjustments=None)
+        out[c] = loader.load_adjusted_array([c], dates, sids, mask)[c]
+    return out
\ No newline at end of file

From d578463dcf698ce6f3481a39629a004f2ed1d701 Mon Sep 17 00:00:00 2001
From: Maya Tydykov <mtydykov@quantopian.com>
Date: Tue, 16 Aug 2016 08:28:50 -0400
Subject: [PATCH 04/16] TST: add cases for shifting release dates

BUG: fix bugs in blaze loader

BUG: call correct method

MAINT: explicitly cast dates column

MAINT: modify code to comply with pandas 0.16.1
---
 tests/pipeline/test_quarters_estimates.py     | 257 ++++++++++++------
 zipline/pipeline/loaders/blaze/core.py        |  32 +--
 zipline/pipeline/loaders/blaze/estimates.py   |  62 ++---
 zipline/pipeline/loaders/blaze/events.py      |   8 +-
 zipline/pipeline/loaders/blaze/utils.py       |  61 +++++
 zipline/pipeline/loaders/events.py            |  23 +-
 zipline/pipeline/loaders/quarter_estimates.py | 234 +++++++---------
 zipline/pipeline/loaders/utils.py             |  18 --
 8 files changed, 372 insertions(+), 323 deletions(-)
 create mode 100644 zipline/pipeline/loaders/blaze/utils.py

diff --git a/tests/pipeline/test_quarters_estimates.py b/tests/pipeline/test_quarters_estimates.py
index 2328f965..f343bfb8 100644
--- a/tests/pipeline/test_quarters_estimates.py
+++ b/tests/pipeline/test_quarters_estimates.py
@@ -1,24 +1,30 @@
+import blaze as bz
 import itertools
 import numpy as np
 import pandas as pd
-from pandas.util.testing import assert_series_equal
-from zipline.pipeline import SimplePipelineEngine, Pipeline
 
+from zipline.pipeline import SimplePipelineEngine, Pipeline
+from zipline.pipeline.common import (
+    EVENT_DATE_FIELD_NAME,
+    FISCAL_QUARTER_FIELD_NAME,
+    FISCAL_YEAR_FIELD_NAME,
+    SID_FIELD_NAME,
+    TS_FIELD_NAME,
+)
 from zipline.pipeline.data import DataSet, Column
+from zipline.pipeline.loaders.blaze.estimates import (
+    BlazeNextEstimatesLoader,
+    BlazePreviousEstimatesLoader
+)
 from zipline.pipeline.loaders.quarter_estimates import (
     NextQuartersEstimatesLoader,
     PreviousQuartersEstimatesLoader
 )
-from zipline.pipeline.loaders.quarter_estimates import (
-    calc_forward_shift,
-    calc_backward_shift
-)
+from zipline.pipeline.loaders.quarter_estimates import shift_quarters
 from zipline.testing import ZiplineTestCase
 from zipline.testing.fixtures import WithAssetFinder, WithTradingSessions
 from zipline.testing.predicates import assert_equal
 from zipline.utils.numpy_utils import datetime64ns_dtype, float64_dtype
-import line_profiler
-prof = line_profiler.LineProfiler()
 
 
 class Estimates(DataSet):
@@ -39,12 +45,13 @@ def QuartersEstimates(num_qtr):
 # in order to reduce the number of dates we need to iterate through when
 # testing.
 releases = pd.DataFrame({
-    'timestamp': [pd.Timestamp('2015-01-15'), pd.Timestamp('2015-01-31')],
-    'event_date': [pd.Timestamp('2015-01-15'), pd.Timestamp('2015-01-31')],
+    TS_FIELD_NAME: [pd.Timestamp('2015-01-15'), pd.Timestamp('2015-01-31')],
+    EVENT_DATE_FIELD_NAME: [pd.Timestamp('2015-01-15'),
+                            pd.Timestamp('2015-01-31')],
     'estimate': [0.5, 0.8],
     'value': [0.6, 0.9],
-    'fiscal_quarter': [1.0, 2.0],
-    'fiscal_year': [2015.0, 2015.0]
+    FISCAL_QUARTER_FIELD_NAME: [1.0, 2.0],
+    FISCAL_YEAR_FIELD_NAME: [2015.0, 2015.0]
 })
 
 q1_knowledge_dates = [pd.Timestamp('2015-01-01'), pd.Timestamp('2015-01-04'),
@@ -52,40 +59,43 @@ q1_knowledge_dates = [pd.Timestamp('2015-01-01'), pd.Timestamp('2015-01-04'),
 q2_knowledge_dates = [pd.Timestamp('2015-01-16'), pd.Timestamp('2015-01-20'),
                       pd.Timestamp('2015-01-24'), pd.Timestamp('2015-01-28')]
 # We want to model the possibility of an estimate predicting a release date
-# that gets shifted forward/backward.
-q1_release_dates = [pd.Timestamp('2015-01-13'), pd.Timestamp('2015-01-15')]
-q2_release_dates = [pd.Timestamp('2015-01-28'), pd.Timestamp('2015-01-30')]
+# that doesn't match the actual release. This could be done by dynamically
+# generating more combinations with different release dates, but that
+# significantly increases the amount of time it takes to run the tests. These
+# hard-coded cases are sufficient to know that we can update our beliefs when
+# we get new information.
+q1_release_dates = [pd.Timestamp('2015-01-15'),
+                    pd.Timestamp('2015-01-16')]  # One day late
+q2_release_dates = [pd.Timestamp('2015-01-30'),  # One day early
+                    pd.Timestamp('2015-01-31')]
 estimates = pd.DataFrame({
+    EVENT_DATE_FIELD_NAME: q1_release_dates + q2_release_dates,
     'estimate': [.1, .2, .3, .4],
     'value': [np.NaN, np.NaN, np.NaN, np.NaN],
-    'fiscal_quarter': [1.0, 1.0, 2.0, 2.0],
-    'fiscal_year': [2015.0, 2015.0, 2015.0, 2015.0]
+    FISCAL_QUARTER_FIELD_NAME: [1.0, 1.0, 2.0, 2.0],
+    FISCAL_YEAR_FIELD_NAME: [2015.0, 2015.0, 2015.0, 2015.0]
 })
 
 
 def gen_estimates():
     sid_estimates = []
     sid_releases = []
-    release_dates = list(itertools.product(q1_release_dates, q2_release_dates))
-    knowledge_permutations = list(itertools.permutations(q1_knowledge_dates +
-                                                         q2_knowledge_dates,
-                                                         4))
-    all_permutations = itertools.product(knowledge_permutations,
-                                         release_dates)
-    for sid, ((q1e1, q1e2, q2e1, q2e2), (rd1, rd2)) in enumerate(
-            all_permutations):
+    for sid, (q1e1, q1e2, q2e1, q2e2) in enumerate(
+            itertools.permutations(q1_knowledge_dates + q2_knowledge_dates,
+                                   4)
+    ):
         # We're assuming that estimates must come before the relevant release.
-        if q1e1 < q1e2 and q2e1 < q2e2 and q1e1 < rd1 and q1e2 < \
-                rd2:
+        if (q1e1 < q1e2 and
+                q2e1 < q2e2 and
+                q1e1 < q1_release_dates[0] and
+                q1e2 < q1_release_dates[1]):
             sid_estimate = estimates.copy(True)
-            sid_estimate['timestamp'] = [q1e1, q1e2, q2e1, q2e2]
-            sid_estimate['event_date'] = [rd1]*2 + [rd2] * 2
-            sid_estimate['sid'] = sid
+            sid_estimate[TS_FIELD_NAME] = [q1e1, q1e2, q2e1, q2e2]
+            sid_estimate[SID_FIELD_NAME] = sid
             sid_estimates += [sid_estimate]
             sid_release = releases.copy(True)
-            sid_release['sid'] = sid_estimate['sid']
+            sid_release[SID_FIELD_NAME] = sid_estimate[SID_FIELD_NAME]
             sid_releases += [sid_release]
-
     return pd.concat(sid_estimates + sid_releases).reset_index(drop=True)
 
 
@@ -105,28 +115,44 @@ class EstimateTestCase(WithAssetFinder,
         cls.sids = cls.events['sid'].unique()
         cls.columns = {
             Estimates.estimate: 'estimate',
-            Estimates.event_date: 'event_date',
-            Estimates.fiscal_quarter: 'fiscal_quarter',
-            Estimates.fiscal_year: 'fiscal_year',
+            Estimates.event_date: EVENT_DATE_FIELD_NAME,
+            Estimates.fiscal_quarter: FISCAL_QUARTER_FIELD_NAME,
+            Estimates.fiscal_year: FISCAL_YEAR_FIELD_NAME,
             Estimates.value: 'value',
         }
         cls.loader = cls.make_loader(
             events=cls.events,
             columns=cls.columns
         )
-        cls.ASSET_FINDER_EQUITY_SIDS = list(cls.events['sid'].unique())
+        cls.ASSET_FINDER_EQUITY_SIDS = list(
+            cls.events[SID_FIELD_NAME].unique()
+        )
         cls.ASSET_FINDER_EQUITY_SYMBOLS = [
             's' + str(n) for n in cls.ASSET_FINDER_EQUITY_SIDS
         ]
         super(EstimateTestCase, cls).init_class_fixtures()
 
+    def _test_wrong_num_quarters_passed(self):
+        with self.assertRaises(ValueError):
+            dataset = QuartersEstimates(-1)
+            engine = SimplePipelineEngine(
+                lambda x: self.loader,
+                self.trading_days,
+                self.asset_finder,
+            )
+
+            engine.run_pipeline(
+                Pipeline({c.name: c.latest for c in dataset.columns}),
+                start_date=self.trading_days[0],
+                end_date=self.trading_days[-1],
+            )
+
 
 class NextEstimateTestCase(EstimateTestCase):
     @classmethod
     def make_loader(cls, events, columns):
         return NextQuartersEstimatesLoader(events, columns)
 
-    #@profile
     def test_next_estimates(self):
         """
         The goal of this test is to make sure that we select the right
@@ -145,30 +171,62 @@ class NextEstimateTestCase(EstimateTestCase):
             end_date=self.trading_days[-1],
         )
         for sid in self.sids:
-            sid_events = results.xs(sid, level=1)
-            ed_sorted_events = self.events[
-                self.events['sid'] == sid
-            ]
-            ed_sorted_events['key'] = 1
-            all_dates = pd.DataFrame({'all_dates': sid_events.index})
-            all_dates['key'] = 1
-            crossproduct = pd.merge(all_dates, ed_sorted_events, on='key')
-            crossproduct = crossproduct[crossproduct['timestamp'] <=
-                                        crossproduct['all_dates']]
-            crossproduct = crossproduct[crossproduct['event_date'] >=
-                                        crossproduct['all_dates']]
-            final = crossproduct.sort_values(by=['all_dates',
-                                                 'event_date',
-                                                 'timestamp'],
-                                             ascending=[True, True,
-                                                        False]).groupby([
-                'all_dates', 'sid']).first().reset_index()
-            final = pd.merge(final, all_dates,
-                             how='right').sort_values(by='all_dates').set_index(
-                'all_dates')
-            final.index.name = None
-            for colname in sid_events.columns:
-                assert_series_equal(final[colname], sid_events[colname])
+            sid_estimates = results.xs(sid, level=1)
+            ts_sorted_estimates = self.events[
+                self.events[SID_FIELD_NAME] == sid
+            ].sort(TS_FIELD_NAME)
+            for i, date in enumerate(sid_estimates.index):
+                comparable_date = date.tz_localize(None)
+                # Filter out estimates we don't know about yet.
+                ts_eligible_estimates = ts_sorted_estimates[
+                    ts_sorted_estimates[TS_FIELD_NAME] <= comparable_date
+                ]
+                expected_estimate = pd.DataFrame()
+                if not ts_eligible_estimates.empty:
+                    q1_knowledge = ts_eligible_estimates[
+                        ts_eligible_estimates[FISCAL_QUARTER_FIELD_NAME] == 1
+                    ]
+                    q2_knowledge = ts_eligible_estimates[
+                        ts_eligible_estimates[FISCAL_QUARTER_FIELD_NAME] == 2
+                    ]
+
+                    # If our latest knowledge of q1 is that the release is
+                    # happening on this simulation date or later, then that's
+                    # the estimate we want to use.
+                    if (not q1_knowledge.empty and
+                        q1_knowledge.iloc[-1][EVENT_DATE_FIELD_NAME] >=
+                            comparable_date):
+                        expected_estimate = q1_knowledge.iloc[-1]
+                    # If q1 has already happened or we don't know about it
+                    # yet and our latest knowledge indicates that q2 hasn't
+                    # happend yet, then that's the estimate we want to use.
+                    elif (not q2_knowledge.empty and
+                          q2_knowledge.iloc[-1][EVENT_DATE_FIELD_NAME] >=
+                            comparable_date):
+                        expected_estimate = q2_knowledge.iloc[-1]
+                if not expected_estimate.empty:
+                    for colname in sid_estimates.columns:
+                        expected_value = expected_estimate[colname]
+                        computed_value = sid_estimates.iloc[i][colname]
+                        assert_equal(expected_value, computed_value)
+                else:
+                    assert sid_estimates.iloc[i].isnull().all()
+
+    def test_wrong_num_quarters_passed(self):
+        self._test_wrong_num_quarters_passed()
+
+
+class BlazeNextEstimateLoaderTestCase(NextEstimateTestCase):
+    """
+    Run the same tests as EventsLoaderTestCase, but using a BlazeEventsLoader.
+    """
+
+    @classmethod
+    def make_loader(cls, events, columns):
+        return BlazeNextEstimatesLoader(
+            bz.data(events),
+            columns,
+        )
 
 
 class PreviousEstimateTestCase(EstimateTestCase):
@@ -194,24 +252,62 @@ class PreviousEstimateTestCase(EstimateTestCase):
             end_date=self.trading_days[-1],
         )
         for sid in self.sids:
-            sid_events = results.xs(sid, level=1)
-            ed_sorted_events = self.events[
-                self.events['sid'] == sid
-            ].sort_values(by=['event_date', 'timestamp'])
-            for i, date in enumerate(sid_events.index):
-                # Filter for events that happened on or before the simulation
-                # date and that we knew about on or before the simulation date.
-                ed_eligible_events = ed_sorted_events[ed_sorted_events['event_date'] <= date]
-                ts_eligible_events = ed_eligible_events[ed_eligible_events['timestamp'] <= date]
-                if not ts_eligible_events.empty:
-                    # The expected event is the one we knew about last.
-                    expected_event = ts_eligible_events.iloc[-1]
-                    for colname in sid_events.columns:
-                        expected_value = expected_event[colname]
-                        computed_value = sid_events.iloc[i][colname]
+            sid_estimates = results.xs(sid, level=1)
+            ts_sorted_estimates = self.events[
+                self.events[SID_FIELD_NAME] == sid
+            ].sort(TS_FIELD_NAME)
+            for i, date in enumerate(sid_estimates.index):
+                comparable_date = date.tz_localize(None)
+                # Filter out estimates we don't know about yet.
+                ts_eligible_estimates = ts_sorted_estimates[
+                    ts_sorted_estimates[TS_FIELD_NAME] <= comparable_date
+                ]
+                expected_estimate = pd.DataFrame()
+                if not ts_eligible_estimates.empty:
+                    # Determine the last piece of information we know about
+                    # for q1 and q2. This takes advantage of the fact that we
+                    # only have 2 quarters in the test data.
+                    q1_knowledge = ts_eligible_estimates[
+                        ts_eligible_estimates[FISCAL_QUARTER_FIELD_NAME] == 1
+                    ]
+                    q2_knowledge = ts_eligible_estimates[
+                        ts_eligible_estimates[FISCAL_QUARTER_FIELD_NAME] == 2
+                    ]
+                    # The expected estimate will be for q2 if the last thing
+                    # we've seen is that the release date already happened.
+                    # Otherwise, it'll be for q1, as long as the release date
+                    # for q1 has already happened.
+                    if (not q2_knowledge.empty and
+                        q2_knowledge.iloc[-1][EVENT_DATE_FIELD_NAME] <=
+                            comparable_date):
+                        expected_estimate = q2_knowledge.iloc[-1]
+                    elif (not q1_knowledge.empty and
+                          q1_knowledge.iloc[-1][EVENT_DATE_FIELD_NAME] <=
+                            comparable_date):
+                        expected_estimate = q1_knowledge.iloc[-1]
+                if not expected_estimate.empty:
+                    for colname in sid_estimates.columns:
+                        expected_value = expected_estimate[colname]
+                        computed_value = sid_estimates.iloc[i][colname]
                         assert_equal(expected_value, computed_value)
                 else:
-                    assert sid_events.iloc[i].isnull().all()
+                    assert sid_estimates.iloc[i].isnull().all()
+
+    def test_wrong_num_quarters_passed(self):
+        self._test_wrong_num_quarters_passed()
+
+
+class BlazePreviousEstimateLoaderTestCase(PreviousEstimateTestCase):
+    """
+    Run the same tests as EventsLoaderTestCase, but using a BlazeEventsLoader.
+    """
+
+    @classmethod
+    def make_loader(cls, events, columns):
+        return BlazePreviousEstimatesLoader(
+            bz.data(events),
+            columns,
+        )
 
 
 class QuarterShiftTestCase(ZiplineTestCase):
@@ -225,20 +321,19 @@ class QuarterShiftTestCase(ZiplineTestCase):
         expected = pd.DataFrame(([yr, qtr] for yr in range(0, 4) for qtr
                                  in range(1, 5)))
         for i in range(0, 8):
-            years, quarters = calc_forward_shift(input_yrs, input_qtrs, i)
+            years, quarters = shift_quarters(i, input_yrs, input_qtrs)
             # Can't use assert_series_equal here with check_names=False
             # because that still fails due to name differences.
             assert years.equals(expected[i:i+4].reset_index(drop=True)[0])
             assert quarters.equals(expected[i:i+4].reset_index(drop=True)[1])
 
-
     def test_calc_backward_shift(self):
         input_yrs = pd.Series([0] * 4)
         input_qtrs = pd.Series(range(4, 0, -1))
         expected = pd.DataFrame(([yr, qtr] for yr in range(0, -4, -1) for qtr
                                  in range(4, 0, -1)))
-        for i in range(0, 8):
-            years, quarters = calc_backward_shift(input_yrs, input_qtrs, i)
+        for i in range(0, 8, 1):
+            years, quarters = shift_quarters(-i, input_yrs, input_qtrs)
             # Can't use assert_series_equal here with check_names=False
             # because that still fails due to name differences.
             assert years.equals(expected[i:i+4].reset_index(drop=True)[0])
diff --git a/zipline/pipeline/loaders/blaze/core.py b/zipline/pipeline/loaders/blaze/core.py
index fc3252e4..01e1e659 100644
--- a/zipline/pipeline/loaders/blaze/core.py
+++ b/zipline/pipeline/loaders/blaze/core.py
@@ -1219,36 +1219,6 @@ def bind_expression_to_resources(expr, resources):
     })
 
 
-def load_raw_data(assets, dates, data_query_time, data_query_tz, expr,
-                  odo_kwargs):
-    lower_dt, upper_dt = normalize_data_query_bounds(
-        dates[0],
-        dates[-1],
-        data_query_time,
-        data_query_tz,
-    )
-    raw = ffill_query_in_range(
-        expr,
-        lower_dt,
-        upper_dt,
-        odo_kwargs,
-    )
-    sids = raw.loc[:, SID_FIELD_NAME]
-    raw.drop(
-        sids[~sids.isin(assets)].index,
-        inplace=True
-    )
-    if data_query_time is not None:
-        normalize_timestamp_to_query_time(
-            raw,
-            data_query_time,
-            data_query_tz,
-            inplace=True,
-            ts_field=TS_FIELD_NAME,
-        )
-    return raw
-
-
 def ffill_query_in_range(expr,
                          lower,
                          upper,
@@ -1303,4 +1273,4 @@ def ffill_query_in_range(expr,
         **odo_kwargs
     )
     raw.loc[:, ts_field] = raw.loc[:, ts_field].astype('datetime64[ns]')
-    return raw
\ No newline at end of file
+    return raw
diff --git a/zipline/pipeline/loaders/blaze/estimates.py b/zipline/pipeline/loaders/blaze/estimates.py
index c2341164..81554b0d 100644
--- a/zipline/pipeline/loaders/blaze/estimates.py
+++ b/zipline/pipeline/loaders/blaze/estimates.py
@@ -2,14 +2,14 @@ from datashape import istabular
 
 from .core import (
     bind_expression_to_resources,
-    load_raw_data,
 )
 from zipline.pipeline.loaders.base import PipelineLoader
-from zipline.pipeline.loaders.events import (
-    required_event_fields,
+from zipline.pipeline.loaders.blaze.utils import load_raw_data
+from zipline.pipeline.loaders.quarter_estimates import (
+    NextQuartersEstimatesLoader,
+    PreviousQuartersEstimatesLoader,
+    required_estimates_fields,
 )
-from zipline.pipeline.loaders.quarter_estimates import \
-    NextQuartersEstimatesLoader, PreviousQuartersEstimatesLoader
 from zipline.pipeline.loaders.utils import (
     check_data_query_args,
 )
@@ -47,7 +47,7 @@ class BlazeEstimatesLoader(PipelineLoader):
 
     And other dataset-specific fields, where each row of the table is a
     record including the sid to identify the company, the timestamp where we
-    learned about the announcement, and the date when the earnings will be z
+    learned about the announcement, and the date when the earnings will be
     announced.
 
     If the '{TS_FIELD_NAME}' field is not included it is assumed that we
@@ -61,8 +61,7 @@ class BlazeEstimatesLoader(PipelineLoader):
                  resources=None,
                  odo_kwargs=None,
                  data_query_time=None,
-                 data_query_tz=None,
-                 loader=None):
+                 data_query_tz=None):
 
         dshape = expr.dshape
         if not istabular(dshape):
@@ -71,7 +70,7 @@ class BlazeEstimatesLoader(PipelineLoader):
             )
 
         required_cols = list(
-            required_event_fields(columns)
+            required_estimates_fields(columns)
         )
         self._expr = bind_expression_to_resources(
             expr[required_cols],
@@ -82,15 +81,18 @@ class BlazeEstimatesLoader(PipelineLoader):
         check_data_query_args(data_query_time, data_query_tz)
         self._data_query_time = data_query_time
         self._data_query_tz = data_query_tz
-        self.loader = loader
 
     def load_adjusted_array(self, columns, dates, assets, mask):
-        raw = load_raw_data(assets, dates, self._data_query_time,
-                            self._data_query_tz, self._exp, self._odo_kwargs)
+        raw = load_raw_data(assets,
+                            dates,
+                            self._data_query_time,
+                            self._data_query_tz,
+                            self._expr,
+                            self._odo_kwargs)
 
         return self.loader(
-            events=raw,
-            next_value_columns=self._columns,
+            raw,
+            self._columns,
         ).load_adjusted_array(
             columns,
             dates,
@@ -102,38 +104,6 @@ class BlazeEstimatesLoader(PipelineLoader):
 class BlazeNextEstimatesLoader(BlazeEstimatesLoader):
     loader = NextQuartersEstimatesLoader
 
-    def __init__(self,
-                 expr,
-                 columns,
-                 resources=None,
-                 odo_kwargs=None,
-                 data_query_time=None,
-                 data_query_tz=None,
-                 loader=None):
-        super(BlazeNextEstimatesLoader).__init__(expr,
-                                                 columns,
-                                                 resources,
-                                                 odo_kwargs,
-                                                 data_query_time,
-                                                 data_query_tz,
-                                                 loader)
-
 
 class BlazePreviousEstimatesLoader(BlazeEstimatesLoader):
     loader = PreviousQuartersEstimatesLoader
-
-    def __init__(self,
-                 expr,
-                 columns,
-                 resources=None,
-                 odo_kwargs=None,
-                 data_query_time=None,
-                 data_query_tz=None,
-                 loader=None):
-        super(BlazeNextEstimatesLoader).__init__(expr,
-                                                 columns,
-                                                 resources,
-                                                 odo_kwargs,
-                                                 data_query_time,
-                                                 data_query_tz,
-                                                 loader)
diff --git a/zipline/pipeline/loaders/blaze/events.py b/zipline/pipeline/loaders/blaze/events.py
index d5ac8e37..39e5dac6 100644
--- a/zipline/pipeline/loaders/blaze/events.py
+++ b/zipline/pipeline/loaders/blaze/events.py
@@ -2,14 +2,16 @@ from datashape import istabular
 
 from .core import (
     bind_expression_to_resources,
-    load_raw_data,
 )
 from zipline.pipeline.loaders.base import PipelineLoader
+from zipline.pipeline.loaders.blaze.utils import load_raw_data
 from zipline.pipeline.loaders.events import (
     EventsLoader,
     required_event_fields,
 )
-from zipline.pipeline.loaders.utils import check_data_query_args
+from zipline.pipeline.loaders.utils import (
+    check_data_query_args,
+)
 from zipline.utils.input_validation import ensure_timezone, optionally
 from zipline.utils.preprocess import preprocess
 
@@ -29,7 +31,7 @@ class BlazeEventsLoader(PipelineLoader):
     data_query_time : time, optional
         The time to use for the data query cutoff.
     data_query_tz : tzinfo or str
-        The timezeone to use for the data query cutoff.
+        The timezone to use for the data query cutoff.
     dataset : DataSet
         The DataSet object for which this loader loads data.
 
diff --git a/zipline/pipeline/loaders/blaze/utils.py b/zipline/pipeline/loaders/blaze/utils.py
new file mode 100644
index 00000000..6455f76c
--- /dev/null
+++ b/zipline/pipeline/loaders/blaze/utils.py
@@ -0,0 +1,61 @@
+from zipline.pipeline.common import SID_FIELD_NAME, TS_FIELD_NAME
+from zipline.pipeline.loaders.blaze.core import ffill_query_in_range
+from zipline.pipeline.loaders.utils import (
+    normalize_data_query_bounds,
+    normalize_timestamp_to_query_time,
+)
+
+
+def load_raw_data(assets, dates, data_query_time, data_query_tz, expr,
+                  odo_kwargs):
+    """
+    given an expression representing data to load, perform normalization and
+    forward-filling and return the data, materialized.
+
+    parameters
+    ----------
+    assets : pd.int64index
+        the assets to load data for.
+    dates : pd.datetimeindex
+        the simulation dates to load data for.
+    data_query_time : datetime.time
+        the time used as cutoff for new information.
+    data_query_tz : tzinfo
+        the timezone to normalize your dates to before comparing against
+        `time`.
+    expr : expr
+        the expression representing the data to load.
+    odo_kwargs : dict, optional
+        extra keyword arguments to pass to odo when executing the expression.
+
+    returns
+    -------
+    raw : pd.dataframe
+        the data symbolized by `expr` materialized in a dataframe.
+    """
+    lower_dt, upper_dt = normalize_data_query_bounds(
+        dates[0],
+        dates[-1],
+        data_query_time,
+        data_query_tz,
+    )
+    raw = ffill_query_in_range(
+        expr,
+        lower_dt,
+        upper_dt,
+        odo_kwargs,
+    )
+    sids = raw.loc[:, SID_FIELD_NAME]
+    raw.drop(
+        sids[~sids.isin(assets)].index,
+        inplace=True
+    )
+    if data_query_time is not None:
+        normalize_timestamp_to_query_time(
+            raw,
+            data_query_time,
+            data_query_tz,
+            inplace=True,
+            ts_field=TS_FIELD_NAME,
+        )
+    return raw
diff --git a/zipline/pipeline/loaders/events.py b/zipline/pipeline/loaders/events.py
index af11499e..cb33b3b2 100644
--- a/zipline/pipeline/loaders/events.py
+++ b/zipline/pipeline/loaders/events.py
@@ -5,14 +5,13 @@ from six import viewvalues
 from toolz import groupby, merge
 
 from .base import PipelineLoader
-from .frame import DataFrameLoader
 from zipline.pipeline.common import (
     EVENT_DATE_FIELD_NAME,
     SID_FIELD_NAME,
     TS_FIELD_NAME,
 )
+from zipline.pipeline.loaders.frame import DataFrameLoader
 from zipline.pipeline.loaders.utils import (
-    choose_rows_by_indexer,
     next_event_indexer,
     previous_event_indexer,
 )
@@ -167,7 +166,7 @@ class EventsLoader(PipelineLoader):
         if not columns:
             return {}
 
-        return choose_rows_by_indexer(
+        return self._load_events(
             rows=self.events,
             name_map=self.next_value_columns,
             indexer=self.next_event_indexer(dates, sids),
@@ -181,7 +180,7 @@ class EventsLoader(PipelineLoader):
         if not columns:
             return {}
 
-        return choose_rows_by_indexer(
+        return self._load_events(
             rows=self.events,
             name_map=self.previous_value_columns,
             indexer=self.previous_event_indexer(dates, sids),
@@ -191,6 +190,22 @@ class EventsLoader(PipelineLoader):
             mask=mask,
         )
 
+    def _load_events(self, name_map, indexer, columns, dates, sids, mask):
+        def to_frame(array):
+            return pd.DataFrame(array, index=dates, columns=sids)
+
+        out = {}
+        for c in columns:
+            raw = self.events[name_map[c]][indexer]
+            # indexer will be -1 for locations where we don't have a known
+            # value.
+            raw[indexer < 0] = c.missing_value
+
+            # Delegate the actual array formatting logic to a DataFrameLoader.
+            loader = DataFrameLoader(c, to_frame(raw), adjustments=None)
+            out[c] = loader.load_adjusted_array([c], dates, sids, mask)[c]
+        return out
+
     def load_adjusted_array(self, columns, dates, sids, mask):
         n, p = self.split_next_and_previous_event_columns(columns)
         return merge(
diff --git a/zipline/pipeline/loaders/quarter_estimates.py b/zipline/pipeline/loaders/quarter_estimates.py
index fa480b4d..7837e57d 100644
--- a/zipline/pipeline/loaders/quarter_estimates.py
+++ b/zipline/pipeline/loaders/quarter_estimates.py
@@ -1,7 +1,8 @@
-import numpy as np
+from abc import abstractmethod
 import pandas as pd
 from six import viewvalues
 from toolz import groupby
+
 from zipline.pipeline.common import (
     EVENT_DATE_FIELD_NAME,
     FISCAL_QUARTER_FIELD_NAME,
@@ -11,90 +12,33 @@ from zipline.pipeline.common import (
 )
 from zipline.pipeline.loaders.base import PipelineLoader
 from zipline.pipeline.loaders.frame import DataFrameLoader
-
-import line_profiler
-from zipline.pipeline.loaders.utils import choose_rows_by_indexer
-
-PREVIOUS_FISCAL_QUARTER = 'previous_fiscal_quarter'
-
-PREVIOUS_FISCAL_YEAR = 'previous_fiscal_year'
+from zipline.utils.pandas_utils import cross_product
 
 NEXT_FISCAL_QUARTER = 'next_fiscal_quarter'
-
 NEXT_FISCAL_YEAR = 'next_fiscal_year'
-
-FISCAL_QUARTER = 'fiscal_quarter'
-
-FISCAL_YEAR = 'fiscal_year'
-
-ALL_DATES = 'dates'
-
-prof = line_profiler.LineProfiler()
+PREVIOUS_FISCAL_QUARTER = 'previous_fiscal_quarter'
+PREVIOUS_FISCAL_YEAR = 'previous_fiscal_year'
+SIMULTATION_DATES = 'dates'
 
 
-#@profile
-def calc_forward_shift(yrs, qtrs, num_qtrs_shift):
-    """
-    Calculate the number of years to shift forward and the new quarter in the
-    shifted year.
-
-    Parameters
-    ----------
-    qtr : int
-        The starting quarter.
-    num_qtr_shift : int
-        The number of quarters to shift forward.
-    yr : int
-        The starting year.
-
-    Returns
-    -------
-    s : pd.Series
-        A series containins the new year and quarter.
-    """
-
-    result_qtrs = (qtrs + num_qtrs_shift) % 4
-    result_years = yrs + (qtrs + num_qtrs_shift) // 4
-    to_adjust = result_qtrs[result_qtrs == 0].index
-    result_years.iloc[to_adjust] -= 1
-    result_qtrs.iloc[to_adjust] = 4
-    return result_years, result_qtrs
+def normalize_quarters(years, quarters):
+    return years * 4 + quarters - 1
 
 
-#@profile
-def calc_backward_shift(yrs, qtrs, num_qtrs_shift):
-    """
-    Calculate the number of years to shift backward and the new quarter in the
-    shifted year.
-
-    Parameters
-    ----------
-    qtr : int
-        The starting quarter.
-    num_qtr_shift : int
-        The number of quarters to shift backward.
-    yr : int
-        The starting year.
-
-    Returns
-    -------
-    s : pd.Series
-        A series containins the new year and quarter.
-    """
-    result_qtrs = 4 - (num_qtrs_shift - qtrs) % 4
-    # Must subtract 1 year since we go backwards at least `qtr` number of
-    # quarters
-    result_years = yrs - (num_qtrs_shift - qtrs) // 4 - 1
-    no_yr_boundary_crossed = qtrs[qtrs > num_qtrs_shift].index
-    result_years.iloc[no_yr_boundary_crossed] = yrs.iloc[no_yr_boundary_crossed]
-    result_qtrs.iloc[no_yr_boundary_crossed] = qtrs.iloc[no_yr_boundary_crossed] - num_qtrs_shift
-    return result_years, result_qtrs
+def split_normalized_quarters(normalized_quarters):
+    years = normalized_quarters // 4
+    quarters = normalized_quarters % 4
+    return years, quarters + 1
 
 
-def required_event_fields(columns):
+def shift_quarters(by, years, quarters):
+    return split_normalized_quarters(normalize_quarters(years, quarters) + by)
+
+
+def required_estimates_fields(columns):
     """
     Compute the set of resource columns required to serve
-    ``next_value_columns`` and ``previous_value_columns``.
+    `columns`.
     """
     # These metadata columns are used to align event indexers.
     return {
@@ -112,16 +56,16 @@ def required_event_fields(columns):
 
 def validate_column_specs(events, columns):
     """
-    Verify that the columns of ``events`` can be used by an EventsLoader to
-    serve the BoundColumns described by ``next_value_columns`` and
-    ``previous_value_columns``.
+    Verify that the columns of ``events`` can be used by a
+    QuarterEstimatesLoader to serve the BoundColumns described by
+    `columns`.
     """
-    required = required_event_fields(columns)
+    required = required_estimates_fields(columns)
     received = set(events.columns)
     missing = required - received
     if missing:
         raise ValueError(
-            "EventsLoader missing required columns {missing}.\n"
+            "QuarterEstimatesLoader missing required columns {missing}.\n"
             "Got Columns: {received}\n"
             "Expected Columns: {required}".format(
                 missing=sorted(missing),
@@ -148,35 +92,45 @@ class QuarterEstimatesLoader(PipelineLoader):
 
         self.base_column_name_map = base_column_name_map
 
+    @abstractmethod
     def load_quarters(self, num_quarters, dates_sids, final_releases_per_qtr):
         pass
 
-    #@profile
     def load_adjusted_array(self, columns, dates, assets, mask):
+        # TODO: how can we enforce that datasets have the num_quarters
+        # attribute, given that they're created dynamically?
         groups = groupby(lambda x: x.dataset.num_quarters, columns)
+        groups_columns = dict(groups)
+        if (pd.Series(groups_columns.keys()) < 0).any():
+            raise ValueError("Must pass a number of quarters >= 0")
         out = {}
-        date_values = pd.DataFrame({'dates': dates})
-        date_values['key'] = 1
-        self.estimates['key'] = 1
-        merged = pd.merge(date_values, self.estimates, on='key')
+        date_values = pd.DataFrame({SIMULTATION_DATES: dates})
+        # dates column must be of type datetime64[ns] in order for subsequent
+        # comparisons to work correctly.
+        date_values[SIMULTATION_DATES] = date_values[
+            SIMULTATION_DATES
+        ].astype('datetime64[ns]')
+        estimates_all_dates = cross_product(date_values, self.estimates)
         asset_df = pd.DataFrame({SID_FIELD_NAME: assets})
-        asset_df['key'] = 1
-        dates_sids = pd.merge(date_values, asset_df, on='key')
-        merged.drop('key', axis=1, inplace=True)
-        dates_sids.drop('key', axis=1, inplace=True)
-        for num_quarters in groups:
-            name_map = {c: self.base_column_name_map[getattr(c.dataset.__base__, c.name)] for c in columns}
+        dates_sids = cross_product(date_values, asset_df)
+        for num_quarters, columns in groups_columns.iteritems():
+            name_map = {c:
+                        self.base_column_name_map[
+                            getattr(c.dataset.__base__, c.name)
+                        ] for c in columns}
 
-            columns = groups[num_quarters]
-            # First, group by sid, fiscal year, and fiscal quarter and only
-            # keep the last estimate made.
-            final_releases_per_qtr = merged[merged[TS_FIELD_NAME] <=
-                                            merged.dates].sort(
-                ['dates', TS_FIELD_NAME]
-            ).groupby(
-                ['dates', SID_FIELD_NAME, FISCAL_YEAR, FISCAL_QUARTER]
-            ).last()
-            final_releases_per_qtr = final_releases_per_qtr.reset_index()
+            # First, determine which estimates we would have known about on
+            # each date. Then, Sort by timestamp and group to find the latest
+            # estimate for each quarter.
+            final_releases_per_qtr = estimates_all_dates[
+                estimates_all_dates[TS_FIELD_NAME] <=
+                estimates_all_dates.dates
+            ].sort([TS_FIELD_NAME]).groupby(
+                [SIMULTATION_DATES,
+                 SID_FIELD_NAME,
+                 FISCAL_YEAR_FIELD_NAME,
+                 FISCAL_QUARTER_FIELD_NAME]
+            ).nth(-1).reset_index()
 
             result = self.load_quarters(num_quarters,
                                         dates_sids,
@@ -184,50 +138,51 @@ class QuarterEstimatesLoader(PipelineLoader):
 
             for c in columns:
                 column_name = name_map[c]
-                # Need to pass a DataFrame that has dates as the index and
-                # all sids as columns with column values being the value in
-                # 'result' for column c
+                # Pivot to get a DataFrame with dates as the index and
+                # sids as the columns.
                 loader = DataFrameLoader(
                     c,
-                    result.pivot(index='dates',
+                    result.pivot(index=SIMULTATION_DATES,
                                  columns=SID_FIELD_NAME,
                                  values=column_name),
                     adjustments=None
                 )
-                out[c] = loader.load_adjusted_array([c], dates, assets, mask)[c]
+                out[c] = loader.load_adjusted_array([c],
+                                                    dates,
+                                                    assets,
+                                                    mask)[c]
         return out
 
 
 class NextQuartersEstimatesLoader(QuarterEstimatesLoader):
 
-    #@profile
     def load_quarters(self, num_quarters, dates_sids, final_releases_per_qtr):
-        # Filter for releases that are after each simulation date.
+        # Filter for releases that are on or after each simulation date.
         eligible_next_releases = final_releases_per_qtr[
             final_releases_per_qtr[EVENT_DATE_FIELD_NAME] >=
-            final_releases_per_qtr['dates']
+            final_releases_per_qtr[SIMULTATION_DATES]
         ]
-
+        # For each sid, get the upcoming release.
         eligible_next_releases.sort(EVENT_DATE_FIELD_NAME)
-        # For each sid, get the upcoming release/year/quarter.
         next_releases = eligible_next_releases.groupby(
-            ['dates', SID_FIELD_NAME]
+            [SIMULTATION_DATES, SID_FIELD_NAME]
         ).nth(0).reset_index()  # We use nth here to avoid forward filling
         # NaNs, which `first()` will do.
         next_releases = next_releases.rename(
-            columns={FISCAL_YEAR: NEXT_FISCAL_YEAR,
-                     FISCAL_QUARTER: NEXT_FISCAL_QUARTER}
+            columns={FISCAL_YEAR_FIELD_NAME: NEXT_FISCAL_YEAR,
+                     FISCAL_QUARTER_FIELD_NAME: NEXT_FISCAL_QUARTER}
         )
-        # `next_qtr` is already the next quarter over,
-        # so we should offest `num_shifts` by 1.
-        (next_releases[FISCAL_YEAR],
-         next_releases[FISCAL_QUARTER]) = calc_forward_shift(
+        # The next fiscal quarter is already our starting point,
+        # so we should offset `num_quarters` by 1.
+        (next_releases[FISCAL_YEAR_FIELD_NAME],
+         next_releases[FISCAL_QUARTER_FIELD_NAME]) = shift_quarters(
+            (num_quarters - 1),
             next_releases[NEXT_FISCAL_YEAR],
-            next_releases[NEXT_FISCAL_QUARTER], (num_quarters - 1)
+            next_releases[NEXT_FISCAL_QUARTER],
         )
-        # Merge to get the rows we care about for each date
+        # Do a left merge to get values for each date.
         result = dates_sids.merge(next_releases,
-                                  on=(['dates', SID_FIELD_NAME]),
+                                  on=([SIMULTATION_DATES, SID_FIELD_NAME]),
                                   how='left')
         return result
 
@@ -236,37 +191,36 @@ class PreviousQuartersEstimatesLoader(QuarterEstimatesLoader):
     def __init__(self,
                  estimates,
                  columns):
-        super(PreviousQuartersEstimatesLoader, self).__init__(estimates, columns)
+        super(PreviousQuartersEstimatesLoader, self).__init__(estimates,
+                                                              columns)
 
-    #@profile
     def load_quarters(self, num_quarters, dates_sids, final_releases_per_qtr):
-        # Filter for releases that are before each simulation date.
+        # Filter for releases that are on or before each simulation date.
         eligible_previous_releases = final_releases_per_qtr[
             final_releases_per_qtr[EVENT_DATE_FIELD_NAME] <=
-            final_releases_per_qtr['dates']
+            final_releases_per_qtr[SIMULTATION_DATES]
         ]
-
+        # For each sid, get the latest release.
         eligible_previous_releases.sort(EVENT_DATE_FIELD_NAME)
-        # For each sid, get the latest release we knew about prior to
-        # each simulation date.
         previous_releases = eligible_previous_releases.groupby(
-            ['dates', SID_FIELD_NAME]
+            [SIMULTATION_DATES, SID_FIELD_NAME]
         ).nth(-1).reset_index()  # We use nth here to avoid forward filling
         # NaNs, which `last()` will do.
-
         previous_releases = previous_releases.rename(columns={
-            FISCAL_YEAR: PREVIOUS_FISCAL_YEAR,
-            FISCAL_QUARTER: PREVIOUS_FISCAL_QUARTER
+            FISCAL_YEAR_FIELD_NAME: PREVIOUS_FISCAL_YEAR,
+            FISCAL_QUARTER_FIELD_NAME: PREVIOUS_FISCAL_QUARTER
         })
-
-        (previous_releases[FISCAL_YEAR],
-         previous_releases[FISCAL_QUARTER]) = \
-            calc_backward_shift(
-            previous_releases[PREVIOUS_FISCAL_YEAR], previous_releases[
-                    PREVIOUS_FISCAL_QUARTER], (num_quarters - 1)
+        # The previous fiscal quarter is already our starting point,
+        # so we should offset `num_quarters` by 1.
+        (previous_releases[FISCAL_YEAR_FIELD_NAME],
+         previous_releases[FISCAL_QUARTER_FIELD_NAME]) = shift_quarters(
+            -(num_quarters - 1),
+            previous_releases[PREVIOUS_FISCAL_YEAR],
+            previous_releases[PREVIOUS_FISCAL_QUARTER],
         )
-        # Merge to get the rows we care about for each date
+        # Do a left merge to get values for each date.
         result = dates_sids.merge(previous_releases,
-                                  on=(['dates', SID_FIELD_NAME]), how='left')
+                                  on=([SIMULTATION_DATES,
+                                       SID_FIELD_NAME]),
+                                  how='left')
         return result
-
diff --git a/zipline/pipeline/loaders/utils.py b/zipline/pipeline/loaders/utils.py
index 385022a8..77a9f447 100644
--- a/zipline/pipeline/loaders/utils.py
+++ b/zipline/pipeline/loaders/utils.py
@@ -2,7 +2,6 @@ import datetime
 
 import numpy as np
 import pandas as pd
-from zipline.pipeline.loaders.frame import DataFrameLoader
 from zipline.utils.pandas_utils import mask_between_time
 
 
@@ -273,20 +272,3 @@ def check_data_query_args(data_query_time, data_query_tz):
                 data_query_tz,
             ),
         )
-
-
-def choose_rows_by_indexer(rows, name_map, indexer, columns, dates, sids, mask):
-    def to_frame(array):
-        return pd.DataFrame(array, index=dates, columns=sids)
-
-    out = {}
-    for c in columns:
-        raw = rows[name_map[c]][indexer]
-        # indexer will be -1 for locations where we don't have a known
-        # value.
-        raw[indexer < 0] = c.missing_value
-
-        # Delegate the actual array formatting logic to a DataFrameLoader.
-        loader = DataFrameLoader(c, to_frame(raw), adjustments=None)
-        out[c] = loader.load_adjusted_array([c], dates, sids, mask)[c]
-    return out
\ No newline at end of file

From e53d7fc9b2437c1234af14064b57aacca3ee03d3 Mon Sep 17 00:00:00 2001
From: Maya Tydykov <mtydykov@quantopian.com>
Date: Wed, 17 Aug 2016 16:05:52 -0400
Subject: [PATCH 05/16] ENH: add an adjustment for float64 2d arrays.

BUG: fix syntax error

MAINT: optimize code for cython
---
 zipline/lib/adjustment.pyx | 79 +++++++++++++++++++++++++++++++++++++-
 1 file changed, 78 insertions(+), 1 deletion(-)

diff --git a/zipline/lib/adjustment.pyx b/zipline/lib/adjustment.pyx
index b0ba1873..fc133d79 100644
--- a/zipline/lib/adjustment.pyx
+++ b/zipline/lib/adjustment.pyx
@@ -3,7 +3,7 @@ from cpython cimport Py_EQ
 
 from pandas import isnull, Timestamp
 from numpy cimport float64_t, uint8_t, int64_t
-from numpy import datetime64, float64
+from numpy import asarray, datetime64, float64
 # Purely for readability. There aren't C-level declarations for these types.
 ctypedef object Int64Index_t
 ctypedef object DatetimeIndex_t
@@ -364,6 +364,83 @@ cdef class Float64Overwrite(Float64Adjustment):
                 data[row, col] = value
 
 
+cdef class Float641DArrayOverwrite:
+    """
+    An adjustment that overwrites subarrays with a value for each subarray.
+
+    Example
+    -------
+
+    >>> import numpy as np
+    >>> arr = np.arange(25, dtype=float).reshape(5, 5)
+    >>> arr
+    array([[  0.,   1.,   2.,   3.,   4.],
+           [  5.,   6.,   7.,   8.,   9.],
+           [ 10.,  11.,  12.,  13.,  14.],
+           [ 15.,  16.,  17.,  18.,  19.],
+           [ 20.,  21.,  22.,  23.,  24.]])
+    >>> adj = Float641DArrayOverwrite(
+    ...     row_starts=np.array([0, 3]),
+    ...     row_ends=np.array([2, 4]),
+    ...     column_starts=np.array([0, 2]),
+    ...     column_ends=np.array([1, 4]),
+    ...     values=np.array([10., 20.]),
+    )
+    >>> adj.mutate(arr)
+    >>> arr
+    array([[ 10.,  10.,   2.,   3.,   4.],
+           [ 10.,  10.,   7.,   8.,   9.],
+           [ 10.,  10.,  12.,  13.,  14.],
+           [ 15.,  16.,  20.,  20.,  20.],
+           [ 20.,  21.,  20.,  20.,  20.]])
+    """
+    cdef:
+        readonly int64_t[:] row_starts, row_ends, column_starts, column_ends
+        readonly float64_t[:] values
+
+    def __init__(self,
+                 int64_t[:] row_starts,
+                 int64_t[:] row_ends,
+                 int64_t[:] column_starts,
+                 int64_t[:] column_ends,
+                 float64_t[:] values):
+        assert (len(row_starts) ==
+                len(row_ends) ==
+                len(column_starts) ==
+                len(column_ends))
+        for (row_start, row_end) in zip(row_starts, row_ends):
+            assert row_start <= row_end
+        for (column_start, column_end) in zip(column_starts, column_ends):
+            assert column_start <= column_end
+
+        self.row_starts = row_starts
+        self.row_ends = row_ends
+        self.column_starts = column_starts
+        self.column_ends = column_ends
+        self.values = values
+
+    cpdef mutate(self, float64_t[:, :] data):
+        cdef Py_ssize_t fill_range, row, col
+        for fill_range in range(len(self.row_starts)):
+            for row in range(self.row_starts[fill_range],
+                             self.row_ends[fill_range] + 1):
+                for col in range(self.column_starts[fill_range],
+                                 self.column_ends[fill_range] + 1):
+                    data[row, col] = self.values[fill_range]
+
+    def __repr__(self):
+            return (
+                "%s(row_starts=%s, row_ends=%s,"
+                " column_starts=%s, column_ends=%s, values=%s)" % (
+                    type(self).__name__,
+                    asarray(self.row_starts),
+                    asarray(self.row_ends),
+                    asarray(self.column_starts),
+                    asarray(self.column_ends),
+                    asarray(self.values),
+                )
+            )
+
 cdef class Float64Add(Float64Adjustment):
     """
     An adjustment that adds a float.

From 2975f9b2fd8df3c53598b7245e8c81ccc6cfec71 Mon Sep 17 00:00:00 2001
From: Maya Tydykov <mtydykov@quantopian.com>
Date: Thu, 18 Aug 2016 14:19:48 -0400
Subject: [PATCH 06/16] TST: add test for 1d array overwrite

---
 tests/pipeline/test_adjusted_array.py         | 101 ++++++++++++++
 tests/pipeline/test_quarters_estimates.py     |   1 -
 zipline/pipeline/loaders/blaze/core.py        |  81 ++---------
 zipline/pipeline/loaders/quarter_estimates.py | 127 +++++++++++-------
 zipline/pipeline/loaders/utils.py             |  71 ++++++++++
 5 files changed, 261 insertions(+), 120 deletions(-)

diff --git a/tests/pipeline/test_adjusted_array.py b/tests/pipeline/test_adjusted_array.py
index cea6f090..80a1dd97 100644
--- a/tests/pipeline/test_adjusted_array.py
+++ b/tests/pipeline/test_adjusted_array.py
@@ -22,6 +22,7 @@ from zipline.lib.adjustment import (
     Datetime64Overwrite,
     Float64Multiply,
     Float64Overwrite,
+    Float641DArrayOverwrite,
     ObjectOverwrite,
 )
 from zipline.lib.adjusted_array import AdjustedArray, NOMASK
@@ -304,6 +305,105 @@ def _gen_overwrite_adjustment_cases(name,
     )
 
 
+def _gen_overwrite_1d_array_adjustment_case():
+    """
+    Generate test cases for overwrite adjustments.
+
+    The algorithm used here is the same as the one used above for
+    multiplicative adjustments.  The only difference is the semantics of how
+    the adjustments are expected to modify the arrays.
+
+    This is parameterized on `make_input` and `make_expected_output` functions,
+    which take 2-D lists of values and transform them into desired input/output
+    arrays. We do this so that we can easily test both vanilla numpy ndarrays
+    and our own LabelArray class for strings.
+    """
+
+    adjustments = {}
+    buffer_as_of = [None] * 6
+    baseline = as_dtype(float64_dtype, [[2, 2, 2],
+                                        [2, 2, 2],
+                                        [2, 2, 2],
+                                        [2, 2, 2],
+                                        [2, 2, 2],
+                                        [2, 2, 2]])
+
+    buffer_as_of[0] = as_dtype(float64_dtype, [[2, 2, 2],
+                                               [2, 2, 2],
+                                               [2, 2, 2],
+                                               [2, 2, 2],
+                                               [2, 2, 2],
+                                               [2, 2, 2]])
+
+    # Note that row indices are inclusive!
+    adjustments[1] = [
+        Float641DArrayOverwrite(array([0]),
+                                array([0]),
+                                array([0]),
+                                array([0]),
+                                as_dtype(float64_dtype, array([1])))
+    ]
+    buffer_as_of[1] = as_dtype(float64_dtype, [[1, 2, 2],
+                                               [2, 2, 2],
+                                               [2, 2, 2],
+                                               [2, 2, 2],
+                                               [2, 2, 2],
+                                               [2, 2, 2]])
+
+    # No adjustment at index 2.
+    buffer_as_of[2] = buffer_as_of[1]
+
+    adjustments[3] = [
+        Float641DArrayOverwrite(array([0, 2, 1]),
+                                array([1, 2, 2]),
+                                array([0, 0, 1]),
+                                array([0, 0, 1]),
+                                as_dtype(float64_dtype, array([4, 1, 3])))
+    ]
+    buffer_as_of[3] = as_dtype(float64_dtype, [[4, 2, 2],
+                                               [4, 3, 2],
+                                               [1, 3, 2],
+                                               [2, 2, 2],
+                                               [2, 2, 2],
+                                               [2, 2, 2]])
+
+    adjustments[4] = [
+        Float641DArrayOverwrite(array([0]),
+                                array([3]),
+                                array([2]),
+                                array([2]),
+                                as_dtype(float64_dtype, array([5])))
+    ]
+    buffer_as_of[4] = as_dtype(float64_dtype, [[4, 2, 5],
+                                               [4, 3, 5],
+                                               [1, 3, 5],
+                                               [2, 2, 5],
+                                               [2, 2, 2],
+                                               [2, 2, 2]])
+
+    adjustments[5] = [
+        Float641DArrayOverwrite(array([0, 2]),
+                                array([4, 2]),
+                                array([1, 2]),
+                                array([1, 2]),
+                                as_dtype(float64_dtype, array([6, 7]))),
+    ]
+    buffer_as_of[5] = as_dtype(float64_dtype, [[4, 6, 5],
+                                               [4, 6, 5],
+                                               [1, 6, 7],
+                                               [2, 6, 5],
+                                               [2, 6, 2],
+                                               [2, 2, 2]])
+
+    return _gen_expectations(
+        baseline,
+        default_missing_value_for_dtype(float64_dtype),
+        adjustments,
+        buffer_as_of,
+        nrows=6,
+    )
+
+
 def _gen_expectations(baseline,
                       missing_value,
                       adjustments,
@@ -442,6 +542,7 @@ class AdjustedArrayTestCase(TestCase):
                     datetime64ns_dtype,
                 ),
             ),
+            _gen_overwrite_1d_array_adjustment_case(),
             # There are six cases here:
             # Using np.bytes/np.unicode/object arrays as inputs.
             # Passing np.bytes/np.unicode/object arrays to LabelArray,
diff --git a/tests/pipeline/test_quarters_estimates.py b/tests/pipeline/test_quarters_estimates.py
index f343bfb8..1f82d161 100644
--- a/tests/pipeline/test_quarters_estimates.py
+++ b/tests/pipeline/test_quarters_estimates.py
@@ -20,7 +20,6 @@ from zipline.pipeline.loaders.quarter_estimates import (
     NextQuartersEstimatesLoader,
     PreviousQuartersEstimatesLoader
 )
-from zipline.pipeline.loaders.quarter_estimates import shift_quarters
 from zipline.testing import ZiplineTestCase
 from zipline.testing.fixtures import WithAssetFinder, WithTradingSessions
 from zipline.testing.predicates import assert_equal
diff --git a/zipline/pipeline/loaders/blaze/core.py b/zipline/pipeline/loaders/blaze/core.py
index 01e1e659..c076029d 100644
--- a/zipline/pipeline/loaders/blaze/core.py
+++ b/zipline/pipeline/loaders/blaze/core.py
@@ -175,9 +175,10 @@ from zipline.pipeline.common import (
 from zipline.pipeline.data.dataset import DataSet, Column
 from zipline.pipeline.loaders.utils import (
     check_data_query_args,
+    last_in_date_group,
     normalize_data_query_bounds,
     normalize_timestamp_to_query_time,
-)
+    ffill_across_cols)
 from zipline.pipeline.sentinels import NotSpecified
 from zipline.lib.adjusted_array import AdjustedArray, can_represent_dtype
 from zipline.lib.adjustment import Float64Overwrite
@@ -869,9 +870,9 @@ def adjustments_from_deltas_with_sids(dense_dates,
 
     Parameters
     ----------
-    dates : pd.DatetimeIndex
-        The dates requested by the loader.
     dense_dates : pd.DatetimeIndex
+        The dates requested by the loader.
+    sparse_dates : pd.DatetimeIndex
         The dates that were in the raw data.
     column_idx : int
         The index of the column in the dataset.
@@ -1091,71 +1092,15 @@ class BlazeLoader(dict):
         )
         sparse_output.drop(AD_FIELD_NAME, axis=1, inplace=True)
 
-        def last_in_date_group(df, reindex, have_sids=have_sids):
-            idx = dates[dates.searchsorted(
-                df[TS_FIELD_NAME].values.astype('datetime64[D]')
-            )]
-            if have_sids:
-                idx = [idx, SID_FIELD_NAME]
-
-            last_in_group = df.drop(TS_FIELD_NAME, axis=1).groupby(
-                idx,
-                sort=False,
-            ).last()
-
-            if have_sids:
-                last_in_group = last_in_group.unstack()
-
-            if reindex:
-                if have_sids:
-                    cols = last_in_group.columns
-                    last_in_group = last_in_group.reindex(
-                        index=dates,
-                        columns=pd.MultiIndex.from_product(
-                            (cols.levels[0], assets),
-                            names=cols.names,
-                        ),
-                    )
-                else:
-                    last_in_group = last_in_group.reindex(dates)
-
-            return last_in_group
-
-        sparse_deltas = last_in_date_group(non_novel_deltas, reindex=False)
-        dense_output = last_in_date_group(sparse_output, reindex=True)
-        dense_output.ffill(inplace=True)
-
-        # Fill in missing values specified by each column. This is made
-        # significantly more complex by the fact that we need to work around
-        # two pandas issues:
-
-        # 1) When we have sids, if there are no records for a given sid for any
-        #    dates, pandas will generate a column full of NaNs for that sid.
-        #    This means that some of the columns in `dense_output` are now
-        #    float instead of the intended dtype, so we have to coerce back to
-        #    our expected type and convert NaNs into the desired missing value.
-
-        # 2) DataFrame.ffill assumes that receiving None as a fill-value means
-        #    that no value was passed.  Consequently, there's no way to tell
-        #    pandas to replace NaNs in an object column with None using fillna,
-        #    so we have to roll our own instead using df.where.
-        for column in columns:
-            # Special logic for strings since `fillna` doesn't work if the
-            # missing value is `None`.
-            if column.dtype == categorical_dtype:
-                dense_output[column.name] = dense_output[
-                    column.name
-                ].where(pd.notnull(dense_output[column.name]),
-                        column.missing_value)
-            else:
-                # We need to execute `fillna` before `astype` in case the
-                # column contains NaNs and needs to be cast to bool or int.
-                # This is so that the NaNs are replaced first, since pandas
-                # can't convert NaNs for those types.
-                dense_output[column.name] = dense_output[
-                    column.name
-                ].fillna(column.missing_value).astype(column.dtype)
-
+        sparse_deltas = last_in_date_group(non_novel_deltas,
+                                           dates,
+                                           assets,
+                                           reindex=False)
+        dense_output = last_in_date_group(sparse_output,
+                                          dates,
+                                          assets,
+                                          reindex=True)
+        ffill_across_cols(dense_output, columns)
         if have_sids:
             adjustments_from_deltas = adjustments_from_deltas_with_sids
             column_view = identity
diff --git a/zipline/pipeline/loaders/quarter_estimates.py b/zipline/pipeline/loaders/quarter_estimates.py
index 7837e57d..325e9186 100644
--- a/zipline/pipeline/loaders/quarter_estimates.py
+++ b/zipline/pipeline/loaders/quarter_estimates.py
@@ -1,7 +1,11 @@
 from abc import abstractmethod
+from collections import defaultdict
+import numpy as np
 import pandas as pd
 from six import viewvalues
 from toolz import groupby
+from zipline.lib.adjusted_array import AdjustedArray
+from zipline.lib.adjustment import Float641DArrayOverwrite
 
 from zipline.pipeline.common import (
     EVENT_DATE_FIELD_NAME,
@@ -13,6 +17,7 @@ from zipline.pipeline.common import (
 from zipline.pipeline.loaders.base import PipelineLoader
 from zipline.pipeline.loaders.frame import DataFrameLoader
 from zipline.utils.pandas_utils import cross_product
+from zipline.pipeline.loaders.utils import last_in_date_group, ffill_across_cols
 
 NEXT_FISCAL_QUARTER = 'next_fiscal_quarter'
 NEXT_FISCAL_YEAR = 'next_fiscal_year'
@@ -31,10 +36,6 @@ def split_normalized_quarters(normalized_quarters):
     return years, quarters + 1
 
 
-def shift_quarters(by, years, quarters):
-    return split_normalized_quarters(normalize_quarters(years, quarters) + by)
-
-
 def required_estimates_fields(columns):
     """
     Compute the set of resource columns required to serve
@@ -93,15 +94,54 @@ class QuarterEstimatesLoader(PipelineLoader):
         self.base_column_name_map = base_column_name_map
 
     @abstractmethod
-    def load_quarters(self, num_quarters, dates_sids, final_releases_per_qtr):
+    def load_quarters(self, num_quarters, last, dates):
         pass
 
+    def get_adjustments(self, df, column, mask, assets,
+                        final_releases_per_qtr, dates, raw_events):
+        adjustments = defaultdict(list)
+        for idx, sid in enumerate(assets):
+            # Get the releases for a particular sid
+            sid_data = final_releases_per_qtr[final_releases_per_qtr[
+                SID_FIELD_NAME] == sid
+            ]
+            # Get the release dates for this sid - these are the quarter
+            # boundaries
+            qtr_boundaries, years, qtrs = sid_data[[
+                EVENT_DATE_FIELD_NAME,
+                FISCAL_YEAR_FIELD_NAME,
+                FISCAL_QUARTER_FIELD_NAME
+            ]].unique()
+            next_qtr_starts = dates.searchsorted(qtr_boundaries, sid='right')
+            for idx, start in enumerate(next_qtr_starts):
+                # Here we need to take the new quarter and, for all dates in
+                # previous quarters, apply adjustments that use this
+                # quarter's values for those previous dates.
+                adjustments[start].extend(Float641DArrayOverwrite(first_row,
+                                                             last_row,
+                                                             idx,
+                                                             idx,
+                                                             value))
+        return AdjustedArray(
+                df[column.name].values.astype(column.dtype),
+                mask,
+                adjustments_from_deltas(
+                    dates,
+                    sparse_output[TS_FIELD_NAME].values,
+                    column_idx,
+                    column.name,
+                    asset_idx,
+                    sparse_deltas,
+                ),
+                column.missing_value,
+            )
+
     def load_adjusted_array(self, columns, dates, assets, mask):
         # TODO: how can we enforce that datasets have the num_quarters
         # attribute, given that they're created dynamically?
         groups = groupby(lambda x: x.dataset.num_quarters, columns)
         groups_columns = dict(groups)
-        if (pd.Series(groups_columns.keys()) < 0).any():
+        if (pd.Series(groups_columns) < 0).any():
             raise ValueError("Must pass a number of quarters >= 0")
         out = {}
         date_values = pd.DataFrame({SIMULTATION_DATES: dates})
@@ -110,34 +150,36 @@ class QuarterEstimatesLoader(PipelineLoader):
         date_values[SIMULTATION_DATES] = date_values[
             SIMULTATION_DATES
         ].astype('datetime64[ns]')
-        estimates_all_dates = cross_product(date_values, self.estimates)
         asset_df = pd.DataFrame({SID_FIELD_NAME: assets})
         dates_sids = cross_product(date_values, asset_df)
+        self.estimates['normalized_quarters'] = normalize_quarters(
+            self.estimates[FISCAL_YEAR_FIELD_NAME],
+            self.estimates[FISCAL_QUARTER_FIELD_NAME],
+        ).astype(float)
         for num_quarters, columns in groups_columns.iteritems():
             name_map = {c:
                         self.base_column_name_map[
                             getattr(c.dataset.__base__, c.name)
                         ] for c in columns}
-
-            # First, determine which estimates we would have known about on
-            # each date. Then, Sort by timestamp and group to find the latest
-            # estimate for each quarter.
-            final_releases_per_qtr = estimates_all_dates[
-                estimates_all_dates[TS_FIELD_NAME] <=
-                estimates_all_dates.dates
-            ].sort([TS_FIELD_NAME]).groupby(
-                [SIMULTATION_DATES,
-                 SID_FIELD_NAME,
-                 FISCAL_YEAR_FIELD_NAME,
-                 FISCAL_QUARTER_FIELD_NAME]
-            ).nth(-1).reset_index()
+            # Determine the last piece of information we know for each column
+            # on each date in the index.
+            last = last_in_date_group(self.estimates, True, dates,
+                                      assets,
+                                      extra_groupers=[
+                                          'normalized_quarters']).reset_index()
+            # Forward fill values for each quarter.
+            ffill_across_cols(last, columns)
+            stacked = last.stack(1).stack(1).reset_index()
 
             result = self.load_quarters(num_quarters,
-                                        dates_sids,
-                                        final_releases_per_qtr)
+                                        stacked, dates)
 
             for c in columns:
                 column_name = name_map[c]
+                pivoted = result.pivot(index=SIMULTATION_DATES,
+                                       columns=SID_FIELD_NAME,
+                                       values=column_name)
+                adjusted_array = self.get_adjustments(pivoted, c, mask, assets)
                 # Pivot to get a DataFrame with dates as the index and
                 # sids as the columns.
                 loader = DataFrameLoader(
@@ -145,7 +187,7 @@ class QuarterEstimatesLoader(PipelineLoader):
                     result.pivot(index=SIMULTATION_DATES,
                                  columns=SID_FIELD_NAME,
                                  values=column_name),
-                    adjustments=None
+                    adjustments=adjusted_array
                 )
                 out[c] = loader.load_adjusted_array([c],
                                                     dates,
@@ -156,34 +198,17 @@ class QuarterEstimatesLoader(PipelineLoader):
 
 class NextQuartersEstimatesLoader(QuarterEstimatesLoader):
 
-    def load_quarters(self, num_quarters, dates_sids, final_releases_per_qtr):
-        # Filter for releases that are on or after each simulation date.
-        eligible_next_releases = final_releases_per_qtr[
-            final_releases_per_qtr[EVENT_DATE_FIELD_NAME] >=
-            final_releases_per_qtr[SIMULTATION_DATES]
-        ]
-        # For each sid, get the upcoming release.
-        eligible_next_releases.sort(EVENT_DATE_FIELD_NAME)
-        next_releases = eligible_next_releases.groupby(
-            [SIMULTATION_DATES, SID_FIELD_NAME]
-        ).nth(0).reset_index()  # We use nth here to avoid forward filling
-        # NaNs, which `first()` will do.
-        next_releases = next_releases.rename(
-            columns={FISCAL_YEAR_FIELD_NAME: NEXT_FISCAL_YEAR,
-                     FISCAL_QUARTER_FIELD_NAME: NEXT_FISCAL_QUARTER}
-        )
-        # The next fiscal quarter is already our starting point,
-        # so we should offset `num_quarters` by 1.
-        (next_releases[FISCAL_YEAR_FIELD_NAME],
-         next_releases[FISCAL_QUARTER_FIELD_NAME]) = shift_quarters(
-            (num_quarters - 1),
-            next_releases[NEXT_FISCAL_YEAR],
-            next_releases[NEXT_FISCAL_QUARTER],
-        )
-        # Do a left merge to get values for each date.
-        result = dates_sids.merge(next_releases,
-                                  on=([SIMULTATION_DATES, SID_FIELD_NAME]),
-                                  how='left')
+    def load_quarters(self, num_quarters, stacked, dates):
+        # Filter for releases that are on or after each simulation date and
+        # determine the next quarter by picking out the upcoming release for
+        # each date in the index.
+        event_date_idxs = dates.searchsorted(pd.to_datetime(stacked[EVENT_DATE_FIELD_NAME]).values)
+        next_releases = stacked.loc[event_date_idxs >= stacked['level_0']].groupby(['level_0', 'sid']).nth(0)
+
+
+        next_releases['shifted_normalized_quarters'] = next_releases[
+            'normalized_quarters'].convert_objects(convert_numeric=True) + (num_quarters - 1)
+
         return result
 
 
diff --git a/zipline/pipeline/loaders/utils.py b/zipline/pipeline/loaders/utils.py
index 77a9f447..28e0c90d 100644
--- a/zipline/pipeline/loaders/utils.py
+++ b/zipline/pipeline/loaders/utils.py
@@ -2,6 +2,8 @@ import datetime
 
 import numpy as np
 import pandas as pd
+from zipline.pipeline.common import TS_FIELD_NAME, SID_FIELD_NAME
+from zipline.utils.numpy_utils import categorical_dtype
 from zipline.utils.pandas_utils import mask_between_time
 
 
@@ -272,3 +274,72 @@ def check_data_query_args(data_query_time, data_query_tz):
                 data_query_tz,
             ),
         )
+
+
+def last_in_date_group(df, reindex, dates, assets, have_sids=True,
+                       extra_groupers=[]):
+    idx = dates[dates.searchsorted(
+        df[TS_FIELD_NAME].values.astype('datetime64[D]')
+    )]
+    if have_sids:
+        idx = [idx, SID_FIELD_NAME] + extra_groupers
+
+    last_in_group = df.drop(TS_FIELD_NAME, axis=1).groupby(
+        idx,
+        sort=False,
+    ).last()
+
+    # For the number of things that we're grouping by (except TS), unstack
+    # the df
+    for _ in range(len(idx) - 1):
+        last_in_group = last_in_group.unstack()
+
+    if reindex:
+        if have_sids:
+            cols = last_in_group.columns
+            last_in_group = last_in_group.reindex(
+                index=dates,
+                columns=pd.MultiIndex.from_product(
+                    tuple(cols.levels[0:len(extra_groupers) + 1]) + (assets,),
+                    names=cols.names,
+                ),
+            )
+        else:
+            last_in_group = last_in_group.reindex(dates)
+
+    return last_in_group
+
+
+def ffill_across_cols(df, columns):
+    df.ffill(inplace=True)
+
+    # Fill in missing values specified by each column. This is made
+    # significantly more complex by the fact that we need to work around
+    # two pandas issues:
+
+    # 1) When we have sids, if there are no records for a given sid for any
+    #    dates, pandas will generate a column full of NaNs for that sid.
+    #    This means that some of the columns in `dense_output` are now
+    #    float instead of the intended dtype, so we have to coerce back to
+    #    our expected type and convert NaNs into the desired missing value.
+
+    # 2) DataFrame.ffill assumes that receiving None as a fill-value means
+    #    that no value was passed.  Consequently, there's no way to tell
+    #    pandas to replace NaNs in an object column with None using fillna,
+    #    so we have to roll our own instead using df.where.
+    for column in columns:
+        # Special logic for strings since `fillna` doesn't work if the
+        # missing value is `None`.
+        if column.dtype == categorical_dtype:
+            df[column.name] = df[
+                column.name
+            ].where(pd.notnull(df[column.name]),
+                    column.missing_value)
+        else:
+            # We need to execute `fillna` before `astype` in case the
+            # column contains NaNs and needs to be cast to bool or int.
+            # This is so that the NaNs are replaced first, since pandas
+            # can't convert NaNs for those types.
+            df[column.name] = df[
+                column.name
+            ].fillna(column.missing_value).astype(column.dtype)

From 199c775fbf6b9a0a6761860a17591d259328bfef Mon Sep 17 00:00:00 2001
From: Maya Tydykov <mtydykov@quantopian.com>
Date: Mon, 22 Aug 2016 19:52:42 -0400
Subject: [PATCH 07/16] ENH: add adjustment for datetime64 arrays

BUG: fix adjustment start index
---
 zipline/lib/adjustment.pyx                    | 161 +++++++++++++-----
 zipline/pipeline/loaders/quarter_estimates.py | 142 ++++++++-------
 2 files changed, 190 insertions(+), 113 deletions(-)

diff --git a/zipline/lib/adjustment.pyx b/zipline/lib/adjustment.pyx
index fc133d79..036c5026 100644
--- a/zipline/lib/adjustment.pyx
+++ b/zipline/lib/adjustment.pyx
@@ -364,7 +364,39 @@ cdef class Float64Overwrite(Float64Adjustment):
                 data[row, col] = value
 
 
-cdef class Float641DArrayOverwrite:
+cdef class ArrayAdjustment(Adjustment):
+    """
+    Base class for ArrayAdjustments.
+
+    Subclasses should inherit and provide a `values` attribute and a `mutate`
+    method.
+    """
+    def __init__(self,
+                 int64_t first_row,
+                 int64_t last_row,
+                 int64_t first_col,
+                 int64_t last_col):
+        super(ArrayAdjustment, self).__init__(
+            first_row=first_row,
+            last_row=last_row,
+            first_col=first_col,
+            last_col=last_col,
+        )
+
+    def __repr__(self):
+            return (
+                "%s(first_row=%d, last_row=%d,"
+                " first_col=%d, last_col=%d, values=%s)" % (
+                    type(self).__name__,
+                    self.first_row,
+                    self.last_row,
+                    self.first_col,
+                    self.last_col,
+                    asarray(self.values),
+                )
+            )
+
+cdef class Float641DArrayOverwrite(ArrayAdjustment):
     """
     An adjustment that overwrites subarrays with a value for each subarray.
 
@@ -380,66 +412,101 @@ cdef class Float641DArrayOverwrite:
            [ 15.,  16.,  17.,  18.,  19.],
            [ 20.,  21.,  22.,  23.,  24.]])
     >>> adj = Float641DArrayOverwrite(
-    ...     row_starts=np.array([0, 3]),
-    ...     row_ends=np.array([2, 4]),
-    ...     column_starts=np.array([0, 2]),
-    ...     column_ends=np.array([1, 4]),
-    ...     values=np.array([10., 20.]),
+    ...     row_start=0,
+    ...     row_end=3,
+    ...     column_start=0,
+    ...     column_end=0,
+    ...     values=np.array([1, 2, 3, 4]),
     )
     >>> adj.mutate(arr)
     >>> arr
-    array([[ 10.,  10.,   2.,   3.,   4.],
-           [ 10.,  10.,   7.,   8.,   9.],
-           [ 10.,  10.,  12.,  13.,  14.],
-           [ 15.,  16.,  20.,  20.,  20.],
-           [ 20.,  21.,  20.,  20.,  20.]])
+    array([[  1.,   1.,   2.,   3.,   4.],
+           [  2.,   6.,   7.,   8.,   9.],
+           [ 3.,  11.,  12.,  13.,  14.],
+           [ 4.,  16.,  17.,  18.,  19.],
+           [ 20.,  21.,  22.,  23.,  24.]])
     """
     cdef:
-        readonly int64_t[:] row_starts, row_ends, column_starts, column_ends
         readonly float64_t[:] values
 
     def __init__(self,
-                 int64_t[:] row_starts,
-                 int64_t[:] row_ends,
-                 int64_t[:] column_starts,
-                 int64_t[:] column_ends,
+                 int64_t first_row,
+                 int64_t last_row,
+                 int64_t first_col,
+                 int64_t last_col,
                  float64_t[:] values):
-        assert (len(row_starts) ==
-                len(row_ends) ==
-                len(column_starts) ==
-                len(column_ends))
-        for (row_start, row_end) in zip(row_starts, row_ends):
-            assert row_start <= row_end
-        for (column_start, column_end) in zip(column_starts, column_ends):
-            assert column_start <= column_end
-
-        self.row_starts = row_starts
-        self.row_ends = row_ends
-        self.column_starts = column_starts
-        self.column_ends = column_ends
+        super(Float641DArrayOverwrite, self).__init__(
+            first_row=first_row,
+            last_row=last_row,
+            first_col=first_col,
+            last_col=last_col,
+        )
+        assert (last_row + 1 - first_row) == len(values)
         self.values = values
 
     cpdef mutate(self, float64_t[:, :] data):
         cdef Py_ssize_t fill_range, row, col
-        for fill_range in range(len(self.row_starts)):
-            for row in range(self.row_starts[fill_range],
-                             self.row_ends[fill_range] + 1):
-                for col in range(self.column_starts[fill_range],
-                                 self.column_ends[fill_range] + 1):
-                    data[row, col] = self.values[fill_range]
+        cdef float64_t[:] values = self.values
+        for col in range(self.first_col, self.last_col + 1):
+            for i, row in enumerate(range(self.first_row, self.last_row + 1)):
+                data[row, col] = values[i]
+
+
+cdef class Datetime641DArrayOverwrite(ArrayAdjustment):
+    """
+    An adjustment that overwrites subarrays with a value for each subarray.
+
+    Example
+    -------
+
+    >>> import numpy as np
+    >>> arr = np.arange(25, dtype=float).reshape(5, 5)
+    >>> arr
+    array([[  0.,   1.,   2.,   3.,   4.],
+           [  5.,   6.,   7.,   8.,   9.],
+           [ 10.,  11.,  12.,  13.,  14.],
+           [ 15.,  16.,  17.,  18.,  19.],
+           [ 20.,  21.,  22.,  23.,  24.]])
+    >>> adj = Datetime641DArrayOverwrite(
+    ...     row_start=0,
+    ...     row_end=3,
+    ...     column_start=0,
+    ...     column_end=0,
+    ...     values=np.array([1, 2, 3, 4]),
+    )
+    >>> adj.mutate(arr)
+    >>> arr
+    array([[  1.,   1.,   2.,   3.,   4.],
+           [  2.,   6.,   7.,   8.,   9.],
+           [ 3.,  11.,  12.,  13.,  14.],
+           [ 4.,  16.,  17.,  18.,  19.],
+           [ 20.,  21.,  22.,  23.,  24.]])
+    """
+    cdef:
+        readonly int64_t[:] values
+
+    def __init__(self,
+                 int64_t first_row,
+                 int64_t last_row,
+                 int64_t first_col,
+                 int64_t last_col,
+                 object values):
+        super(Datetime641DArrayOverwrite, self).__init__(
+            first_row=first_row,
+            last_row=last_row,
+            first_col=first_col,
+            last_col=last_col,
+        )
+        assert (last_row + 1 - first_row) == len(values)
+        self.values = asarray([datetime_to_int(value) for value in values])
+
+    cpdef mutate(self, int64_t[:, :] data):
+        cdef Py_ssize_t row, col
+        cdef int64_t[:] values = self.values
+        for col in range(self.first_col, self.last_col + 1):
+            for i, row in enumerate(range(self.first_row, self.last_row + 1)):
+                data[row, col] = values[i]
 
-    def __repr__(self):
-            return (
-                "%s(row_starts=%s, row_ends=%s,"
-                " column_starts=%s, column_ends=%s, values=%s)" % (
-                    type(self).__name__,
-                    asarray(self.row_starts),
-                    asarray(self.row_ends),
-                    asarray(self.column_starts),
-                    asarray(self.column_ends),
-                    asarray(self.values),
-                )
-            )
 
 cdef class Float64Add(Float64Adjustment):
     """
diff --git a/zipline/pipeline/loaders/quarter_estimates.py b/zipline/pipeline/loaders/quarter_estimates.py
index 325e9186..7d9ce0c4 100644
--- a/zipline/pipeline/loaders/quarter_estimates.py
+++ b/zipline/pipeline/loaders/quarter_estimates.py
@@ -1,11 +1,14 @@
 from abc import abstractmethod
 from collections import defaultdict
+from functools import partial
 import numpy as np
+from numpy.ma import asarray
 import pandas as pd
 from six import viewvalues
-from toolz import groupby
+from toolz import groupby, curry
 from zipline.lib.adjusted_array import AdjustedArray
-from zipline.lib.adjustment import Float641DArrayOverwrite
+from zipline.lib.adjustment import (Datetime641DArrayOverwrite,
+                                    Float641DArrayOverwrite)
 
 from zipline.pipeline.common import (
     EVENT_DATE_FIELD_NAME,
@@ -16,6 +19,7 @@ from zipline.pipeline.common import (
 )
 from zipline.pipeline.loaders.base import PipelineLoader
 from zipline.pipeline.loaders.frame import DataFrameLoader
+from zipline.utils.numpy_utils import datetime64ns_dtype
 from zipline.utils.pandas_utils import cross_product
 from zipline.pipeline.loaders.utils import last_in_date_group, ffill_across_cols
 
@@ -97,42 +101,49 @@ class QuarterEstimatesLoader(PipelineLoader):
     def load_quarters(self, num_quarters, last, dates):
         pass
 
-    def get_adjustments(self, df, column, mask, assets,
-                        final_releases_per_qtr, dates, raw_events):
+    def get_adjustments(self, result, col_result, last,
+                        column_name,
+                        column, mask,
+                        assets):
         adjustments = defaultdict(list)
-        for idx, sid in enumerate(assets):
-            # Get the releases for a particular sid
-            sid_data = final_releases_per_qtr[final_releases_per_qtr[
-                SID_FIELD_NAME] == sid
+        if column.dtype == datetime64ns_dtype:
+            overwrite = Datetime641DArrayOverwrite
+        else:
+            overwrite = Float641DArrayOverwrite
+        for sid_idx, sid in enumerate(assets):
+            sid_result = result[result.index.get_level_values(
+                SID_FIELD_NAME
+            ) == sid]
+            sid_result = sid_result.reset_index(
+                level='shifted_normalized_quarters'
+            )  # Remove qtrs from index to find shifts
+            # Figure out where we think quarters are changing.
+            qtr_shifts = sid_result[
+                sid_result['shifted_normalized_quarters'] !=
+                sid_result['shifted_normalized_quarters'].shift(1)
             ]
-            # Get the release dates for this sid - these are the quarter
-            # boundaries
-            qtr_boundaries, years, qtrs = sid_data[[
-                EVENT_DATE_FIELD_NAME,
-                FISCAL_YEAR_FIELD_NAME,
-                FISCAL_QUARTER_FIELD_NAME
-            ]].unique()
-            next_qtr_starts = dates.searchsorted(qtr_boundaries, sid='right')
-            for idx, start in enumerate(next_qtr_starts):
-                # Here we need to take the new quarter and, for all dates in
-                # previous quarters, apply adjustments that use this
-                # quarter's values for those previous dates.
-                adjustments[start].extend(Float641DArrayOverwrite(first_row,
-                                                             last_row,
-                                                             idx,
-                                                             idx,
-                                                             value))
+            # Iterate backwards. No adjustment for 1st quarter.
+            for row_indexer in list(reversed(qtr_shifts.index))[:-1]:
+                # We want to write the values for this row's quarter over
+                # everything that comes before this quarter when we are at
+                # the date before this quarter starts.
+                qtr_start_idx = last.index.get_loc(row_indexer[0])
+                quarter = qtr_shifts.loc[row_indexer][
+                    'shifted_normalized_quarters'
+                ]
+                adjustments[qtr_start_idx] = \
+                    [overwrite(0,
+                               qtr_start_idx - 1,  # get index date
+                               sid_idx,
+                               sid_idx,
+                               last[column_name, quarter,
+                                    sid][:qtr_start_idx].values)
+                     ]
+
         return AdjustedArray(
-                df[column.name].values.astype(column.dtype),
+                col_result.values.astype(column.dtype),
                 mask,
-                adjustments_from_deltas(
-                    dates,
-                    sparse_output[TS_FIELD_NAME].values,
-                    column_idx,
-                    column.name,
-                    asset_idx,
-                    sparse_deltas,
-                ),
+                dict(adjustments),
                 column.missing_value,
             )
 
@@ -150,8 +161,6 @@ class QuarterEstimatesLoader(PipelineLoader):
         date_values[SIMULTATION_DATES] = date_values[
             SIMULTATION_DATES
         ].astype('datetime64[ns]')
-        asset_df = pd.DataFrame({SID_FIELD_NAME: assets})
-        dates_sids = cross_product(date_values, asset_df)
         self.estimates['normalized_quarters'] = normalize_quarters(
             self.estimates[FISCAL_YEAR_FIELD_NAME],
             self.estimates[FISCAL_QUARTER_FIELD_NAME],
@@ -166,50 +175,51 @@ class QuarterEstimatesLoader(PipelineLoader):
             last = last_in_date_group(self.estimates, True, dates,
                                       assets,
                                       extra_groupers=[
-                                          'normalized_quarters']).reset_index()
+                                          'normalized_quarters'])
             # Forward fill values for each quarter.
             ffill_across_cols(last, columns)
-            stacked = last.stack(1).stack(1).reset_index()
+            stacked = last.stack(1).stack(1)
 
-            result = self.load_quarters(num_quarters,
-                                        stacked, dates)
+            result = self.load_quarters(num_quarters, stacked)
 
             for c in columns:
                 column_name = name_map[c]
-                pivoted = result.pivot(index=SIMULTATION_DATES,
-                                       columns=SID_FIELD_NAME,
-                                       values=column_name)
-                adjusted_array = self.get_adjustments(pivoted, c, mask, assets)
-                # Pivot to get a DataFrame with dates as the index and
-                # sids as the columns.
-                loader = DataFrameLoader(
-                    c,
-                    result.pivot(index=SIMULTATION_DATES,
-                                 columns=SID_FIELD_NAME,
-                                 values=column_name),
-                    adjustments=adjusted_array
-                )
-                out[c] = loader.load_adjusted_array([c],
-                                                    dates,
-                                                    assets,
-                                                    mask)[c]
+                col_result = result[
+                    column_name
+                ].reset_index(1, drop=True).unstack(1).reindex(dates)
+                adjusted_array = self.get_adjustments(result,
+                                                      col_result,
+                                                      last,
+                                                      column_name,
+                                                      c,
+                                                      mask,
+                                                      assets)
+                out[c] = adjusted_array
         return out
 
 
 class NextQuartersEstimatesLoader(QuarterEstimatesLoader):
 
-    def load_quarters(self, num_quarters, stacked, dates):
+    def load_quarters(self, num_quarters, stacked):
         # Filter for releases that are on or after each simulation date and
         # determine the next quarter by picking out the upcoming release for
         # each date in the index.
-        event_date_idxs = dates.searchsorted(pd.to_datetime(stacked[EVENT_DATE_FIELD_NAME]).values)
-        next_releases = stacked.loc[event_date_idxs >= stacked['level_0']].groupby(['level_0', 'sid']).nth(0)
-
-
-        next_releases['shifted_normalized_quarters'] = next_releases[
-            'normalized_quarters'].convert_objects(convert_numeric=True) + (num_quarters - 1)
-
-        return result
+        stacked = stacked.sort(EVENT_DATE_FIELD_NAME)
+        next_releases = stacked.loc[
+            stacked[EVENT_DATE_FIELD_NAME] >= stacked.index.get_level_values(
+                0
+            )].groupby(level=[0, 2]).nth(0)
+        next_releases[
+            'shifted_normalized_quarters'
+        ] = next_releases.index.get_level_values(
+            'normalized_quarters'
+        ) + (num_quarters - 1)
+        next_releases = next_releases.set_index([
+            next_releases.index.get_level_values(0),  # dates
+            'shifted_normalized_quarters',
+            next_releases.index.get_level_values(2)  # sids
+        ])
+        return stacked.loc[next_releases.index]
 
 
 class PreviousQuartersEstimatesLoader(QuarterEstimatesLoader):

From ebbe85b79a62d8531e8551c07df0c458d8dc4ea5 Mon Sep 17 00:00:00 2001
From: Maya Tydykov <mtydykov@quantopian.com>
Date: Tue, 23 Aug 2016 11:06:07 -0400
Subject: [PATCH 08/16] TST: add test for datetime array and update test

TST: fix quarter normalization test

TST: change test name

BUG: remove arg

BUG: look at dict keys

TST: add test for windowing

MAINT: raise ValueError instead of asserting

TST: add assertion to check windowing

TST: parametrize test over number of quarters forward/back.

BUG: fix adjustment calculation logic for quarter crossovers.

TST: add test for previous quarter windows

BUG: fix bugs in calculating previous windows

BUG: fix missing value for datetime

TST: add test case for missing quarter
---
 tests/pipeline/test_adjusted_array.py         | 143 ++++---
 tests/pipeline/test_quarters_estimates.py     | 196 ++++++++--
 zipline/lib/adjustment.pyx                    |  24 +-
 zipline/pipeline/loaders/blaze/core.py        |   3 +-
 zipline/pipeline/loaders/events.py            |   2 -
 zipline/pipeline/loaders/quarter_estimates.py | 363 ++++++++++++------
 zipline/pipeline/loaders/utils.py             |  47 ++-
 7 files changed, 552 insertions(+), 226 deletions(-)

diff --git a/tests/pipeline/test_adjusted_array.py b/tests/pipeline/test_adjusted_array.py
index 80a1dd97..a5bf3ed0 100644
--- a/tests/pipeline/test_adjusted_array.py
+++ b/tests/pipeline/test_adjusted_array.py
@@ -20,6 +20,7 @@ from toolz import curry
 from zipline.errors import WindowLengthNotPositive, WindowLengthTooLong
 from zipline.lib.adjustment import (
     Datetime64Overwrite,
+    Datetime641DArrayOverwrite,
     Float64Multiply,
     Float64Overwrite,
     Float641DArrayOverwrite,
@@ -305,7 +306,11 @@ def _gen_overwrite_adjustment_cases(name,
     )
 
 
-def _gen_overwrite_1d_array_adjustment_case():
+def _gen_overwrite_1d_array_adjustment_case(name,
+                                            make_input,
+                                            make_expected_output,
+                                            dtype,
+                                            missing_value):
     """
     Generate test cases for overwrite adjustments.
 
@@ -314,90 +319,91 @@ def _gen_overwrite_1d_array_adjustment_case():
     the adjustments are expected to modify the arrays.
 
     This is parameterized on `make_input` and `make_expected_output` functions,
-    which take 2-D lists of values and transform them into desired input/output
+    which take 1-D lists of values and transform them into desired input/output
     arrays. We do this so that we can easily test both vanilla numpy ndarrays
     and our own LabelArray class for strings.
     """
-
+    adjustment_type = {
+        float64_dtype: Float641DArrayOverwrite,
+        datetime64ns_dtype: Datetime641DArrayOverwrite,
+    }[dtype]
     adjustments = {}
     buffer_as_of = [None] * 6
-    baseline = as_dtype(float64_dtype, [[2, 2, 2],
-                                        [2, 2, 2],
-                                        [2, 2, 2],
-                                        [2, 2, 2],
-                                        [2, 2, 2],
-                                        [2, 2, 2]])
+    baseline = make_input([[2, 2, 2],
+                           [2, 2, 2],
+                           [2, 2, 2],
+                           [2, 2, 2],
+                           [2, 2, 2],
+                           [2, 2, 2]])
 
-    buffer_as_of[0] = as_dtype(float64_dtype, [[2, 2, 2],
-                                               [2, 2, 2],
-                                               [2, 2, 2],
-                                               [2, 2, 2],
-                                               [2, 2, 2],
-                                               [2, 2, 2]])
+    buffer_as_of[0] = make_expected_output([[2, 2, 2],
+                                            [2, 2, 2],
+                                            [2, 2, 2],
+                                            [2, 2, 2],
+                                            [2, 2, 2],
+                                            [2, 2, 2]])
 
+    vals1 = [1]
     # Note that row indices are inclusive!
     adjustments[1] = [
-        Float641DArrayOverwrite(array([0]),
-                                array([0]),
-                                array([0]),
-                                array([0]),
-                                as_dtype(float64_dtype, array([1])))
+        adjustment_type(
+            0, 0, 0, 0,
+            array([coerce_to_dtype(dtype, val) for val in vals1])
+        )
     ]
-    buffer_as_of[1] = as_dtype(float64_dtype, [[1, 2, 2],
-                                               [2, 2, 2],
-                                               [2, 2, 2],
-                                               [2, 2, 2],
-                                               [2, 2, 2],
-                                               [2, 2, 2]])
+    buffer_as_of[1] = make_input([[1, 2, 2],
+                                  [2, 2, 2],
+                                  [2, 2, 2],
+                                  [2, 2, 2],
+                                  [2, 2, 2],
+                                  [2, 2, 2]])
 
     # No adjustment at index 2.
     buffer_as_of[2] = buffer_as_of[1]
 
+    vals3 = [4, 4, 1]
     adjustments[3] = [
-        Float641DArrayOverwrite(array([0, 2, 1]),
-                                array([1, 2, 2]),
-                                array([0, 0, 1]),
-                                array([0, 0, 1]),
-                                as_dtype(float64_dtype, array([4, 1, 3])))
+        adjustment_type(
+            0, 2, 0, 0,
+            array([coerce_to_dtype(dtype, val) for val in vals3])
+        )
     ]
-    buffer_as_of[3] = as_dtype(float64_dtype, [[4, 2, 2],
-                                               [4, 3, 2],
-                                               [1, 3, 2],
-                                               [2, 2, 2],
-                                               [2, 2, 2],
-                                               [2, 2, 2]])
+    buffer_as_of[3] = make_input([[4, 2, 2],
+                                  [4, 2, 2],
+                                  [1, 2, 2],
+                                  [2, 2, 2],
+                                  [2, 2, 2],
+                                  [2, 2, 2]])
 
+    vals4 = [5] * 4
     adjustments[4] = [
-        Float641DArrayOverwrite(array([0]),
-                                array([3]),
-                                array([2]),
-                                array([2]),
-                                as_dtype(float64_dtype, array([5])))
+        adjustment_type(
+            0, 3, 2, 2,
+            array([coerce_to_dtype(dtype, val) for val in vals4]))
     ]
-    buffer_as_of[4] = as_dtype(float64_dtype, [[4, 2, 5],
-                                               [4, 3, 5],
-                                               [1, 3, 5],
-                                               [2, 2, 5],
-                                               [2, 2, 2],
-                                               [2, 2, 2]])
+    buffer_as_of[4] = make_input([[4, 2, 5],
+                                  [4, 2, 5],
+                                  [1, 2, 5],
+                                  [2, 2, 5],
+                                  [2, 2, 2],
+                                  [2, 2, 2]])
 
+    vals5 = range(1, 6)
     adjustments[5] = [
-        Float641DArrayOverwrite(array([0, 2]),
-                                array([4, 2]),
-                                array([1, 2]),
-                                array([1, 2]),
-                                as_dtype(float64_dtype, array([6, 7]))),
+        adjustment_type(
+            0, 4, 1, 1,
+            array([coerce_to_dtype(dtype, val) for val in vals5])),
     ]
-    buffer_as_of[5] = as_dtype(float64_dtype, [[4, 6, 5],
-                                               [4, 6, 5],
-                                               [1, 6, 7],
-                                               [2, 6, 5],
-                                               [2, 6, 2],
-                                               [2, 2, 2]])
+    buffer_as_of[5] = make_input([[4, 1, 5],
+                                  [4, 2, 5],
+                                  [1, 3, 5],
+                                  [2, 4, 5],
+                                  [2, 5, 2],
+                                  [2, 2, 2]])
 
     return _gen_expectations(
         baseline,
-        default_missing_value_for_dtype(float64_dtype),
+        missing_value,
         adjustments,
         buffer_as_of,
         nrows=6,
@@ -542,7 +548,22 @@ class AdjustedArrayTestCase(TestCase):
                     datetime64ns_dtype,
                 ),
             ),
-            _gen_overwrite_1d_array_adjustment_case(),
+            _gen_overwrite_1d_array_adjustment_case(
+                'float',
+                make_input=as_dtype(float64_dtype),
+                make_expected_output=as_dtype(float64_dtype),
+                dtype=float64_dtype,
+                missing_value=default_missing_value_for_dtype(float64_dtype),
+            ),
+            _gen_overwrite_1d_array_adjustment_case(
+                'datetime',
+                make_input=as_dtype(datetime64ns_dtype),
+                make_expected_output=as_dtype(datetime64ns_dtype),
+                dtype=datetime64ns_dtype,
+                missing_value=default_missing_value_for_dtype(
+                    datetime64ns_dtype,
+                ),
+            ),
             # There are six cases here:
             # Using np.bytes/np.unicode/object arrays as inputs.
             # Passing np.bytes/np.unicode/object arrays to LabelArray,
diff --git a/tests/pipeline/test_quarters_estimates.py b/tests/pipeline/test_quarters_estimates.py
index 1f82d161..5129bb5a 100644
--- a/tests/pipeline/test_quarters_estimates.py
+++ b/tests/pipeline/test_quarters_estimates.py
@@ -1,9 +1,10 @@
 import blaze as bz
 import itertools
+from nose_parameterized import parameterized
 import numpy as np
 import pandas as pd
 
-from zipline.pipeline import SimplePipelineEngine, Pipeline
+from zipline.pipeline import SimplePipelineEngine, Pipeline, CustomFactor
 from zipline.pipeline.common import (
     EVENT_DATE_FIELD_NAME,
     FISCAL_QUARTER_FIELD_NAME,
@@ -18,8 +19,8 @@ from zipline.pipeline.loaders.blaze.estimates import (
 )
 from zipline.pipeline.loaders.quarter_estimates import (
     NextQuartersEstimatesLoader,
-    PreviousQuartersEstimatesLoader
-)
+    PreviousQuartersEstimatesLoader,
+    split_normalized_quarters, normalize_quarters)
 from zipline.testing import ZiplineTestCase
 from zipline.testing.fixtures import WithAssetFinder, WithTradingSessions
 from zipline.testing.predicates import assert_equal
@@ -31,7 +32,6 @@ class Estimates(DataSet):
     fiscal_quarter = Column(dtype=float64_dtype)
     fiscal_year = Column(dtype=float64_dtype)
     estimate = Column(dtype=float64_dtype)
-    value = Column(dtype=float64_dtype)
 
 
 def QuartersEstimates(num_qtr):
@@ -40,6 +40,28 @@ def QuartersEstimates(num_qtr):
         name = Estimates
     return QtrEstimates
 
+
+# 0Q1: 2015-01-05.Q1.e1.2015-01-06, 2015-01-10.Q1.e1.2015-01-11,
+# 0Q2: 2015-01-15.Q2.e1.2015-01-16, 2015-01-20.Q2.e1.2015-01-21,
+# 0Q4: 2015-02-05.Q4.e1.2015-02-06, 2015-02-10.Q4.e1.2015-02-11,
+# Skip Q3 to make sure we handle skipped quarter data correctly.
+estimates_timeline = pd.DataFrame({
+    TS_FIELD_NAME: [pd.Timestamp('2015-01-05'), pd.Timestamp('2015-01-07'),
+                    pd.Timestamp('2015-01-05'), pd.Timestamp('2015-01-17'),
+                    pd.Timestamp('2015-01-05'), pd.Timestamp('2015-01-17'),
+                    pd.Timestamp('2015-01-22'), pd.Timestamp('2015-02-02')],
+    EVENT_DATE_FIELD_NAME:
+        [pd.Timestamp('2015-01-10'), pd.Timestamp('2015-01-10'),
+         pd.Timestamp('2015-01-20'), pd.Timestamp('2015-01-20'),
+         pd.Timestamp('2015-02-10'), pd.Timestamp('2015-02-10'),
+         pd.Timestamp('2015-02-10'), pd.Timestamp('2015-02-10')],
+    'estimate': [1.]*2 + [2.] * 2 + [4.] * 4,
+    FISCAL_QUARTER_FIELD_NAME: [1]*2 + [2] * 2 + [4] * 4,
+    FISCAL_YEAR_FIELD_NAME: [2015]*8,
+    SID_FIELD_NAME: [0]*8
+})
+
+
 # Final release dates never change. The quarters have very tight date ranges
 # in order to reduce the number of dates we need to iterate through when
 # testing.
@@ -48,7 +70,6 @@ releases = pd.DataFrame({
     EVENT_DATE_FIELD_NAME: [pd.Timestamp('2015-01-15'),
                             pd.Timestamp('2015-01-31')],
     'estimate': [0.5, 0.8],
-    'value': [0.6, 0.9],
     FISCAL_QUARTER_FIELD_NAME: [1.0, 2.0],
     FISCAL_YEAR_FIELD_NAME: [2015.0, 2015.0]
 })
@@ -70,7 +91,6 @@ q2_release_dates = [pd.Timestamp('2015-01-30'),  # One day early
 estimates = pd.DataFrame({
     EVENT_DATE_FIELD_NAME: q1_release_dates + q2_release_dates,
     'estimate': [.1, .2, .3, .4],
-    'value': [np.NaN, np.NaN, np.NaN, np.NaN],
     FISCAL_QUARTER_FIELD_NAME: [1.0, 1.0, 2.0, 2.0],
     FISCAL_YEAR_FIELD_NAME: [2015.0, 2015.0, 2015.0, 2015.0]
 })
@@ -110,14 +130,12 @@ class EstimateTestCase(WithAssetFinder,
 
     @classmethod
     def init_class_fixtures(cls):
-        cls.events = gen_estimates()
         cls.sids = cls.events['sid'].unique()
         cls.columns = {
             Estimates.estimate: 'estimate',
             Estimates.event_date: EVENT_DATE_FIELD_NAME,
             Estimates.fiscal_quarter: FISCAL_QUARTER_FIELD_NAME,
             Estimates.fiscal_year: FISCAL_YEAR_FIELD_NAME,
-            Estimates.value: 'value',
         }
         cls.loader = cls.make_loader(
             events=cls.events,
@@ -147,7 +165,138 @@ class EstimateTestCase(WithAssetFinder,
             )
 
 
+window_test_cases = [
+    (window_len, start_idx, num_quarters_out) for
+    (window_len, start_idx), num_quarters_out in
+    itertools.product(
+        [[5, pd.Timestamp('2015-01-09').tz_localize('utc')],
+         [6, pd.Timestamp('2015-01-12').tz_localize('utc')],
+         [11, pd.Timestamp('2015-01-20').tz_localize('utc')],
+         [19, pd.Timestamp('2015-01-30').tz_localize('utc')],
+         [26, pd.Timestamp('2015-02-10').tz_localize('utc')]],
+        [1, 2, 3, 4])
+]
+
+
+class NextEstimateWindowsTestCase(EstimateTestCase):
+    events = estimates_timeline
+    START_DATE = pd.Timestamp('2014-12-31')
+    END_DATE = pd.Timestamp('2015-02-15')
+
+    @classmethod
+    def make_loader(cls, events, columns):
+        return NextQuartersEstimatesLoader(events, columns)
+
+    @parameterized.expand(window_test_cases)
+    def test_next_estimate_windows_at_quarter_boundaries(self,
+                                                         window_len,
+                                                         start_idx,
+                                                         num_quarters_out):
+        """
+        Tests that we overwrite values with the correct quarter's estimate at
+        the correct dates.
+        """
+        dataset = QuartersEstimates(num_quarters_out)
+
+        class SomeFactor(CustomFactor):
+            inputs = [dataset.estimate]
+            window_length = window_len
+
+            def compute(self, today, assets, out, *inputs):
+                unique_inputs = np.unique(inputs).tolist()
+                requested_quarter = None
+                if (pd.Timestamp('2015-02-10').tz_localize('utc') >= today >=
+                        pd.Timestamp('2015-01-05').tz_localize('utc')):
+                    next_quarter = estimates_timeline[
+                            estimates_timeline[EVENT_DATE_FIELD_NAME] >= today
+                        ].min()[FISCAL_QUARTER_FIELD_NAME]
+                    requested_quarter = next_quarter + num_quarters_out - 1
+
+                # If we know something about the requested quarter, assert
+                # that all our estimates in the window are about that quarter.
+                if requested_quarter and requested_quarter <= 4 and \
+                        requested_quarter != 3:
+                    assert np.equal(unique_inputs, requested_quarter).all()
+                else:
+                    # We don't have any information yet about the next quarter
+                    # or about the requested quarter; in that case, all our
+                    # estimates in the window should be NaN across time.
+                    assert np.isnan(unique_inputs).all()
+
+        engine = SimplePipelineEngine(
+            lambda x: self.loader,
+            self.trading_days,
+            self.asset_finder,
+        )
+        engine.run_pipeline(
+            Pipeline({'est': SomeFactor()}),
+            start_date=start_idx,
+            end_date=self.trading_days[-1],
+        )
+
+
+class PreviousEstimateWindowsTestCase(EstimateTestCase):
+    events = estimates_timeline
+    START_DATE = pd.Timestamp('2014-12-31')
+    END_DATE = pd.Timestamp('2015-02-15')
+
+    @classmethod
+    def make_loader(cls, events, columns):
+        return PreviousQuartersEstimatesLoader(events, columns)
+
+    @parameterized.expand(window_test_cases)
+    def test_previous_estimate_windows_at_quarter_boundaries(self,
+                                                             window_len,
+                                                             start_idx,
+                                                             num_quarters_out):
+        """
+        Tests that we overwrite values with the correct quarter's estimate at
+        the correct dates.
+        """
+        dataset = QuartersEstimates(num_quarters_out)
+
+        class SomeFactor(CustomFactor):
+            inputs = [dataset.estimate]
+            window_length = window_len
+
+            def compute(self, today, assets, out, *inputs):
+                unique_inputs = np.unique(inputs).tolist()
+                requested_quarter = None
+                if today >= pd.Timestamp('2015-01-12').tz_localize('utc'):
+                    previous_quarter = estimates_timeline[
+                            estimates_timeline[EVENT_DATE_FIELD_NAME] <= today
+                        ].max()[FISCAL_QUARTER_FIELD_NAME]
+                    requested_quarter = (
+                        previous_quarter - (num_quarters_out - 1)
+                    )
+
+                # If we know something about the requested quarter, assert
+                # that all our estimates in the window are about that quarter.
+                if requested_quarter and requested_quarter >= 0 and \
+                        requested_quarter != 3:
+                    assert np.equal(unique_inputs, requested_quarter).all()
+                else:
+                    # We don't have any information yet about the previous
+                    # quarter
+                    # or about the requested quarter; in that case, all our
+                    # estimates in the window should be NaN across time.
+                    assert np.isnan(unique_inputs).all()
+
+        engine = SimplePipelineEngine(
+            lambda x: self.loader,
+            self.trading_days,
+            self.asset_finder,
+        )
+        engine.run_pipeline(
+            Pipeline({'est': SomeFactor()}),
+            start_date=start_idx,
+            end_date=self.trading_days[-1],
+        )
+
+
 class NextEstimateTestCase(EstimateTestCase):
+    events = gen_estimates()
+
     @classmethod
     def make_loader(cls, events, columns):
         return NextQuartersEstimatesLoader(events, columns)
@@ -229,6 +378,8 @@ class BlazeNextEstimateLoaderTestCase(NextEstimateTestCase):
 
 
 class PreviousEstimateTestCase(EstimateTestCase):
+    events = gen_estimates()
+
     @classmethod
     def make_loader(cls, events, columns):
         return PreviousQuartersEstimatesLoader(events, columns)
@@ -314,26 +465,13 @@ class QuarterShiftTestCase(ZiplineTestCase):
     This tests, in isolation, quarter calculation logic for shifting quarters
     backwards/forwards from a starting point.
     """
-    def test_calc_forward_shift(self):
+    def test_quarter_normalization(self):
         input_yrs = pd.Series([0] * 4)
         input_qtrs = pd.Series(range(1, 5))
-        expected = pd.DataFrame(([yr, qtr] for yr in range(0, 4) for qtr
-                                 in range(1, 5)))
-        for i in range(0, 8):
-            years, quarters = shift_quarters(i, input_yrs, input_qtrs)
-            # Can't use assert_series_equal here with check_names=False
-            # because that still fails due to name differences.
-            assert years.equals(expected[i:i+4].reset_index(drop=True)[0])
-            assert quarters.equals(expected[i:i+4].reset_index(drop=True)[1])
-
-    def test_calc_backward_shift(self):
-        input_yrs = pd.Series([0] * 4)
-        input_qtrs = pd.Series(range(4, 0, -1))
-        expected = pd.DataFrame(([yr, qtr] for yr in range(0, -4, -1) for qtr
-                                 in range(4, 0, -1)))
-        for i in range(0, 8, 1):
-            years, quarters = shift_quarters(-i, input_yrs, input_qtrs)
-            # Can't use assert_series_equal here with check_names=False
-            # because that still fails due to name differences.
-            assert years.equals(expected[i:i+4].reset_index(drop=True)[0])
-            assert quarters.equals(expected[i:i+4].reset_index(drop=True)[1])
+        result_years, result_quarters = split_normalized_quarters(
+            normalize_quarters(input_yrs, input_qtrs)
+        )
+        # Can't use assert_series_equal here with check_names=False
+        # because that still fails due to name differences.
+        assert input_yrs.equals(result_years)
+        assert input_qtrs.equals(result_quarters)
diff --git a/zipline/lib/adjustment.pyx b/zipline/lib/adjustment.pyx
index 036c5026..8839ece9 100644
--- a/zipline/lib/adjustment.pyx
+++ b/zipline/lib/adjustment.pyx
@@ -371,18 +371,6 @@ cdef class ArrayAdjustment(Adjustment):
     Subclasses should inherit and provide a `values` attribute and a `mutate`
     method.
     """
-    def __init__(self,
-                 int64_t first_row,
-                 int64_t last_row,
-                 int64_t first_col,
-                 int64_t last_col):
-        super(ArrayAdjustment, self).__init__(
-            first_row=first_row,
-            last_row=last_row,
-            first_col=first_col,
-            last_col=last_col,
-        )
-
     def __repr__(self):
             return (
                 "%s(first_row=%d, last_row=%d,"
@@ -441,7 +429,11 @@ cdef class Float641DArrayOverwrite(ArrayAdjustment):
             first_col=first_col,
             last_col=last_col,
         )
-        assert (last_row + 1 - first_row) == len(values)
+        if last_row + 1 - first_row != len(values):
+            raise ValueError(
+                "Mismatch: got %d values for rows starting at index %d and "
+                "ending at index %d." % (len(values), first_row, last_row)
+            )
         self.values = values
 
     cpdef mutate(self, float64_t[:, :] data):
@@ -497,7 +489,11 @@ cdef class Datetime641DArrayOverwrite(ArrayAdjustment):
             first_col=first_col,
             last_col=last_col,
         )
-        assert (last_row + 1 - first_row) == len(values)
+        if last_row + 1 - first_row != len(values):
+            raise ValueError("Mismatch: got %d values for rows starting at"
+            " index %d and ending at index %d." % (
+                len(values), first_row, last_row)
+            )
         self.values = asarray([datetime_to_int(value) for value in values])
 
     cpdef mutate(self, int64_t[:, :] data):
diff --git a/zipline/pipeline/loaders/blaze/core.py b/zipline/pipeline/loaders/blaze/core.py
index c076029d..24ce3e26 100644
--- a/zipline/pipeline/loaders/blaze/core.py
+++ b/zipline/pipeline/loaders/blaze/core.py
@@ -178,7 +178,8 @@ from zipline.pipeline.loaders.utils import (
     last_in_date_group,
     normalize_data_query_bounds,
     normalize_timestamp_to_query_time,
-    ffill_across_cols)
+    ffill_across_cols
+)
 from zipline.pipeline.sentinels import NotSpecified
 from zipline.lib.adjusted_array import AdjustedArray, can_represent_dtype
 from zipline.lib.adjustment import Float64Overwrite
diff --git a/zipline/pipeline/loaders/events.py b/zipline/pipeline/loaders/events.py
index cb33b3b2..4249d145 100644
--- a/zipline/pipeline/loaders/events.py
+++ b/zipline/pipeline/loaders/events.py
@@ -167,7 +167,6 @@ class EventsLoader(PipelineLoader):
             return {}
 
         return self._load_events(
-            rows=self.events,
             name_map=self.next_value_columns,
             indexer=self.next_event_indexer(dates, sids),
             columns=columns,
@@ -181,7 +180,6 @@ class EventsLoader(PipelineLoader):
             return {}
 
         return self._load_events(
-            rows=self.events,
             name_map=self.previous_value_columns,
             indexer=self.previous_event_indexer(dates, sids),
             columns=columns,
diff --git a/zipline/pipeline/loaders/quarter_estimates.py b/zipline/pipeline/loaders/quarter_estimates.py
index 7d9ce0c4..6ef8ae7c 100644
--- a/zipline/pipeline/loaders/quarter_estimates.py
+++ b/zipline/pipeline/loaders/quarter_estimates.py
@@ -1,11 +1,9 @@
 from abc import abstractmethod
 from collections import defaultdict
-from functools import partial
 import numpy as np
-from numpy.ma import asarray
 import pandas as pd
 from six import viewvalues
-from toolz import groupby, curry
+from toolz import groupby
 from zipline.lib.adjusted_array import AdjustedArray
 from zipline.lib.adjustment import (Datetime641DArrayOverwrite,
                                     Float641DArrayOverwrite)
@@ -18,10 +16,15 @@ from zipline.pipeline.common import (
     TS_FIELD_NAME,
 )
 from zipline.pipeline.loaders.base import PipelineLoader
-from zipline.pipeline.loaders.frame import DataFrameLoader
 from zipline.utils.numpy_utils import datetime64ns_dtype
-from zipline.utils.pandas_utils import cross_product
-from zipline.pipeline.loaders.utils import last_in_date_group, ffill_across_cols
+from zipline.pipeline.loaders.utils import (
+    ffill_across_cols,
+    last_in_date_group
+)
+
+NORMALIZED_QUARTERS = 'normalized_quarters'
+
+SHIFTED_NORMALIZED_QTRS = 'shifted_normalized_quarters'
 
 NEXT_FISCAL_QUARTER = 'next_fiscal_quarter'
 NEXT_FISCAL_YEAR = 'next_fiscal_year'
@@ -101,47 +104,184 @@ class QuarterEstimatesLoader(PipelineLoader):
     def load_quarters(self, num_quarters, last, dates):
         pass
 
-    def get_adjustments(self, result, col_result, last,
+    def get_requested_data_for_col(self, stacked_last_per_qtr, idx, dates):
+        """
+        Selects the requested data for each date.
+
+        Parameters
+        ----------
+        stacked_last_per_qtr : pd.DataFrame
+            The latest estimate known per sid per date per quarter with the
+            dates, normalized quarter, and sid as the index.
+        idx : pd.MultiIndex
+            The index of the row of the requested quarter from each date for
+            each sid.
+        dates : pd.DatetimeIndex
+            The calendar dates for which estimates data is requested.
+
+        Returns
+        --------
+        requested_qtr_data : pd.DataFrame
+            The DataFrame with final values for the requested quarter for all
+            columns; `dates` are the index and columns are a MultiIndex with
+            sids at the top level and the dataset columns on the bottom.
+        """
+        requested_qtr_data = stacked_last_per_qtr.loc[idx]
+        # We no longer need this in the index, but we do need it as a column
+        # to calculate adjustments.
+        requested_qtr_data = requested_qtr_data.reset_index(
+            SHIFTED_NORMALIZED_QTRS
+        )
+        (requested_qtr_data[FISCAL_YEAR_FIELD_NAME],
+         requested_qtr_data[FISCAL_QUARTER_FIELD_NAME]) = \
+            split_normalized_quarters(
+                requested_qtr_data[SHIFTED_NORMALIZED_QTRS]
+            )
+        # Move sids into the columns. Once we're left with just dates
+        # as the index, we can reindex by all dates so that we have a
+        # value for each calendar date.
+        requested_qtr_data = requested_qtr_data.unstack(
+            SID_FIELD_NAME
+        ).reindex(dates)
+        return requested_qtr_data
+
+    def get_adjustments(self,
+                        zero_qtr_idx,
+                        requested_qtr_idx,
+                        stacked_last_per_qtr,
+                        last_per_qtr,
+                        dates,
                         column_name,
-                        column, mask,
-                        assets):
+                        column,
+                        mask,
+                        assets,
+                        qtr_crossover_point):
+        """
+        Creates an AdjustedArray from the given estimates data for the given
+        dates.
+
+        Parameters
+        ----------
+        zero_qtr_idx : pd.MultiIndex
+            The index of the row of the zeroth (immediately next/previous)
+            quarter from each date for each sid.
+        requested_qtr_idx : pd.MultiIndex
+            The index of the row of the requested quarter from each date for
+            each sid.
+        stacked_last_per_qtr : pd.DataFrame
+            The latest estimate known per sid per date per quarter with the
+            dates, normalized quarter, and sid as the index.
+        last_per_qtr : pd.DataFrame
+            The latest estimate known per sid per date per quarter with
+            dates as the index and normalized quarter and sid in the columns
+            MultiIndex; allows easy access to the timeline of estimates
+            across all dates for a sid for a particular quarter.
+        dates : pd.DatetimeIndex
+            The calendar dates for which estimates data is requested.
+        column_name : string
+            The name of the column for which the AdjustedArray is being
+            computed.
+        column : BoundColumn
+            The column for which the AdjustedArray is being computed.
+        mask :
+        assets :
+        qtr_crossover_point :
+            Whether we should use the 'right' or 'left' side when doing
+            searchsorted on the dates for quarter boundaries.
+
+        Returns
+        -------
+        adjusted_array : AdjustedArray
+            The array of data and overwrites for the given column.
+        """
         adjustments = defaultdict(list)
+        requested_qtr_data = self.get_requested_data_for_col(
+            stacked_last_per_qtr, requested_qtr_idx, dates
+        )
+        zero_qtr_data = stacked_last_per_qtr.loc[zero_qtr_idx]
+        # We no longer need this in the index, but we do need it as a column
+        # to calculate adjustments.
+        zero_qtr_data = zero_qtr_data.reset_index(NORMALIZED_QUARTERS)
         if column.dtype == datetime64ns_dtype:
             overwrite = Datetime641DArrayOverwrite
+            missing_value = np.datetime64('NaT', 'ns')
         else:
             overwrite = Float641DArrayOverwrite
+            missing_value = np.NaN
         for sid_idx, sid in enumerate(assets):
-            sid_result = result[result.index.get_level_values(
-                SID_FIELD_NAME
-            ) == sid]
-            sid_result = sid_result.reset_index(
-                level='shifted_normalized_quarters'
-            )  # Remove qtrs from index to find shifts
-            # Figure out where we think quarters are changing.
-            qtr_shifts = sid_result[
-                sid_result['shifted_normalized_quarters'] !=
-                sid_result['shifted_normalized_quarters'].shift(1)
+            zero_qtr_sid_data = zero_qtr_data[
+                zero_qtr_data.index.get_level_values(SID_FIELD_NAME) == sid
             ]
-            # Iterate backwards. No adjustment for 1st quarter.
-            for row_indexer in list(reversed(qtr_shifts.index))[:-1]:
-                # We want to write the values for this row's quarter over
-                # everything that comes before this quarter when we are at
-                # the date before this quarter starts.
-                qtr_start_idx = last.index.get_loc(row_indexer[0])
-                quarter = qtr_shifts.loc[row_indexer][
-                    'shifted_normalized_quarters'
-                ]
-                adjustments[qtr_start_idx] = \
-                    [overwrite(0,
-                               qtr_start_idx - 1,  # get index date
-                               sid_idx,
-                               sid_idx,
-                               last[column_name, quarter,
-                                    sid][:qtr_start_idx].values)
-                     ]
+            # Determine where quarters are changing for this sid.
+            qtr_shifts = zero_qtr_sid_data[
+                zero_qtr_sid_data[NORMALIZED_QUARTERS] !=
+                zero_qtr_sid_data[NORMALIZED_QUARTERS].shift(1)
+            ]
+            # On dates where we don't have any information about quarters,
+            # we will get nulls, and each of these will be interpreted as
+            # quarter shifts. We need to remove these here.
+            qtr_shifts = qtr_shifts[
+                qtr_shifts[NORMALIZED_QUARTERS].notnull()
+            ]
+            # For the given sid, determine which quarters we have estimates
+            # for.
+            quarters_with_estimates_for_sid = last_per_qtr.xs(
+                sid, axis=1, level=SID_FIELD_NAME
+            ).groupby(axis=1, level=1).first().columns.values
+            for row_indexer in list(qtr_shifts.index):
+                # Find the starting index of the quarter that comes right
+                # after this row. This isn't the starting index of the
+                # requested quarter, but simply the date we cross over into a
+                # new quarter.
+                qtr_start_idx = dates.searchsorted(
+                    zero_qtr_data.loc[
+                        row_indexer
+                    ][EVENT_DATE_FIELD_NAME],
+                    side=qtr_crossover_point
+                )
+
+                # Only add adjustments if the next quarter starts somewhere in
+                # our date index for this sid. Our 'next' quarter can never
+                # start at index 0; a starting index of 0 means that the next
+                # quarter's event date was NaT.
+                if 0 < qtr_start_idx < len(dates):
+                    # Find the quarter being requested in the quarter we're
+                    # crossing into.
+                    requested_quarter = requested_qtr_data[
+                        SHIFTED_NORMALIZED_QTRS
+                    ][sid].iloc[qtr_start_idx]
+
+                    # If there are estimates for the requested quarter,
+                    # overwrite all values going up to the starting index of
+                    # that quarter with estimates for that quarter.
+                    if requested_quarter in quarters_with_estimates_for_sid:
+                        adjustments[qtr_start_idx] = \
+                            [overwrite(
+                                0,
+                                qtr_start_idx - 1,  # overwrite thru last qtr
+                                sid_idx,
+                                sid_idx,
+                                last_per_qtr[column_name,
+                                             requested_quarter,
+                                             sid][:qtr_start_idx].values)]
+                    # There are no estimates for the quarter. Overwrite all
+                    # values going up to the starting index of that quarter
+                    # with the missing value for this column.
+                    else:
+                        adjustments[qtr_start_idx] = [
+                            overwrite(
+                                0,
+                                qtr_start_idx - 1,
+                                sid_idx,
+                                sid_idx,
+                                np.array(
+                                    [missing_value] *
+                                    len(last_per_qtr.index[:qtr_start_idx]))
+                            )
+                        ]
 
         return AdjustedArray(
-                col_result.values.astype(column.dtype),
+                requested_qtr_data[column_name].values.astype(column.dtype),
                 mask,
                 dict(adjustments),
                 column.missing_value,
@@ -152,110 +292,101 @@ class QuarterEstimatesLoader(PipelineLoader):
         # attribute, given that they're created dynamically?
         groups = groupby(lambda x: x.dataset.num_quarters, columns)
         groups_columns = dict(groups)
-        if (pd.Series(groups_columns) < 0).any():
+        if (pd.Series(groups_columns.keys()) < 0).any():
             raise ValueError("Must pass a number of quarters >= 0")
         out = {}
-        date_values = pd.DataFrame({SIMULTATION_DATES: dates})
-        # dates column must be of type datetime64[ns] in order for subsequent
-        # comparisons to work correctly.
-        date_values[SIMULTATION_DATES] = date_values[
-            SIMULTATION_DATES
-        ].astype('datetime64[ns]')
-        self.estimates['normalized_quarters'] = normalize_quarters(
+        self.estimates[NORMALIZED_QUARTERS] = normalize_quarters(
             self.estimates[FISCAL_YEAR_FIELD_NAME],
             self.estimates[FISCAL_QUARTER_FIELD_NAME],
-        ).astype(float)
-        for num_quarters, columns in groups_columns.iteritems():
-            name_map = {c:
-                        self.base_column_name_map[
+        )
+        for num_quarters, columns in groups_columns.items():
+            # The column's dataset is itself dynamic and the mapping we
+            # actually want is to its dataset's parent's column name.
+            name_map = {c: self.base_column_name_map[
                             getattr(c.dataset.__base__, c.name)
                         ] for c in columns}
             # Determine the last piece of information we know for each column
-            # on each date in the index.
-            last = last_in_date_group(self.estimates, True, dates,
-                                      assets,
-                                      extra_groupers=[
-                                          'normalized_quarters'])
-            # Forward fill values for each quarter.
-            ffill_across_cols(last, columns)
-            stacked = last.stack(1).stack(1)
+            # on each date in the index for each sid and quarter.
+            last_per_qtr = last_in_date_group(
+                self.estimates, True, dates, assets,
+                extra_groupers=[NORMALIZED_QUARTERS]
+            )
 
-            result = self.load_quarters(num_quarters, stacked)
+            # Forward fill values for each quarter/sid/dataset column.
+            ffill_across_cols(last_per_qtr, columns)
+            # Stack quarter and sid into the index.
+            stacked_last_per_qtr = last_per_qtr.stack([NORMALIZED_QUARTERS,
+                                                       SID_FIELD_NAME])
+            # Set date index name for ease of reference
+            stacked_last_per_qtr.index.set_names(SIMULTATION_DATES, 0, True)
+            # Determine which quarter is next/previous for each date.
+            shifted_qtr_data = self.load_quarters(num_quarters,
+                                                  stacked_last_per_qtr)
+            zero_qtr_idx = shifted_qtr_data.index
+            requested_qtr_idx = shifted_qtr_data.set_index([
+                    shifted_qtr_data.index.get_level_values(
+                        SIMULTATION_DATES
+                    ),
+                    shifted_qtr_data[SHIFTED_NORMALIZED_QTRS],
+                    shifted_qtr_data.index.get_level_values(
+                        SID_FIELD_NAME
+                    )]
+            ).index
 
             for c in columns:
                 column_name = name_map[c]
-                col_result = result[
-                    column_name
-                ].reset_index(1, drop=True).unstack(1).reindex(dates)
-                adjusted_array = self.get_adjustments(result,
-                                                      col_result,
-                                                      last,
+                adjusted_array = self.get_adjustments(zero_qtr_idx,
+                                                      requested_qtr_idx,
+                                                      stacked_last_per_qtr,
+                                                      last_per_qtr,
+                                                      dates,
                                                       column_name,
                                                       c,
                                                       mask,
-                                                      assets)
+                                                      assets,
+                                                      self.qtr_crossover_point)
                 out[c] = adjusted_array
         return out
 
 
 class NextQuartersEstimatesLoader(QuarterEstimatesLoader):
+    qtr_crossover_point = 'right'
 
-    def load_quarters(self, num_quarters, stacked):
+    def load_quarters(self, num_quarters, stacked_last_per_qtr):
         # Filter for releases that are on or after each simulation date and
         # determine the next quarter by picking out the upcoming release for
         # each date in the index.
-        stacked = stacked.sort(EVENT_DATE_FIELD_NAME)
-        next_releases = stacked.loc[
-            stacked[EVENT_DATE_FIELD_NAME] >= stacked.index.get_level_values(
-                0
-            )].groupby(level=[0, 2]).nth(0)
-        next_releases[
-            'shifted_normalized_quarters'
-        ] = next_releases.index.get_level_values(
-            'normalized_quarters'
+        stacked_last_per_qtr = stacked_last_per_qtr.sort(
+            EVENT_DATE_FIELD_NAME
+        )
+        next_releases_per_date = stacked_last_per_qtr.loc[
+            stacked_last_per_qtr[EVENT_DATE_FIELD_NAME] >=
+            stacked_last_per_qtr.index.get_level_values(SIMULTATION_DATES)
+        ].groupby(level=[SIMULTATION_DATES, SID_FIELD_NAME]).nth(0)
+        next_releases_per_date[
+            SHIFTED_NORMALIZED_QTRS
+        ] = next_releases_per_date.index.get_level_values(
+            NORMALIZED_QUARTERS
         ) + (num_quarters - 1)
-        next_releases = next_releases.set_index([
-            next_releases.index.get_level_values(0),  # dates
-            'shifted_normalized_quarters',
-            next_releases.index.get_level_values(2)  # sids
-        ])
-        return stacked.loc[next_releases.index]
+        return next_releases_per_date
 
 
 class PreviousQuartersEstimatesLoader(QuarterEstimatesLoader):
-    def __init__(self,
-                 estimates,
-                 columns):
-        super(PreviousQuartersEstimatesLoader, self).__init__(estimates,
-                                                              columns)
+    qtr_crossover_point = 'left'
 
-    def load_quarters(self, num_quarters, dates_sids, final_releases_per_qtr):
-        # Filter for releases that are on or before each simulation date.
-        eligible_previous_releases = final_releases_per_qtr[
-            final_releases_per_qtr[EVENT_DATE_FIELD_NAME] <=
-            final_releases_per_qtr[SIMULTATION_DATES]
-        ]
-        # For each sid, get the latest release.
-        eligible_previous_releases.sort(EVENT_DATE_FIELD_NAME)
-        previous_releases = eligible_previous_releases.groupby(
-            [SIMULTATION_DATES, SID_FIELD_NAME]
-        ).nth(-1).reset_index()  # We use nth here to avoid forward filling
-        # NaNs, which `last()` will do.
-        previous_releases = previous_releases.rename(columns={
-            FISCAL_YEAR_FIELD_NAME: PREVIOUS_FISCAL_YEAR,
-            FISCAL_QUARTER_FIELD_NAME: PREVIOUS_FISCAL_QUARTER
-        })
-        # The previous fiscal quarter is already our starting point,
-        # so we should offset `num_quarters` by 1.
-        (previous_releases[FISCAL_YEAR_FIELD_NAME],
-         previous_releases[FISCAL_QUARTER_FIELD_NAME]) = shift_quarters(
-            -(num_quarters - 1),
-            previous_releases[PREVIOUS_FISCAL_YEAR],
-            previous_releases[PREVIOUS_FISCAL_QUARTER],
-        )
-        # Do a left merge to get values for each date.
-        result = dates_sids.merge(previous_releases,
-                                  on=([SIMULTATION_DATES,
-                                       SID_FIELD_NAME]),
-                                  how='left')
-        return result
+    def load_quarters(self, num_quarters, stacked_last_per_qtr):
+        # Filter for releases that are on or before each simulation date and
+        # determine the previous quarter by picking out the upcoming release
+        # for each date in the index.
+        stacked_last_per_qtr = stacked_last_per_qtr.sort(EVENT_DATE_FIELD_NAME)
+        previous_releases_per_date = stacked_last_per_qtr.loc[
+            stacked_last_per_qtr[EVENT_DATE_FIELD_NAME] <=
+            stacked_last_per_qtr.index.get_level_values(
+                SIMULTATION_DATES
+            )].groupby(level=[SIMULTATION_DATES, SID_FIELD_NAME]).nth(-1)
+        previous_releases_per_date[
+            SHIFTED_NORMALIZED_QTRS
+        ] = previous_releases_per_date.index.get_level_values(
+            NORMALIZED_QUARTERS
+        ) - (num_quarters - 1)
+        return previous_releases_per_date
diff --git a/zipline/pipeline/loaders/utils.py b/zipline/pipeline/loaders/utils.py
index 28e0c90d..5bd91132 100644
--- a/zipline/pipeline/loaders/utils.py
+++ b/zipline/pipeline/loaders/utils.py
@@ -278,11 +278,41 @@ def check_data_query_args(data_query_time, data_query_tz):
 
 def last_in_date_group(df, reindex, dates, assets, have_sids=True,
                        extra_groupers=[]):
+    """
+    Determine the last piece of information known on each date in the date
+    index for each group.
+
+    Parameters
+    ----------
+    df : pd.DataFrame
+        The DataFrame containing the data to be grouped.
+    reindex : bool
+        Whether or not the DataFrame should be reindexed against the date
+        index. This will add back any dates to the index that were grouped
+        away.
+    dates : pd.DatetimeIndex
+        The dates to use for grouping and reindexing.
+    assets : pd.Int64Index
+        The assets that should be included in the column multiindex.
+    have_sids : bool
+        Whether or not the DataFrame has sids. If it does, they will be used
+        in the groupby.
+    extra_groupers : list of str
+        Any extra field names that should be included in the groupby.
+
+    Returns
+    -------
+    last_in_group : pd.DataFrame
+        A DataFrame with dates as the index and fields used in the groupby as
+        levels of a multiindex of columns.
+
+    """
     idx = dates[dates.searchsorted(
         df[TS_FIELD_NAME].values.astype('datetime64[D]')
     )]
     if have_sids:
-        idx = [idx, SID_FIELD_NAME] + extra_groupers
+        idx = [idx, SID_FIELD_NAME]
+    idx += extra_groupers
 
     last_in_group = df.drop(TS_FIELD_NAME, axis=1).groupby(
         idx,
@@ -291,8 +321,7 @@ def last_in_date_group(df, reindex, dates, assets, have_sids=True,
 
     # For the number of things that we're grouping by (except TS), unstack
     # the df
-    for _ in range(len(idx) - 1):
-        last_in_group = last_in_group.unstack()
+    last_in_group = last_in_group.unstack([-1, -2])
 
     if reindex:
         if have_sids:
@@ -311,6 +340,18 @@ def last_in_date_group(df, reindex, dates, assets, have_sids=True,
 
 
 def ffill_across_cols(df, columns):
+    """
+    Forward fill values in a DataFrame with special logic to handle cases
+    that pd.DataFrame.ffill cannot and cast columns to appropriate types.
+
+    Parameters
+    ----------
+    df : pd.DataFrame
+        The DataFrame to do forward-filling on.
+    columns : list of BoundColumn
+        The BoundColumns that correspond to columns in the DataFrame to which
+        special filling and/or casting logic should be applied.
+    """
     df.ffill(inplace=True)
 
     # Fill in missing values specified by each column. This is made

From cc07a00d1679cbd6dee9cbf30a72ad33f94a4771 Mon Sep 17 00:00:00 2001
From: Maya Tydykov <mtydykov@quantopian.com>
Date: Mon, 29 Aug 2016 17:48:22 -0400
Subject: [PATCH 09/16] TST: add test for requesting multiple datasets with
 multiple quarters

---
 tests/pipeline/test_quarters_estimates.py | 156 +++++++++++++++++++++-
 1 file changed, 150 insertions(+), 6 deletions(-)

diff --git a/tests/pipeline/test_quarters_estimates.py b/tests/pipeline/test_quarters_estimates.py
index 5129bb5a..bb263f22 100644
--- a/tests/pipeline/test_quarters_estimates.py
+++ b/tests/pipeline/test_quarters_estimates.py
@@ -3,6 +3,7 @@ import itertools
 from nose_parameterized import parameterized
 import numpy as np
 import pandas as pd
+from toolz import merge
 
 from zipline.pipeline import SimplePipelineEngine, Pipeline, CustomFactor
 from zipline.pipeline.common import (
@@ -137,10 +138,8 @@ class EstimateTestCase(WithAssetFinder,
             Estimates.fiscal_quarter: FISCAL_QUARTER_FIELD_NAME,
             Estimates.fiscal_year: FISCAL_YEAR_FIELD_NAME,
         }
-        cls.loader = cls.make_loader(
-            events=cls.events,
-            columns=cls.columns
-        )
+        cls.loader = cls.make_loader(cls.events, cls.columns)
+
         cls.ASSET_FINDER_EQUITY_SIDS = list(
             cls.events[SID_FIELD_NAME].unique()
         )
@@ -179,9 +178,9 @@ window_test_cases = [
 
 
 class NextEstimateWindowsTestCase(EstimateTestCase):
-    events = estimates_timeline
     START_DATE = pd.Timestamp('2014-12-31')
     END_DATE = pd.Timestamp('2015-02-15')
+    events = estimates_timeline
 
     @classmethod
     def make_loader(cls, events, columns):
@@ -236,9 +235,9 @@ class NextEstimateWindowsTestCase(EstimateTestCase):
 
 
 class PreviousEstimateWindowsTestCase(EstimateTestCase):
-    events = estimates_timeline
     START_DATE = pd.Timestamp('2014-12-31')
     END_DATE = pd.Timestamp('2015-02-15')
+    events = estimates_timeline
 
     @classmethod
     def make_loader(cls, events, columns):
@@ -364,6 +363,151 @@ class NextEstimateTestCase(EstimateTestCase):
         self._test_wrong_num_quarters_passed()
 
 
+class NextEstimateMultipleQuartersTestCase(EstimateTestCase):
+    events = pd.DataFrame({
+            SID_FIELD_NAME: [0] * 2,
+            TS_FIELD_NAME: [pd.Timestamp('2015-01-01'),
+                            pd.Timestamp('2015-01-06')],
+            EVENT_DATE_FIELD_NAME: [pd.Timestamp('2015-01-10'),
+                                    pd.Timestamp('2015-01-20')],
+            'estimate': [1., 2.],
+            FISCAL_QUARTER_FIELD_NAME: [1, 2],
+            FISCAL_YEAR_FIELD_NAME: [2015, 2015]
+        })
+
+    @classmethod
+    def make_loader(cls, events, columns):
+        return NextQuartersEstimatesLoader(events, columns)
+
+    def test_multiple_qtrs_requested(self):
+        """
+        This test asks for datasets that calculate which estimates to
+        return for multiple quarters out and checks that the returned columns
+        contain data for the correct number of quarters out.
+        """
+        dataset1 = QuartersEstimates(1)
+        dataset2 = QuartersEstimates(2)
+        engine = SimplePipelineEngine(
+            lambda x: self.loader,
+            self.trading_days,
+            self.asset_finder,
+        )
+
+        results = engine.run_pipeline(
+            Pipeline(
+                merge([{c.name + '1': c.latest for c in dataset1.columns},
+                       {c.name + '2': c.latest for c in dataset2.columns}])
+            ),
+            start_date=self.trading_days[0],
+            end_date=self.trading_days[-1],
+        )
+        q1_columns = [col.name + '1' for col in self.columns]
+        q2_columns = [col.name + '2' for col in self.columns]
+
+        # We now expect a column for 1 quarter out and a column for 2
+        # quarters out for each of the dataset columns.
+        assert np.array_equal(sorted(np.array(q1_columns + q2_columns)),
+                              sorted(results.columns.values))
+
+        def check_null_range(start_date, stop_date, col_name):
+            # Make sure that values in the given column/range are all null.
+            assert (
+                results.loc[
+                    start_date:stop_date
+                ][col_name].isnull()
+            ).all()
+
+        def check_values(start_date, end_date, col_name, qtr, event_idx):
+            # Make sure that values in the given column/range are all equal
+            # to the value at the given index from the raw data.
+            assert (
+                results.loc[
+                    start_date:end_date
+                ][col_name + qtr] ==
+                self.events[col_name][event_idx]
+            ).all()
+
+        # Although it's painful to check the ranges one by one for different
+        # columns, it's important to do this so that we have a clear
+        # understanding of how knowledge/event dates interact and give us
+        # values for 1Q out and 2Q out.
+        for col in self.columns:
+            # 1Q out cols
+            check_null_range(self.START_DATE,
+                             pd.Timestamp('2014-12-31'),
+                             col.name + '1')
+            check_values(pd.Timestamp('2015-01-02'),
+                         pd.Timestamp('2015-01-10'),
+                         col.name,
+                         '1',
+                         0)  # First event is our 1Q out
+            check_values(pd.Timestamp('2015-01-11'),
+                         pd.Timestamp('2015-01-20'),
+                         col.name,
+                         '1',
+                         1)  # Second event becomes our 1Q out
+            check_null_range(pd.Timestamp('2015-01-21'),
+                             self.END_DATE,
+                             col.name + '1')
+
+        # Fiscal year and quarter are different for 2Q out because even when we
+        # have no data for 2Q out, we still know which fiscal year/quarter we
+        # want data for as long as we have data for 1Q out.
+        for col in set(self.columns.keys()) - {Estimates.fiscal_year,
+                                               Estimates.fiscal_quarter}:
+            # 2Q out cols
+            check_null_range(self.START_DATE,
+                             pd.Timestamp('2015-01-05'),
+                             col.name + '2')
+            # We have data for 2Q out when our knowledge of
+            # the next quarter and the quarter after that
+            # overlaps and before the next quarter's event
+            # happens.
+            check_values(pd.Timestamp('2015-01-06'),
+                         pd.Timestamp('2015-01-10'),
+                         col.name,
+                         '2',
+                         1)
+            check_null_range(pd.Timestamp('2015-01-11'),
+                             self.END_DATE,
+                             col.name + '2')
+
+        # Check fiscal year/quarter for 2Q out.
+        check_null_range(self.START_DATE,
+                         pd.Timestamp('2015-01-01'),
+                         Estimates.fiscal_quarter.name + '2')
+        check_null_range(self.START_DATE,
+                         pd.Timestamp('2015-01-01'),
+                         Estimates.fiscal_year.name + '2')
+        # We have a different quarter number than the quarter numbers we have
+        # in our data for 2Q out, so assert manually.
+        assert (
+                results.loc[
+                    pd.Timestamp('2015-01-02'):pd.Timestamp('2015-01-10')
+                ][Estimates.fiscal_quarter.name + '2'] ==
+                2
+            ).all()
+        assert (
+                results.loc[
+                    pd.Timestamp('2015-01-10'):pd.Timestamp('2015-01-20')
+                ][Estimates.fiscal_quarter.name + '2'] ==
+                3
+            ).all()
+        # We have the same fiscal year, 2-15, for 2Q out over the date range of
+        # interest.
+        check_values(pd.Timestamp('2015-01-02'),
+                     pd.Timestamp('2015-01-20'),
+                     Estimates.fiscal_year.name,
+                     '2',
+                     1)
+        check_null_range(pd.Timestamp('2015-01-21'),
+                         self.END_DATE,
+                         Estimates.fiscal_quarter.name + '2')
+        check_null_range(pd.Timestamp('2015-01-21'),
+                         self.END_DATE,
+                         Estimates.fiscal_year.name + '2')
+
+
 class BlazeNextEstimateLoaderTestCase(NextEstimateTestCase):
     """
     Run the same tests as EventsLoaderTestCase, but using a BlazeEventsLoader.

From 2a09160ca803760d1fa2c0e0ce702e5da0fd6d1b Mon Sep 17 00:00:00 2001
From: Maya Tydykov <mtydykov@quantopian.com>
Date: Tue, 30 Aug 2016 10:13:38 -0400
Subject: [PATCH 10/16] TST: add test to check previous columns w/ multiple
 qtrs

MAINT: pass column to name dict

MAINT: make check for invalid num columns py3-compatible
---
 tests/pipeline/test_adjusted_array.py         |  197 ++-
 tests/pipeline/test_quarters_estimates.py     | 1173 ++++++++++-------
 zipline/lib/adjustment.pyx                    |   50 +-
 zipline/pipeline/loaders/blaze/core.py        |    9 +-
 zipline/pipeline/loaders/blaze/estimates.py   |   10 +-
 zipline/pipeline/loaders/blaze/events.py      |   18 +-
 zipline/pipeline/loaders/blaze/utils.py       |   13 +-
 zipline/pipeline/loaders/quarter_estimates.py |  381 ++++--
 zipline/pipeline/loaders/utils.py             |   32 +-
 zipline/testing/core.py                       |    6 +
 zipline/testing/fixtures.py                   |    5 +-
 11 files changed, 1154 insertions(+), 740 deletions(-)

diff --git a/tests/pipeline/test_adjusted_array.py b/tests/pipeline/test_adjusted_array.py
index a5bf3ed0..730af952 100644
--- a/tests/pipeline/test_adjusted_array.py
+++ b/tests/pipeline/test_adjusted_array.py
@@ -202,11 +202,7 @@ def _gen_multiplicative_adjustment_cases(dtype):
     )
 
 
-def _gen_overwrite_adjustment_cases(name,
-                                    make_input,
-                                    make_expected_output,
-                                    dtype,
-                                    missing_value):
+def _gen_overwrite_adjustment_cases(dtype):
     """
     Generate test cases for overwrite adjustments.
 
@@ -226,6 +222,8 @@ def _gen_overwrite_adjustment_cases(name,
         unicode_dtype: ObjectOverwrite,
         object_dtype: ObjectOverwrite,
     }[dtype]
+    make_expected_dtype = as_dtype(dtype)
+    missing_value = default_missing_value_for_dtype(datetime64ns_dtype)
 
     if dtype == object_dtype:
         # When we're testing object dtypes, we expect to have strings, but
@@ -237,30 +235,30 @@ def _gen_overwrite_adjustment_cases(name,
 
     adjustments = {}
     buffer_as_of = [None] * 6
-    baseline = make_input([[2, 2, 2],
-                           [2, 2, 2],
-                           [2, 2, 2],
-                           [2, 2, 2],
-                           [2, 2, 2],
-                           [2, 2, 2]])
+    baseline = make_expected_dtype([[2, 2, 2],
+                                    [2, 2, 2],
+                                    [2, 2, 2],
+                                    [2, 2, 2],
+                                    [2, 2, 2],
+                                    [2, 2, 2]])
 
-    buffer_as_of[0] = make_expected_output([[2, 2, 2],
-                                            [2, 2, 2],
-                                            [2, 2, 2],
-                                            [2, 2, 2],
-                                            [2, 2, 2],
-                                            [2, 2, 2]])
+    buffer_as_of[0] = make_expected_dtype([[2, 2, 2],
+                                           [2, 2, 2],
+                                           [2, 2, 2],
+                                           [2, 2, 2],
+                                           [2, 2, 2],
+                                           [2, 2, 2]])
 
     # Note that row indices are inclusive!
     adjustments[1] = [
         adjustment_type(0, 0, 0, 0, make_overwrite_value(dtype, 1)),
     ]
-    buffer_as_of[1] = make_expected_output([[1, 2, 2],
-                                            [2, 2, 2],
-                                            [2, 2, 2],
-                                            [2, 2, 2],
-                                            [2, 2, 2],
-                                            [2, 2, 2]])
+    buffer_as_of[1] = make_expected_dtype([[1, 2, 2],
+                                           [2, 2, 2],
+                                           [2, 2, 2],
+                                           [2, 2, 2],
+                                           [2, 2, 2],
+                                           [2, 2, 2]])
 
     # No adjustment at index 2.
     buffer_as_of[2] = buffer_as_of[1]
@@ -269,33 +267,33 @@ def _gen_overwrite_adjustment_cases(name,
         adjustment_type(1, 2, 1, 1, make_overwrite_value(dtype, 3)),
         adjustment_type(0, 1, 0, 0, make_overwrite_value(dtype, 4)),
     ]
-    buffer_as_of[3] = make_expected_output([[4, 2, 2],
-                                            [4, 3, 2],
-                                            [2, 3, 2],
-                                            [2, 2, 2],
-                                            [2, 2, 2],
-                                            [2, 2, 2]])
+    buffer_as_of[3] = make_expected_dtype([[4, 2, 2],
+                                           [4, 3, 2],
+                                           [2, 3, 2],
+                                           [2, 2, 2],
+                                           [2, 2, 2],
+                                           [2, 2, 2]])
 
     adjustments[4] = [
         adjustment_type(0, 3, 2, 2, make_overwrite_value(dtype, 5))
     ]
-    buffer_as_of[4] = make_expected_output([[4, 2, 5],
-                                            [4, 3, 5],
-                                            [2, 3, 5],
-                                            [2, 2, 5],
-                                            [2, 2, 2],
-                                            [2, 2, 2]])
+    buffer_as_of[4] = make_expected_dtype([[4, 2, 5],
+                                           [4, 3, 5],
+                                           [2, 3, 5],
+                                           [2, 2, 5],
+                                           [2, 2, 2],
+                                           [2, 2, 2]])
 
     adjustments[5] = [
         adjustment_type(0, 4, 1, 1, make_overwrite_value(dtype, 6)),
         adjustment_type(2, 2, 2, 2, make_overwrite_value(dtype, 7)),
     ]
-    buffer_as_of[5] = make_expected_output([[4, 6, 5],
-                                            [4, 6, 5],
-                                            [2, 6, 7],
-                                            [2, 6, 5],
-                                            [2, 6, 2],
-                                            [2, 2, 2]])
+    buffer_as_of[5] = make_expected_dtype([[4, 6, 5],
+                                           [4, 6, 5],
+                                           [2, 6, 7],
+                                           [2, 6, 5],
+                                           [2, 6, 2],
+                                           [2, 2, 2]])
 
     return _gen_expectations(
         baseline,
@@ -306,11 +304,7 @@ def _gen_overwrite_adjustment_cases(name,
     )
 
 
-def _gen_overwrite_1d_array_adjustment_case(name,
-                                            make_input,
-                                            make_expected_output,
-                                            dtype,
-                                            missing_value):
+def _gen_overwrite_1d_array_adjustment_case(dtype):
     """
     Generate test cases for overwrite adjustments.
 
@@ -327,21 +321,24 @@ def _gen_overwrite_1d_array_adjustment_case(name,
         float64_dtype: Float641DArrayOverwrite,
         datetime64ns_dtype: Datetime641DArrayOverwrite,
     }[dtype]
+    make_expected_dtype = as_dtype(dtype)
+    missing_value = default_missing_value_for_dtype(datetime64ns_dtype)
+
     adjustments = {}
     buffer_as_of = [None] * 6
-    baseline = make_input([[2, 2, 2],
-                           [2, 2, 2],
-                           [2, 2, 2],
-                           [2, 2, 2],
-                           [2, 2, 2],
-                           [2, 2, 2]])
+    baseline = make_expected_dtype([[2, 2, 2],
+                                    [2, 2, 2],
+                                    [2, 2, 2],
+                                    [2, 2, 2],
+                                    [2, 2, 2],
+                                    [2, 2, 2]])
 
-    buffer_as_of[0] = make_expected_output([[2, 2, 2],
-                                            [2, 2, 2],
-                                            [2, 2, 2],
-                                            [2, 2, 2],
-                                            [2, 2, 2],
-                                            [2, 2, 2]])
+    buffer_as_of[0] = make_expected_dtype([[2, 2, 2],
+                                           [2, 2, 2],
+                                           [2, 2, 2],
+                                           [2, 2, 2],
+                                           [2, 2, 2],
+                                           [2, 2, 2]])
 
     vals1 = [1]
     # Note that row indices are inclusive!
@@ -351,12 +348,12 @@ def _gen_overwrite_1d_array_adjustment_case(name,
             array([coerce_to_dtype(dtype, val) for val in vals1])
         )
     ]
-    buffer_as_of[1] = make_input([[1, 2, 2],
-                                  [2, 2, 2],
-                                  [2, 2, 2],
-                                  [2, 2, 2],
-                                  [2, 2, 2],
-                                  [2, 2, 2]])
+    buffer_as_of[1] = make_expected_dtype([[1, 2, 2],
+                                           [2, 2, 2],
+                                           [2, 2, 2],
+                                           [2, 2, 2],
+                                           [2, 2, 2],
+                                           [2, 2, 2]])
 
     # No adjustment at index 2.
     buffer_as_of[2] = buffer_as_of[1]
@@ -368,12 +365,12 @@ def _gen_overwrite_1d_array_adjustment_case(name,
             array([coerce_to_dtype(dtype, val) for val in vals3])
         )
     ]
-    buffer_as_of[3] = make_input([[4, 2, 2],
-                                  [4, 2, 2],
-                                  [1, 2, 2],
-                                  [2, 2, 2],
-                                  [2, 2, 2],
-                                  [2, 2, 2]])
+    buffer_as_of[3] = make_expected_dtype([[4, 2, 2],
+                                           [4, 2, 2],
+                                           [1, 2, 2],
+                                           [2, 2, 2],
+                                           [2, 2, 2],
+                                           [2, 2, 2]])
 
     vals4 = [5] * 4
     adjustments[4] = [
@@ -381,12 +378,12 @@ def _gen_overwrite_1d_array_adjustment_case(name,
             0, 3, 2, 2,
             array([coerce_to_dtype(dtype, val) for val in vals4]))
     ]
-    buffer_as_of[4] = make_input([[4, 2, 5],
-                                  [4, 2, 5],
-                                  [1, 2, 5],
-                                  [2, 2, 5],
-                                  [2, 2, 2],
-                                  [2, 2, 2]])
+    buffer_as_of[4] = make_expected_dtype([[4, 2, 5],
+                                           [4, 2, 5],
+                                           [1, 2, 5],
+                                           [2, 2, 5],
+                                           [2, 2, 2],
+                                           [2, 2, 2]])
 
     vals5 = range(1, 6)
     adjustments[5] = [
@@ -394,12 +391,12 @@ def _gen_overwrite_1d_array_adjustment_case(name,
             0, 4, 1, 1,
             array([coerce_to_dtype(dtype, val) for val in vals5])),
     ]
-    buffer_as_of[5] = make_input([[4, 1, 5],
-                                  [4, 2, 5],
-                                  [1, 3, 5],
-                                  [2, 4, 5],
-                                  [2, 5, 2],
-                                  [2, 2, 2]])
+    buffer_as_of[5] = make_expected_dtype([[4, 1, 5],
+                                           [4, 2, 5],
+                                           [1, 3, 5],
+                                           [2, 4, 5],
+                                           [2, 5, 2],
+                                           [2, 2, 2]])
 
     return _gen_expectations(
         baseline,
@@ -532,38 +529,10 @@ class AdjustedArrayTestCase(TestCase):
 
     @parameterized.expand(
         chain(
-            _gen_overwrite_adjustment_cases(
-                'float',
-                make_input=as_dtype(float64_dtype),
-                make_expected_output=as_dtype(float64_dtype),
-                dtype=float64_dtype,
-                missing_value=default_missing_value_for_dtype(float64_dtype),
-            ),
-            _gen_overwrite_adjustment_cases(
-                'datetime',
-                make_input=as_dtype(datetime64ns_dtype),
-                make_expected_output=as_dtype(datetime64ns_dtype),
-                dtype=datetime64ns_dtype,
-                missing_value=default_missing_value_for_dtype(
-                    datetime64ns_dtype,
-                ),
-            ),
-            _gen_overwrite_1d_array_adjustment_case(
-                'float',
-                make_input=as_dtype(float64_dtype),
-                make_expected_output=as_dtype(float64_dtype),
-                dtype=float64_dtype,
-                missing_value=default_missing_value_for_dtype(float64_dtype),
-            ),
-            _gen_overwrite_1d_array_adjustment_case(
-                'datetime',
-                make_input=as_dtype(datetime64ns_dtype),
-                make_expected_output=as_dtype(datetime64ns_dtype),
-                dtype=datetime64ns_dtype,
-                missing_value=default_missing_value_for_dtype(
-                    datetime64ns_dtype,
-                ),
-            ),
+            _gen_overwrite_adjustment_cases(float64_dtype),
+            _gen_overwrite_adjustment_cases(datetime64ns_dtype),
+            _gen_overwrite_1d_array_adjustment_case(float64_dtype),
+            _gen_overwrite_1d_array_adjustment_case(datetime64ns_dtype),
             # There are six cases here:
             # Using np.bytes/np.unicode/object arrays as inputs.
             # Passing np.bytes/np.unicode/object arrays to LabelArray,
diff --git a/tests/pipeline/test_quarters_estimates.py b/tests/pipeline/test_quarters_estimates.py
index bb263f22..9217df77 100644
--- a/tests/pipeline/test_quarters_estimates.py
+++ b/tests/pipeline/test_quarters_estimates.py
@@ -1,8 +1,10 @@
 import blaze as bz
 import itertools
+from nose.tools import assert_true
 from nose_parameterized import parameterized
 import numpy as np
 import pandas as pd
+from pandas.util.testing import assert_frame_equal
 from toolz import merge
 
 from zipline.pipeline import SimplePipelineEngine, Pipeline, CustomFactor
@@ -13,19 +15,26 @@ from zipline.pipeline.common import (
     SID_FIELD_NAME,
     TS_FIELD_NAME,
 )
-from zipline.pipeline.data import DataSet, Column
+from zipline.pipeline.data import DataSet
+from zipline.pipeline.data import Column
 from zipline.pipeline.loaders.blaze.estimates import (
     BlazeNextEstimatesLoader,
     BlazePreviousEstimatesLoader
 )
 from zipline.pipeline.loaders.quarter_estimates import (
     NextQuartersEstimatesLoader,
+    normalize_quarters,
     PreviousQuartersEstimatesLoader,
-    split_normalized_quarters, normalize_quarters)
-from zipline.testing import ZiplineTestCase
-from zipline.testing.fixtures import WithAssetFinder, WithTradingSessions
+    split_normalized_quarters,
+)
+from zipline.testing.fixtures import (
+    WithAssetFinder,
+    WithTradingSessions,
+    ZiplineTestCase,
+)
 from zipline.testing.predicates import assert_equal
-from zipline.utils.numpy_utils import datetime64ns_dtype, float64_dtype
+from zipline.utils.numpy_utils import datetime64ns_dtype
+from zipline.utils.numpy_utils import float64_dtype
 
 
 class Estimates(DataSet):
@@ -42,276 +51,267 @@ def QuartersEstimates(num_qtr):
     return QtrEstimates
 
 
-# 0Q1: 2015-01-05.Q1.e1.2015-01-06, 2015-01-10.Q1.e1.2015-01-11,
-# 0Q2: 2015-01-15.Q2.e1.2015-01-16, 2015-01-20.Q2.e1.2015-01-21,
-# 0Q4: 2015-02-05.Q4.e1.2015-02-06, 2015-02-10.Q4.e1.2015-02-11,
-# Skip Q3 to make sure we handle skipped quarter data correctly.
-estimates_timeline = pd.DataFrame({
-    TS_FIELD_NAME: [pd.Timestamp('2015-01-05'), pd.Timestamp('2015-01-07'),
-                    pd.Timestamp('2015-01-05'), pd.Timestamp('2015-01-17'),
-                    pd.Timestamp('2015-01-05'), pd.Timestamp('2015-01-17'),
-                    pd.Timestamp('2015-01-22'), pd.Timestamp('2015-02-02')],
-    EVENT_DATE_FIELD_NAME:
-        [pd.Timestamp('2015-01-10'), pd.Timestamp('2015-01-10'),
-         pd.Timestamp('2015-01-20'), pd.Timestamp('2015-01-20'),
-         pd.Timestamp('2015-02-10'), pd.Timestamp('2015-02-10'),
-         pd.Timestamp('2015-02-10'), pd.Timestamp('2015-02-10')],
-    'estimate': [1.]*2 + [2.] * 2 + [4.] * 4,
-    FISCAL_QUARTER_FIELD_NAME: [1]*2 + [2] * 2 + [4] * 4,
-    FISCAL_YEAR_FIELD_NAME: [2015]*8,
-    SID_FIELD_NAME: [0]*8
-})
+class WithEstimates(WithTradingSessions, WithAssetFinder):
+    """
+    ZiplineTestCase mixin providing cls.loader and cls.events as class
+    level fixtures.
 
 
-# Final release dates never change. The quarters have very tight date ranges
-# in order to reduce the number of dates we need to iterate through when
-# testing.
-releases = pd.DataFrame({
-    TS_FIELD_NAME: [pd.Timestamp('2015-01-15'), pd.Timestamp('2015-01-31')],
-    EVENT_DATE_FIELD_NAME: [pd.Timestamp('2015-01-15'),
-                            pd.Timestamp('2015-01-31')],
-    'estimate': [0.5, 0.8],
-    FISCAL_QUARTER_FIELD_NAME: [1.0, 2.0],
-    FISCAL_YEAR_FIELD_NAME: [2015.0, 2015.0]
-})
+    Methods
+    -------
+    make_loader(events, columns) -> PipelineLoader
+        Method which returns the loader to be used throughout tests.
 
-q1_knowledge_dates = [pd.Timestamp('2015-01-01'), pd.Timestamp('2015-01-04'),
-                      pd.Timestamp('2015-01-08'), pd.Timestamp('2015-01-12')]
-q2_knowledge_dates = [pd.Timestamp('2015-01-16'), pd.Timestamp('2015-01-20'),
-                      pd.Timestamp('2015-01-24'), pd.Timestamp('2015-01-28')]
-# We want to model the possibility of an estimate predicting a release date
-# that doesn't match the actual release. This could be done by dynamically
-# generating more combinations with different release dates, but that
-# significantly increases the amount of time it takes to run the tests. These
-# hard-coded cases are sufficient to know that we can update our beliefs when
-# we get new information.
-q1_release_dates = [pd.Timestamp('2015-01-15'),
-                    pd.Timestamp('2015-01-16')]  # One day late
-q2_release_dates = [pd.Timestamp('2015-01-30'),  # One day early
-                    pd.Timestamp('2015-01-31')]
-estimates = pd.DataFrame({
-    EVENT_DATE_FIELD_NAME: q1_release_dates + q2_release_dates,
-    'estimate': [.1, .2, .3, .4],
-    FISCAL_QUARTER_FIELD_NAME: [1.0, 1.0, 2.0, 2.0],
-    FISCAL_YEAR_FIELD_NAME: [2015.0, 2015.0, 2015.0, 2015.0]
-})
+        events : pd.DataFrame
+            The raw events to be used as input to the pipeline loader.
+        columns : dict[str -> str]
+            The dictionary mapping the names of BoundColumns to the
+            associated column name in the events DataFrame.
+    """
 
-
-def gen_estimates():
-    sid_estimates = []
-    sid_releases = []
-    for sid, (q1e1, q1e2, q2e1, q2e2) in enumerate(
-            itertools.permutations(q1_knowledge_dates + q2_knowledge_dates,
-                                   4)
-    ):
-        # We're assuming that estimates must come before the relevant release.
-        if (q1e1 < q1e2 and
-                q2e1 < q2e2 and
-                q1e1 < q1_release_dates[0] and
-                q1e2 < q1_release_dates[1]):
-            sid_estimate = estimates.copy(True)
-            sid_estimate[TS_FIELD_NAME] = [q1e1, q1e2, q2e1, q2e2]
-            sid_estimate[SID_FIELD_NAME] = sid
-            sid_estimates += [sid_estimate]
-            sid_release = releases.copy(True)
-            sid_release[SID_FIELD_NAME] = sid_estimate[SID_FIELD_NAME]
-            sid_releases += [sid_release]
-    return pd.concat(sid_estimates + sid_releases).reset_index(drop=True)
-
-
-class EstimateTestCase(WithAssetFinder,
-                       WithTradingSessions,
-                       ZiplineTestCase):
+    # Short window defined in order for test to run faster.
     START_DATE = pd.Timestamp('2014-12-28')
     END_DATE = pd.Timestamp('2015-02-03')
 
     @classmethod
     def make_loader(cls, events, columns):
-        pass
+        raise NotImplementedError('make_loader')
 
     @classmethod
     def init_class_fixtures(cls):
-        cls.sids = cls.events['sid'].unique()
+        cls.sids = cls.events[SID_FIELD_NAME].unique()
         cls.columns = {
-            Estimates.estimate: 'estimate',
-            Estimates.event_date: EVENT_DATE_FIELD_NAME,
-            Estimates.fiscal_quarter: FISCAL_QUARTER_FIELD_NAME,
-            Estimates.fiscal_year: FISCAL_YEAR_FIELD_NAME,
+            Estimates.event_date: 'event_date',
+            Estimates.fiscal_quarter: 'fiscal_quarter',
+            Estimates.fiscal_year: 'fiscal_year',
+            Estimates.estimate: 'estimate'
         }
-        cls.loader = cls.make_loader(cls.events, cls.columns)
-
+        cls.loader = cls.make_loader(cls.events, {column.name: val for
+                                                  column, val in
+                                                  cls.columns.items()})
         cls.ASSET_FINDER_EQUITY_SIDS = list(
             cls.events[SID_FIELD_NAME].unique()
         )
         cls.ASSET_FINDER_EQUITY_SYMBOLS = [
             's' + str(n) for n in cls.ASSET_FINDER_EQUITY_SIDS
         ]
-        super(EstimateTestCase, cls).init_class_fixtures()
+        super(WithEstimates, cls).init_class_fixtures()
+
+
+class WithWrongNumQuarters(WithEstimates):
+    """
+    ZiplineTestCase mixin providing cls.events as a class level fixture and
+    defining a test for all inheritors to use.
+
+    Attributes
+    ----------
+    events : pd.DataFrame
+        A simple DataFrame with columns needed for estimates and a single sid
+        and no other data.
+
+    Tests
+    ------
+    test_wrong_num_quarters_passed()
+        Tests that loading with an incorrect quarter number raises an error.
+    """
+    events = pd.DataFrame({SID_FIELD_NAME: 0},
+                          columns=[SID_FIELD_NAME,
+                                   TS_FIELD_NAME,
+                                   EVENT_DATE_FIELD_NAME,
+                                   FISCAL_QUARTER_FIELD_NAME,
+                                   FISCAL_YEAR_FIELD_NAME,
+                                   'estimate'],
+                          index=[0])
+
+    def test_wrong_num_quarters_passed(self):
+        dataset = QuartersEstimates(-1)
+        engine = SimplePipelineEngine(
+            lambda x: self.loader,
+            self.trading_days,
+            self.asset_finder,
+        )
+        p = Pipeline({c.name: c.latest for c in dataset.columns})
 
-    def _test_wrong_num_quarters_passed(self):
         with self.assertRaises(ValueError):
-            dataset = QuartersEstimates(-1)
-            engine = SimplePipelineEngine(
-                lambda x: self.loader,
-                self.trading_days,
-                self.asset_finder,
-            )
-
             engine.run_pipeline(
-                Pipeline({c.name: c.latest for c in dataset.columns}),
+                p,
                 start_date=self.trading_days[0],
                 end_date=self.trading_days[-1],
             )
 
 
-window_test_cases = [
-    (window_len, start_idx, num_quarters_out) for
-    (window_len, start_idx), num_quarters_out in
-    itertools.product(
-        [[5, pd.Timestamp('2015-01-09').tz_localize('utc')],
-         [6, pd.Timestamp('2015-01-12').tz_localize('utc')],
-         [11, pd.Timestamp('2015-01-20').tz_localize('utc')],
-         [19, pd.Timestamp('2015-01-30').tz_localize('utc')],
-         [26, pd.Timestamp('2015-02-10').tz_localize('utc')]],
-        [1, 2, 3, 4])
-]
-
-
-class NextEstimateWindowsTestCase(EstimateTestCase):
-    START_DATE = pd.Timestamp('2014-12-31')
-    END_DATE = pd.Timestamp('2015-02-15')
-    events = estimates_timeline
-
-    @classmethod
-    def make_loader(cls, events, columns):
-        return NextQuartersEstimatesLoader(events, columns)
-
-    @parameterized.expand(window_test_cases)
-    def test_next_estimate_windows_at_quarter_boundaries(self,
-                                                         window_len,
-                                                         start_idx,
-                                                         num_quarters_out):
-        """
-        Tests that we overwrite values with the correct quarter's estimate at
-        the correct dates.
-        """
-        dataset = QuartersEstimates(num_quarters_out)
-
-        class SomeFactor(CustomFactor):
-            inputs = [dataset.estimate]
-            window_length = window_len
-
-            def compute(self, today, assets, out, *inputs):
-                unique_inputs = np.unique(inputs).tolist()
-                requested_quarter = None
-                if (pd.Timestamp('2015-02-10').tz_localize('utc') >= today >=
-                        pd.Timestamp('2015-01-05').tz_localize('utc')):
-                    next_quarter = estimates_timeline[
-                            estimates_timeline[EVENT_DATE_FIELD_NAME] >= today
-                        ].min()[FISCAL_QUARTER_FIELD_NAME]
-                    requested_quarter = next_quarter + num_quarters_out - 1
-
-                # If we know something about the requested quarter, assert
-                # that all our estimates in the window are about that quarter.
-                if requested_quarter and requested_quarter <= 4 and \
-                        requested_quarter != 3:
-                    assert np.equal(unique_inputs, requested_quarter).all()
-                else:
-                    # We don't have any information yet about the next quarter
-                    # or about the requested quarter; in that case, all our
-                    # estimates in the window should be NaN across time.
-                    assert np.isnan(unique_inputs).all()
-
-        engine = SimplePipelineEngine(
-            lambda x: self.loader,
-            self.trading_days,
-            self.asset_finder,
-        )
-        engine.run_pipeline(
-            Pipeline({'est': SomeFactor()}),
-            start_date=start_idx,
-            end_date=self.trading_days[-1],
-        )
-
-
-class PreviousEstimateWindowsTestCase(EstimateTestCase):
-    START_DATE = pd.Timestamp('2014-12-31')
-    END_DATE = pd.Timestamp('2015-02-15')
-    events = estimates_timeline
-
+class PreviousWithWrongNumQuarters(WithWrongNumQuarters,
+                                   ZiplineTestCase):
+    """
+    Tests that previous quarter loader correctly breaks if an incorrect
+    number of quarters is passed.
+    """
     @classmethod
     def make_loader(cls, events, columns):
         return PreviousQuartersEstimatesLoader(events, columns)
 
-    @parameterized.expand(window_test_cases)
-    def test_previous_estimate_windows_at_quarter_boundaries(self,
-                                                             window_len,
-                                                             start_idx,
-                                                             num_quarters_out):
-        """
-        Tests that we overwrite values with the correct quarter's estimate at
-        the correct dates.
-        """
-        dataset = QuartersEstimates(num_quarters_out)
-
-        class SomeFactor(CustomFactor):
-            inputs = [dataset.estimate]
-            window_length = window_len
-
-            def compute(self, today, assets, out, *inputs):
-                unique_inputs = np.unique(inputs).tolist()
-                requested_quarter = None
-                if today >= pd.Timestamp('2015-01-12').tz_localize('utc'):
-                    previous_quarter = estimates_timeline[
-                            estimates_timeline[EVENT_DATE_FIELD_NAME] <= today
-                        ].max()[FISCAL_QUARTER_FIELD_NAME]
-                    requested_quarter = (
-                        previous_quarter - (num_quarters_out - 1)
-                    )
-
-                # If we know something about the requested quarter, assert
-                # that all our estimates in the window are about that quarter.
-                if requested_quarter and requested_quarter >= 0 and \
-                        requested_quarter != 3:
-                    assert np.equal(unique_inputs, requested_quarter).all()
-                else:
-                    # We don't have any information yet about the previous
-                    # quarter
-                    # or about the requested quarter; in that case, all our
-                    # estimates in the window should be NaN across time.
-                    assert np.isnan(unique_inputs).all()
-
-        engine = SimplePipelineEngine(
-            lambda x: self.loader,
-            self.trading_days,
-            self.asset_finder,
-        )
-        engine.run_pipeline(
-            Pipeline({'est': SomeFactor()}),
-            start_date=start_idx,
-            end_date=self.trading_days[-1],
-        )
-
-
-class NextEstimateTestCase(EstimateTestCase):
-    events = gen_estimates()
 
+class NextWithWrongNumQuarters(WithWrongNumQuarters,
+                               ZiplineTestCase):
+    """
+    Tests that next quarter loader correctly breaks if an incorrect
+    number of quarters is passed.
+    """
     @classmethod
     def make_loader(cls, events, columns):
         return NextQuartersEstimatesLoader(events, columns)
 
-    def test_next_estimates(self):
+
+class WithEstimatesT0(WithEstimates):
+    """
+    ZiplineTestCase mixin providing cls.events as a class level fixture and
+    defining a test for all inheritors to use.
+
+    Attributes
+    ----------
+    cls.events : pd.DataFrame
+        Generated dynamically in order to test inter-leavings of estimates and
+        event dates for multiple quarters to make sure that we select the
+        right immediate 'next' or 'previous' quarter relative to each date -
+        i.e., the right 't0' on the timeline. We care about selecting the
+        right 't0' because we use that to calculate which quarter's data needs
+        to be returned for each day.
+
+    Methods
+    -------
+    get_expected_estimate(q1_knowledge,
+                          q2_knowledge,
+                          comparable_date) -> pd.DataFrame
+        Retrieves the expected estimate given the latest knowledge about each
+        quarter and the date on which the estimate is being requested. If
+        there is no expected estimate, returns an empty DataFrame.
+
+    Tests
+    ------
+    test_estimates()
+        Tests that we get the right 't0' value on each day for each sid and
+        for each column.
+    """
+    q1_knowledge_dates = [pd.Timestamp('2015-01-01'),
+                          pd.Timestamp('2015-01-04'),
+                          pd.Timestamp('2015-01-08'),
+                          pd.Timestamp('2015-01-12')]
+    q2_knowledge_dates = [pd.Timestamp('2015-01-16'),
+                          pd.Timestamp('2015-01-20'),
+                          pd.Timestamp('2015-01-24'),
+                          pd.Timestamp('2015-01-28')]
+    # We want to model the possibility of an estimate predicting a release date
+    # that doesn't match the actual release. This could be done by dynamically
+    # generating more combinations with different release dates, but that
+    # significantly increases the amount of time it takes to run the tests.
+    # These hard-coded cases are sufficient to know that we can update our
+    # beliefs when we get new information.
+    q1_release_dates = [pd.Timestamp('2015-01-15'),
+                        pd.Timestamp('2015-01-16')]  # One day late
+    q2_release_dates = [pd.Timestamp('2015-01-30'),  # One day early
+                        pd.Timestamp('2015-01-31')]
+
+    @classmethod
+    def gen_estimates(cls):
         """
-        The goal of this test is to make sure that we select the right
-        datapoint as our 'next' w.r.t each date.
+        In order to determine which estimate we care about for a particular
+        sid, we need to look at all estimates that we have for that sid and
+        their associated event dates.
+
+        We define q1 < q2, and thus event1 < event2 since event1 occurs
+        during q1 and event2 occurs during q2 and we assume that there can
+        only be 1 event per quarter. We assume that there can be multiple
+        estimates per quarter leading up to the event. We assume that estimates
+        will not surpass the relevant event date. We will look at 2 estimates
+        for an event before the event occurs, since that is the simplest
+        scenario that covers the interesting edge cases:
+            - estimate values changing
+            - a release date changing
+            - estimates for different quarters interleaving
+
+        Thus, we generate all possible inter-leavings of 2 estimates per
+        quarter-event where estimate1 < estimate2 and all estimates are < the
+        relevant event and assign each of these inter-leavings to a
+        different sid.
         """
+
+        sid_estimates = []
+        sid_releases = []
+        # We want all permutations of 2 knowledge dates per quarter.
+        it = enumerate(
+            itertools.permutations(cls.q1_knowledge_dates +
+                                   cls.q2_knowledge_dates,
+                                   4)
+        )
+        for sid, (q1e1, q1e2, q2e1, q2e2) in it:
+            # We're assuming that estimates must come before the relevant
+            # release.
+            if (q1e1 < q1e2 and
+                    q2e1 < q2e2 and
+                    # All estimates are < Q2's event, so just constrain Q1
+                    # estimates.
+                    q1e1 < cls.q1_release_dates[0] and
+                    q1e2 < cls.q1_release_dates[0]):
+                sid_estimates.append(cls.create_estimates_df(q1e1,
+                                                             q1e2,
+                                                             q2e1,
+                                                             q2e2,
+                                                             sid))
+                sid_releases.append(cls.create_releases_df(sid))
+
+        return pd.concat(sid_estimates + sid_releases).reset_index(drop=True)
+
+    @classmethod
+    def create_releases_df(cls, sid):
+        # Final release dates never change. The quarters have very tight date
+        # ranges in order to reduce the number of dates we need to iterate
+        # through when testing.
+        return pd.DataFrame({
+            TS_FIELD_NAME: [pd.Timestamp('2015-01-15'),
+                            pd.Timestamp('2015-01-31')],
+            EVENT_DATE_FIELD_NAME: [pd.Timestamp('2015-01-15'),
+                                    pd.Timestamp('2015-01-31')],
+            'estimate': [0.5, 0.8],
+            FISCAL_QUARTER_FIELD_NAME: [1.0, 2.0],
+            FISCAL_YEAR_FIELD_NAME: [2015.0, 2015.0],
+            SID_FIELD_NAME: sid
+        })
+
+    @classmethod
+    def create_estimates_df(cls,
+                            q1e1,
+                            q1e2,
+                            q2e1,
+                            q2e2,
+                            sid):
+        return pd.DataFrame({
+            EVENT_DATE_FIELD_NAME: cls.q1_release_dates + cls.q2_release_dates,
+            'estimate': [.1, .2, .3, .4],
+            FISCAL_QUARTER_FIELD_NAME: [1.0, 1.0, 2.0, 2.0],
+            FISCAL_YEAR_FIELD_NAME: [2015.0, 2015.0, 2015.0, 2015.0],
+            TS_FIELD_NAME: [q1e1, q1e2, q2e1, q2e2],
+            SID_FIELD_NAME: sid,
+        })
+
+    @classmethod
+    def init_class_fixtures(cls):
+        # Must be generated before call to super since super uses `events`.
+        cls.events = cls.gen_estimates()
+        super(WithEstimatesT0, cls).init_class_fixtures()
+
+    def get_expected_estimate(self,
+                              q1_knowledge,
+                              q2_knowledge,
+                              comparable_date):
+        return pd.DataFrame()
+
+    def test_estimates(self):
         dataset = QuartersEstimates(1)
         engine = SimplePipelineEngine(
             lambda x: self.loader,
             self.trading_days,
             self.asset_finder,
         )
-
         results = engine.run_pipeline(
             Pipeline({c.name: c.latest for c in dataset.columns}),
             start_date=self.trading_days[0],
@@ -328,63 +328,179 @@ class NextEstimateTestCase(EstimateTestCase):
                 ts_eligible_estimates = ts_sorted_estimates[
                     ts_sorted_estimates[TS_FIELD_NAME] <= comparable_date
                 ]
-                expected_estimate = pd.DataFrame()
+                # If there are estimates we know about:
                 if not ts_eligible_estimates.empty:
+                    # Determine the last piece of information we know about
+                    # for q1 and q2. This takes advantage of the fact that we
+                    # only have 2 quarters in the test data.
                     q1_knowledge = ts_eligible_estimates[
                         ts_eligible_estimates[FISCAL_QUARTER_FIELD_NAME] == 1
                     ]
                     q2_knowledge = ts_eligible_estimates[
                         ts_eligible_estimates[FISCAL_QUARTER_FIELD_NAME] == 2
                     ]
-
-                    # If our latest knowledge of q1 is that the release is
-                    # happening on this simulation date or later, then that's
-                    # the estimate we want to use.
-                    if (not q1_knowledge.empty and
-                        q1_knowledge.iloc[-1][EVENT_DATE_FIELD_NAME] >=
-                            comparable_date):
-                        expected_estimate = q1_knowledge.iloc[-1]
-                    # If q1 has already happened or we don't know about it
-                    # yet and our latest knowledge indicates that q2 hasn't
-                    # happend yet, then that's the estimate we want to use.
-                    elif (not q2_knowledge.empty and
-                          q2_knowledge.iloc[-1][EVENT_DATE_FIELD_NAME] >=
-                            comparable_date):
-                        expected_estimate = q2_knowledge.iloc[-1]
-                if not expected_estimate.empty:
-                    for colname in sid_estimates.columns:
-                        expected_value = expected_estimate[colname]
-                        computed_value = sid_estimates.iloc[i][colname]
-                        assert_equal(expected_value, computed_value)
+                    expected_estimate = self.get_expected_estimate(
+                        q1_knowledge,
+                        q2_knowledge,
+                        comparable_date,
+                    )
+                    if not expected_estimate.empty:
+                        for colname in sid_estimates.columns:
+                            expected_value = expected_estimate[colname]
+                            computed_value = sid_estimates.iloc[i][colname]
+                            assert_equal(expected_value, computed_value)
+                    else:
+                        # There are no eligible 'next' estimates on this day;
+                        #  everything should be null.
+                        assert_true(sid_estimates.iloc[i].isnull().all())
                 else:
-                    assert sid_estimates.iloc[i].isnull().all()
-
-    def test_wrong_num_quarters_passed(self):
-        self._test_wrong_num_quarters_passed()
+                    # We don't know about any estimates on this day;
+                    # everything should be null.
+                    assert_true(sid_estimates.iloc[i].isnull().all())
 
 
-class NextEstimateMultipleQuartersTestCase(EstimateTestCase):
-    events = pd.DataFrame({
-            SID_FIELD_NAME: [0] * 2,
-            TS_FIELD_NAME: [pd.Timestamp('2015-01-01'),
-                            pd.Timestamp('2015-01-06')],
-            EVENT_DATE_FIELD_NAME: [pd.Timestamp('2015-01-10'),
-                                    pd.Timestamp('2015-01-20')],
-            'estimate': [1., 2.],
-            FISCAL_QUARTER_FIELD_NAME: [1, 2],
-            FISCAL_YEAR_FIELD_NAME: [2015, 2015]
-        })
-
+class NextEstimate(WithEstimatesT0, ZiplineTestCase):
     @classmethod
     def make_loader(cls, events, columns):
         return NextQuartersEstimatesLoader(events, columns)
 
+    def get_expected_estimate(self,
+                              q1_knowledge,
+                              q2_knowledge,
+                              comparable_date):
+        # If our latest knowledge of q1 is that the release is
+        # happening on this simulation date or later, then that's
+        # the estimate we want to use.
+        if (not q1_knowledge.empty and
+            q1_knowledge.iloc[-1][EVENT_DATE_FIELD_NAME] >=
+                comparable_date):
+            return q1_knowledge.iloc[-1]
+        # If q1 has already happened or we don't know about it
+        # yet and our latest knowledge indicates that q2 hasn't
+        # happened yet, then that's the estimate we want to use.
+        elif (not q2_knowledge.empty and
+              q2_knowledge.iloc[-1][EVENT_DATE_FIELD_NAME] >=
+                comparable_date):
+            return q2_knowledge.iloc[-1]
+        return pd.DataFrame()
+
+
+class BlazeNextEstimateLoaderTestCase(NextEstimate):
+    """
+    Run the same tests as EventsLoaderTestCase, but using a BlazeEventsLoader.
+    """
+
+    @classmethod
+    def make_loader(cls, events, columns):
+        return BlazeNextEstimatesLoader(
+            bz.data(events),
+            columns,
+        )
+
+
+class PreviousEstimate(WithEstimatesT0, ZiplineTestCase):
+    @classmethod
+    def make_loader(cls, events, columns):
+        return PreviousQuartersEstimatesLoader(events, columns)
+
+    def get_expected_estimate(self,
+                              q1_knowledge,
+                              q2_knowledge,
+                              comparable_date):
+
+        # The expected estimate will be for q2 if the last thing
+        # we've seen is that the release date already happened.
+        # Otherwise, it'll be for q1, as long as the release date
+        # for q1 has already happened.
+        if (not q2_knowledge.empty and
+            q2_knowledge.iloc[-1][EVENT_DATE_FIELD_NAME] <=
+                comparable_date):
+            return q2_knowledge.iloc[-1]
+        elif (not q1_knowledge.empty and
+              q1_knowledge.iloc[-1][EVENT_DATE_FIELD_NAME] <=
+                comparable_date):
+            return q1_knowledge.iloc[-1]
+        return pd.DataFrame()
+
+
+class BlazePreviousEstimateLoaderTestCase(PreviousEstimate):
+    """
+    Run the same tests as EventsLoaderTestCase, but using a BlazeEventsLoader.
+    """
+
+    @classmethod
+    def make_loader(cls, events, columns):
+        return BlazePreviousEstimatesLoader(
+            bz.data(events),
+            columns,
+        )
+
+
+class WithEstimateMultipleQuarters(WithEstimates):
+    """
+    ZiplineTestCase mixin providing cls.events, cls.make_expected_out as
+    class-level fixtures and self.test_multiple_qtrs_requested as a test.
+
+    Attributes
+    ----------
+    events : pd.DataFrame
+        Simple DataFrame with estimates for 2 quarters for a single sid.
+
+    Methods
+    -------
+    make_expected_out() --> pd.DataFrame
+        Returns the DataFrame that is expected as a result of running a
+        Pipeline where estimates are requested for multiple quarters out.
+    fill_expected_out(expected)
+        Fills the expected DataFrame with data.
+
+    Tests
+    ------
+    test_multiple_qtrs_requested()
+        Runs a Pipeline that calculate which estimates for multiple quarters
+        out and checks that the returned columns contain data for the correct
+        number of quarters out.
+    """
+    events = pd.DataFrame({
+        SID_FIELD_NAME: [0] * 2,
+        TS_FIELD_NAME: [pd.Timestamp('2015-01-01'),
+                        pd.Timestamp('2015-01-06')],
+        EVENT_DATE_FIELD_NAME: [pd.Timestamp('2015-01-10'),
+                                pd.Timestamp('2015-01-20')],
+        'estimate': [1., 2.],
+        FISCAL_QUARTER_FIELD_NAME: [1, 2],
+        FISCAL_YEAR_FIELD_NAME: [2015, 2015]
+    })
+
+    @classmethod
+    def init_class_fixtures(cls):
+        super(WithEstimateMultipleQuarters, cls).init_class_fixtures()
+        cls.expected_out = cls.make_expected_out()
+
+    @classmethod
+    def make_expected_out(cls):
+        expected = pd.DataFrame(columns=[cls.columns[col] + '1'
+                                         for col in cls.columns] +
+                                        [cls.columns[col] + '2'
+                                         for col in cls.columns],
+                                index=cls.trading_days)
+
+        for (col, raw_name), suffix in itertools.product(
+            cls.columns.items(), ('1', '2')
+        ):
+            expected_name = raw_name + suffix
+            if col.dtype == datetime64ns_dtype:
+                expected[expected_name] = pd.to_datetime(
+                    expected[expected_name]
+                )
+            else:
+                expected[expected_name] = expected[
+                    expected_name
+                ].astype(col.dtype)
+        cls.fill_expected_out(expected)
+        return expected.reindex(cls.trading_days)
+
     def test_multiple_qtrs_requested(self):
-        """
-        This test asks for datasets that calculate which estimates to
-        return for multiple quarters out and checks that the returned columns
-        contain data for the correct number of quarters out.
-        """
         dataset1 = QuartersEstimates(1)
         dataset2 = QuartersEstimates(2)
         engine = SimplePipelineEngine(
@@ -406,202 +522,363 @@ class NextEstimateMultipleQuartersTestCase(EstimateTestCase):
 
         # We now expect a column for 1 quarter out and a column for 2
         # quarters out for each of the dataset columns.
-        assert np.array_equal(sorted(np.array(q1_columns + q2_columns)),
-                              sorted(results.columns.values))
-
-        def check_null_range(start_date, stop_date, col_name):
-            # Make sure that values in the given column/range are all null.
-            assert (
-                results.loc[
-                    start_date:stop_date
-                ][col_name].isnull()
-            ).all()
-
-        def check_values(start_date, end_date, col_name, qtr, event_idx):
-            # Make sure that values in the given column/range are all equal
-            # to the value at the given index from the raw data.
-            assert (
-                results.loc[
-                    start_date:end_date
-                ][col_name + qtr] ==
-                self.events[col_name][event_idx]
-            ).all()
-
-        # Although it's painful to check the ranges one by one for different
-        # columns, it's important to do this so that we have a clear
-        # understanding of how knowledge/event dates interact and give us
-        # values for 1Q out and 2Q out.
-        for col in self.columns:
-            # 1Q out cols
-            check_null_range(self.START_DATE,
-                             pd.Timestamp('2014-12-31'),
-                             col.name + '1')
-            check_values(pd.Timestamp('2015-01-02'),
-                         pd.Timestamp('2015-01-10'),
-                         col.name,
-                         '1',
-                         0)  # First event is our 1Q out
-            check_values(pd.Timestamp('2015-01-11'),
-                         pd.Timestamp('2015-01-20'),
-                         col.name,
-                         '1',
-                         1)  # Second event becomes our 1Q out
-            check_null_range(pd.Timestamp('2015-01-21'),
-                             self.END_DATE,
-                             col.name + '1')
-
-        # Fiscal year and quarter are different for 2Q out because even when we
-        # have no data for 2Q out, we still know which fiscal year/quarter we
-        # want data for as long as we have data for 1Q out.
-        for col in set(self.columns.keys()) - {Estimates.fiscal_year,
-                                               Estimates.fiscal_quarter}:
-            # 2Q out cols
-            check_null_range(self.START_DATE,
-                             pd.Timestamp('2015-01-05'),
-                             col.name + '2')
-            # We have data for 2Q out when our knowledge of
-            # the next quarter and the quarter after that
-            # overlaps and before the next quarter's event
-            # happens.
-            check_values(pd.Timestamp('2015-01-06'),
-                         pd.Timestamp('2015-01-10'),
-                         col.name,
-                         '2',
-                         1)
-            check_null_range(pd.Timestamp('2015-01-11'),
-                             self.END_DATE,
-                             col.name + '2')
-
-        # Check fiscal year/quarter for 2Q out.
-        check_null_range(self.START_DATE,
-                         pd.Timestamp('2015-01-01'),
-                         Estimates.fiscal_quarter.name + '2')
-        check_null_range(self.START_DATE,
-                         pd.Timestamp('2015-01-01'),
-                         Estimates.fiscal_year.name + '2')
-        # We have a different quarter number than the quarter numbers we have
-        # in our data for 2Q out, so assert manually.
-        assert (
-                results.loc[
-                    pd.Timestamp('2015-01-02'):pd.Timestamp('2015-01-10')
-                ][Estimates.fiscal_quarter.name + '2'] ==
-                2
-            ).all()
-        assert (
-                results.loc[
-                    pd.Timestamp('2015-01-10'):pd.Timestamp('2015-01-20')
-                ][Estimates.fiscal_quarter.name + '2'] ==
-                3
-            ).all()
-        # We have the same fiscal year, 2-15, for 2Q out over the date range of
-        # interest.
-        check_values(pd.Timestamp('2015-01-02'),
-                     pd.Timestamp('2015-01-20'),
-                     Estimates.fiscal_year.name,
-                     '2',
-                     1)
-        check_null_range(pd.Timestamp('2015-01-21'),
-                         self.END_DATE,
-                         Estimates.fiscal_quarter.name + '2')
-        check_null_range(pd.Timestamp('2015-01-21'),
-                         self.END_DATE,
-                         Estimates.fiscal_year.name + '2')
+        assert_equal(sorted(np.array(q1_columns + q2_columns)),
+                     sorted(results.columns.values))
+        assert_frame_equal(self.expected_out.sort(axis=1),
+                           results.xs(0, level=1).sort(axis=1))
 
 
-class BlazeNextEstimateLoaderTestCase(NextEstimateTestCase):
-    """
-    Run the same tests as EventsLoaderTestCase, but using a BlazeEventsLoader.
-    """
-
+class NextEstimateMultipleQuarters(
+    WithEstimateMultipleQuarters, ZiplineTestCase
+):
     @classmethod
     def make_loader(cls, events, columns):
-        return BlazeNextEstimatesLoader(
-            bz.data(events),
-            columns,
-        )
+        return NextQuartersEstimatesLoader(events, columns)
+
+    @classmethod
+    def fill_expected_out(cls, expected):
+        # Fill columns for 1 Q out
+        for raw_name in cls.columns.values():
+            expected[raw_name + '1'].loc[
+                pd.Timestamp('2015-01-01'):pd.Timestamp('2015-01-11')
+            ] = cls.events[raw_name].iloc[0]
+            expected[raw_name + '1'].loc[
+                pd.Timestamp('2015-01-11'):pd.Timestamp('2015-01-20')
+            ] = cls.events[raw_name].iloc[1]
+
+        # Fill columns for 2 Q out
+        # We only have an estimate and event date for 2 quarters out before
+        # Q1's event happens; after Q1's event, we know 1 Q out but not 2 Qs
+        # out.
+        for col_name in ['estimate', 'event_date']:
+            expected[col_name + '2'].loc[
+                pd.Timestamp('2015-01-06'):pd.Timestamp('2015-01-10')
+            ] = cls.events[col_name].iloc[1]
+        # But we know what FQ and FY we'd need in both Q1 and Q2
+        # because we know which FQ is next and can calculate from there
+        expected[FISCAL_QUARTER_FIELD_NAME + '2'].loc[
+            pd.Timestamp('2015-01-01'):pd.Timestamp('2015-01-09')
+        ] = 2
+        expected[FISCAL_QUARTER_FIELD_NAME + '2'].loc[
+            pd.Timestamp('2015-01-12'):pd.Timestamp('2015-01-20')
+        ] = 3
+        expected[FISCAL_YEAR_FIELD_NAME + '2'].loc[
+            pd.Timestamp('2015-01-01'):pd.Timestamp('2015-01-20')
+        ] = 2015
+
+        return expected
 
 
-class PreviousEstimateTestCase(EstimateTestCase):
-    events = gen_estimates()
+class PreviousEstimateMultipleQuarters(
+    WithEstimateMultipleQuarters,
+    ZiplineTestCase
+):
 
     @classmethod
     def make_loader(cls, events, columns):
         return PreviousQuartersEstimatesLoader(events, columns)
 
-    def test_previous_estimates(self):
+    @classmethod
+    def fill_expected_out(cls, expected):
+        # Fill columns for 1 Q out
+        for raw_name in cls.columns.values():
+            expected[raw_name + '1'].loc[
+                pd.Timestamp('2015-01-12'):pd.Timestamp('2015-01-19')
+            ] = cls.events[raw_name].iloc[0]
+            expected[raw_name + '1'].loc[
+                pd.Timestamp('2015-01-20'):
+            ] = cls.events[raw_name].iloc[1]
+
+        # Fill columns for 2 Q out
+        for col_name in ['estimate', 'event_date']:
+            expected[col_name + '2'].loc[
+                pd.Timestamp('2015-01-20'):
+            ] = cls.events[col_name].iloc[0]
+        expected[
+            FISCAL_QUARTER_FIELD_NAME + '2'
+        ].loc[pd.Timestamp('2015-01-12'):pd.Timestamp('2015-01-20')] = 4
+        expected[
+            FISCAL_YEAR_FIELD_NAME + '2'
+        ].loc[pd.Timestamp('2015-01-12'):pd.Timestamp('2015-01-20')] = 2014
+        expected[
+            FISCAL_QUARTER_FIELD_NAME + '2'
+        ].loc[pd.Timestamp('2015-01-20'):] = 1
+        expected[
+            FISCAL_YEAR_FIELD_NAME + '2'
+        ].loc[pd.Timestamp('2015-01-20'):] = 2015
+        return expected
+
+
+class WithEstimateWindows(WithEstimates):
+    """
+    ZiplineTestCase mixin providing fixures and a test to test running a
+    Pipeline with an estimates loader over differently-sized windows.
+
+    Attributes
+    ----------
+    events : pd.DataFrame
+        DataFrame with estimates for 2 quarters for 2 sids.
+    window_test_start_date : pd.Timestamp
+        The date from which the window should start.
+    timelines : dict[int -> pd.DataFrame]
+        A dictionary mapping to the number of quarters out to
+        snapshots of how the data should look on each date in the date range.
+
+    Methods
+    -------
+    make_expected_timelines() -> dict[int -> pd.DataFrame]
+        Creates a dictionary of expected data. See `timelines`, above.
+
+    Tests
+    -----
+    test_estimate_windows_at_quarter_boundaries()
+        Tests that we overwrite values with the correct quarter's estimate at
+        the correct dates when we have a factor that asks for a window of data.
+    """
+    sid_0_timeline = pd.DataFrame({
+        TS_FIELD_NAME: [pd.Timestamp('2015-01-05'),
+                        pd.Timestamp('2015-01-07'),
+                        pd.Timestamp('2015-01-05'),
+                        pd.Timestamp('2015-01-17')],
+        EVENT_DATE_FIELD_NAME:
+            [pd.Timestamp('2015-01-10'),
+             pd.Timestamp('2015-01-10'),
+             pd.Timestamp('2015-01-20'),
+             pd.Timestamp('2015-01-20')],
+        'estimate': [10., 11.] + [20., 21.],
+        FISCAL_QUARTER_FIELD_NAME: [1] * 2 + [2] * 2,
+        FISCAL_YEAR_FIELD_NAME: 2015,
+        SID_FIELD_NAME: 0,
+    })
+
+    sid_1_timeline = pd.DataFrame({
+        TS_FIELD_NAME: [pd.Timestamp('2015-01-09'),
+                        pd.Timestamp('2015-01-12'),
+                        pd.Timestamp('2015-01-09'),
+                        pd.Timestamp('2015-01-15')],
+        EVENT_DATE_FIELD_NAME:
+            [pd.Timestamp('2015-01-12'), pd.Timestamp('2015-01-12'),
+             pd.Timestamp('2015-01-15'), pd.Timestamp('2015-01-15')],
+        'estimate': [10., 11.] + [30., 31.],
+        FISCAL_QUARTER_FIELD_NAME: [1] * 2 + [3] * 2,
+        FISCAL_YEAR_FIELD_NAME: 2015,
+        SID_FIELD_NAME: 1
+    })
+
+    window_test_start_date = pd.Timestamp('2015-01-05')
+    critical_dates = [pd.Timestamp('2015-01-09', tz='utc'),
+                      pd.Timestamp('2015-01-12', tz='utc'),
+                      pd.Timestamp('2015-01-15', tz='utc'),
+                      pd.Timestamp('2015-01-20', tz='utc')]
+    # window length, starting date, num quarters out, timeline. Parameterizes
+    # over number of quarters out.
+    window_test_cases = list(itertools.product(critical_dates, (1, 2)))
+    events = pd.concat([sid_0_timeline, sid_1_timeline])
+
+    @classmethod
+    def make_expected_timelines(cls):
+        return {}
+
+    @classmethod
+    def init_class_fixtures(cls):
+        super(WithEstimateWindows, cls).init_class_fixtures()
+        cls.timelines = cls.make_expected_timelines()
+
+    @classmethod
+    def create_expected_df(cls, tuples, end_date):
         """
-        The goal of this test is to make sure that we select the right
-        datapoint as our 'previous' w.r.t each date.
+        Given a list of tuples of new data we get for each sid on each critical
+        date (when information changes), create a DataFrame that fills that
+        data through a date range ending at `end_date`.
         """
-        dataset = QuartersEstimates(1)
+        df = pd.DataFrame(tuples,
+                          columns=[SID_FIELD_NAME,
+                                   'estimate',
+                                   'knowledge_date'])
+        df = df.pivot_table(columns='sid',
+                            values='estimate',
+                            index='knowledge_date')
+        df = df.reindex(
+            pd.date_range(cls.window_test_start_date, end_date)
+        )
+        # Index name is lost during reindex.
+        df.index = df.index.rename('knowledge_date')
+        df['at_date'] = end_date.tz_localize('utc')
+        df = df.set_index(['at_date', df.index.tz_localize('utc')]).ffill()
+        return df
+
+    @parameterized.expand(window_test_cases)
+    def test_estimate_windows_at_quarter_boundaries(self,
+                                                    start_idx,
+                                                    num_quarters_out):
+        dataset = QuartersEstimates(num_quarters_out)
+        trading_days = self.trading_days
+        timelines = self.timelines
+        # The window length should be from the starting index back to the first
+        # date on which we got data. The goal is to ensure that as we
+        # progress through the timeline, all data we got, starting from that
+        # first date, is correctly overwritten.
+        window_len = (
+            self.trading_days.get_loc(start_idx) -
+            self.trading_days.get_loc(self.window_test_start_date) + 1
+        )
+
+        class SomeFactor(CustomFactor):
+            inputs = [dataset.estimate]
+            window_length = window_len
+
+            def compute(self, today, assets, out, estimate):
+                today_idx = trading_days.get_loc(today)
+                today_timeline = timelines[
+                    num_quarters_out
+                ].loc[today].reindex(
+                    trading_days[:today_idx + 1]
+                ).values
+                timeline_start_idx = (len(today_timeline) - window_len)
+                assert_equal(estimate,
+                             today_timeline[timeline_start_idx:])
         engine = SimplePipelineEngine(
             lambda x: self.loader,
             self.trading_days,
             self.asset_finder,
         )
-
-        results = engine.run_pipeline(
-            Pipeline({c.name: c.latest for c in dataset.columns}),
-            start_date=self.trading_days[0],
-            end_date=self.trading_days[-1],
+        engine.run_pipeline(
+            Pipeline({'est': SomeFactor()}),
+            start_date=start_idx,
+            end_date=pd.Timestamp('2015-01-20', tz='utc'),  # last event date
+            # we have
         )
-        for sid in self.sids:
-            sid_estimates = results.xs(sid, level=1)
-            ts_sorted_estimates = self.events[
-                self.events[SID_FIELD_NAME] == sid
-            ].sort(TS_FIELD_NAME)
-            for i, date in enumerate(sid_estimates.index):
-                comparable_date = date.tz_localize(None)
-                # Filter out estimates we don't know about yet.
-                ts_eligible_estimates = ts_sorted_estimates[
-                    ts_sorted_estimates[TS_FIELD_NAME] <= comparable_date
-                ]
-                expected_estimate = pd.DataFrame()
-                if not ts_eligible_estimates.empty:
-                    # Determine the last piece of information we know about
-                    # for q1 and q2. This takes advantage of the fact that we
-                    # only have 2 quarters in the test data.
-                    q1_knowledge = ts_eligible_estimates[
-                        ts_eligible_estimates[FISCAL_QUARTER_FIELD_NAME] == 1
-                    ]
-                    q2_knowledge = ts_eligible_estimates[
-                        ts_eligible_estimates[FISCAL_QUARTER_FIELD_NAME] == 2
-                    ]
-                    # The expected estimate will be for q2 if the last thing
-                    # we've seen is that the release date already happened.
-                    # Otherwise, it'll be for q1, as long as the release date
-                    # for q1 has already happened.
-                    if (not q2_knowledge.empty and
-                        q2_knowledge.iloc[-1][EVENT_DATE_FIELD_NAME] <=
-                            comparable_date):
-                        expected_estimate = q2_knowledge.iloc[-1]
-                    elif (not q1_knowledge.empty and
-                          q1_knowledge.iloc[-1][EVENT_DATE_FIELD_NAME] <=
-                            comparable_date):
-                        expected_estimate = q1_knowledge.iloc[-1]
-                if not expected_estimate.empty:
-                    for colname in sid_estimates.columns:
-                        expected_value = expected_estimate[colname]
-                        computed_value = sid_estimates.iloc[i][colname]
-                        assert_equal(expected_value, computed_value)
-                else:
-                    assert sid_estimates.iloc[i].isnull().all()
-
-    def test_wrong_num_quarters_passed(self):
-        self._test_wrong_num_quarters_passed()
 
 
-class BlazePreviousEstimateLoaderTestCase(PreviousEstimateTestCase):
-    """
-    Run the same tests as EventsLoaderTestCase, but using a BlazeEventsLoader.
-    """
-
+class PreviousEstimateWindows(WithEstimateWindows, ZiplineTestCase):
     @classmethod
     def make_loader(cls, events, columns):
-        return BlazePreviousEstimatesLoader(
-            bz.data(events),
-            columns,
+        return PreviousQuartersEstimatesLoader(events, columns)
+
+    @classmethod
+    def make_expected_timelines(cls):
+        oneq_previous = pd.concat([
+            cls.create_expected_df(
+                [(0, np.NaN, cls.window_test_start_date),
+                 (1, np.NaN, cls.window_test_start_date)],
+                pd.Timestamp('2015-01-09')
+            ),
+            cls.create_expected_df(
+                [(0, 11, pd.Timestamp('2015-01-10')),
+                 (1, 11, pd.Timestamp('2015-01-12'))],
+                pd.Timestamp('2015-01-12')
+            ),
+            cls.create_expected_df(
+                [(0, 11, pd.Timestamp('2015-01-10')),
+                 (1, 11, pd.Timestamp('2015-01-12'))],
+                pd.Timestamp('2015-01-13')
+            ),
+            cls.create_expected_df(
+                [(0, 11, pd.Timestamp('2015-01-10')),
+                 (1, 11, pd.Timestamp('2015-01-12'))],
+                pd.Timestamp('2015-01-14')
+            ),
+            cls.create_expected_df(
+                [(0, 11, pd.Timestamp('2015-01-10')),
+                 (1, 31, pd.Timestamp('2015-01-15'))],
+                pd.Timestamp('2015-01-15')
+            ),
+            cls.create_expected_df(
+                [(0, 11, pd.Timestamp('2015-01-10')),
+                 (1, 31, pd.Timestamp('2015-01-15'))],
+                pd.Timestamp('2015-01-16')
+            ),
+            cls.create_expected_df(
+                [(0, 21, pd.Timestamp('2015-01-17')),
+                 (1, 31, pd.Timestamp('2015-01-15'))],
+                pd.Timestamp('2015-01-20')
+            ),
+        ])
+
+        twoq_previous = pd.concat(
+            [cls.create_expected_df(
+                [(0, np.NaN, cls.window_test_start_date),
+                 (1, np.NaN, cls.window_test_start_date)],
+                end_date
+            ) for end_date in pd.date_range('2015-01-09', '2015-01-19')] +
+            [cls.create_expected_df(
+                [(0, 11, pd.Timestamp('2015-01-20')),
+                 (1, np.NaN, cls.window_test_start_date)],
+                pd.Timestamp('2015-01-20')
+            )]
         )
+        return {
+            1: oneq_previous,
+            2: twoq_previous
+        }
+
+
+class NextEstimateWindows(WithEstimateWindows, ZiplineTestCase):
+    @classmethod
+    def make_loader(cls, events, columns):
+        return NextQuartersEstimatesLoader(events, columns)
+
+    @classmethod
+    def make_expected_timelines(cls):
+        oneq_next = pd.concat([
+            cls.create_expected_df(
+                [(0, 10, cls.window_test_start_date),
+                 (0, 11, pd.Timestamp('2015-01-07')),
+                 (1, 10, pd.Timestamp('2015-01-09'))],
+                pd.Timestamp('2015-01-09')
+            ),
+            cls.create_expected_df(
+                [(0, 20, cls.window_test_start_date),
+                 (1, 10, pd.Timestamp('2015-01-09')),
+                 (1, 11, pd.Timestamp('2015-01-12'))],
+                pd.Timestamp('2015-01-12')
+            ),
+            cls.create_expected_df(
+                [(0, 20, cls.window_test_start_date),
+                 (1, 30, pd.Timestamp('2015-01-09'))],
+                pd.Timestamp('2015-01-13')
+            ),
+            cls.create_expected_df(
+                [(0, 20, cls.window_test_start_date),
+                 (1, 30, pd.Timestamp('2015-01-09'))],
+                pd.Timestamp('2015-01-14')
+            ),
+            cls.create_expected_df(
+                [(0, 20, cls.window_test_start_date),
+                 (1, 30, pd.Timestamp('2015-01-09')),
+                 (1, 31, pd.Timestamp('2015-01-15'))],
+                pd.Timestamp('2015-01-15')
+            ),
+            cls.create_expected_df(
+                [(0, 20, cls.window_test_start_date),
+                 (1, np.NaN, cls.window_test_start_date)],
+                pd.Timestamp('2015-01-16')
+            ),
+            cls.create_expected_df(
+                [(0, 20, cls.window_test_start_date),
+                 (0, 21, pd.Timestamp('2015-01-17')),
+                 (1, np.NaN, cls.window_test_start_date)],
+                pd.Timestamp('2015-01-20')
+            ),
+        ])
+
+        twoq_next = pd.concat(
+            [cls.create_expected_df(
+                [(0, 20, pd.Timestamp(cls.window_test_start_date)),
+                 (1, np.NaN, pd.Timestamp(cls.window_test_start_date))],
+                pd.Timestamp('2015-01-09')
+            )] +
+            [cls.create_expected_df(
+                [(0, np.NaN, pd.Timestamp(cls.window_test_start_date)),
+                 (1, np.NaN, pd.Timestamp(cls.window_test_start_date))],
+                end_date
+            ) for end_date in pd.date_range('2015-01-12', '2015-01-20')]
+        )
+
+        return {
+            1: oneq_next,
+            2: twoq_next
+        }
 
 
 class QuarterShiftTestCase(ZiplineTestCase):
@@ -610,12 +887,12 @@ class QuarterShiftTestCase(ZiplineTestCase):
     backwards/forwards from a starting point.
     """
     def test_quarter_normalization(self):
-        input_yrs = pd.Series([0] * 4)
-        input_qtrs = pd.Series(range(1, 5))
+        input_yrs = pd.Series([0] * 4, dtype=np.int64)
+        input_qtrs = pd.Series(range(1, 5), dtype=np.int64)
         result_years, result_quarters = split_normalized_quarters(
             normalize_quarters(input_yrs, input_qtrs)
         )
         # Can't use assert_series_equal here with check_names=False
         # because that still fails due to name differences.
-        assert input_yrs.equals(result_years)
-        assert input_qtrs.equals(result_quarters)
+        assert_equal(input_yrs, result_years)
+        assert_equal(input_qtrs, result_quarters)
diff --git a/zipline/lib/adjustment.pyx b/zipline/lib/adjustment.pyx
index 8839ece9..aea8df5c 100644
--- a/zipline/lib/adjustment.pyx
+++ b/zipline/lib/adjustment.pyx
@@ -3,7 +3,7 @@ from cpython cimport Py_EQ
 
 from pandas import isnull, Timestamp
 from numpy cimport float64_t, uint8_t, int64_t
-from numpy import asarray, datetime64, float64
+from numpy import asarray, datetime64, float64, int64
 # Purely for readability. There aren't C-level declarations for these types.
 ctypedef object Int64Index_t
 ctypedef object DatetimeIndex_t
@@ -451,28 +451,32 @@ cdef class Datetime641DArrayOverwrite(ArrayAdjustment):
     Example
     -------
 
-    >>> import numpy as np
-    >>> arr = np.arange(25, dtype=float).reshape(5, 5)
-    >>> arr
-    array([[  0.,   1.,   2.,   3.,   4.],
-           [  5.,   6.,   7.,   8.,   9.],
-           [ 10.,  11.,  12.,  13.,  14.],
-           [ 15.,  16.,  17.,  18.,  19.],
-           [ 20.,  21.,  22.,  23.,  24.]])
+    >>> import numpy as np; import pandas as pd
+    >>> dts = pd.date_range('2014', freq='D', periods=9, tz='UTC')
+    >>> arr = dts.values.reshape(3, 3)
+    >>> arr == np.datetime64(0, 'ns')
+    array([[False, False, False],
+       [False, False, False],
+       [False, False, False]], dtype=bool)
     >>> adj = Datetime641DArrayOverwrite(
-    ...     row_start=0,
-    ...     row_end=3,
-    ...     column_start=0,
-    ...     column_end=0,
-    ...     values=np.array([1, 2, 3, 4]),
-    )
-    >>> adj.mutate(arr)
-    >>> arr
-    array([[  1.,   1.,   2.,   3.,   4.],
-           [  2.,   6.,   7.,   8.,   9.],
-           [ 3.,  11.,  12.,  13.,  14.],
-           [ 4.,  16.,  17.,  18.,  19.],
-           [ 20.,  21.,  22.,  23.,  24.]])
+    ...           first_row=1,
+    ...           last_row=2,
+    ...           first_col=1,
+    ...           last_col=2,
+    ...           values=np.array([
+    ...               np.datetime64(0, 'ns'),
+    ...               np.datetime64(1, 'ns')
+    ...           ])
+    ...       )
+    >>> adj.mutate(arr.view(np.int64))
+    >>> arr == np.datetime64(0, 'ns')
+    array([[False, False, False],
+       [False,  True,  True],
+       [False, False, False]], dtype=bool)
+    >>> arr == np.datetime64(1, 'ns')
+    array([[False, False, False],
+       [False, False, False],
+       [False,  True,  True]], dtype=bool)
     """
     cdef:
         readonly int64_t[:] values
@@ -598,7 +602,7 @@ cdef datetime_to_int(object datetimelike):
             datetimelike.dtype.name,
         )
 
-    return datetimelike.astype(int)
+    return datetimelike.astype(int64)
 
 
 cdef class Datetime64Adjustment(_Int64Adjustment):
diff --git a/zipline/pipeline/loaders/blaze/core.py b/zipline/pipeline/loaders/blaze/core.py
index 24ce3e26..5264a954 100644
--- a/zipline/pipeline/loaders/blaze/core.py
+++ b/zipline/pipeline/loaders/blaze/core.py
@@ -1096,12 +1096,15 @@ class BlazeLoader(dict):
         sparse_deltas = last_in_date_group(non_novel_deltas,
                                            dates,
                                            assets,
-                                           reindex=False)
+                                           reindex=False,
+                                           have_sids=have_sids)
         dense_output = last_in_date_group(sparse_output,
                                           dates,
                                           assets,
-                                          reindex=True)
-        ffill_across_cols(dense_output, columns)
+                                          reindex=True,
+                                          have_sids=have_sids)
+        ffill_across_cols(dense_output, columns, {c.name: c.name
+                                                  for c in columns})
         if have_sids:
             adjustments_from_deltas = adjustments_from_deltas_with_sids
             column_view = identity
diff --git a/zipline/pipeline/loaders/blaze/estimates.py b/zipline/pipeline/loaders/blaze/estimates.py
index 81554b0d..864b05a7 100644
--- a/zipline/pipeline/loaders/blaze/estimates.py
+++ b/zipline/pipeline/loaders/blaze/estimates.py
@@ -25,6 +25,8 @@ class BlazeEstimatesLoader(PipelineLoader):
     ----------
     expr : Expr
         The expression representing the data to load.
+    columns : dict[str -> str]
+        A dict mapping BoundColumn names to the associated names in `expr`.
     resources : dict, optional
         Mapping from the loadable terms of ``expr`` to actual data resources.
     odo_kwargs : dict, optional
@@ -33,8 +35,6 @@ class BlazeEstimatesLoader(PipelineLoader):
         The time to use for the data query cutoff.
     data_query_tz : tzinfo or str
         The timezeone to use for the data query cutoff.
-    dataset : DataSet
-        The DataSet object for which this loader loads data.
 
     Notes
     -----
@@ -43,12 +43,14 @@ class BlazeEstimatesLoader(PipelineLoader):
        Dim * {{
            {SID_FIELD_NAME}: int64,
            {TS_FIELD_NAME}: datetime,
+           {FISCAL_YEAR_FIELD_NAME}: float64,
+           {FISCAL_QUARTER_FIELD_NAME}: float64,
+           {EVENT_DATE_FIELD_NAME}: datetime,
        }}
 
     And other dataset-specific fields, where each row of the table is a
     record including the sid to identify the company, the timestamp where we
-    learned about the announcement, and the date when the earnings will be
-    announced.
+    learned about the announcement, and the date of the event.
 
     If the '{TS_FIELD_NAME}' field is not included it is assumed that we
     start the backtest with knowledge of all announcements.
diff --git a/zipline/pipeline/loaders/blaze/events.py b/zipline/pipeline/loaders/blaze/events.py
index 39e5dac6..56c050f2 100644
--- a/zipline/pipeline/loaders/blaze/events.py
+++ b/zipline/pipeline/loaders/blaze/events.py
@@ -24,6 +24,10 @@ class BlazeEventsLoader(PipelineLoader):
     ----------
     expr : Expr
         The expression representing the data to load.
+    next_value_columns : dict[BoundColumn -> raw column name]
+        A dict mapping 'next' BoundColumns to their column names in `expr`.
+    previous_value_columns : dict[BoundColumn -> raw column name]
+        A dict mapping 'previous' BoundColumns to their column names in `expr`.
     resources : dict, optional
         Mapping from the loadable terms of ``expr`` to actual data resources.
     odo_kwargs : dict, optional
@@ -32,8 +36,6 @@ class BlazeEventsLoader(PipelineLoader):
         The time to use for the data query cutoff.
     data_query_tz : tzinfo or str
         The timezone to use for the data query cutoff.
-    dataset : DataSet
-        The DataSet object for which this loader loads data.
 
     Notes
     -----
@@ -42,12 +44,12 @@ class BlazeEventsLoader(PipelineLoader):
        Dim * {{
            {SID_FIELD_NAME}: int64,
            {TS_FIELD_NAME}: datetime,
+           {EVENT_DATE_FIELD_NAME}: datetime,
        }}
 
     And other dataset-specific fields, where each row of the table is a
     record including the sid to identify the company, the timestamp where we
-    learned about the announcement, and the date when the earnings will be z
-    announced.
+    learned about the announcement, and the event date.
 
     If the '{TS_FIELD_NAME}' field is not included it is assumed that we
     start the backtest with knowledge of all announcements.
@@ -84,8 +86,12 @@ class BlazeEventsLoader(PipelineLoader):
         self._data_query_tz = data_query_tz
 
     def load_adjusted_array(self, columns, dates, assets, mask):
-        raw = load_raw_data(assets, dates, self._data_query_time,
-                            self._data_query_tz, self._expr, self._odo_kwargs)
+        raw = load_raw_data(assets,
+                            dates,
+                            self._data_query_time,
+                            self._data_query_tz,
+                            self._expr,
+                            self._odo_kwargs)
 
         return EventsLoader(
             events=raw,
diff --git a/zipline/pipeline/loaders/blaze/utils.py b/zipline/pipeline/loaders/blaze/utils.py
index 6455f76c..963d9f9a 100644
--- a/zipline/pipeline/loaders/blaze/utils.py
+++ b/zipline/pipeline/loaders/blaze/utils.py
@@ -6,7 +6,11 @@ from zipline.pipeline.loaders.utils import (
 )
 
 
-def load_raw_data(assets, dates, data_query_time, data_query_tz, expr,
+def load_raw_data(assets,
+                  dates,
+                  data_query_time,
+                  data_query_tz,
+                  expr,
                   odo_kwargs):
     """
     given an expression representing data to load, perform normalization and
@@ -25,13 +29,14 @@ def load_raw_data(assets, dates, data_query_time, data_query_tz, expr,
         `time`.
     expr : expr
         the expression representing the data to load.
-    odo_kwargs : dict, optional
+    odo_kwargs : dict
         extra keyword arguments to pass to odo when executing the expression.
 
     returns
     -------
     raw : pd.dataframe
-        the data symbolized by `expr` materialized in a dataframe.
+        The result of computing expr and materializing the result as a
+        dataframe.
     """
     lower_dt, upper_dt = normalize_data_query_bounds(
         dates[0],
@@ -45,7 +50,7 @@ def load_raw_data(assets, dates, data_query_time, data_query_tz, expr,
         upper_dt,
         odo_kwargs,
     )
-    sids = raw.loc[:, SID_FIELD_NAME]
+    sids = raw[SID_FIELD_NAME]
     raw.drop(
         sids[~sids.isin(assets)].index,
         inplace=True
diff --git a/zipline/pipeline/loaders/quarter_estimates.py b/zipline/pipeline/loaders/quarter_estimates.py
index 6ef8ae7c..54686a6e 100644
--- a/zipline/pipeline/loaders/quarter_estimates.py
+++ b/zipline/pipeline/loaders/quarter_estimates.py
@@ -1,9 +1,8 @@
 from abc import abstractmethod
-from collections import defaultdict
 import numpy as np
-import pandas as pd
 from six import viewvalues
 from toolz import groupby
+
 from zipline.lib.adjusted_array import AdjustedArray
 from zipline.lib.adjustment import (Datetime641DArrayOverwrite,
                                     Float641DArrayOverwrite)
@@ -22,14 +21,15 @@ from zipline.pipeline.loaders.utils import (
     last_in_date_group
 )
 
-NORMALIZED_QUARTERS = 'normalized_quarters'
-
-SHIFTED_NORMALIZED_QTRS = 'shifted_normalized_quarters'
 
+INVALID_NUM_QTRS_MESSAGE = "Passed invalid number of quarters %s; " \
+                           "must pass a number of quarters >= 0"
 NEXT_FISCAL_QUARTER = 'next_fiscal_quarter'
 NEXT_FISCAL_YEAR = 'next_fiscal_year'
+NORMALIZED_QUARTERS = 'normalized_quarters'
 PREVIOUS_FISCAL_QUARTER = 'previous_fiscal_quarter'
 PREVIOUS_FISCAL_YEAR = 'previous_fiscal_year'
+SHIFTED_NORMALIZED_QTRS = 'shifted_normalized_quarters'
 SIMULTATION_DATES = 'dates'
 
 
@@ -86,10 +86,10 @@ def validate_column_specs(events, columns):
 class QuarterEstimatesLoader(PipelineLoader):
     def __init__(self,
                  estimates,
-                 base_column_name_map):
+                 name_map):
         validate_column_specs(
             estimates,
-            base_column_name_map
+            name_map
         )
 
         self.estimates = estimates[
@@ -97,12 +97,16 @@ class QuarterEstimatesLoader(PipelineLoader):
             estimates[FISCAL_QUARTER_FIELD_NAME].notnull() &
             estimates[FISCAL_YEAR_FIELD_NAME].notnull()
         ]
+        self.estimates[NORMALIZED_QUARTERS] = normalize_quarters(
+            self.estimates[FISCAL_YEAR_FIELD_NAME],
+            self.estimates[FISCAL_QUARTER_FIELD_NAME],
+        )
 
-        self.base_column_name_map = base_column_name_map
+        self.name_map = name_map
 
     @abstractmethod
     def load_quarters(self, num_quarters, last, dates):
-        pass
+        raise NotImplementedError('load_quarters')
 
     def get_requested_data_for_col(self, stacked_last_per_qtr, idx, dates):
         """
@@ -111,8 +115,8 @@ class QuarterEstimatesLoader(PipelineLoader):
         Parameters
         ----------
         stacked_last_per_qtr : pd.DataFrame
-            The latest estimate known per sid per date per quarter with the
-            dates, normalized quarter, and sid as the index.
+            The latest estimate known  with the dates, normalized quarter, and
+            sid as the index.
         idx : pd.MultiIndex
             The index of the row of the requested quarter from each date for
             each sid.
@@ -122,16 +126,18 @@ class QuarterEstimatesLoader(PipelineLoader):
         Returns
         --------
         requested_qtr_data : pd.DataFrame
-            The DataFrame with final values for the requested quarter for all
-            columns; `dates` are the index and columns are a MultiIndex with
-            sids at the top level and the dataset columns on the bottom.
+            The DataFrame with the latest values for the requested quarter
+            for all columns; `dates` are the index and columns are a MultiIndex
+            with sids at the top level and the dataset columns on the bottom.
         """
         requested_qtr_data = stacked_last_per_qtr.loc[idx]
-        # We no longer need this in the index, but we do need it as a column
-        # to calculate adjustments.
+        # We no longer need the shifted normalized quarters in the index, but
+        # we do need it as a column to calculate adjustments.
         requested_qtr_data = requested_qtr_data.reset_index(
             SHIFTED_NORMALIZED_QTRS
         )
+        # Calculate the actual year/quarter being requested and add those in
+        # as columns.
         (requested_qtr_data[FISCAL_YEAR_FIELD_NAME],
          requested_qtr_data[FISCAL_QUARTER_FIELD_NAME]) = \
             split_normalized_quarters(
@@ -154,8 +160,7 @@ class QuarterEstimatesLoader(PipelineLoader):
                         column_name,
                         column,
                         mask,
-                        assets,
-                        qtr_crossover_point):
+                        assets):
         """
         Creates an AdjustedArray from the given estimates data for the given
         dates.
@@ -183,18 +188,17 @@ class QuarterEstimatesLoader(PipelineLoader):
             computed.
         column : BoundColumn
             The column for which the AdjustedArray is being computed.
-        mask :
-        assets :
-        qtr_crossover_point :
-            Whether we should use the 'right' or 'left' side when doing
-            searchsorted on the dates for quarter boundaries.
+        mask : np.array
+            Mask array of dimensions len(dates) X len(assets).
+        assets : pd.Int64Index
+            An index of all the assets from the raw data.
 
         Returns
         -------
         adjusted_array : AdjustedArray
             The array of data and overwrites for the given column.
         """
-        adjustments = defaultdict(list)
+        adjustments = {}
         requested_qtr_data = self.get_requested_data_for_col(
             stacked_last_per_qtr, requested_qtr_idx, dates
         )
@@ -204,10 +208,8 @@ class QuarterEstimatesLoader(PipelineLoader):
         zero_qtr_data = zero_qtr_data.reset_index(NORMALIZED_QUARTERS)
         if column.dtype == datetime64ns_dtype:
             overwrite = Datetime641DArrayOverwrite
-            missing_value = np.datetime64('NaT', 'ns')
         else:
             overwrite = Float641DArrayOverwrite
-            missing_value = np.NaN
         for sid_idx, sid in enumerate(assets):
             zero_qtr_sid_data = zero_qtr_data[
                 zero_qtr_data.index.get_level_values(SID_FIELD_NAME) == sid
@@ -225,7 +227,7 @@ class QuarterEstimatesLoader(PipelineLoader):
             ]
             # For the given sid, determine which quarters we have estimates
             # for.
-            quarters_with_estimates_for_sid = last_per_qtr.xs(
+            qtrs_with_estimates_for_sid = last_per_qtr.xs(
                 sid, axis=1, level=SID_FIELD_NAME
             ).groupby(axis=1, level=1).first().columns.values
             for row_indexer in list(qtr_shifts.index):
@@ -233,108 +235,162 @@ class QuarterEstimatesLoader(PipelineLoader):
                 # after this row. This isn't the starting index of the
                 # requested quarter, but simply the date we cross over into a
                 # new quarter.
-                qtr_start_idx = dates.searchsorted(
+                next_qtr_start_idx = dates.searchsorted(
                     zero_qtr_data.loc[
                         row_indexer
                     ][EVENT_DATE_FIELD_NAME],
-                    side=qtr_crossover_point
+                    side='left'
+                    if isinstance(self, PreviousQuartersEstimatesLoader)
+                    else 'right'
                 )
-
-                # Only add adjustments if the next quarter starts somewhere in
-                # our date index for this sid. Our 'next' quarter can never
-                # start at index 0; a starting index of 0 means that the next
-                # quarter's event date was NaT.
-                if 0 < qtr_start_idx < len(dates):
-                    # Find the quarter being requested in the quarter we're
-                    # crossing into.
-                    requested_quarter = requested_qtr_data[
-                        SHIFTED_NORMALIZED_QTRS
-                    ][sid].iloc[qtr_start_idx]
-
-                    # If there are estimates for the requested quarter,
-                    # overwrite all values going up to the starting index of
-                    # that quarter with estimates for that quarter.
-                    if requested_quarter in quarters_with_estimates_for_sid:
-                        adjustments[qtr_start_idx] = \
-                            [overwrite(
-                                0,
-                                qtr_start_idx - 1,  # overwrite thru last qtr
-                                sid_idx,
-                                sid_idx,
-                                last_per_qtr[column_name,
-                                             requested_quarter,
-                                             sid][:qtr_start_idx].values)]
-                    # There are no estimates for the quarter. Overwrite all
-                    # values going up to the starting index of that quarter
-                    # with the missing value for this column.
-                    else:
-                        adjustments[qtr_start_idx] = [
-                            overwrite(
-                                0,
-                                qtr_start_idx - 1,
-                                sid_idx,
-                                sid_idx,
-                                np.array(
-                                    [missing_value] *
-                                    len(last_per_qtr.index[:qtr_start_idx]))
-                            )
-                        ]
+                adjustments[next_qtr_start_idx] = \
+                    self.create_overwrite_for_quarter(
+                        next_qtr_start_idx,
+                        column,
+                        column_name,
+                        dates,
+                        last_per_qtr,
+                        overwrite,
+                        qtrs_with_estimates_for_sid,
+                        requested_qtr_data,
+                        sid,
+                        sid_idx,
+                    )
 
         return AdjustedArray(
-                requested_qtr_data[column_name].values.astype(column.dtype),
-                mask,
-                dict(adjustments),
-                column.missing_value,
-            )
+            requested_qtr_data[column_name].values.astype(column.dtype),
+            mask,
+            dict(adjustments),
+            column.missing_value,
+        )
+
+    def create_overwrite_for_quarter(self,
+                                     next_qtr_start_idx,
+                                     column,
+                                     column_name,
+                                     dates,
+                                     last_per_qtr,
+                                     overwrite,
+                                     quarters_with_estimates_for_sid,
+                                     requested_qtr_data,
+                                     sid,
+                                     sid_idx):
+        # Only add adjustments if the next quarter starts somewhere in
+        # our date index for this sid. Our 'next' quarter can never
+        # start at index 0; a starting index of 0 means that the next
+        # quarter's event date was NaT.
+        if 0 < next_qtr_start_idx < len(dates):
+            # Find the quarter being requested in the quarter we're
+            # crossing into.
+            requested_quarter = requested_qtr_data[
+                SHIFTED_NORMALIZED_QTRS
+            ][sid].iloc[next_qtr_start_idx]
+
+            # If there are estimates for the requested quarter,
+            # overwrite all values going up to the starting index of
+            # that quarter with estimates for that quarter.
+            if requested_quarter in quarters_with_estimates_for_sid:
+                return self.create_overwrite_for_estimate(
+                    column,
+                    column_name,
+                    last_per_qtr,
+                    next_qtr_start_idx,
+                    overwrite,
+                    requested_quarter,
+                    sid,
+                    sid_idx
+                )
+            # There are no estimates for the quarter. Overwrite all
+            # values going up to the starting index of that quarter
+            # with the missing value for this column.
+            else:
+                return self.overwrite_with_null(
+                    column,
+                    last_per_qtr,
+                    next_qtr_start_idx,
+                    overwrite,
+                    sid_idx
+                )
+
+    def overwrite_with_null(self,
+                            column,
+                            last_per_qtr,
+                            next_qtr_start_idx,
+                            overwrite,
+                            sid_idx):
+        return [overwrite(
+                0,
+                next_qtr_start_idx - 1,
+                sid_idx,
+                sid_idx,
+                np.full(
+                    len(
+                        last_per_qtr.index[:next_qtr_start_idx]
+                    ),
+                    column.missing_value,
+                    dtype=column.dtype
+                ))]
 
     def load_adjusted_array(self, columns, dates, assets, mask):
-        # TODO: how can we enforce that datasets have the num_quarters
-        # attribute, given that they're created dynamically?
-        groups = groupby(lambda x: x.dataset.num_quarters, columns)
-        groups_columns = dict(groups)
-        if (pd.Series(groups_columns.keys()) < 0).any():
-            raise ValueError("Must pass a number of quarters >= 0")
+        # Separate out getting the columns' datasets and the datasets'
+        # num_quarters attributes to ensure that we're catching the right
+        # AttributeError.
+        col_to_datasets = {col: col.dataset for col in columns}
+        try:
+            groups = groupby(lambda col: col_to_datasets[col].num_quarters,
+                             col_to_datasets)
+        except AttributeError:
+            raise AttributeError("Datasets loaded via the "
+                                 "QuarterEstimatesLoader must define a "
+                                 "`num_quarters` attribute that defines how "
+                                 "many quarters out the loader should load "
+                                 "the data relative to `dates`.")
+        if any(num_qtr < 0 for num_qtr in groups):
+            raise ValueError(
+                INVALID_NUM_QTRS_MESSAGE % ','.join(
+                    str(qtr) for qtr in groups if qtr < 0
+                )
+
+            )
         out = {}
-        self.estimates[NORMALIZED_QUARTERS] = normalize_quarters(
-            self.estimates[FISCAL_YEAR_FIELD_NAME],
-            self.estimates[FISCAL_QUARTER_FIELD_NAME],
-        )
-        for num_quarters, columns in groups_columns.items():
-            # The column's dataset is itself dynamic and the mapping we
-            # actually want is to its dataset's parent's column name.
-            name_map = {c: self.base_column_name_map[
-                            getattr(c.dataset.__base__, c.name)
-                        ] for c in columns}
+
+        for num_quarters, columns in groups.items():
             # Determine the last piece of information we know for each column
             # on each date in the index for each sid and quarter.
             last_per_qtr = last_in_date_group(
-                self.estimates, True, dates, assets,
+                self.estimates, dates, assets, reindex=True,
                 extra_groupers=[NORMALIZED_QUARTERS]
             )
 
             # Forward fill values for each quarter/sid/dataset column.
-            ffill_across_cols(last_per_qtr, columns)
+            ffill_across_cols(last_per_qtr, columns, self.name_map)
             # Stack quarter and sid into the index.
-            stacked_last_per_qtr = last_per_qtr.stack([NORMALIZED_QUARTERS,
-                                                       SID_FIELD_NAME])
+            stacked_last_per_qtr = last_per_qtr.stack([SID_FIELD_NAME,
+                                                       NORMALIZED_QUARTERS])
             # Set date index name for ease of reference
-            stacked_last_per_qtr.index.set_names(SIMULTATION_DATES, 0, True)
+            stacked_last_per_qtr.index.set_names(SIMULTATION_DATES,
+                                                 level=0,
+                                                 inplace=True)
+            # We want to know the most recent/next event relative to each date.
+            stacked_last_per_qtr = stacked_last_per_qtr.sort(
+                EVENT_DATE_FIELD_NAME
+            )
             # Determine which quarter is next/previous for each date.
             shifted_qtr_data = self.load_quarters(num_quarters,
                                                   stacked_last_per_qtr)
             zero_qtr_idx = shifted_qtr_data.index
             requested_qtr_idx = shifted_qtr_data.set_index([
-                    shifted_qtr_data.index.get_level_values(
-                        SIMULTATION_DATES
-                    ),
-                    shifted_qtr_data[SHIFTED_NORMALIZED_QTRS],
-                    shifted_qtr_data.index.get_level_values(
-                        SID_FIELD_NAME
-                    )]
-            ).index
+                shifted_qtr_data.index.get_level_values(
+                    SIMULTATION_DATES
+                ),
+                shifted_qtr_data.index.get_level_values(
+                    SID_FIELD_NAME
+                ),
+                shifted_qtr_data[SHIFTED_NORMALIZED_QTRS]
+            ]).index
 
             for c in columns:
-                column_name = name_map[c]
+                column_name = self.name_map[c.name]
                 adjusted_array = self.get_adjustments(zero_qtr_idx,
                                                       requested_qtr_idx,
                                                       stacked_last_per_qtr,
@@ -343,26 +399,68 @@ class QuarterEstimatesLoader(PipelineLoader):
                                                       column_name,
                                                       c,
                                                       mask,
-                                                      assets,
-                                                      self.qtr_crossover_point)
+                                                      assets)
                 out[c] = adjusted_array
         return out
 
 
 class NextQuartersEstimatesLoader(QuarterEstimatesLoader):
-    qtr_crossover_point = 'right'
+    def create_overwrite_for_estimate(self,
+                                      column,
+                                      column_name,
+                                      last_per_qtr,
+                                      next_qtr_start_idx,
+                                      overwrite,
+                                      requested_quarter,
+                                      sid,
+                                      sid_idx):
+        return [overwrite(
+            0,
+            # overwrite thru last qtr
+            next_qtr_start_idx - 1,
+            sid_idx,
+            sid_idx,
+            last_per_qtr[
+                column_name,
+                requested_quarter,
+                sid
+            ][0:next_qtr_start_idx].values)]
 
     def load_quarters(self, num_quarters, stacked_last_per_qtr):
-        # Filter for releases that are on or after each simulation date and
-        # determine the next quarter by picking out the upcoming release for
-        # each date in the index.
-        stacked_last_per_qtr = stacked_last_per_qtr.sort(
-            EVENT_DATE_FIELD_NAME
-        )
+        """
+        Filters for releases that are on or after each simulation date and
+        determines the next quarter by picking out the upcoming release for
+        each date in the index. Adda a SHIFTED_NORMALIZED_QTRS column which
+        contains the requested next quarter for each calendar date and sid.
+
+        Parameters
+        ----------
+        num_quarters : int
+            Number of quarters to go out in the future.
+        stacked_last_per_qtr : pd.DataFrame
+            A DataFrame with index of calendar dates, sid, and normalized
+            quarters with each row being the latest estimate for the row's
+            index values, sorted by event date.
+
+        Returns
+        -------
+        next_releases_per_date : pd.DataFrame
+            A DataFrame with index of calendar dates, sid, and normalized
+            quarters, keeping only rows with next event information relative to
+            the index values and with an added column for
+            SHIFTED_NORMALIZED_QTRS, which contains the requested quarter for
+            each row.
+        """
+
+        # We reset the index here because in pandas3, a groupby on the index
+        # will set the index to just the items in the groupby, so we will lose
+        # the normalized quarters.
         next_releases_per_date = stacked_last_per_qtr.loc[
             stacked_last_per_qtr[EVENT_DATE_FIELD_NAME] >=
             stacked_last_per_qtr.index.get_level_values(SIMULTATION_DATES)
-        ].groupby(level=[SIMULTATION_DATES, SID_FIELD_NAME]).nth(0)
+        ].reset_index(NORMALIZED_QUARTERS).groupby(
+            level=[SIMULTATION_DATES, SID_FIELD_NAME]
+        ).nth(0).set_index(NORMALIZED_QUARTERS, append=True)
         next_releases_per_date[
             SHIFTED_NORMALIZED_QTRS
         ] = next_releases_per_date.index.get_level_values(
@@ -372,18 +470,57 @@ class NextQuartersEstimatesLoader(QuarterEstimatesLoader):
 
 
 class PreviousQuartersEstimatesLoader(QuarterEstimatesLoader):
-    qtr_crossover_point = 'left'
+    def create_overwrite_for_estimate(self,
+                                      column,
+                                      column_name,
+                                      last_per_qtr,
+                                      next_qtr_start_idx,
+                                      overwrite,
+                                      requested_quarter,
+                                      sid,
+                                      sid_idx):
+        return self.overwrite_with_null(column,
+                                        last_per_qtr,
+                                        next_qtr_start_idx,
+                                        overwrite,
+                                        sid_idx)
 
     def load_quarters(self, num_quarters, stacked_last_per_qtr):
-        # Filter for releases that are on or before each simulation date and
-        # determine the previous quarter by picking out the upcoming release
-        # for each date in the index.
-        stacked_last_per_qtr = stacked_last_per_qtr.sort(EVENT_DATE_FIELD_NAME)
+        """
+        Filters for releases that are on or after each simulation date and
+        determines the previous quarter by picking out the most recent
+        release relative to each date in the index. Adds a
+        SHIFTED_NORMALIZED_QTRS column which contains the requested previous
+         quarter for each calendar date and sid.
+
+        Parameters
+        ----------
+        num_quarters : int
+            Number of quarters to go out in the past.
+        stacked_last_per_qtr : pd.DataFrame
+            A DataFrame with index of calendar dates, sid, and normalized
+            quarters with each row being the latest estimate for the row's
+            index values, sorted by event date.
+
+        Returns
+        -------
+        next_releases_per_date : pd.DataFrame
+            A DataFrame with index of calendar dates, sid, and normalized
+            quarters, keeping only rows with have a previous event relative
+            to the index values and with an added column for
+            SHIFTED_NORMALIZED_QTRS, which contains the requested quarter for
+            each row.
+        """
+
+        # We reset the index here because in pandas3, a groupby on the index
+        # will set the index to just the items in the groupby, so we will lose
+        # the normalized quarters.
         previous_releases_per_date = stacked_last_per_qtr.loc[
             stacked_last_per_qtr[EVENT_DATE_FIELD_NAME] <=
-            stacked_last_per_qtr.index.get_level_values(
-                SIMULTATION_DATES
-            )].groupby(level=[SIMULTATION_DATES, SID_FIELD_NAME]).nth(-1)
+            stacked_last_per_qtr.index.get_level_values(SIMULTATION_DATES)
+        ].reset_index(NORMALIZED_QUARTERS).groupby(
+            level=[SIMULTATION_DATES, SID_FIELD_NAME]
+        ).nth(-1).set_index(NORMALIZED_QUARTERS, append=True)
         previous_releases_per_date[
             SHIFTED_NORMALIZED_QTRS
         ] = previous_releases_per_date.index.get_level_values(
diff --git a/zipline/pipeline/loaders/utils.py b/zipline/pipeline/loaders/utils.py
index 5bd91132..125dacfb 100644
--- a/zipline/pipeline/loaders/utils.py
+++ b/zipline/pipeline/loaders/utils.py
@@ -276,7 +276,7 @@ def check_data_query_args(data_query_time, data_query_tz):
         )
 
 
-def last_in_date_group(df, reindex, dates, assets, have_sids=True,
+def last_in_date_group(df, dates, assets, reindex=True, have_sids=True,
                        extra_groupers=[]):
     """
     Determine the last piece of information known on each date in the date
@@ -286,14 +286,14 @@ def last_in_date_group(df, reindex, dates, assets, have_sids=True,
     ----------
     df : pd.DataFrame
         The DataFrame containing the data to be grouped.
-    reindex : bool
-        Whether or not the DataFrame should be reindexed against the date
-        index. This will add back any dates to the index that were grouped
-        away.
     dates : pd.DatetimeIndex
         The dates to use for grouping and reindexing.
     assets : pd.Int64Index
         The assets that should be included in the column multiindex.
+    reindex : bool
+        Whether or not the DataFrame should be reindexed against the date
+        index. This will add back any dates to the index that were grouped
+        away.
     have_sids : bool
         Whether or not the DataFrame has sids. If it does, they will be used
         in the groupby.
@@ -307,11 +307,11 @@ def last_in_date_group(df, reindex, dates, assets, have_sids=True,
         levels of a multiindex of columns.
 
     """
-    idx = dates[dates.searchsorted(
+    idx = [dates[dates.searchsorted(
         df[TS_FIELD_NAME].values.astype('datetime64[D]')
-    )]
+    )]]
     if have_sids:
-        idx = [idx, SID_FIELD_NAME]
+        idx += [SID_FIELD_NAME]
     idx += extra_groupers
 
     last_in_group = df.drop(TS_FIELD_NAME, axis=1).groupby(
@@ -321,7 +321,7 @@ def last_in_date_group(df, reindex, dates, assets, have_sids=True,
 
     # For the number of things that we're grouping by (except TS), unstack
     # the df
-    last_in_group = last_in_group.unstack([-1, -2])
+    last_in_group = last_in_group.unstack(list(range(-1, -len(idx), -1)))
 
     if reindex:
         if have_sids:
@@ -339,7 +339,7 @@ def last_in_date_group(df, reindex, dates, assets, have_sids=True,
     return last_in_group
 
 
-def ffill_across_cols(df, columns):
+def ffill_across_cols(df, columns, name_map):
     """
     Forward fill values in a DataFrame with special logic to handle cases
     that pd.DataFrame.ffill cannot and cast columns to appropriate types.
@@ -351,6 +351,9 @@ def ffill_across_cols(df, columns):
     columns : list of BoundColumn
         The BoundColumns that correspond to columns in the DataFrame to which
         special filling and/or casting logic should be applied.
+    name_map: map of string -> string
+        Mapping from the name of each BoundColumn to the associated column
+        name in `df`.
     """
     df.ffill(inplace=True)
 
@@ -369,18 +372,19 @@ def ffill_across_cols(df, columns):
     #    pandas to replace NaNs in an object column with None using fillna,
     #    so we have to roll our own instead using df.where.
     for column in columns:
+        column_name = name_map[column.name]
         # Special logic for strings since `fillna` doesn't work if the
         # missing value is `None`.
         if column.dtype == categorical_dtype:
-            df[column.name] = df[
+            df[column_name] = df[
                 column.name
-            ].where(pd.notnull(df[column.name]),
+            ].where(pd.notnull(df[column_name]),
                     column.missing_value)
         else:
             # We need to execute `fillna` before `astype` in case the
             # column contains NaNs and needs to be cast to bool or int.
             # This is so that the NaNs are replaced first, since pandas
             # can't convert NaNs for those types.
-            df[column.name] = df[
-                column.name
+            df[column_name] = df[
+                column_name
             ].fillna(column.missing_value).astype(column.dtype)
diff --git a/zipline/testing/core.py b/zipline/testing/core.py
index f9832767..19240a5b 100644
--- a/zipline/testing/core.py
+++ b/zipline/testing/core.py
@@ -49,8 +49,14 @@ from zipline.pipeline.loaders.testing import make_seeded_random_loader
 from zipline.utils import security_list
 from zipline.utils.calendars import get_calendar
 from zipline.utils.input_validation import expect_dimensions
+<<<<<<< HEAD
 from zipline.utils.numpy_utils import as_column, isnat
 from zipline.utils.pandas_utils import timedelta_to_integral_seconds
+=======
+from zipline.utils.numpy_utils import (
+    as_column,
+)
+>>>>>>> WIP
 from zipline.utils.sentinel import sentinel
 
 import numpy as np
diff --git a/zipline/testing/fixtures.py b/zipline/testing/fixtures.py
index f0e2aaa5..1ce01627 100644
--- a/zipline/testing/fixtures.py
+++ b/zipline/testing/fixtures.py
@@ -34,13 +34,14 @@ from ..finance.trading import TradingEnvironment
 from ..utils import factory
 from ..utils.classproperty import classproperty
 from ..utils.final import FinalMeta, final
-from .core import tmp_asset_finder, make_simple_equity_info
+from .core import (tmp_asset_finder, make_simple_equity_info)
 from zipline.assets import Equity, Future
 from zipline.pipeline import SimplePipelineEngine
 from zipline.pipeline.loaders.testing import make_seeded_random_loader
 from zipline.utils.calendars import (
     get_calendar,
-    register_calendar)
+    register_calendar
+)
 
 
 class ZiplineTestCase(with_metaclass(FinalMeta, TestCase)):

From 90984be20cff37415addb95362d23e1daf87793a Mon Sep 17 00:00:00 2001
From: Maya Tydykov <mtydykov@quantopian.com>
Date: Tue, 13 Sep 2016 10:53:00 -0400
Subject: [PATCH 11/16] TST: add test for missing num_quarters and clean up
 tests

---
 tests/pipeline/test_quarters_estimates.py | 209 ++++++++++++++--------
 1 file changed, 130 insertions(+), 79 deletions(-)

diff --git a/tests/pipeline/test_quarters_estimates.py b/tests/pipeline/test_quarters_estimates.py
index 9217df77..56e7827b 100644
--- a/tests/pipeline/test_quarters_estimates.py
+++ b/tests/pipeline/test_quarters_estimates.py
@@ -4,7 +4,7 @@ from nose.tools import assert_true
 from nose_parameterized import parameterized
 import numpy as np
 import pandas as pd
-from pandas.util.testing import assert_frame_equal
+from pandas.util.testing import assert_frame_equal, assert_series_equal
 from toolz import merge
 
 from zipline.pipeline import SimplePipelineEngine, Pipeline, CustomFactor
@@ -22,6 +22,7 @@ from zipline.pipeline.loaders.blaze.estimates import (
     BlazePreviousEstimatesLoader
 )
 from zipline.pipeline.loaders.quarter_estimates import (
+    INVALID_NUM_QTRS_MESSAGE,
     NextQuartersEstimatesLoader,
     normalize_quarters,
     PreviousQuartersEstimatesLoader,
@@ -32,7 +33,7 @@ from zipline.testing.fixtures import (
     WithTradingSessions,
     ZiplineTestCase,
 )
-from zipline.testing.predicates import assert_equal
+from zipline.testing.predicates import assert_equal, assert_raises_regex
 from zipline.utils.numpy_utils import datetime64ns_dtype
 from zipline.utils.numpy_utils import float64_dtype
 
@@ -51,6 +52,12 @@ def QuartersEstimates(num_qtr):
     return QtrEstimates
 
 
+def QuartersEstimatesNoNumQuartersAttr(num_qtr):
+    class QtrEstimates(Estimates):
+        name = Estimates
+    return QtrEstimates
+
+
 class WithEstimates(WithTradingSessions, WithAssetFinder):
     """
     ZiplineTestCase mixin providing cls.loader and cls.events as class
@@ -77,8 +84,13 @@ class WithEstimates(WithTradingSessions, WithAssetFinder):
     def make_loader(cls, events, columns):
         raise NotImplementedError('make_loader')
 
+    @classmethod
+    def make_events(cls):
+        raise NotImplementedError('make_events')
+
     @classmethod
     def init_class_fixtures(cls):
+        cls.events = cls.make_events()
         cls.sids = cls.events[SID_FIELD_NAME].unique()
         cls.columns = {
             Estimates.event_date: 'event_date',
@@ -98,7 +110,7 @@ class WithEstimates(WithTradingSessions, WithAssetFinder):
         super(WithEstimates, cls).init_class_fixtures()
 
 
-class WithWrongNumQuarters(WithEstimates):
+class WithWrongLoaderDefinition(WithEstimates):
     """
     ZiplineTestCase mixin providing cls.events as a class level fixture and
     defining a test for all inheritors to use.
@@ -113,18 +125,48 @@ class WithWrongNumQuarters(WithEstimates):
     ------
     test_wrong_num_quarters_passed()
         Tests that loading with an incorrect quarter number raises an error.
+    test_no_num_quarters_attr()
+        Tests that the loader throws an AssertionError if the dataset being
+        loaded has no `num_quarters` attribute.
     """
-    events = pd.DataFrame({SID_FIELD_NAME: 0},
-                          columns=[SID_FIELD_NAME,
-                                   TS_FIELD_NAME,
-                                   EVENT_DATE_FIELD_NAME,
-                                   FISCAL_QUARTER_FIELD_NAME,
-                                   FISCAL_YEAR_FIELD_NAME,
-                                   'estimate'],
-                          index=[0])
+
+    @classmethod
+    def make_events(cls):
+        return pd.DataFrame({SID_FIELD_NAME: 0},
+                            columns=[SID_FIELD_NAME,
+                                     TS_FIELD_NAME,
+                                     EVENT_DATE_FIELD_NAME,
+                                     FISCAL_QUARTER_FIELD_NAME,
+                                     FISCAL_YEAR_FIELD_NAME,
+                                     'estimate'],
+                            index=[0])
 
     def test_wrong_num_quarters_passed(self):
-        dataset = QuartersEstimates(-1)
+        bad_dataset1 = QuartersEstimates(-1)
+        bad_dataset2 = QuartersEstimates(-2)
+        good_dataset = QuartersEstimates(1)
+        engine = SimplePipelineEngine(
+            lambda x: self.loader,
+            self.trading_days,
+            self.asset_finder,
+        )
+        columns = {c.name + str(dataset.num_quarters): c.latest
+                   for dataset in (bad_dataset1,
+                                   bad_dataset2,
+                                   good_dataset)
+                   for c in dataset.columns}
+        p = Pipeline(columns)
+
+        with self.assertRaises(ValueError) as e:
+            engine.run_pipeline(
+                p,
+                start_date=self.trading_days[0],
+                end_date=self.trading_days[-1],
+            )
+            assert_raises_regex(e, INVALID_NUM_QTRS_MESSAGE % "-1,-2")
+
+    def test_no_num_quarters_attr(self):
+        dataset = QuartersEstimatesNoNumQuartersAttr(1)
         engine = SimplePipelineEngine(
             lambda x: self.loader,
             self.trading_days,
@@ -132,7 +174,7 @@ class WithWrongNumQuarters(WithEstimates):
         )
         p = Pipeline({c.name: c.latest for c in dataset.columns})
 
-        with self.assertRaises(ValueError):
+        with self.assertRaises(AttributeError):
             engine.run_pipeline(
                 p,
                 start_date=self.trading_days[0],
@@ -140,7 +182,7 @@ class WithWrongNumQuarters(WithEstimates):
             )
 
 
-class PreviousWithWrongNumQuarters(WithWrongNumQuarters,
+class PreviousWithWrongNumQuarters(WithWrongLoaderDefinition,
                                    ZiplineTestCase):
     """
     Tests that previous quarter loader correctly breaks if an incorrect
@@ -151,7 +193,7 @@ class PreviousWithWrongNumQuarters(WithWrongNumQuarters,
         return PreviousQuartersEstimatesLoader(events, columns)
 
 
-class NextWithWrongNumQuarters(WithWrongNumQuarters,
+class NextWithWrongNumQuarters(WithWrongLoaderDefinition,
                                ZiplineTestCase):
     """
     Tests that next quarter loader correctly breaks if an incorrect
@@ -162,7 +204,7 @@ class NextWithWrongNumQuarters(WithWrongNumQuarters,
         return NextQuartersEstimatesLoader(events, columns)
 
 
-class WithEstimatesT0(WithEstimates):
+class WithEstimatesTimeZero(WithEstimates):
     """
     ZiplineTestCase mixin providing cls.events as a class level fixture and
     defining a test for all inheritors to use.
@@ -173,9 +215,9 @@ class WithEstimatesT0(WithEstimates):
         Generated dynamically in order to test inter-leavings of estimates and
         event dates for multiple quarters to make sure that we select the
         right immediate 'next' or 'previous' quarter relative to each date -
-        i.e., the right 't0' on the timeline. We care about selecting the
-        right 't0' because we use that to calculate which quarter's data needs
-        to be returned for each day.
+        i.e., the right 'time zero' on the timeline. We care about selecting
+        the right 'time zero' because we use that to calculate which quarter's
+        data needs to be returned for each day.
 
     Methods
     -------
@@ -189,8 +231,8 @@ class WithEstimatesT0(WithEstimates):
     Tests
     ------
     test_estimates()
-        Tests that we get the right 't0' value on each day for each sid and
-        for each column.
+        Tests that we get the right 'time zero' value on each day for each
+        sid and for each column.
     """
     q1_knowledge_dates = [pd.Timestamp('2015-01-01'),
                           pd.Timestamp('2015-01-04'),
@@ -212,7 +254,7 @@ class WithEstimatesT0(WithEstimates):
                         pd.Timestamp('2015-01-31')]
 
     @classmethod
-    def gen_estimates(cls):
+    def make_events(cls):
         """
         In order to determine which estimate we care about for a particular
         sid, we need to look at all estimates that we have for that sid and
@@ -296,8 +338,8 @@ class WithEstimatesT0(WithEstimates):
     @classmethod
     def init_class_fixtures(cls):
         # Must be generated before call to super since super uses `events`.
-        cls.events = cls.gen_estimates()
-        super(WithEstimatesT0, cls).init_class_fixtures()
+        cls.events = cls.make_events()
+        super(WithEstimatesTimeZero, cls).init_class_fixtures()
 
     def get_expected_estimate(self,
                               q1_knowledge,
@@ -344,14 +386,17 @@ class WithEstimatesT0(WithEstimates):
                         q2_knowledge,
                         comparable_date,
                     )
-                    if not expected_estimate.empty:
-                        for colname in sid_estimates.columns:
-                            expected_value = expected_estimate[colname]
-                            computed_value = sid_estimates.iloc[i][colname]
-                            assert_equal(expected_value, computed_value)
+                    # Have to explicitly check for None because
+                    # `expected_estimate` might be a DataFrame.
+                    if expected_estimate is not None:
+                        assert_series_equal(
+                            sid_estimates.iloc[i],
+                            expected_estimate[sid_estimates.columns],
+                            check_names=False
+                        )
                     else:
-                        # There are no eligible 'next' estimates on this day;
-                        #  everything should be null.
+                        # There are no eligible 'next'/'previous' estimates on
+                        # this day; everything should be null.
                         assert_true(sid_estimates.iloc[i].isnull().all())
                 else:
                     # We don't know about any estimates on this day;
@@ -359,7 +404,7 @@ class WithEstimatesT0(WithEstimates):
                     assert_true(sid_estimates.iloc[i].isnull().all())
 
 
-class NextEstimate(WithEstimatesT0, ZiplineTestCase):
+class NextEstimate(WithEstimatesTimeZero, ZiplineTestCase):
     @classmethod
     def make_loader(cls, events, columns):
         return NextQuartersEstimatesLoader(events, columns)
@@ -372,17 +417,17 @@ class NextEstimate(WithEstimatesT0, ZiplineTestCase):
         # happening on this simulation date or later, then that's
         # the estimate we want to use.
         if (not q1_knowledge.empty and
-            q1_knowledge.iloc[-1][EVENT_DATE_FIELD_NAME] >=
+            q1_knowledge[EVENT_DATE_FIELD_NAME].iloc[-1] >=
                 comparable_date):
             return q1_knowledge.iloc[-1]
         # If q1 has already happened or we don't know about it
         # yet and our latest knowledge indicates that q2 hasn't
         # happened yet, then that's the estimate we want to use.
         elif (not q2_knowledge.empty and
-              q2_knowledge.iloc[-1][EVENT_DATE_FIELD_NAME] >=
+              q2_knowledge[EVENT_DATE_FIELD_NAME].iloc[-1] >=
                 comparable_date):
             return q2_knowledge.iloc[-1]
-        return pd.DataFrame()
+        return None
 
 
 class BlazeNextEstimateLoaderTestCase(NextEstimate):
@@ -398,7 +443,7 @@ class BlazeNextEstimateLoaderTestCase(NextEstimate):
         )
 
 
-class PreviousEstimate(WithEstimatesT0, ZiplineTestCase):
+class PreviousEstimate(WithEstimatesTimeZero, ZiplineTestCase):
     @classmethod
     def make_loader(cls, events, columns):
         return PreviousQuartersEstimatesLoader(events, columns)
@@ -413,14 +458,14 @@ class PreviousEstimate(WithEstimatesT0, ZiplineTestCase):
         # Otherwise, it'll be for q1, as long as the release date
         # for q1 has already happened.
         if (not q2_knowledge.empty and
-            q2_knowledge.iloc[-1][EVENT_DATE_FIELD_NAME] <=
+            q2_knowledge[EVENT_DATE_FIELD_NAME].iloc[-1] <=
                 comparable_date):
             return q2_knowledge.iloc[-1]
         elif (not q1_knowledge.empty and
-              q1_knowledge.iloc[-1][EVENT_DATE_FIELD_NAME] <=
+              q1_knowledge[EVENT_DATE_FIELD_NAME].iloc[-1] <=
                 comparable_date):
             return q1_knowledge.iloc[-1]
-        return pd.DataFrame()
+        return None
 
 
 class BlazePreviousEstimateLoaderTestCase(PreviousEstimate):
@@ -461,16 +506,19 @@ class WithEstimateMultipleQuarters(WithEstimates):
         out and checks that the returned columns contain data for the correct
         number of quarters out.
     """
-    events = pd.DataFrame({
-        SID_FIELD_NAME: [0] * 2,
-        TS_FIELD_NAME: [pd.Timestamp('2015-01-01'),
-                        pd.Timestamp('2015-01-06')],
-        EVENT_DATE_FIELD_NAME: [pd.Timestamp('2015-01-10'),
-                                pd.Timestamp('2015-01-20')],
-        'estimate': [1., 2.],
-        FISCAL_QUARTER_FIELD_NAME: [1, 2],
-        FISCAL_YEAR_FIELD_NAME: [2015, 2015]
-    })
+
+    @classmethod
+    def make_events(cls):
+        return pd.DataFrame({
+            SID_FIELD_NAME: [0] * 2,
+            TS_FIELD_NAME: [pd.Timestamp('2015-01-01'),
+                            pd.Timestamp('2015-01-06')],
+            EVENT_DATE_FIELD_NAME: [pd.Timestamp('2015-01-10'),
+                                    pd.Timestamp('2015-01-20')],
+            'estimate': [1., 2.],
+            FISCAL_QUARTER_FIELD_NAME: [1, 2],
+            FISCAL_YEAR_FIELD_NAME: [2015, 2015]
+        })
 
     @classmethod
     def init_class_fixtures(cls):
@@ -635,35 +683,6 @@ class WithEstimateWindows(WithEstimates):
         Tests that we overwrite values with the correct quarter's estimate at
         the correct dates when we have a factor that asks for a window of data.
     """
-    sid_0_timeline = pd.DataFrame({
-        TS_FIELD_NAME: [pd.Timestamp('2015-01-05'),
-                        pd.Timestamp('2015-01-07'),
-                        pd.Timestamp('2015-01-05'),
-                        pd.Timestamp('2015-01-17')],
-        EVENT_DATE_FIELD_NAME:
-            [pd.Timestamp('2015-01-10'),
-             pd.Timestamp('2015-01-10'),
-             pd.Timestamp('2015-01-20'),
-             pd.Timestamp('2015-01-20')],
-        'estimate': [10., 11.] + [20., 21.],
-        FISCAL_QUARTER_FIELD_NAME: [1] * 2 + [2] * 2,
-        FISCAL_YEAR_FIELD_NAME: 2015,
-        SID_FIELD_NAME: 0,
-    })
-
-    sid_1_timeline = pd.DataFrame({
-        TS_FIELD_NAME: [pd.Timestamp('2015-01-09'),
-                        pd.Timestamp('2015-01-12'),
-                        pd.Timestamp('2015-01-09'),
-                        pd.Timestamp('2015-01-15')],
-        EVENT_DATE_FIELD_NAME:
-            [pd.Timestamp('2015-01-12'), pd.Timestamp('2015-01-12'),
-             pd.Timestamp('2015-01-15'), pd.Timestamp('2015-01-15')],
-        'estimate': [10., 11.] + [30., 31.],
-        FISCAL_QUARTER_FIELD_NAME: [1] * 2 + [3] * 2,
-        FISCAL_YEAR_FIELD_NAME: 2015,
-        SID_FIELD_NAME: 1
-    })
 
     window_test_start_date = pd.Timestamp('2015-01-05')
     critical_dates = [pd.Timestamp('2015-01-09', tz='utc'),
@@ -673,7 +692,39 @@ class WithEstimateWindows(WithEstimates):
     # window length, starting date, num quarters out, timeline. Parameterizes
     # over number of quarters out.
     window_test_cases = list(itertools.product(critical_dates, (1, 2)))
-    events = pd.concat([sid_0_timeline, sid_1_timeline])
+
+    @classmethod
+    def make_events(cls):
+        sid_0_timeline = pd.DataFrame({
+            TS_FIELD_NAME: [pd.Timestamp('2015-01-05'),
+                            pd.Timestamp('2015-01-07'),
+                            pd.Timestamp('2015-01-05'),
+                            pd.Timestamp('2015-01-17')],
+            EVENT_DATE_FIELD_NAME:
+                [pd.Timestamp('2015-01-10'),
+                 pd.Timestamp('2015-01-10'),
+                 pd.Timestamp('2015-01-20'),
+                 pd.Timestamp('2015-01-20')],
+            'estimate': [10., 11.] + [20., 21.],
+            FISCAL_QUARTER_FIELD_NAME: [1] * 2 + [2] * 2,
+            FISCAL_YEAR_FIELD_NAME: 2015,
+            SID_FIELD_NAME: 0,
+        })
+
+        sid_1_timeline = pd.DataFrame({
+            TS_FIELD_NAME: [pd.Timestamp('2015-01-09'),
+                            pd.Timestamp('2015-01-12'),
+                            pd.Timestamp('2015-01-09'),
+                            pd.Timestamp('2015-01-15')],
+            EVENT_DATE_FIELD_NAME:
+                [pd.Timestamp('2015-01-12'), pd.Timestamp('2015-01-12'),
+                 pd.Timestamp('2015-01-15'), pd.Timestamp('2015-01-15')],
+            'estimate': [10., 11.] + [30., 31.],
+            FISCAL_QUARTER_FIELD_NAME: [1] * 2 + [3] * 2,
+            FISCAL_YEAR_FIELD_NAME: 2015,
+            SID_FIELD_NAME: 1
+        })
+        return pd.concat([sid_0_timeline, sid_1_timeline])
 
     @classmethod
     def make_expected_timelines(cls):

From f140cdb8d610662101b0f3a2c9a3b97986c7c8b5 Mon Sep 17 00:00:00 2001
From: Maya Tydykov <mtydykov@quantopian.com>
Date: Tue, 13 Sep 2016 15:52:32 -0400
Subject: [PATCH 12/16] MAINT: add checkpointing

BUG: add cols for sids with no data and get adjustments outside column loop
---
 zipline/pipeline/loaders/blaze/core.py        |  72 ++++----
 zipline/pipeline/loaders/blaze/estimates.py   |   7 +-
 zipline/pipeline/loaders/blaze/utils.py       |   6 +-
 zipline/pipeline/loaders/quarter_estimates.py | 172 +++++++++---------
 4 files changed, 134 insertions(+), 123 deletions(-)

diff --git a/zipline/pipeline/loaders/blaze/core.py b/zipline/pipeline/loaders/blaze/core.py
index 5264a954..61782c31 100644
--- a/zipline/pipeline/loaders/blaze/core.py
+++ b/zipline/pipeline/loaders/blaze/core.py
@@ -1028,22 +1028,10 @@ class BlazeLoader(dict):
 
             return odo(e[predicate][colnames], pd.DataFrame, **odo_kwargs)
 
-        if checkpoints is not None:
-            ts = checkpoints[TS_FIELD_NAME]
-            checkpoints_ts = odo(ts[ts <= lower_dt].max(), pd.Timestamp)
-            if pd.isnull(checkpoints_ts):
-                materialized_checkpoints = pd.DataFrame(columns=colnames)
-                lower = None
-            else:
-                materialized_checkpoints = odo(
-                    checkpoints[ts == checkpoints_ts][colnames],
-                    pd.DataFrame,
-                    **odo_kwargs
-                )
-                lower = checkpoints_ts
-        else:
-            materialized_checkpoints = pd.DataFrame(columns=colnames)
-            lower = None
+
+        lower, materialized_checkpoints = get_materialized_checkpoints(
+            checkpoints, colnames, lower_dt, odo_kwargs
+        )
 
         materialized_expr = self.pool.apply_async(collect_expr, (expr, lower))
         materialized_deltas = (
@@ -1137,6 +1125,7 @@ class BlazeLoader(dict):
             for column_idx, column in enumerate(columns)
         }
 
+
 global_loader = BlazeLoader.global_instance()
 
 
@@ -1168,12 +1157,32 @@ def bind_expression_to_resources(expr, resources):
     })
 
 
+def get_materialized_checkpoints(checkpoints, colnames, lower_dt, odo_kwargs):
+    if checkpoints is not None:
+        ts = checkpoints[TS_FIELD_NAME]
+        checkpoints_ts = odo(ts[ts <= lower_dt].max(), pd.Timestamp)
+        if pd.isnull(checkpoints_ts):
+            materialized_checkpoints = pd.DataFrame(columns=colnames)
+            lower = None
+        else:
+            materialized_checkpoints = odo(
+                checkpoints[ts == checkpoints_ts][colnames],
+                pd.DataFrame,
+                **odo_kwargs
+            )
+            lower = checkpoints_ts
+    else:
+        materialized_checkpoints = pd.DataFrame(columns=colnames)
+        lower = None
+    return lower, materialized_checkpoints
+
+
 def ffill_query_in_range(expr,
                          lower,
                          upper,
+                         checkpoints=None,
                          odo_kwargs=None,
-                         ts_field=TS_FIELD_NAME,
-                         sid_field=SID_FIELD_NAME):
+                         ts_field=TS_FIELD_NAME):
     """Query a blaze expression in a given time range properly forward filling
     from values that fall before the lower date.
 
@@ -1199,27 +1208,24 @@ def ffill_query_in_range(expr,
         start before the requested start date if a value is needed to ffill.
     """
     odo_kwargs = odo_kwargs or {}
-    filtered = expr[expr[ts_field] <= lower]
-    computed_lower = odo(
-        bz.by(
-            filtered[sid_field],
-            timestamp=filtered[ts_field].max(),
-        ).timestamp.min(),
-        pd.Timestamp,
-        **odo_kwargs
+    computed_lower, materialized_checkpoints = get_materialized_checkpoints(
+        checkpoints, expr.fields, lower, odo_kwargs
     )
     if pd.isnull(computed_lower):
         # If there is no lower date, just query for data in the date
         # range. It must all be null anyways.
         computed_lower = lower
 
-    raw = odo(
-        expr[
-            (expr[ts_field] >= computed_lower) &
-            (expr[ts_field] <= upper)
-        ],
-        pd.DataFrame,
-        **odo_kwargs
+    raw = pd.concat(
+        [materialized_checkpoints,
+         odo(
+             expr[
+                 (expr[ts_field] >= computed_lower) &
+                 (expr[ts_field] <= upper)
+             ],
+             pd.DataFrame,
+             **odo_kwargs
+         )]
     )
     raw.loc[:, ts_field] = raw.loc[:, ts_field].astype('datetime64[ns]')
     return raw
diff --git a/zipline/pipeline/loaders/blaze/estimates.py b/zipline/pipeline/loaders/blaze/estimates.py
index 864b05a7..b991c62c 100644
--- a/zipline/pipeline/loaders/blaze/estimates.py
+++ b/zipline/pipeline/loaders/blaze/estimates.py
@@ -63,7 +63,8 @@ class BlazeEstimatesLoader(PipelineLoader):
                  resources=None,
                  odo_kwargs=None,
                  data_query_time=None,
-                 data_query_tz=None):
+                 data_query_tz=None,
+                 checkpoints=None):
 
         dshape = expr.dshape
         if not istabular(dshape):
@@ -83,6 +84,7 @@ class BlazeEstimatesLoader(PipelineLoader):
         check_data_query_args(data_query_time, data_query_tz)
         self._data_query_time = data_query_time
         self._data_query_tz = data_query_tz
+        self._checkpoints = checkpoints
 
     def load_adjusted_array(self, columns, dates, assets, mask):
         raw = load_raw_data(assets,
@@ -90,7 +92,8 @@ class BlazeEstimatesLoader(PipelineLoader):
                             self._data_query_time,
                             self._data_query_tz,
                             self._expr,
-                            self._odo_kwargs)
+                            self._odo_kwargs,
+                            checkpoints=self._checkpoints)
 
         return self.loader(
             raw,
diff --git a/zipline/pipeline/loaders/blaze/utils.py b/zipline/pipeline/loaders/blaze/utils.py
index 963d9f9a..b5be9cd9 100644
--- a/zipline/pipeline/loaders/blaze/utils.py
+++ b/zipline/pipeline/loaders/blaze/utils.py
@@ -11,7 +11,8 @@ def load_raw_data(assets,
                   data_query_time,
                   data_query_tz,
                   expr,
-                  odo_kwargs):
+                  odo_kwargs,
+                  checkpoints=None):
     """
     given an expression representing data to load, perform normalization and
     forward-filling and return the data, materialized.
@@ -48,7 +49,8 @@ def load_raw_data(assets,
         expr,
         lower_dt,
         upper_dt,
-        odo_kwargs,
+        checkpoints=checkpoints,
+        odo_kwargs=odo_kwargs,
     )
     sids = raw[SID_FIELD_NAME]
     raw.drop(
diff --git a/zipline/pipeline/loaders/quarter_estimates.py b/zipline/pipeline/loaders/quarter_estimates.py
index 54686a6e..71ff3a32 100644
--- a/zipline/pipeline/loaders/quarter_estimates.py
+++ b/zipline/pipeline/loaders/quarter_estimates.py
@@ -1,5 +1,7 @@
+from collections import defaultdict
 from abc import abstractmethod
 import numpy as np
+import pandas as pd
 from six import viewvalues
 from toolz import groupby
 
@@ -152,30 +154,22 @@ class QuarterEstimatesLoader(PipelineLoader):
         return requested_qtr_data
 
     def get_adjustments(self,
-                        zero_qtr_idx,
-                        requested_qtr_idx,
-                        stacked_last_per_qtr,
+                        zero_qtr_data,
+                        requested_qtr_data,
                         last_per_qtr,
                         dates,
-                        column_name,
-                        column,
-                        mask,
-                        assets):
+                        assets,
+                        columns):
         """
         Creates an AdjustedArray from the given estimates data for the given
         dates.
 
         Parameters
         ----------
-        zero_qtr_idx : pd.MultiIndex
-            The index of the row of the zeroth (immediately next/previous)
-            quarter from each date for each sid.
-        requested_qtr_idx : pd.MultiIndex
-            The index of the row of the requested quarter from each date for
-            each sid.
-        stacked_last_per_qtr : pd.DataFrame
-            The latest estimate known per sid per date per quarter with the
-            dates, normalized quarter, and sid as the index.
+        zero_qtr_data : pd.DataFrame
+            The 'time zero' data for each date/sid.
+        zero_qtr_data : pd.DataFrame
+            The data for the requested quarter.
         last_per_qtr : pd.DataFrame
             The latest estimate known per sid per date per quarter with
             dates as the index and normalized quarter and sid in the columns
@@ -198,18 +192,11 @@ class QuarterEstimatesLoader(PipelineLoader):
         adjusted_array : AdjustedArray
             The array of data and overwrites for the given column.
         """
-        adjustments = {}
-        requested_qtr_data = self.get_requested_data_for_col(
-            stacked_last_per_qtr, requested_qtr_idx, dates
-        )
-        zero_qtr_data = stacked_last_per_qtr.loc[zero_qtr_idx]
+        col_to_adjustments = defaultdict(dict)
         # We no longer need this in the index, but we do need it as a column
         # to calculate adjustments.
         zero_qtr_data = zero_qtr_data.reset_index(NORMALIZED_QUARTERS)
-        if column.dtype == datetime64ns_dtype:
-            overwrite = Datetime641DArrayOverwrite
-        else:
-            overwrite = Float641DArrayOverwrite
+
         for sid_idx, sid in enumerate(assets):
             zero_qtr_sid_data = zero_qtr_data[
                 zero_qtr_data.index.get_level_values(SID_FIELD_NAME) == sid
@@ -243,38 +230,35 @@ class QuarterEstimatesLoader(PipelineLoader):
                     if isinstance(self, PreviousQuartersEstimatesLoader)
                     else 'right'
                 )
-                adjustments[next_qtr_start_idx] = \
-                    self.create_overwrite_for_quarter(
-                        next_qtr_start_idx,
-                        column,
-                        column_name,
-                        dates,
-                        last_per_qtr,
-                        overwrite,
-                        qtrs_with_estimates_for_sid,
-                        requested_qtr_data,
-                        sid,
-                        sid_idx,
-                    )
-
-        return AdjustedArray(
-            requested_qtr_data[column_name].values.astype(column.dtype),
-            mask,
-            dict(adjustments),
-            column.missing_value,
-        )
+                self.create_overwrite_for_quarter(
+                    col_to_adjustments,
+                    next_qtr_start_idx,
+                    dates,
+                    last_per_qtr,
+                    qtrs_with_estimates_for_sid,
+                    requested_qtr_data,
+                    sid,
+                    sid_idx,
+                    columns,
+                )
+        return col_to_adjustments
 
     def create_overwrite_for_quarter(self,
+                                     col_to_adjustments,
                                      next_qtr_start_idx,
-                                     column,
-                                     column_name,
                                      dates,
                                      last_per_qtr,
-                                     overwrite,
                                      quarters_with_estimates_for_sid,
                                      requested_qtr_data,
                                      sid,
-                                     sid_idx):
+                                     sid_idx,
+                                     columns):
+        overwrites_dict = {}
+        for col in columns:
+            if col.dtype == datetime64ns_dtype:
+                overwrites_dict[col] = Datetime641DArrayOverwrite
+            else:
+                overwrites_dict[col] = Float641DArrayOverwrite
         # Only add adjustments if the next quarter starts somewhere in
         # our date index for this sid. Our 'next' quarter can never
         # start at index 0; a starting index of 0 means that the next
@@ -285,32 +269,35 @@ class QuarterEstimatesLoader(PipelineLoader):
             requested_quarter = requested_qtr_data[
                 SHIFTED_NORMALIZED_QTRS
             ][sid].iloc[next_qtr_start_idx]
-
-            # If there are estimates for the requested quarter,
-            # overwrite all values going up to the starting index of
-            # that quarter with estimates for that quarter.
-            if requested_quarter in quarters_with_estimates_for_sid:
-                return self.create_overwrite_for_estimate(
-                    column,
-                    column_name,
-                    last_per_qtr,
-                    next_qtr_start_idx,
-                    overwrite,
-                    requested_quarter,
-                    sid,
-                    sid_idx
-                )
-            # There are no estimates for the quarter. Overwrite all
-            # values going up to the starting index of that quarter
-            # with the missing value for this column.
-            else:
-                return self.overwrite_with_null(
-                    column,
-                    last_per_qtr,
-                    next_qtr_start_idx,
-                    overwrite,
-                    sid_idx
-                )
+            for col in columns:
+                column_name = self.name_map[col.name]
+                # If there are estimates for the requested quarter,
+                # overwrite all values going up to the starting index of
+                # that quarter with estimates for that quarter.
+                if requested_quarter in quarters_with_estimates_for_sid:
+                    col_to_adjustments[column_name][next_qtr_start_idx] = \
+                        self.create_overwrite_for_estimate(
+                            col,
+                            column_name,
+                            last_per_qtr,
+                            next_qtr_start_idx,
+                            overwrites_dict[col],
+                            requested_quarter,
+                            sid,
+                            sid_idx
+                        )
+                # There are no estimates for the quarter. Overwrite all
+                # values going up to the starting index of that quarter
+                # with the missing value for this column.
+                else:
+                    col_to_adjustments[column_name][next_qtr_start_idx] =\
+                        self.overwrite_with_null(
+                            col,
+                            last_per_qtr,
+                            next_qtr_start_idx,
+                            overwrites_dict[col],
+                            sid_idx
+                        )
 
     def overwrite_with_null(self,
                             column,
@@ -353,7 +340,6 @@ class QuarterEstimatesLoader(PipelineLoader):
 
             )
         out = {}
-
         for num_quarters, columns in groups.items():
             # Determine the last piece of information we know for each column
             # on each date in the index for each sid and quarter.
@@ -388,19 +374,33 @@ class QuarterEstimatesLoader(PipelineLoader):
                 ),
                 shifted_qtr_data[SHIFTED_NORMALIZED_QTRS]
             ]).index
+            requested_qtr_data = self.get_requested_data_for_col(
+                stacked_last_per_qtr, requested_qtr_idx, dates
+            )
 
-            for c in columns:
-                column_name = self.name_map[c.name]
-                adjusted_array = self.get_adjustments(zero_qtr_idx,
-                                                      requested_qtr_idx,
-                                                      stacked_last_per_qtr,
+            zero_qtr_data = stacked_last_per_qtr.loc[zero_qtr_idx]
+
+            col_to_adjustments = self.get_adjustments(zero_qtr_data,
+                                                      requested_qtr_data,
                                                       last_per_qtr,
                                                       dates,
-                                                      column_name,
-                                                      c,
-                                                      mask,
-                                                      assets)
-                out[c] = adjusted_array
+                                                      assets,
+                                                      columns)
+            for col in columns:
+                column_name = self.name_map[col.name]
+                # We may have dropped assets if they never have any data for the
+                # requested quarter.
+                df = pd.DataFrame(data=requested_qtr_data[column_name],
+                                  index=dates,
+                                  columns=assets,
+                                  dtype=col.dtype)
+
+                out[col] = AdjustedArray(
+                    df.values.astype(col.dtype),
+                    mask,
+                    dict(col_to_adjustments[column_name]),
+                    col.missing_value,
+                )
         return out
 
 

From 65d15d3960d959ce205886ba3b1acfd6c35fb81e Mon Sep 17 00:00:00 2001
From: Maya Tydykov <mtydykov@quantopian.com>
Date: Wed, 14 Sep 2016 10:26:29 -0400
Subject: [PATCH 13/16] TST: add test for sid with no data

MAINT: optimization - only look at assets appearing in data

TST: simplify test

DOC: add documentation for checkpoints

MAINT: explicitly cast event date field to datetime

MAINT: add back import

TST: fix indexing to remove setting wtih copy warning
---
 tests/pipeline/test_quarters_estimates.py     | 192 +++---
 zipline/pipeline/loaders/blaze/core.py        |  22 +-
 zipline/pipeline/loaders/blaze/estimates.py   |  27 +-
 zipline/pipeline/loaders/blaze/events.py      |   6 +
 zipline/pipeline/loaders/blaze/utils.py       |  11 +-
 .../pipeline/loaders/earnings_estimates.py    | 619 ++++++++++++++++++
 zipline/pipeline/loaders/quarter_estimates.py | 529 ---------------
 zipline/pipeline/loaders/utils.py             |   7 +-
 zipline/testing/fixtures.py                   |   5 +-
 9 files changed, 776 insertions(+), 642 deletions(-)
 create mode 100644 zipline/pipeline/loaders/earnings_estimates.py
 delete mode 100644 zipline/pipeline/loaders/quarter_estimates.py

diff --git a/tests/pipeline/test_quarters_estimates.py b/tests/pipeline/test_quarters_estimates.py
index 56e7827b..4f7b8366 100644
--- a/tests/pipeline/test_quarters_estimates.py
+++ b/tests/pipeline/test_quarters_estimates.py
@@ -4,7 +4,6 @@ from nose.tools import assert_true
 from nose_parameterized import parameterized
 import numpy as np
 import pandas as pd
-from pandas.util.testing import assert_frame_equal, assert_series_equal
 from toolz import merge
 
 from zipline.pipeline import SimplePipelineEngine, Pipeline, CustomFactor
@@ -21,11 +20,11 @@ from zipline.pipeline.loaders.blaze.estimates import (
     BlazeNextEstimatesLoader,
     BlazePreviousEstimatesLoader
 )
-from zipline.pipeline.loaders.quarter_estimates import (
+from zipline.pipeline.loaders.earnings_estimates import (
     INVALID_NUM_QTRS_MESSAGE,
-    NextQuartersEstimatesLoader,
+    NextEarningsEstimatesLoader,
     normalize_quarters,
-    PreviousQuartersEstimatesLoader,
+    PreviousEarningsEstimatesLoader,
     split_normalized_quarters,
 )
 from zipline.testing.fixtures import (
@@ -78,7 +77,7 @@ class WithEstimates(WithTradingSessions, WithAssetFinder):
 
     # Short window defined in order for test to run faster.
     START_DATE = pd.Timestamp('2014-12-28')
-    END_DATE = pd.Timestamp('2015-02-03')
+    END_DATE = pd.Timestamp('2015-02-04')
 
     @classmethod
     def make_loader(cls, events, columns):
@@ -88,10 +87,14 @@ class WithEstimates(WithTradingSessions, WithAssetFinder):
     def make_events(cls):
         raise NotImplementedError('make_events')
 
+    @classmethod
+    def get_sids(cls):
+        return cls.events[SID_FIELD_NAME].unique()
+
     @classmethod
     def init_class_fixtures(cls):
         cls.events = cls.make_events()
-        cls.sids = cls.events[SID_FIELD_NAME].unique()
+        cls.ASSET_FINDER_EQUITY_SIDS = cls.get_sids()
         cls.columns = {
             Estimates.event_date: 'event_date',
             Estimates.fiscal_quarter: 'fiscal_quarter',
@@ -101,9 +104,6 @@ class WithEstimates(WithTradingSessions, WithAssetFinder):
         cls.loader = cls.make_loader(cls.events, {column.name: val for
                                                   column, val in
                                                   cls.columns.items()})
-        cls.ASSET_FINDER_EQUITY_SIDS = list(
-            cls.events[SID_FIELD_NAME].unique()
-        )
         cls.ASSET_FINDER_EQUITY_SYMBOLS = [
             's' + str(n) for n in cls.ASSET_FINDER_EQUITY_SIDS
         ]
@@ -190,7 +190,7 @@ class PreviousWithWrongNumQuarters(WithWrongLoaderDefinition,
     """
     @classmethod
     def make_loader(cls, events, columns):
-        return PreviousQuartersEstimatesLoader(events, columns)
+        return PreviousEarningsEstimatesLoader(events, columns)
 
 
 class NextWithWrongNumQuarters(WithWrongLoaderDefinition,
@@ -201,7 +201,7 @@ class NextWithWrongNumQuarters(WithWrongLoaderDefinition,
     """
     @classmethod
     def make_loader(cls, events, columns):
-        return NextQuartersEstimatesLoader(events, columns)
+        return NextEarningsEstimatesLoader(events, columns)
 
 
 class WithEstimatesTimeZero(WithEstimates):
@@ -234,24 +234,27 @@ class WithEstimatesTimeZero(WithEstimates):
         Tests that we get the right 'time zero' value on each day for each
         sid and for each column.
     """
+    # Shorter date range for performance
+    END_DATE = pd.Timestamp('2015-01-28')
+
     q1_knowledge_dates = [pd.Timestamp('2015-01-01'),
                           pd.Timestamp('2015-01-04'),
-                          pd.Timestamp('2015-01-08'),
-                          pd.Timestamp('2015-01-12')]
-    q2_knowledge_dates = [pd.Timestamp('2015-01-16'),
+                          pd.Timestamp('2015-01-07'),
+                          pd.Timestamp('2015-01-11')]
+    q2_knowledge_dates = [pd.Timestamp('2015-01-14'),
+                          pd.Timestamp('2015-01-17'),
                           pd.Timestamp('2015-01-20'),
-                          pd.Timestamp('2015-01-24'),
-                          pd.Timestamp('2015-01-28')]
+                          pd.Timestamp('2015-01-23')]
     # We want to model the possibility of an estimate predicting a release date
     # that doesn't match the actual release. This could be done by dynamically
     # generating more combinations with different release dates, but that
     # significantly increases the amount of time it takes to run the tests.
     # These hard-coded cases are sufficient to know that we can update our
     # beliefs when we get new information.
-    q1_release_dates = [pd.Timestamp('2015-01-15'),
-                        pd.Timestamp('2015-01-16')]  # One day late
-    q2_release_dates = [pd.Timestamp('2015-01-30'),  # One day early
-                        pd.Timestamp('2015-01-31')]
+    q1_release_dates = [pd.Timestamp('2015-01-13'),
+                        pd.Timestamp('2015-01-14')]  # One day late
+    q2_release_dates = [pd.Timestamp('2015-01-25'),  # One day early
+                        pd.Timestamp('2015-01-26')]
 
     @classmethod
     def make_events(cls):
@@ -300,8 +303,15 @@ class WithEstimatesTimeZero(WithEstimates):
                                                              q2e2,
                                                              sid))
                 sid_releases.append(cls.create_releases_df(sid))
+        return pd.concat(sid_estimates +
+                         sid_releases).reset_index(drop=True)
 
-        return pd.concat(sid_estimates + sid_releases).reset_index(drop=True)
+    @classmethod
+    def get_sids(cls):
+        sids = cls.events[SID_FIELD_NAME].unique()
+        # Tack on an extra sid to make sure that sids with no data are
+        # included but have all-null columns.
+        return list(sids) + [max(sids) + 1]
 
     @classmethod
     def create_releases_df(cls, sid):
@@ -309,10 +319,10 @@ class WithEstimatesTimeZero(WithEstimates):
         # ranges in order to reduce the number of dates we need to iterate
         # through when testing.
         return pd.DataFrame({
-            TS_FIELD_NAME: [pd.Timestamp('2015-01-15'),
-                            pd.Timestamp('2015-01-31')],
-            EVENT_DATE_FIELD_NAME: [pd.Timestamp('2015-01-15'),
-                                    pd.Timestamp('2015-01-31')],
+            TS_FIELD_NAME: [pd.Timestamp('2015-01-13'),
+                            pd.Timestamp('2015-01-26')],
+            EVENT_DATE_FIELD_NAME: [pd.Timestamp('2015-01-13'),
+                                    pd.Timestamp('2015-01-26')],
             'estimate': [0.5, 0.8],
             FISCAL_QUARTER_FIELD_NAME: [1.0, 2.0],
             FISCAL_YEAR_FIELD_NAME: [2015.0, 2015.0],
@@ -337,8 +347,6 @@ class WithEstimatesTimeZero(WithEstimates):
 
     @classmethod
     def init_class_fixtures(cls):
-        # Must be generated before call to super since super uses `events`.
-        cls.events = cls.make_events()
         super(WithEstimatesTimeZero, cls).init_class_fixtures()
 
     def get_expected_estimate(self,
@@ -356,58 +364,42 @@ class WithEstimatesTimeZero(WithEstimates):
         )
         results = engine.run_pipeline(
             Pipeline({c.name: c.latest for c in dataset.columns}),
-            start_date=self.trading_days[0],
-            end_date=self.trading_days[-1],
+            start_date=self.trading_days[1],
+            end_date=self.trading_days[-2],
         )
-        for sid in self.sids:
+        for sid in self.ASSET_FINDER_EQUITY_SIDS:
             sid_estimates = results.xs(sid, level=1)
-            ts_sorted_estimates = self.events[
-                self.events[SID_FIELD_NAME] == sid
-            ].sort(TS_FIELD_NAME)
-            for i, date in enumerate(sid_estimates.index):
-                comparable_date = date.tz_localize(None)
-                # Filter out estimates we don't know about yet.
-                ts_eligible_estimates = ts_sorted_estimates[
-                    ts_sorted_estimates[TS_FIELD_NAME] <= comparable_date
+            # Separate assertion for all-null DataFrame to avoid setting
+            # column dtypes on `all_expected`.
+            if sid == max(self.ASSET_FINDER_EQUITY_SIDS):
+                assert_true(sid_estimates.isnull().all().all())
+            else:
+                ts_sorted_estimates = self.events[
+                    self.events[SID_FIELD_NAME] == sid
+                ].sort(TS_FIELD_NAME)
+                q1_knowledge = ts_sorted_estimates[
+                    ts_sorted_estimates[FISCAL_QUARTER_FIELD_NAME] == 1
                 ]
-                # If there are estimates we know about:
-                if not ts_eligible_estimates.empty:
-                    # Determine the last piece of information we know about
-                    # for q1 and q2. This takes advantage of the fact that we
-                    # only have 2 quarters in the test data.
-                    q1_knowledge = ts_eligible_estimates[
-                        ts_eligible_estimates[FISCAL_QUARTER_FIELD_NAME] == 1
-                    ]
-                    q2_knowledge = ts_eligible_estimates[
-                        ts_eligible_estimates[FISCAL_QUARTER_FIELD_NAME] == 2
-                    ]
-                    expected_estimate = self.get_expected_estimate(
-                        q1_knowledge,
-                        q2_knowledge,
-                        comparable_date,
-                    )
-                    # Have to explicitly check for None because
-                    # `expected_estimate` might be a DataFrame.
-                    if expected_estimate is not None:
-                        assert_series_equal(
-                            sid_estimates.iloc[i],
-                            expected_estimate[sid_estimates.columns],
-                            check_names=False
-                        )
-                    else:
-                        # There are no eligible 'next'/'previous' estimates on
-                        # this day; everything should be null.
-                        assert_true(sid_estimates.iloc[i].isnull().all())
-                else:
-                    # We don't know about any estimates on this day;
-                    # everything should be null.
-                    assert_true(sid_estimates.iloc[i].isnull().all())
+                q2_knowledge = ts_sorted_estimates[
+                    ts_sorted_estimates[FISCAL_QUARTER_FIELD_NAME] == 2
+                ]
+                all_expected = pd.concat(
+                    [self.get_expected_estimate(
+                        q1_knowledge[q1_knowledge[TS_FIELD_NAME] <=
+                                     date.tz_localize(None)],
+                        q2_knowledge[q2_knowledge[TS_FIELD_NAME] <=
+                                     date.tz_localize(None)],
+                        date.tz_localize(None),
+                    ).set_index([[date]]) for date in sid_estimates.index],
+                    axis=0)
+                assert_equal(all_expected[sid_estimates.columns],
+                             sid_estimates)
 
 
 class NextEstimate(WithEstimatesTimeZero, ZiplineTestCase):
     @classmethod
     def make_loader(cls, events, columns):
-        return NextQuartersEstimatesLoader(events, columns)
+        return NextEarningsEstimatesLoader(events, columns)
 
     def get_expected_estimate(self,
                               q1_knowledge,
@@ -419,15 +411,16 @@ class NextEstimate(WithEstimatesTimeZero, ZiplineTestCase):
         if (not q1_knowledge.empty and
             q1_knowledge[EVENT_DATE_FIELD_NAME].iloc[-1] >=
                 comparable_date):
-            return q1_knowledge.iloc[-1]
+            return q1_knowledge.iloc[-1:]
         # If q1 has already happened or we don't know about it
         # yet and our latest knowledge indicates that q2 hasn't
         # happened yet, then that's the estimate we want to use.
         elif (not q2_knowledge.empty and
               q2_knowledge[EVENT_DATE_FIELD_NAME].iloc[-1] >=
                 comparable_date):
-            return q2_knowledge.iloc[-1]
-        return None
+            return q2_knowledge.iloc[-1:]
+        return pd.DataFrame(columns=q1_knowledge.columns,
+                            index=[comparable_date])
 
 
 class BlazeNextEstimateLoaderTestCase(NextEstimate):
@@ -446,7 +439,7 @@ class BlazeNextEstimateLoaderTestCase(NextEstimate):
 class PreviousEstimate(WithEstimatesTimeZero, ZiplineTestCase):
     @classmethod
     def make_loader(cls, events, columns):
-        return PreviousQuartersEstimatesLoader(events, columns)
+        return PreviousEarningsEstimatesLoader(events, columns)
 
     def get_expected_estimate(self,
                               q1_knowledge,
@@ -460,12 +453,13 @@ class PreviousEstimate(WithEstimatesTimeZero, ZiplineTestCase):
         if (not q2_knowledge.empty and
             q2_knowledge[EVENT_DATE_FIELD_NAME].iloc[-1] <=
                 comparable_date):
-            return q2_knowledge.iloc[-1]
+            return q2_knowledge.iloc[-1:]
         elif (not q1_knowledge.empty and
               q1_knowledge[EVENT_DATE_FIELD_NAME].iloc[-1] <=
                 comparable_date):
-            return q1_knowledge.iloc[-1]
-        return None
+            return q1_knowledge.iloc[-1:]
+        return pd.DataFrame(columns=q1_knowledge.columns,
+                            index=[comparable_date])
 
 
 class BlazePreviousEstimateLoaderTestCase(PreviousEstimate):
@@ -572,8 +566,8 @@ class WithEstimateMultipleQuarters(WithEstimates):
         # quarters out for each of the dataset columns.
         assert_equal(sorted(np.array(q1_columns + q2_columns)),
                      sorted(results.columns.values))
-        assert_frame_equal(self.expected_out.sort(axis=1),
-                           results.xs(0, level=1).sort(axis=1))
+        assert_equal(self.expected_out.sort(axis=1),
+                     results.xs(0, level=1).sort(axis=1))
 
 
 class NextEstimateMultipleQuarters(
@@ -581,17 +575,19 @@ class NextEstimateMultipleQuarters(
 ):
     @classmethod
     def make_loader(cls, events, columns):
-        return NextQuartersEstimatesLoader(events, columns)
+        return NextEarningsEstimatesLoader(events, columns)
 
     @classmethod
     def fill_expected_out(cls, expected):
         # Fill columns for 1 Q out
         for raw_name in cls.columns.values():
-            expected[raw_name + '1'].loc[
-                pd.Timestamp('2015-01-01'):pd.Timestamp('2015-01-11')
+            expected.loc[
+                pd.Timestamp('2015-01-01'):pd.Timestamp('2015-01-11'),
+                raw_name + '1'
             ] = cls.events[raw_name].iloc[0]
-            expected[raw_name + '1'].loc[
-                pd.Timestamp('2015-01-11'):pd.Timestamp('2015-01-20')
+            expected.loc[
+                pd.Timestamp('2015-01-11'):pd.Timestamp('2015-01-20'),
+                raw_name + '1'
             ] = cls.events[raw_name].iloc[1]
 
         # Fill columns for 2 Q out
@@ -599,19 +595,23 @@ class NextEstimateMultipleQuarters(
         # Q1's event happens; after Q1's event, we know 1 Q out but not 2 Qs
         # out.
         for col_name in ['estimate', 'event_date']:
-            expected[col_name + '2'].loc[
-                pd.Timestamp('2015-01-06'):pd.Timestamp('2015-01-10')
+            expected.loc[
+                pd.Timestamp('2015-01-06'):pd.Timestamp('2015-01-10'),
+                col_name + '2'
             ] = cls.events[col_name].iloc[1]
         # But we know what FQ and FY we'd need in both Q1 and Q2
         # because we know which FQ is next and can calculate from there
-        expected[FISCAL_QUARTER_FIELD_NAME + '2'].loc[
-            pd.Timestamp('2015-01-01'):pd.Timestamp('2015-01-09')
+        expected.loc[
+            pd.Timestamp('2015-01-01'):pd.Timestamp('2015-01-09'),
+            FISCAL_QUARTER_FIELD_NAME + '2'
         ] = 2
-        expected[FISCAL_QUARTER_FIELD_NAME + '2'].loc[
-            pd.Timestamp('2015-01-12'):pd.Timestamp('2015-01-20')
+        expected.loc[
+            pd.Timestamp('2015-01-12'):pd.Timestamp('2015-01-20'),
+            FISCAL_QUARTER_FIELD_NAME + '2'
         ] = 3
-        expected[FISCAL_YEAR_FIELD_NAME + '2'].loc[
-            pd.Timestamp('2015-01-01'):pd.Timestamp('2015-01-20')
+        expected.loc[
+            pd.Timestamp('2015-01-01'):pd.Timestamp('2015-01-20'),
+            FISCAL_YEAR_FIELD_NAME + '2'
         ] = 2015
 
         return expected
@@ -624,7 +624,7 @@ class PreviousEstimateMultipleQuarters(
 
     @classmethod
     def make_loader(cls, events, columns):
-        return PreviousQuartersEstimatesLoader(events, columns)
+        return PreviousEarningsEstimatesLoader(events, columns)
 
     @classmethod
     def fill_expected_out(cls, expected):
@@ -804,7 +804,7 @@ class WithEstimateWindows(WithEstimates):
 class PreviousEstimateWindows(WithEstimateWindows, ZiplineTestCase):
     @classmethod
     def make_loader(cls, events, columns):
-        return PreviousQuartersEstimatesLoader(events, columns)
+        return PreviousEarningsEstimatesLoader(events, columns)
 
     @classmethod
     def make_expected_timelines(cls):
@@ -867,7 +867,7 @@ class PreviousEstimateWindows(WithEstimateWindows, ZiplineTestCase):
 class NextEstimateWindows(WithEstimateWindows, ZiplineTestCase):
     @classmethod
     def make_loader(cls, events, columns):
-        return NextQuartersEstimatesLoader(events, columns)
+        return NextEarningsEstimatesLoader(events, columns)
 
     @classmethod
     def make_expected_timelines(cls):
diff --git a/zipline/pipeline/loaders/blaze/core.py b/zipline/pipeline/loaders/blaze/core.py
index 61782c31..d5c3e40b 100644
--- a/zipline/pipeline/loaders/blaze/core.py
+++ b/zipline/pipeline/loaders/blaze/core.py
@@ -1028,7 +1028,6 @@ class BlazeLoader(dict):
 
             return odo(e[predicate][colnames], pd.DataFrame, **odo_kwargs)
 
-
         lower, materialized_checkpoints = get_materialized_checkpoints(
             checkpoints, colnames, lower_dt, odo_kwargs
         )
@@ -1158,6 +1157,22 @@ def bind_expression_to_resources(expr, resources):
 
 
 def get_materialized_checkpoints(checkpoints, colnames, lower_dt, odo_kwargs):
+    """
+    Computes a lower bound and a DataFrame checkpoints.
+
+    Parameters
+    ----------
+    checkpoints : Expr
+        Bound blaze expression for a checkpoints table from which to get a
+        computed lower bound.
+    colnames : iterable of str
+        The names of the columns for which checkpoints should be computed.
+    lower_dt : pd.Timestamp
+        The lower date being queried for that serves as an upper bound for
+        checkpoints.
+    odo_kwargs : dict, optional
+        The extra keyword arguments to pass to ``odo``.
+    """
     if checkpoints is not None:
         ts = checkpoints[TS_FIELD_NAME]
         checkpoints_ts = odo(ts[ts <= lower_dt].max(), pd.Timestamp)
@@ -1194,12 +1209,13 @@ def ffill_query_in_range(expr,
         The lower date to query for.
     upper : datetime
         The upper date to query for.
+    checkpoints : Expr, optional
+        Bound blaze expression for a checkpoints table from which to get a
+        computed lower bound.
     odo_kwargs : dict, optional
         The extra keyword arguments to pass to ``odo``.
     ts_field : str, optional
         The name of the timestamp field in the given blaze expression.
-    sid_field : str, optional
-        The name of the sid field in the given blaze expression.
 
     Returns
     -------
diff --git a/zipline/pipeline/loaders/blaze/estimates.py b/zipline/pipeline/loaders/blaze/estimates.py
index b991c62c..b63c58a6 100644
--- a/zipline/pipeline/loaders/blaze/estimates.py
+++ b/zipline/pipeline/loaders/blaze/estimates.py
@@ -3,11 +3,18 @@ from datashape import istabular
 from .core import (
     bind_expression_to_resources,
 )
+from zipline.pipeline.common import (
+    EVENT_DATE_FIELD_NAME,
+    FISCAL_QUARTER_FIELD_NAME,
+    FISCAL_YEAR_FIELD_NAME,
+    SID_FIELD_NAME,
+    TS_FIELD_NAME,
+)
 from zipline.pipeline.loaders.base import PipelineLoader
 from zipline.pipeline.loaders.blaze.utils import load_raw_data
-from zipline.pipeline.loaders.quarter_estimates import (
-    NextQuartersEstimatesLoader,
-    PreviousQuartersEstimatesLoader,
+from zipline.pipeline.loaders.earnings_estimates import (
+    NextEarningsEstimatesLoader,
+    PreviousEarningsEstimatesLoader,
     required_estimates_fields,
 )
 from zipline.pipeline.loaders.utils import (
@@ -35,6 +42,9 @@ class BlazeEstimatesLoader(PipelineLoader):
         The time to use for the data query cutoff.
     data_query_tz : tzinfo or str
         The timezeone to use for the data query cutoff.
+    checkpoints : Expr, optional
+        The expression representing checkpointed data to be used for faster
+        forward-filling of data from `expr`.
 
     Notes
     -----
@@ -55,6 +65,13 @@ class BlazeEstimatesLoader(PipelineLoader):
     If the '{TS_FIELD_NAME}' field is not included it is assumed that we
     start the backtest with knowledge of all announcements.
     """
+    __doc__ = __doc__.format(
+        SID_FIELD_NAME=SID_FIELD_NAME,
+        TS_FIELD_NAME=TS_FIELD_NAME,
+        FISCAL_YEAR_FIELD_NAME=FISCAL_YEAR_FIELD_NAME,
+        FISCAL_QUARTER_FIELD_NAME=FISCAL_QUARTER_FIELD_NAME,
+        EVENT_DATE_FIELD_NAME=EVENT_DATE_FIELD_NAME,
+    )
 
     @preprocess(data_query_tz=optionally(ensure_timezone))
     def __init__(self,
@@ -107,8 +124,8 @@ class BlazeEstimatesLoader(PipelineLoader):
 
 
 class BlazeNextEstimatesLoader(BlazeEstimatesLoader):
-    loader = NextQuartersEstimatesLoader
+    loader = NextEarningsEstimatesLoader
 
 
 class BlazePreviousEstimatesLoader(BlazeEstimatesLoader):
-    loader = PreviousQuartersEstimatesLoader
+    loader = PreviousEarningsEstimatesLoader
diff --git a/zipline/pipeline/loaders/blaze/events.py b/zipline/pipeline/loaders/blaze/events.py
index 56c050f2..6ecf3c6f 100644
--- a/zipline/pipeline/loaders/blaze/events.py
+++ b/zipline/pipeline/loaders/blaze/events.py
@@ -3,6 +3,8 @@ from datashape import istabular
 from .core import (
     bind_expression_to_resources,
 )
+from zipline.pipeline.common import SID_FIELD_NAME, TS_FIELD_NAME, \
+    EVENT_DATE_FIELD_NAME
 from zipline.pipeline.loaders.base import PipelineLoader
 from zipline.pipeline.loaders.blaze.utils import load_raw_data
 from zipline.pipeline.loaders.events import (
@@ -55,6 +57,10 @@ class BlazeEventsLoader(PipelineLoader):
     start the backtest with knowledge of all announcements.
     """
 
+    __doc__ == __doc__.format(SID_FIELD_NAME=SID_FIELD_NAME,
+                              TS_FIELD_NAME=TS_FIELD_NAME,
+                              EVENT_DATE_FIELD_NAME=EVENT_DATE_FIELD_NAME)
+
     @preprocess(data_query_tz=optionally(ensure_timezone))
     def __init__(self,
                  expr,
diff --git a/zipline/pipeline/loaders/blaze/utils.py b/zipline/pipeline/loaders/blaze/utils.py
index b5be9cd9..ab186c82 100644
--- a/zipline/pipeline/loaders/blaze/utils.py
+++ b/zipline/pipeline/loaders/blaze/utils.py
@@ -14,10 +14,11 @@ def load_raw_data(assets,
                   odo_kwargs,
                   checkpoints=None):
     """
-    given an expression representing data to load, perform normalization and
-    forward-filling and return the data, materialized.
+    Given an expression representing data to load, perform normalization and
+    forward-filling and return the data, materialized. Only accepts data with a
+    `sid` field.
 
-    parameters
+    Parameters
     ----------
     assets : pd.int64index
         the assets to load data for.
@@ -32,8 +33,10 @@ def load_raw_data(assets,
         the expression representing the data to load.
     odo_kwargs : dict
         extra keyword arguments to pass to odo when executing the expression.
+    checkpoints : expr, optional
+        the expression representing the checkpointed data for `expr`.
 
-    returns
+    Returns
     -------
     raw : pd.dataframe
         The result of computing expr and materializing the result as a
diff --git a/zipline/pipeline/loaders/earnings_estimates.py b/zipline/pipeline/loaders/earnings_estimates.py
new file mode 100644
index 00000000..3397d105
--- /dev/null
+++ b/zipline/pipeline/loaders/earnings_estimates.py
@@ -0,0 +1,619 @@
+from collections import defaultdict
+from abc import abstractmethod, abstractproperty
+import pandas as pd
+from six import viewvalues
+from toolz import groupby
+
+from zipline.lib.adjusted_array import AdjustedArray
+from zipline.lib.adjustment import (
+    Datetime641DArrayOverwrite,
+    Datetime64Overwrite,
+    Float641DArrayOverwrite,
+    Float64Overwrite,
+)
+
+from zipline.pipeline.common import (
+    EVENT_DATE_FIELD_NAME,
+    FISCAL_QUARTER_FIELD_NAME,
+    FISCAL_YEAR_FIELD_NAME,
+    SID_FIELD_NAME,
+    TS_FIELD_NAME,
+)
+from zipline.pipeline.loaders.base import PipelineLoader
+from zipline.utils.numpy_utils import datetime64ns_dtype, float64_dtype
+from zipline.pipeline.loaders.utils import (
+    ffill_across_cols,
+    last_in_date_group
+)
+
+
+INVALID_NUM_QTRS_MESSAGE = "Passed invalid number of quarters %s; " \
+                           "must pass a number of quarters >= 0"
+NEXT_FISCAL_QUARTER = 'next_fiscal_quarter'
+NEXT_FISCAL_YEAR = 'next_fiscal_year'
+NORMALIZED_QUARTERS = 'normalized_quarters'
+PREVIOUS_FISCAL_QUARTER = 'previous_fiscal_quarter'
+PREVIOUS_FISCAL_YEAR = 'previous_fiscal_year'
+SHIFTED_NORMALIZED_QTRS = 'shifted_normalized_quarters'
+SIMULTATION_DATES = 'dates'
+
+
+def normalize_quarters(years, quarters):
+    return years * 4 + quarters - 1
+
+
+def split_normalized_quarters(normalized_quarters):
+    years = normalized_quarters // 4
+    quarters = normalized_quarters % 4
+    return years, quarters + 1
+
+
+def required_estimates_fields(columns):
+    """
+    Compute the set of resource columns required to serve
+    `columns`.
+    """
+    # These metadata columns are used to align event indexers.
+    return {
+        TS_FIELD_NAME,
+        SID_FIELD_NAME,
+        EVENT_DATE_FIELD_NAME,
+        FISCAL_QUARTER_FIELD_NAME,
+        FISCAL_YEAR_FIELD_NAME
+    }.union(
+        # We also expect any of the field names that our loadable columns
+        # are mapped to.
+        viewvalues(columns),
+    )
+
+
+def validate_column_specs(events, columns):
+    """
+    Verify that the columns of ``events`` can be used by a
+    EarningsEstimatesLoader to serve the BoundColumns described by
+    `columns`.
+    """
+    required = required_estimates_fields(columns)
+    received = set(events.columns)
+    missing = required - received
+    if missing:
+        raise ValueError(
+            "EarningsEstimatesLoader missing required columns {missing}.\n"
+            "Got Columns: {received}\n"
+            "Expected Columns: {required}".format(
+                missing=sorted(missing),
+                received=sorted(received),
+                required=sorted(required),
+            )
+        )
+
+
+class EarningsEstimatesLoader(PipelineLoader):
+    """
+    An abstract pipeline loader for estimates data that can load data a
+    variable number of quarters forwards/backwards from calendar dates
+    depending on the `num_quarters` attribute of the columns' dataset.
+
+    Parameters
+    ----------
+    estimates : pd.DataFrame
+        The raw estimates data.
+        ``estimates`` must contain at least 5 columns:
+            sid : int64
+                The asset id associated with each estimate.
+
+            event_date : datetime64[ns]
+                The date on which the event that the estimate is for will/has
+                occurred..
+
+            timestamp : datetime64[ns]
+                The date on which we learned about the estimate.
+
+            fiscal_quarter : int64
+                The quarter during which the event has/will occur.
+
+            fiscal_year : int64
+                The year during which the event has/will occur.
+
+    name_map : dict[str -> str]
+        A map of names of BoundColumns that this loader will load to the
+        names of the corresponding columns in `events`.
+    """
+    def __init__(self,
+                 estimates,
+                 name_map):
+        validate_column_specs(
+            estimates,
+            name_map
+        )
+
+        self.estimates = estimates[
+            estimates[EVENT_DATE_FIELD_NAME].notnull() &
+            estimates[FISCAL_QUARTER_FIELD_NAME].notnull() &
+            estimates[FISCAL_YEAR_FIELD_NAME].notnull()
+        ]
+        self.estimates[NORMALIZED_QUARTERS] = normalize_quarters(
+            self.estimates[FISCAL_YEAR_FIELD_NAME],
+            self.estimates[FISCAL_QUARTER_FIELD_NAME],
+        )
+        self.array_overwrites_dict = {datetime64ns_dtype:
+                                      Datetime641DArrayOverwrite,
+                                      float64_dtype: Float641DArrayOverwrite}
+        self.scalar_overwrites_dict = {datetime64ns_dtype: Datetime64Overwrite,
+                                       float64_dtype: Float64Overwrite}
+
+        self.name_map = name_map
+
+    @abstractmethod
+    def get_zeroth_quarter_idx(self, num_quarters, last, dates):
+        raise NotImplementedError('get_zeroth_quarter_idx')
+
+    @abstractmethod
+    def get_shifted_qtrs(self, zero_qtrs, num_quarters):
+        raise NotImplementedError('get_shifted_qtrs')
+
+    @abstractmethod
+    def create_overwrite_for_estimate(self,
+                                      column,
+                                      column_name,
+                                      last_per_qtr,
+                                      next_qtr_start_idx,
+                                      requested_quarter,
+                                      sid,
+                                      sid_idx):
+        raise NotImplementedError('create_overwrite_for_estimate')
+
+    @abstractproperty
+    def searchsorted_side(self):
+        return NotImplementedError('searchsorted_side')
+
+    def get_requested_quarter_data(self, stacked_last_per_qtr, idx, dates):
+        """
+        Selects the requested data for each date.
+
+        Parameters
+        ----------
+        stacked_last_per_qtr : pd.DataFrame
+            The latest estimate known  with the dates, normalized quarter, and
+            sid as the index.
+        idx : pd.MultiIndex
+            The index of the row of the requested quarter from each date for
+            each sid.
+        dates : pd.DatetimeIndex
+            The calendar dates for which estimates data is requested.
+
+        Returns
+        --------
+        requested_qtr_data : pd.DataFrame
+            The DataFrame with the latest values for the requested quarter
+            for all columns; `dates` are the index and columns are a MultiIndex
+            with sids at the top level and the dataset columns on the bottom.
+        """
+        requested_qtr_data = stacked_last_per_qtr.loc[idx]
+        # We've lost the index names when doing `loc`, so set them here.
+        requested_qtr_data.index = requested_qtr_data.index.set_names(
+            idx.names
+        )
+        requested_qtr_data = requested_qtr_data.reset_index(
+            SHIFTED_NORMALIZED_QTRS
+        )
+        # Calculate the actual year/quarter being requested and add those in
+        # as columns.
+        (requested_qtr_data[FISCAL_YEAR_FIELD_NAME],
+         requested_qtr_data[FISCAL_QUARTER_FIELD_NAME]) = \
+            split_normalized_quarters(
+                requested_qtr_data[SHIFTED_NORMALIZED_QTRS]
+            )
+        # Once we're left with just dates as the index, we can reindex by all
+        # dates so that we have a value for each calendar date.
+        return requested_qtr_data.unstack(SID_FIELD_NAME).reindex(dates)
+
+    def get_adjustments(self,
+                        zero_qtr_data,
+                        requested_qtr_data,
+                        last_per_qtr,
+                        dates,
+                        assets,
+                        columns):
+        """
+        Creates an AdjustedArray from the given estimates data for the given
+        dates.
+
+        Parameters
+        ----------
+        zero_qtr_data : pd.DataFrame
+            The 'time zero' data for each calendar date per sid.
+        requested_qtr_data : pd.DataFrame
+            The requested quarter data for each calendar date per sid.
+        last_per_qtr : pd.DataFrame
+            A DataFrame with a column MultiIndex of [self.estimates.columns,
+            normalized_quarters, sid] that allows easily getting the timeline
+            of estimates for a particular sid for a particular quarter.
+        dates : pd.DatetimeIndex
+            The calendar dates for which estimates data is requested.
+        assets : pd.Int64Index
+            An index of all the assets from the raw data.
+        columns : list of BoundColumn
+            The columns for which adjustments need to be calculated.
+
+        Returns
+        -------
+        adjusted_array : AdjustedArray
+            The array of data and overwrites for the given column.
+        """
+        col_to_overwrites = defaultdict(dict)
+        # We no longer need NORMALIZED_QUARTERS in the index, but we do need it
+        # as a column to calculate adjustments.
+        zero_qtr_data = zero_qtr_data.reset_index(NORMALIZED_QUARTERS)
+
+        for sid_idx, sid in enumerate(assets):
+            zero_qtr_sid_data = zero_qtr_data[
+                zero_qtr_data.index.get_level_values(SID_FIELD_NAME) == sid
+            ]
+            # Determine where quarters are changing for this sid.
+            qtr_shifts = zero_qtr_sid_data[
+                zero_qtr_sid_data[NORMALIZED_QUARTERS] !=
+                zero_qtr_sid_data[NORMALIZED_QUARTERS].shift(1)
+            ]
+            # On dates where we don't have any information about quarters,
+            # we will get nulls, and each of these will be interpreted as
+            # quarter shifts. We need to remove these here.
+            qtr_shifts = qtr_shifts[
+                qtr_shifts[NORMALIZED_QUARTERS].notnull()
+            ]
+            # For the given sid, determine which quarters we have estimates
+            # for.
+            qtrs_with_estimates_for_sid = last_per_qtr.xs(
+                sid, axis=1, level=SID_FIELD_NAME
+            ).groupby(axis=1, level=1).first().columns.values
+            for row_indexer in list(qtr_shifts.index):
+                # Find the starting index of the quarter that comes right
+                # after this row. This isn't the starting index of the
+                # requested quarter, but simply the date we cross over into a
+                # new quarter.
+                next_qtr_start_idx = dates.searchsorted(
+                    zero_qtr_data.loc[
+                        row_indexer
+                    ][EVENT_DATE_FIELD_NAME],
+                    side=self.searchsorted_side
+                )
+                # Only add adjustments if the next quarter starts somewhere in
+                # our date index for this sid. Our 'next' quarter can never
+                # start at index 0; a starting index of 0 means that the next
+                # quarter's event date was NaT.
+                if 0 < next_qtr_start_idx < len(dates):
+                    self.create_overwrite_for_quarter(
+                        col_to_overwrites,
+                        next_qtr_start_idx,
+                        last_per_qtr,
+                        qtrs_with_estimates_for_sid,
+                        requested_qtr_data,
+                        sid,
+                        sid_idx,
+                        columns,
+                    )
+        return col_to_overwrites
+
+    def create_overwrite_for_quarter(self,
+                                     col_to_overwrites,
+                                     next_qtr_start_idx,
+                                     last_per_qtr,
+                                     quarters_with_estimates_for_sid,
+                                     requested_qtr_data,
+                                     sid,
+                                     sid_idx,
+                                     columns):
+        """
+        Add entries to the dictionary of columns to adjustments for the given
+        sid and the given quarter.
+
+        Parameters
+        ----------
+        col_to_overwrites : dict [column_name -> list of ArrayAdjustment]
+            A dictionary mapping column names to all overwrites for those
+            columns.
+        next_qtr_start_idx : int
+            The index of the first day of the next quarter in the calendar
+            dates.
+        last_per_qtr : pd.DataFrame
+            A DataFrame with a column MultiIndex of [self.estimates.columns,
+            normalized_quarters, sid] that allows easily getting the timeline
+            of estimates for a particular sid for a particular quarter; this
+            is particularly useful for getting adjustments for 'next'
+            estimates.
+        quarters_with_estimates_for_sid : np.array
+            An array of all quarters for which there are estimates for the
+            given sid.
+        sid : int
+            The sid for which to create overwrites.
+        sid_idx : int
+            The index of the sid in `assets`.
+        columns : list of BoundColumn
+            The columns for which to create overwrites.
+        """
+
+        # Find the quarter being requested in the quarter we're
+        # crossing into.
+        requested_quarter = requested_qtr_data[
+            SHIFTED_NORMALIZED_QTRS
+        ][sid].iloc[next_qtr_start_idx]
+        for col in columns:
+            column_name = self.name_map[col.name]
+            # If there are estimates for the requested quarter,
+            # overwrite all values going up to the starting index of
+            # that quarter with estimates for that quarter.
+            if requested_quarter in quarters_with_estimates_for_sid:
+                col_to_overwrites[column_name][next_qtr_start_idx] = \
+                    [self.create_overwrite_for_estimate(
+                        col,
+                        column_name,
+                        last_per_qtr,
+                        next_qtr_start_idx,
+                        requested_quarter,
+                        sid,
+                        sid_idx
+                    )]
+            # There are no estimates for the quarter. Overwrite all
+            # values going up to the starting index of that quarter
+            # with the missing value for this column.
+            else:
+                col_to_overwrites[column_name][next_qtr_start_idx] =\
+                    [self.overwrite_with_null(
+                        col,
+                        last_per_qtr.index,
+                        next_qtr_start_idx,
+                        sid_idx
+                    )]
+
+    def overwrite_with_null(self,
+                            column,
+                            dates,
+                            next_qtr_start_idx,
+                            sid_idx):
+        return self.scalar_overwrites_dict[column.dtype](
+            0,
+            next_qtr_start_idx - 1,
+            sid_idx,
+            sid_idx,
+            column.missing_value
+        )
+
+    def load_adjusted_array(self, columns, dates, assets, mask):
+        # Separate out getting the columns' datasets and the datasets'
+        # num_quarters attributes to ensure that we're catching the right
+        # AttributeError.
+        col_to_datasets = {col: col.dataset for col in columns}
+        try:
+            groups = groupby(lambda col: col_to_datasets[col].num_quarters,
+                             col_to_datasets)
+        except AttributeError:
+            raise AttributeError("Datasets loaded via the "
+                                 "EarningsEstimatesLoader must define a "
+                                 "`num_quarters` attribute that defines how "
+                                 "many quarters out the loader should load "
+                                 "the data relative to `dates`.")
+        if any(num_qtr < 0 for num_qtr in groups):
+            raise ValueError(
+                INVALID_NUM_QTRS_MESSAGE % ','.join(
+                    str(qtr) for qtr in groups if qtr < 0
+                )
+
+            )
+        out = {}
+        # To optimize performance, only work below on assets that are
+        # actually in the raw data.
+        assets_with_data = set(assets) & set(self.estimates[SID_FIELD_NAME])
+        for num_quarters, columns in groups.items():
+            last_per_qtr, stacked_last_per_qtr = self.get_last_data_per_qtr(
+                assets_with_data, columns, dates
+            )
+            # Determine which quarter is immediately next/previous for each
+            # date.
+            zeroth_quarter_idx = self.get_zeroth_quarter_idx(
+                num_quarters, stacked_last_per_qtr
+            )
+            zero_qtr_data = stacked_last_per_qtr.loc[zeroth_quarter_idx]
+            # Doing it this way because creating a MultiIndex from scratch
+            # results in being unable to unstack sids because of duplicate
+            # values, even though the MultiIndex is created with the same
+            # exact values as below - possible pandas bug.
+            requested_qtr_idx = zero_qtr_data.reset_index(
+                NORMALIZED_QUARTERS
+            ).set_index(
+                self.get_shifted_qtrs(
+                    zeroth_quarter_idx.get_level_values(NORMALIZED_QUARTERS),
+                    num_quarters
+                ),
+                append=True
+            ).index
+            requested_qtr_idx = requested_qtr_idx.rename(
+                SHIFTED_NORMALIZED_QTRS, -1
+            )
+            requested_qtr_data = self.get_requested_quarter_data(
+                stacked_last_per_qtr, requested_qtr_idx, dates
+            )
+
+            # Calculate all adjustments for the given quarter and accumulate
+            # them for each column.
+            col_to_adjustments = self.get_adjustments(zero_qtr_data,
+                                                      requested_qtr_data,
+                                                      last_per_qtr,
+                                                      dates,
+                                                      assets_with_data,
+                                                      columns)
+            for col in columns:
+                column_name = self.name_map[col.name]
+                # We may have dropped assets if they never have any data for
+                # the requested quarter.
+                df = pd.DataFrame(data=requested_qtr_data[column_name],
+                                  index=dates,
+                                  columns=assets,
+                                  dtype=col.dtype)
+
+                out[col] = AdjustedArray(
+                    df.values.astype(col.dtype),
+                    mask,
+                    dict(col_to_adjustments[column_name]),
+                    col.missing_value,
+                )
+        return out
+
+    def get_last_data_per_qtr(self, assets_with_data, columns, dates):
+        """
+        Determine the last piece of information we know for each column on each
+        date in the index for each sid and quarter.
+
+        Parameters
+        ----------
+        assets_with_data : pd.Index
+            Index of all assets that appear in the raw data given to the
+            loader.
+        columns : iterable of BoundColumn
+            The columns that need to be loaded from the raw data.
+        dates : pd.DatetimeIndex
+            The calendar of dates for which data should be loaded.
+
+        Returns
+        -------
+        stacked_last_per_qtr : pd.DataFrame
+            A DataFrame indexed by [dates, sid, normalized_quarters] that has
+            the latest information for each row of the index, sorted by event
+            date.
+        last_per_qtr : pd.DataFrame
+            A DataFrame with columns that are a MultiIndex of [
+            self.estimates.columns, normalized_quarters, sid].
+        """
+        # Get a DataFrame indexed by date with a MultiIndex of columns of [
+        # self.estimates.columns, normalized_quarters, sid], where each cell
+        # contains the latest data for that day.
+        last_per_qtr = last_in_date_group(
+            self.estimates, dates, assets_with_data, reindex=True,
+            extra_groupers=[NORMALIZED_QUARTERS]
+        )
+        # Forward fill values for each quarter/sid/dataset column.
+        ffill_across_cols(last_per_qtr, columns, self.name_map)
+        # Stack quarter and sid into the index.
+        stacked_last_per_qtr = last_per_qtr.stack([SID_FIELD_NAME,
+                                                   NORMALIZED_QUARTERS])
+        # Set date index name for ease of reference
+        stacked_last_per_qtr.index.set_names(SIMULTATION_DATES,
+                                             level=0,
+                                             inplace=True)
+        stacked_last_per_qtr = stacked_last_per_qtr.sort(
+            EVENT_DATE_FIELD_NAME
+        )
+        stacked_last_per_qtr[EVENT_DATE_FIELD_NAME] = pd.to_datetime(
+            stacked_last_per_qtr[EVENT_DATE_FIELD_NAME]
+        )
+        return last_per_qtr, stacked_last_per_qtr
+
+
+class NextEarningsEstimatesLoader(EarningsEstimatesLoader):
+    @property
+    def searchsorted_side(self):
+        return 'right'
+
+    def create_overwrite_for_estimate(self,
+                                      column,
+                                      column_name,
+                                      last_per_qtr,
+                                      next_qtr_start_idx,
+                                      requested_quarter,
+                                      sid,
+                                      sid_idx):
+        return self.array_overwrites_dict[column.dtype](
+            0,
+            # overwrite thru last qtr
+            next_qtr_start_idx - 1,
+            sid_idx,
+            sid_idx,
+            last_per_qtr[
+                column_name,
+                requested_quarter,
+                sid
+            ][:next_qtr_start_idx].values)
+
+    def get_shifted_qtrs(self, zero_qtrs, num_quarters):
+        return zero_qtrs + (num_quarters - 1)
+
+    def get_zeroth_quarter_idx(self, num_quarters, stacked_last_per_qtr):
+        """
+        Filters for releases that are on or after each simulation date and
+        determines the next quarter by picking out the upcoming release for
+        each date in the index.
+
+        Parameters
+        ----------
+        num_quarters : int
+            Number of quarters to go out in the future.
+        stacked_last_per_qtr : pd.DataFrame
+            A DataFrame with index of calendar dates, sid, and normalized
+            quarters with each row being the latest estimate for the row's
+            index values, sorted by event date.
+
+        Returns
+        -------
+        next_releases_per_date_index : pd.MultiIndex
+            An index of calendar dates, sid, and normalized quarters, for only
+            the rows that have a next event.
+        """
+
+        next_releases_per_date = stacked_last_per_qtr.loc[
+            stacked_last_per_qtr[EVENT_DATE_FIELD_NAME] >=
+            stacked_last_per_qtr.index.get_level_values(SIMULTATION_DATES)
+        ].groupby(
+            level=[SIMULTATION_DATES, SID_FIELD_NAME], as_index=False
+        ).nth(0)
+        return next_releases_per_date.index
+
+
+class PreviousEarningsEstimatesLoader(EarningsEstimatesLoader):
+    @property
+    def searchsorted_side(self):
+        return 'left'
+
+    def create_overwrite_for_estimate(self,
+                                      column,
+                                      column_name,
+                                      dates,
+                                      next_qtr_start_idx,
+                                      requested_quarter,
+                                      sid,
+                                      sid_idx):
+        return self.overwrite_with_null(column,
+                                        dates,
+                                        next_qtr_start_idx,
+                                        sid_idx)
+
+    def get_shifted_qtrs(self, zero_qtrs, num_quarters):
+        return zero_qtrs - (num_quarters - 1)
+
+    def get_zeroth_quarter_idx(self, num_quarters, stacked_last_per_qtr):
+        """
+        Filters for releases that are on or after each simulation date and
+        determines the previous quarter by picking out the most recent
+        release relative to each date in the index.
+
+        Parameters
+        ----------
+        num_quarters : int
+            Number of quarters to go out in the past.
+        stacked_last_per_qtr : pd.DataFrame
+            A DataFrame with index of calendar dates, sid, and normalized
+            quarters with each row being the latest estimate for the row's
+            index values, sorted by event date.
+
+        Returns
+        -------
+        previous_releases_per_date_index : pd.MultiIndex
+            An index of calendar dates, sid, and normalized quarters, for only
+            the rows that have a previous event.
+        """
+
+        previous_releases_per_date = stacked_last_per_qtr.loc[
+            stacked_last_per_qtr[EVENT_DATE_FIELD_NAME] <=
+            stacked_last_per_qtr.index.get_level_values(SIMULTATION_DATES)
+        ].groupby(
+            level=[SIMULTATION_DATES, SID_FIELD_NAME], as_index=False
+        ).nth(-1)
+        return previous_releases_per_date.index
diff --git a/zipline/pipeline/loaders/quarter_estimates.py b/zipline/pipeline/loaders/quarter_estimates.py
deleted file mode 100644
index 71ff3a32..00000000
--- a/zipline/pipeline/loaders/quarter_estimates.py
+++ /dev/null
@@ -1,529 +0,0 @@
-from collections import defaultdict
-from abc import abstractmethod
-import numpy as np
-import pandas as pd
-from six import viewvalues
-from toolz import groupby
-
-from zipline.lib.adjusted_array import AdjustedArray
-from zipline.lib.adjustment import (Datetime641DArrayOverwrite,
-                                    Float641DArrayOverwrite)
-
-from zipline.pipeline.common import (
-    EVENT_DATE_FIELD_NAME,
-    FISCAL_QUARTER_FIELD_NAME,
-    FISCAL_YEAR_FIELD_NAME,
-    SID_FIELD_NAME,
-    TS_FIELD_NAME,
-)
-from zipline.pipeline.loaders.base import PipelineLoader
-from zipline.utils.numpy_utils import datetime64ns_dtype
-from zipline.pipeline.loaders.utils import (
-    ffill_across_cols,
-    last_in_date_group
-)
-
-
-INVALID_NUM_QTRS_MESSAGE = "Passed invalid number of quarters %s; " \
-                           "must pass a number of quarters >= 0"
-NEXT_FISCAL_QUARTER = 'next_fiscal_quarter'
-NEXT_FISCAL_YEAR = 'next_fiscal_year'
-NORMALIZED_QUARTERS = 'normalized_quarters'
-PREVIOUS_FISCAL_QUARTER = 'previous_fiscal_quarter'
-PREVIOUS_FISCAL_YEAR = 'previous_fiscal_year'
-SHIFTED_NORMALIZED_QTRS = 'shifted_normalized_quarters'
-SIMULTATION_DATES = 'dates'
-
-
-def normalize_quarters(years, quarters):
-    return years * 4 + quarters - 1
-
-
-def split_normalized_quarters(normalized_quarters):
-    years = normalized_quarters // 4
-    quarters = normalized_quarters % 4
-    return years, quarters + 1
-
-
-def required_estimates_fields(columns):
-    """
-    Compute the set of resource columns required to serve
-    `columns`.
-    """
-    # These metadata columns are used to align event indexers.
-    return {
-        TS_FIELD_NAME,
-        SID_FIELD_NAME,
-        EVENT_DATE_FIELD_NAME,
-        FISCAL_QUARTER_FIELD_NAME,
-        FISCAL_YEAR_FIELD_NAME
-    }.union(
-        # We also expect any of the field names that our loadable columns
-        # are mapped to.
-        viewvalues(columns),
-    )
-
-
-def validate_column_specs(events, columns):
-    """
-    Verify that the columns of ``events`` can be used by a
-    QuarterEstimatesLoader to serve the BoundColumns described by
-    `columns`.
-    """
-    required = required_estimates_fields(columns)
-    received = set(events.columns)
-    missing = required - received
-    if missing:
-        raise ValueError(
-            "QuarterEstimatesLoader missing required columns {missing}.\n"
-            "Got Columns: {received}\n"
-            "Expected Columns: {required}".format(
-                missing=sorted(missing),
-                received=sorted(received),
-                required=sorted(required),
-            )
-        )
-
-
-class QuarterEstimatesLoader(PipelineLoader):
-    def __init__(self,
-                 estimates,
-                 name_map):
-        validate_column_specs(
-            estimates,
-            name_map
-        )
-
-        self.estimates = estimates[
-            estimates[EVENT_DATE_FIELD_NAME].notnull() &
-            estimates[FISCAL_QUARTER_FIELD_NAME].notnull() &
-            estimates[FISCAL_YEAR_FIELD_NAME].notnull()
-        ]
-        self.estimates[NORMALIZED_QUARTERS] = normalize_quarters(
-            self.estimates[FISCAL_YEAR_FIELD_NAME],
-            self.estimates[FISCAL_QUARTER_FIELD_NAME],
-        )
-
-        self.name_map = name_map
-
-    @abstractmethod
-    def load_quarters(self, num_quarters, last, dates):
-        raise NotImplementedError('load_quarters')
-
-    def get_requested_data_for_col(self, stacked_last_per_qtr, idx, dates):
-        """
-        Selects the requested data for each date.
-
-        Parameters
-        ----------
-        stacked_last_per_qtr : pd.DataFrame
-            The latest estimate known  with the dates, normalized quarter, and
-            sid as the index.
-        idx : pd.MultiIndex
-            The index of the row of the requested quarter from each date for
-            each sid.
-        dates : pd.DatetimeIndex
-            The calendar dates for which estimates data is requested.
-
-        Returns
-        --------
-        requested_qtr_data : pd.DataFrame
-            The DataFrame with the latest values for the requested quarter
-            for all columns; `dates` are the index and columns are a MultiIndex
-            with sids at the top level and the dataset columns on the bottom.
-        """
-        requested_qtr_data = stacked_last_per_qtr.loc[idx]
-        # We no longer need the shifted normalized quarters in the index, but
-        # we do need it as a column to calculate adjustments.
-        requested_qtr_data = requested_qtr_data.reset_index(
-            SHIFTED_NORMALIZED_QTRS
-        )
-        # Calculate the actual year/quarter being requested and add those in
-        # as columns.
-        (requested_qtr_data[FISCAL_YEAR_FIELD_NAME],
-         requested_qtr_data[FISCAL_QUARTER_FIELD_NAME]) = \
-            split_normalized_quarters(
-                requested_qtr_data[SHIFTED_NORMALIZED_QTRS]
-            )
-        # Move sids into the columns. Once we're left with just dates
-        # as the index, we can reindex by all dates so that we have a
-        # value for each calendar date.
-        requested_qtr_data = requested_qtr_data.unstack(
-            SID_FIELD_NAME
-        ).reindex(dates)
-        return requested_qtr_data
-
-    def get_adjustments(self,
-                        zero_qtr_data,
-                        requested_qtr_data,
-                        last_per_qtr,
-                        dates,
-                        assets,
-                        columns):
-        """
-        Creates an AdjustedArray from the given estimates data for the given
-        dates.
-
-        Parameters
-        ----------
-        zero_qtr_data : pd.DataFrame
-            The 'time zero' data for each date/sid.
-        zero_qtr_data : pd.DataFrame
-            The data for the requested quarter.
-        last_per_qtr : pd.DataFrame
-            The latest estimate known per sid per date per quarter with
-            dates as the index and normalized quarter and sid in the columns
-            MultiIndex; allows easy access to the timeline of estimates
-            across all dates for a sid for a particular quarter.
-        dates : pd.DatetimeIndex
-            The calendar dates for which estimates data is requested.
-        column_name : string
-            The name of the column for which the AdjustedArray is being
-            computed.
-        column : BoundColumn
-            The column for which the AdjustedArray is being computed.
-        mask : np.array
-            Mask array of dimensions len(dates) X len(assets).
-        assets : pd.Int64Index
-            An index of all the assets from the raw data.
-
-        Returns
-        -------
-        adjusted_array : AdjustedArray
-            The array of data and overwrites for the given column.
-        """
-        col_to_adjustments = defaultdict(dict)
-        # We no longer need this in the index, but we do need it as a column
-        # to calculate adjustments.
-        zero_qtr_data = zero_qtr_data.reset_index(NORMALIZED_QUARTERS)
-
-        for sid_idx, sid in enumerate(assets):
-            zero_qtr_sid_data = zero_qtr_data[
-                zero_qtr_data.index.get_level_values(SID_FIELD_NAME) == sid
-            ]
-            # Determine where quarters are changing for this sid.
-            qtr_shifts = zero_qtr_sid_data[
-                zero_qtr_sid_data[NORMALIZED_QUARTERS] !=
-                zero_qtr_sid_data[NORMALIZED_QUARTERS].shift(1)
-            ]
-            # On dates where we don't have any information about quarters,
-            # we will get nulls, and each of these will be interpreted as
-            # quarter shifts. We need to remove these here.
-            qtr_shifts = qtr_shifts[
-                qtr_shifts[NORMALIZED_QUARTERS].notnull()
-            ]
-            # For the given sid, determine which quarters we have estimates
-            # for.
-            qtrs_with_estimates_for_sid = last_per_qtr.xs(
-                sid, axis=1, level=SID_FIELD_NAME
-            ).groupby(axis=1, level=1).first().columns.values
-            for row_indexer in list(qtr_shifts.index):
-                # Find the starting index of the quarter that comes right
-                # after this row. This isn't the starting index of the
-                # requested quarter, but simply the date we cross over into a
-                # new quarter.
-                next_qtr_start_idx = dates.searchsorted(
-                    zero_qtr_data.loc[
-                        row_indexer
-                    ][EVENT_DATE_FIELD_NAME],
-                    side='left'
-                    if isinstance(self, PreviousQuartersEstimatesLoader)
-                    else 'right'
-                )
-                self.create_overwrite_for_quarter(
-                    col_to_adjustments,
-                    next_qtr_start_idx,
-                    dates,
-                    last_per_qtr,
-                    qtrs_with_estimates_for_sid,
-                    requested_qtr_data,
-                    sid,
-                    sid_idx,
-                    columns,
-                )
-        return col_to_adjustments
-
-    def create_overwrite_for_quarter(self,
-                                     col_to_adjustments,
-                                     next_qtr_start_idx,
-                                     dates,
-                                     last_per_qtr,
-                                     quarters_with_estimates_for_sid,
-                                     requested_qtr_data,
-                                     sid,
-                                     sid_idx,
-                                     columns):
-        overwrites_dict = {}
-        for col in columns:
-            if col.dtype == datetime64ns_dtype:
-                overwrites_dict[col] = Datetime641DArrayOverwrite
-            else:
-                overwrites_dict[col] = Float641DArrayOverwrite
-        # Only add adjustments if the next quarter starts somewhere in
-        # our date index for this sid. Our 'next' quarter can never
-        # start at index 0; a starting index of 0 means that the next
-        # quarter's event date was NaT.
-        if 0 < next_qtr_start_idx < len(dates):
-            # Find the quarter being requested in the quarter we're
-            # crossing into.
-            requested_quarter = requested_qtr_data[
-                SHIFTED_NORMALIZED_QTRS
-            ][sid].iloc[next_qtr_start_idx]
-            for col in columns:
-                column_name = self.name_map[col.name]
-                # If there are estimates for the requested quarter,
-                # overwrite all values going up to the starting index of
-                # that quarter with estimates for that quarter.
-                if requested_quarter in quarters_with_estimates_for_sid:
-                    col_to_adjustments[column_name][next_qtr_start_idx] = \
-                        self.create_overwrite_for_estimate(
-                            col,
-                            column_name,
-                            last_per_qtr,
-                            next_qtr_start_idx,
-                            overwrites_dict[col],
-                            requested_quarter,
-                            sid,
-                            sid_idx
-                        )
-                # There are no estimates for the quarter. Overwrite all
-                # values going up to the starting index of that quarter
-                # with the missing value for this column.
-                else:
-                    col_to_adjustments[column_name][next_qtr_start_idx] =\
-                        self.overwrite_with_null(
-                            col,
-                            last_per_qtr,
-                            next_qtr_start_idx,
-                            overwrites_dict[col],
-                            sid_idx
-                        )
-
-    def overwrite_with_null(self,
-                            column,
-                            last_per_qtr,
-                            next_qtr_start_idx,
-                            overwrite,
-                            sid_idx):
-        return [overwrite(
-                0,
-                next_qtr_start_idx - 1,
-                sid_idx,
-                sid_idx,
-                np.full(
-                    len(
-                        last_per_qtr.index[:next_qtr_start_idx]
-                    ),
-                    column.missing_value,
-                    dtype=column.dtype
-                ))]
-
-    def load_adjusted_array(self, columns, dates, assets, mask):
-        # Separate out getting the columns' datasets and the datasets'
-        # num_quarters attributes to ensure that we're catching the right
-        # AttributeError.
-        col_to_datasets = {col: col.dataset for col in columns}
-        try:
-            groups = groupby(lambda col: col_to_datasets[col].num_quarters,
-                             col_to_datasets)
-        except AttributeError:
-            raise AttributeError("Datasets loaded via the "
-                                 "QuarterEstimatesLoader must define a "
-                                 "`num_quarters` attribute that defines how "
-                                 "many quarters out the loader should load "
-                                 "the data relative to `dates`.")
-        if any(num_qtr < 0 for num_qtr in groups):
-            raise ValueError(
-                INVALID_NUM_QTRS_MESSAGE % ','.join(
-                    str(qtr) for qtr in groups if qtr < 0
-                )
-
-            )
-        out = {}
-        for num_quarters, columns in groups.items():
-            # Determine the last piece of information we know for each column
-            # on each date in the index for each sid and quarter.
-            last_per_qtr = last_in_date_group(
-                self.estimates, dates, assets, reindex=True,
-                extra_groupers=[NORMALIZED_QUARTERS]
-            )
-
-            # Forward fill values for each quarter/sid/dataset column.
-            ffill_across_cols(last_per_qtr, columns, self.name_map)
-            # Stack quarter and sid into the index.
-            stacked_last_per_qtr = last_per_qtr.stack([SID_FIELD_NAME,
-                                                       NORMALIZED_QUARTERS])
-            # Set date index name for ease of reference
-            stacked_last_per_qtr.index.set_names(SIMULTATION_DATES,
-                                                 level=0,
-                                                 inplace=True)
-            # We want to know the most recent/next event relative to each date.
-            stacked_last_per_qtr = stacked_last_per_qtr.sort(
-                EVENT_DATE_FIELD_NAME
-            )
-            # Determine which quarter is next/previous for each date.
-            shifted_qtr_data = self.load_quarters(num_quarters,
-                                                  stacked_last_per_qtr)
-            zero_qtr_idx = shifted_qtr_data.index
-            requested_qtr_idx = shifted_qtr_data.set_index([
-                shifted_qtr_data.index.get_level_values(
-                    SIMULTATION_DATES
-                ),
-                shifted_qtr_data.index.get_level_values(
-                    SID_FIELD_NAME
-                ),
-                shifted_qtr_data[SHIFTED_NORMALIZED_QTRS]
-            ]).index
-            requested_qtr_data = self.get_requested_data_for_col(
-                stacked_last_per_qtr, requested_qtr_idx, dates
-            )
-
-            zero_qtr_data = stacked_last_per_qtr.loc[zero_qtr_idx]
-
-            col_to_adjustments = self.get_adjustments(zero_qtr_data,
-                                                      requested_qtr_data,
-                                                      last_per_qtr,
-                                                      dates,
-                                                      assets,
-                                                      columns)
-            for col in columns:
-                column_name = self.name_map[col.name]
-                # We may have dropped assets if they never have any data for the
-                # requested quarter.
-                df = pd.DataFrame(data=requested_qtr_data[column_name],
-                                  index=dates,
-                                  columns=assets,
-                                  dtype=col.dtype)
-
-                out[col] = AdjustedArray(
-                    df.values.astype(col.dtype),
-                    mask,
-                    dict(col_to_adjustments[column_name]),
-                    col.missing_value,
-                )
-        return out
-
-
-class NextQuartersEstimatesLoader(QuarterEstimatesLoader):
-    def create_overwrite_for_estimate(self,
-                                      column,
-                                      column_name,
-                                      last_per_qtr,
-                                      next_qtr_start_idx,
-                                      overwrite,
-                                      requested_quarter,
-                                      sid,
-                                      sid_idx):
-        return [overwrite(
-            0,
-            # overwrite thru last qtr
-            next_qtr_start_idx - 1,
-            sid_idx,
-            sid_idx,
-            last_per_qtr[
-                column_name,
-                requested_quarter,
-                sid
-            ][0:next_qtr_start_idx].values)]
-
-    def load_quarters(self, num_quarters, stacked_last_per_qtr):
-        """
-        Filters for releases that are on or after each simulation date and
-        determines the next quarter by picking out the upcoming release for
-        each date in the index. Adda a SHIFTED_NORMALIZED_QTRS column which
-        contains the requested next quarter for each calendar date and sid.
-
-        Parameters
-        ----------
-        num_quarters : int
-            Number of quarters to go out in the future.
-        stacked_last_per_qtr : pd.DataFrame
-            A DataFrame with index of calendar dates, sid, and normalized
-            quarters with each row being the latest estimate for the row's
-            index values, sorted by event date.
-
-        Returns
-        -------
-        next_releases_per_date : pd.DataFrame
-            A DataFrame with index of calendar dates, sid, and normalized
-            quarters, keeping only rows with next event information relative to
-            the index values and with an added column for
-            SHIFTED_NORMALIZED_QTRS, which contains the requested quarter for
-            each row.
-        """
-
-        # We reset the index here because in pandas3, a groupby on the index
-        # will set the index to just the items in the groupby, so we will lose
-        # the normalized quarters.
-        next_releases_per_date = stacked_last_per_qtr.loc[
-            stacked_last_per_qtr[EVENT_DATE_FIELD_NAME] >=
-            stacked_last_per_qtr.index.get_level_values(SIMULTATION_DATES)
-        ].reset_index(NORMALIZED_QUARTERS).groupby(
-            level=[SIMULTATION_DATES, SID_FIELD_NAME]
-        ).nth(0).set_index(NORMALIZED_QUARTERS, append=True)
-        next_releases_per_date[
-            SHIFTED_NORMALIZED_QTRS
-        ] = next_releases_per_date.index.get_level_values(
-            NORMALIZED_QUARTERS
-        ) + (num_quarters - 1)
-        return next_releases_per_date
-
-
-class PreviousQuartersEstimatesLoader(QuarterEstimatesLoader):
-    def create_overwrite_for_estimate(self,
-                                      column,
-                                      column_name,
-                                      last_per_qtr,
-                                      next_qtr_start_idx,
-                                      overwrite,
-                                      requested_quarter,
-                                      sid,
-                                      sid_idx):
-        return self.overwrite_with_null(column,
-                                        last_per_qtr,
-                                        next_qtr_start_idx,
-                                        overwrite,
-                                        sid_idx)
-
-    def load_quarters(self, num_quarters, stacked_last_per_qtr):
-        """
-        Filters for releases that are on or after each simulation date and
-        determines the previous quarter by picking out the most recent
-        release relative to each date in the index. Adds a
-        SHIFTED_NORMALIZED_QTRS column which contains the requested previous
-         quarter for each calendar date and sid.
-
-        Parameters
-        ----------
-        num_quarters : int
-            Number of quarters to go out in the past.
-        stacked_last_per_qtr : pd.DataFrame
-            A DataFrame with index of calendar dates, sid, and normalized
-            quarters with each row being the latest estimate for the row's
-            index values, sorted by event date.
-
-        Returns
-        -------
-        next_releases_per_date : pd.DataFrame
-            A DataFrame with index of calendar dates, sid, and normalized
-            quarters, keeping only rows with have a previous event relative
-            to the index values and with an added column for
-            SHIFTED_NORMALIZED_QTRS, which contains the requested quarter for
-            each row.
-        """
-
-        # We reset the index here because in pandas3, a groupby on the index
-        # will set the index to just the items in the groupby, so we will lose
-        # the normalized quarters.
-        previous_releases_per_date = stacked_last_per_qtr.loc[
-            stacked_last_per_qtr[EVENT_DATE_FIELD_NAME] <=
-            stacked_last_per_qtr.index.get_level_values(SIMULTATION_DATES)
-        ].reset_index(NORMALIZED_QUARTERS).groupby(
-            level=[SIMULTATION_DATES, SID_FIELD_NAME]
-        ).nth(-1).set_index(NORMALIZED_QUARTERS, append=True)
-        previous_releases_per_date[
-            SHIFTED_NORMALIZED_QTRS
-        ] = previous_releases_per_date.index.get_level_values(
-            NORMALIZED_QUARTERS
-        ) - (num_quarters - 1)
-        return previous_releases_per_date
diff --git a/zipline/pipeline/loaders/utils.py b/zipline/pipeline/loaders/utils.py
index 125dacfb..a81df8fe 100644
--- a/zipline/pipeline/loaders/utils.py
+++ b/zipline/pipeline/loaders/utils.py
@@ -320,8 +320,11 @@ def last_in_date_group(df, dates, assets, reindex=True, have_sids=True,
     ).last()
 
     # For the number of things that we're grouping by (except TS), unstack
-    # the df
-    last_in_group = last_in_group.unstack(list(range(-1, -len(idx), -1)))
+    # the df. Done this way because of an unresolved pandas bug whereby
+    # passing a list of levels with mixed dtypes to unstack causes the
+    # resulting DataFrame to have all object-type columns.
+    for _ in range(len(idx) - 1):
+        last_in_group = last_in_group.unstack(-1)
 
     if reindex:
         if have_sids:
diff --git a/zipline/testing/fixtures.py b/zipline/testing/fixtures.py
index 1ce01627..f0e2aaa5 100644
--- a/zipline/testing/fixtures.py
+++ b/zipline/testing/fixtures.py
@@ -34,14 +34,13 @@ from ..finance.trading import TradingEnvironment
 from ..utils import factory
 from ..utils.classproperty import classproperty
 from ..utils.final import FinalMeta, final
-from .core import (tmp_asset_finder, make_simple_equity_info)
+from .core import tmp_asset_finder, make_simple_equity_info
 from zipline.assets import Equity, Future
 from zipline.pipeline import SimplePipelineEngine
 from zipline.pipeline.loaders.testing import make_seeded_random_loader
 from zipline.utils.calendars import (
     get_calendar,
-    register_calendar
-)
+    register_calendar)
 
 
 class ZiplineTestCase(with_metaclass(FinalMeta, TestCase)):

From bdce6ae4935bac02d0dd4e125c3677134621d4b4 Mon Sep 17 00:00:00 2001
From: Joe Jevnik <joe@quantopian.com>
Date: Mon, 19 Sep 2016 21:13:28 -0400
Subject: [PATCH 14/16] PERF: vectorize earnings estimates

---
 .../pipeline/loaders/earnings_estimates.py    | 275 ++++++++++--------
 zipline/pipeline/loaders/utils.py             |   6 +-
 2 files changed, 156 insertions(+), 125 deletions(-)

diff --git a/zipline/pipeline/loaders/earnings_estimates.py b/zipline/pipeline/loaders/earnings_estimates.py
index 3397d105..6790e2c4 100644
--- a/zipline/pipeline/loaders/earnings_estimates.py
+++ b/zipline/pipeline/loaders/earnings_estimates.py
@@ -1,5 +1,7 @@
-from collections import defaultdict
 from abc import abstractmethod, abstractproperty
+from collections import defaultdict
+
+import numpy as np
 import pandas as pd
 from six import viewvalues
 from toolz import groupby
@@ -136,11 +138,15 @@ class EarningsEstimatesLoader(PipelineLoader):
             self.estimates[FISCAL_YEAR_FIELD_NAME],
             self.estimates[FISCAL_QUARTER_FIELD_NAME],
         )
-        self.array_overwrites_dict = {datetime64ns_dtype:
-                                      Datetime641DArrayOverwrite,
-                                      float64_dtype: Float641DArrayOverwrite}
-        self.scalar_overwrites_dict = {datetime64ns_dtype: Datetime64Overwrite,
-                                       float64_dtype: Float64Overwrite}
+
+        self.array_overwrites_dict = {
+            datetime64ns_dtype: Datetime641DArrayOverwrite,
+            float64_dtype: Float641DArrayOverwrite,
+        }
+        self.scalar_overwrites_dict = {
+            datetime64ns_dtype: Datetime64Overwrite,
+            float64_dtype: Float64Overwrite,
+        }
 
         self.name_map = name_map
 
@@ -167,18 +173,25 @@ class EarningsEstimatesLoader(PipelineLoader):
     def searchsorted_side(self):
         return NotImplementedError('searchsorted_side')
 
-    def get_requested_quarter_data(self, stacked_last_per_qtr, idx, dates):
+    def get_requested_quarter_data(self,
+                                   zero_qtr_data,
+                                   zeroth_quarter_idx,
+                                   stacked_last_per_qtr,
+                                   num_quarters,
+                                   dates):
         """
         Selects the requested data for each date.
 
         Parameters
         ----------
+        zero_qtr_data : pd.DataFrame
+            The 'time zero' data for each calendar date per sid.
+        zeroth_quarter_idx : pd.Index
+            An index of calendar dates, sid, and normalized quarters, for only
+            the rows that have a next or previous earnings estimate.
         stacked_last_per_qtr : pd.DataFrame
-            The latest estimate known  with the dates, normalized quarter, and
+            The latest estimate known with the dates, normalized quarter, and
             sid as the index.
-        idx : pd.MultiIndex
-            The index of the row of the requested quarter from each date for
-            each sid.
         dates : pd.DatetimeIndex
             The calendar dates for which estimates data is requested.
 
@@ -189,13 +202,27 @@ class EarningsEstimatesLoader(PipelineLoader):
             for all columns; `dates` are the index and columns are a MultiIndex
             with sids at the top level and the dataset columns on the bottom.
         """
-        requested_qtr_data = stacked_last_per_qtr.loc[idx]
-        # We've lost the index names when doing `loc`, so set them here.
-        requested_qtr_data.index = requested_qtr_data.index.set_names(
-            idx.names
+        zero_qtr_data_idx = zero_qtr_data.index
+        requested_qtr_idx = pd.MultiIndex.from_arrays(
+            [
+                zero_qtr_data_idx.get_level_values(0),
+                zero_qtr_data_idx.get_level_values(1),
+                self.get_shifted_qtrs(
+                    zeroth_quarter_idx.get_level_values(
+                        NORMALIZED_QUARTERS,
+                    ),
+                    num_quarters,
+                ),
+            ],
+            names=[
+                zero_qtr_data_idx.names[0],
+                zero_qtr_data_idx.names[1],
+                SHIFTED_NORMALIZED_QTRS,
+            ],
         )
+        requested_qtr_data = stacked_last_per_qtr.loc[requested_qtr_idx]
         requested_qtr_data = requested_qtr_data.reset_index(
-            SHIFTED_NORMALIZED_QTRS
+            SHIFTED_NORMALIZED_QTRS,
         )
         # Calculate the actual year/quarter being requested and add those in
         # as columns.
@@ -244,54 +271,44 @@ class EarningsEstimatesLoader(PipelineLoader):
         col_to_overwrites = defaultdict(dict)
         # We no longer need NORMALIZED_QUARTERS in the index, but we do need it
         # as a column to calculate adjustments.
-        zero_qtr_data = zero_qtr_data.reset_index(NORMALIZED_QUARTERS)
+        zero_qtr_data = zero_qtr_data.reset_index(level=NORMALIZED_QUARTERS)
+        zero_qtr_data.sort_index(inplace=True)
 
-        for sid_idx, sid in enumerate(assets):
-            zero_qtr_sid_data = zero_qtr_data[
-                zero_qtr_data.index.get_level_values(SID_FIELD_NAME) == sid
+        quarter_shifts = zero_qtr_data.loc[
+            zero_qtr_data.index[
+                zero_qtr_data.groupby(level=SID_FIELD_NAME)[
+                    NORMALIZED_QUARTERS
+                ].diff().nonzero()
             ]
-            # Determine where quarters are changing for this sid.
-            qtr_shifts = zero_qtr_sid_data[
-                zero_qtr_sid_data[NORMALIZED_QUARTERS] !=
-                zero_qtr_sid_data[NORMALIZED_QUARTERS].shift(1)
-            ]
-            # On dates where we don't have any information about quarters,
-            # we will get nulls, and each of these will be interpreted as
-            # quarter shifts. We need to remove these here.
-            qtr_shifts = qtr_shifts[
-                qtr_shifts[NORMALIZED_QUARTERS].notnull()
-            ]
-            # For the given sid, determine which quarters we have estimates
-            # for.
-            qtrs_with_estimates_for_sid = last_per_qtr.xs(
-                sid, axis=1, level=SID_FIELD_NAME
-            ).groupby(axis=1, level=1).first().columns.values
-            for row_indexer in list(qtr_shifts.index):
-                # Find the starting index of the quarter that comes right
-                # after this row. This isn't the starting index of the
-                # requested quarter, but simply the date we cross over into a
-                # new quarter.
-                next_qtr_start_idx = dates.searchsorted(
-                    zero_qtr_data.loc[
-                        row_indexer
-                    ][EVENT_DATE_FIELD_NAME],
-                    side=self.searchsorted_side
-                )
-                # Only add adjustments if the next quarter starts somewhere in
-                # our date index for this sid. Our 'next' quarter can never
-                # start at index 0; a starting index of 0 means that the next
-                # quarter's event date was NaT.
-                if 0 < next_qtr_start_idx < len(dates):
+        ]
+
+        sid_to_idx = dict(zip(assets, range(len(assets))))
+
+        def collect_adjustments(group):
+            next_qtr_start_indices = dates.searchsorted(
+                group[EVENT_DATE_FIELD_NAME].values,
+                side=self.searchsorted_side,
+            )
+            sid = int(group.name)
+            qtrs_with_estimates = group[NORMALIZED_QUARTERS].values
+            for idx in next_qtr_start_indices:
+                if 0 < idx < len(dates):
+                    # Only add adjustments if the next quarter starts somewhere
+                    # in our date index for this sid. Our 'next' quarter can
+                    # never start at index 0; a starting index of 0 means that
+                    # the next quarter's event date was NaT.
                     self.create_overwrite_for_quarter(
                         col_to_overwrites,
-                        next_qtr_start_idx,
+                        idx,
                         last_per_qtr,
-                        qtrs_with_estimates_for_sid,
+                        qtrs_with_estimates,
                         requested_qtr_data,
                         sid,
-                        sid_idx,
+                        sid_to_idx[sid],
                         columns,
                     )
+
+        quarter_shifts.groupby(level=SID_FIELD_NAME).apply(collect_adjustments)
         return col_to_overwrites
 
     def create_overwrite_for_quarter(self,
@@ -335,16 +352,16 @@ class EarningsEstimatesLoader(PipelineLoader):
         # Find the quarter being requested in the quarter we're
         # crossing into.
         requested_quarter = requested_qtr_data[
-            SHIFTED_NORMALIZED_QTRS
-        ][sid].iloc[next_qtr_start_idx]
+            SHIFTED_NORMALIZED_QTRS, sid,
+        ].iloc[next_qtr_start_idx]
         for col in columns:
             column_name = self.name_map[col.name]
             # If there are estimates for the requested quarter,
             # overwrite all values going up to the starting index of
             # that quarter with estimates for that quarter.
             if requested_quarter in quarters_with_estimates_for_sid:
-                col_to_overwrites[column_name][next_qtr_start_idx] = \
-                    [self.create_overwrite_for_estimate(
+                col_to_overwrites[column_name][next_qtr_start_idx] = [
+                    self.create_overwrite_for_estimate(
                         col,
                         column_name,
                         last_per_qtr,
@@ -352,18 +369,20 @@ class EarningsEstimatesLoader(PipelineLoader):
                         requested_quarter,
                         sid,
                         sid_idx
-                    )]
+                    ),
+                ]
             # There are no estimates for the quarter. Overwrite all
             # values going up to the starting index of that quarter
             # with the missing value for this column.
             else:
-                col_to_overwrites[column_name][next_qtr_start_idx] =\
-                    [self.overwrite_with_null(
+                col_to_overwrites[column_name][next_qtr_start_idx] = [
+                    self.overwrite_with_null(
                         col,
                         last_per_qtr.index,
                         next_qtr_start_idx,
                         sid_idx
-                    )]
+                    ),
+                ]
 
     def overwrite_with_null(self,
                             column,
@@ -403,34 +422,23 @@ class EarningsEstimatesLoader(PipelineLoader):
         # To optimize performance, only work below on assets that are
         # actually in the raw data.
         assets_with_data = set(assets) & set(self.estimates[SID_FIELD_NAME])
+        last_per_qtr, stacked_last_per_qtr = self.get_last_data_per_qtr(
+            assets_with_data,
+            columns,
+            dates
+        )
+        # Determine which quarter is immediately next/previous for each
+        # date.
+        zeroth_quarter_idx = self.get_zeroth_quarter_idx(stacked_last_per_qtr)
+        zero_qtr_data = stacked_last_per_qtr.loc[zeroth_quarter_idx]
+
         for num_quarters, columns in groups.items():
-            last_per_qtr, stacked_last_per_qtr = self.get_last_data_per_qtr(
-                assets_with_data, columns, dates
-            )
-            # Determine which quarter is immediately next/previous for each
-            # date.
-            zeroth_quarter_idx = self.get_zeroth_quarter_idx(
-                num_quarters, stacked_last_per_qtr
-            )
-            zero_qtr_data = stacked_last_per_qtr.loc[zeroth_quarter_idx]
-            # Doing it this way because creating a MultiIndex from scratch
-            # results in being unable to unstack sids because of duplicate
-            # values, even though the MultiIndex is created with the same
-            # exact values as below - possible pandas bug.
-            requested_qtr_idx = zero_qtr_data.reset_index(
-                NORMALIZED_QUARTERS
-            ).set_index(
-                self.get_shifted_qtrs(
-                    zeroth_quarter_idx.get_level_values(NORMALIZED_QUARTERS),
-                    num_quarters
-                ),
-                append=True
-            ).index
-            requested_qtr_idx = requested_qtr_idx.rename(
-                SHIFTED_NORMALIZED_QTRS, -1
-            )
             requested_qtr_data = self.get_requested_quarter_data(
-                stacked_last_per_qtr, requested_qtr_idx, dates
+                zero_qtr_data,
+                zeroth_quarter_idx,
+                stacked_last_per_qtr,
+                num_quarters,
+                dates,
             )
 
             # Calculate all adjustments for the given quarter and accumulate
@@ -441,17 +449,33 @@ class EarningsEstimatesLoader(PipelineLoader):
                                                       dates,
                                                       assets_with_data,
                                                       columns)
+
+            # Lookup the asset indexer once, this is so we can reindex
+            # the assets returned into the assets requested for each column.
+            # This depends on the fact that our column multiindex has the same
+            # sids for each field. This allows us to do the lookup once on
+            # level 1 instead of doing the lookup each time per value in
+            # level 0.
+            asset_indexer = assets.get_indexer_for(
+                requested_qtr_data.columns.levels[1],
+            )
             for col in columns:
                 column_name = self.name_map[col.name]
-                # We may have dropped assets if they never have any data for
-                # the requested quarter.
-                df = pd.DataFrame(data=requested_qtr_data[column_name],
-                                  index=dates,
-                                  columns=assets,
-                                  dtype=col.dtype)
+                # allocate the empty output with the correct missing value
+                output_array = np.full(
+                    (len(dates), len(assets)),
+                    col.missing_value,
+                    dtype=col.dtype,
+                )
+                # overwrite the missing value with values from the computed
+                # data
+                output_array[
+                    :,
+                    asset_indexer,
+                ] = requested_qtr_data[column_name].values
 
                 out[col] = AdjustedArray(
-                    df.values.astype(col.dtype),
+                    output_array,
                     mask,
                     dict(col_to_adjustments[column_name]),
                     col.missing_value,
@@ -487,20 +511,26 @@ class EarningsEstimatesLoader(PipelineLoader):
         # self.estimates.columns, normalized_quarters, sid], where each cell
         # contains the latest data for that day.
         last_per_qtr = last_in_date_group(
-            self.estimates, dates, assets_with_data, reindex=True,
-            extra_groupers=[NORMALIZED_QUARTERS]
+            self.estimates,
+            dates,
+            assets_with_data,
+            reindex=True,
+            extra_groupers=[NORMALIZED_QUARTERS],
         )
         # Forward fill values for each quarter/sid/dataset column.
         ffill_across_cols(last_per_qtr, columns, self.name_map)
         # Stack quarter and sid into the index.
-        stacked_last_per_qtr = last_per_qtr.stack([SID_FIELD_NAME,
-                                                   NORMALIZED_QUARTERS])
+        stacked_last_per_qtr = last_per_qtr.stack(
+            [SID_FIELD_NAME, NORMALIZED_QUARTERS],
+        )
         # Set date index name for ease of reference
-        stacked_last_per_qtr.index.set_names(SIMULTATION_DATES,
-                                             level=0,
-                                             inplace=True)
+        stacked_last_per_qtr.index.set_names(
+            SIMULTATION_DATES,
+            level=0,
+            inplace=True,
+        )
         stacked_last_per_qtr = stacked_last_per_qtr.sort(
-            EVENT_DATE_FIELD_NAME
+            EVENT_DATE_FIELD_NAME,
         )
         stacked_last_per_qtr[EVENT_DATE_FIELD_NAME] = pd.to_datetime(
             stacked_last_per_qtr[EVENT_DATE_FIELD_NAME]
@@ -509,9 +539,7 @@ class EarningsEstimatesLoader(PipelineLoader):
 
 
 class NextEarningsEstimatesLoader(EarningsEstimatesLoader):
-    @property
-    def searchsorted_side(self):
-        return 'right'
+    searchsorted_side = 'right'
 
     def create_overwrite_for_estimate(self,
                                       column,
@@ -530,13 +558,14 @@ class NextEarningsEstimatesLoader(EarningsEstimatesLoader):
             last_per_qtr[
                 column_name,
                 requested_quarter,
-                sid
-            ][:next_qtr_start_idx].values)
+                sid,
+            ].values[:next_qtr_start_idx],
+        )
 
     def get_shifted_qtrs(self, zero_qtrs, num_quarters):
         return zero_qtrs + (num_quarters - 1)
 
-    def get_zeroth_quarter_idx(self, num_quarters, stacked_last_per_qtr):
+    def get_zeroth_quarter_idx(self, stacked_last_per_qtr):
         """
         Filters for releases that are on or after each simulation date and
         determines the next quarter by picking out the upcoming release for
@@ -544,8 +573,6 @@ class NextEarningsEstimatesLoader(EarningsEstimatesLoader):
 
         Parameters
         ----------
-        num_quarters : int
-            Number of quarters to go out in the future.
         stacked_last_per_qtr : pd.DataFrame
             A DataFrame with index of calendar dates, sid, and normalized
             quarters with each row being the latest estimate for the row's
@@ -562,15 +589,14 @@ class NextEarningsEstimatesLoader(EarningsEstimatesLoader):
             stacked_last_per_qtr[EVENT_DATE_FIELD_NAME] >=
             stacked_last_per_qtr.index.get_level_values(SIMULTATION_DATES)
         ].groupby(
-            level=[SIMULTATION_DATES, SID_FIELD_NAME], as_index=False
+            level=[SIMULTATION_DATES, SID_FIELD_NAME],
+            as_index=False,
         ).nth(0)
         return next_releases_per_date.index
 
 
 class PreviousEarningsEstimatesLoader(EarningsEstimatesLoader):
-    @property
-    def searchsorted_side(self):
-        return 'left'
+    searchsorted_side = 'left'
 
     def create_overwrite_for_estimate(self,
                                       column,
@@ -580,15 +606,17 @@ class PreviousEarningsEstimatesLoader(EarningsEstimatesLoader):
                                       requested_quarter,
                                       sid,
                                       sid_idx):
-        return self.overwrite_with_null(column,
-                                        dates,
-                                        next_qtr_start_idx,
-                                        sid_idx)
+        return self.overwrite_with_null(
+            column,
+            dates,
+            next_qtr_start_idx,
+            sid_idx,
+        )
 
     def get_shifted_qtrs(self, zero_qtrs, num_quarters):
         return zero_qtrs - (num_quarters - 1)
 
-    def get_zeroth_quarter_idx(self, num_quarters, stacked_last_per_qtr):
+    def get_zeroth_quarter_idx(self, stacked_last_per_qtr):
         """
         Filters for releases that are on or after each simulation date and
         determines the previous quarter by picking out the most recent
@@ -596,8 +624,6 @@ class PreviousEarningsEstimatesLoader(EarningsEstimatesLoader):
 
         Parameters
         ----------
-        num_quarters : int
-            Number of quarters to go out in the past.
         stacked_last_per_qtr : pd.DataFrame
             A DataFrame with index of calendar dates, sid, and normalized
             quarters with each row being the latest estimate for the row's
@@ -614,6 +640,7 @@ class PreviousEarningsEstimatesLoader(EarningsEstimatesLoader):
             stacked_last_per_qtr[EVENT_DATE_FIELD_NAME] <=
             stacked_last_per_qtr.index.get_level_values(SIMULTATION_DATES)
         ].groupby(
-            level=[SIMULTATION_DATES, SID_FIELD_NAME], as_index=False
+            level=[SIMULTATION_DATES, SID_FIELD_NAME],
+            as_index=False,
         ).nth(-1)
         return previous_releases_per_date.index
diff --git a/zipline/pipeline/loaders/utils.py b/zipline/pipeline/loaders/utils.py
index a81df8fe..028da74c 100644
--- a/zipline/pipeline/loaders/utils.py
+++ b/zipline/pipeline/loaders/utils.py
@@ -276,7 +276,11 @@ def check_data_query_args(data_query_time, data_query_tz):
         )
 
 
-def last_in_date_group(df, dates, assets, reindex=True, have_sids=True,
+def last_in_date_group(df,
+                       dates,
+                       assets,
+                       reindex=True,
+                       have_sids=True,
                        extra_groupers=[]):
     """
     Determine the last piece of information known on each date in the date

From 8d2e6446ee0886766e5bb861ca2ddc5112c17bd8 Mon Sep 17 00:00:00 2001
From: Joe Jevnik <joe@quantopian.com>
Date: Tue, 20 Sep 2016 13:44:26 -0400
Subject: [PATCH 15/16] PERF: only query for the columns requested + metadata

BUG: choose last event date for quarter shift
---
 tests/pipeline/test_quarters_estimates.py     |  8 +--
 zipline/pipeline/loaders/blaze/estimates.py   | 20 ++++---
 zipline/pipeline/loaders/blaze/events.py      |  2 +-
 .../pipeline/loaders/earnings_estimates.py    | 52 +++++++++----------
 4 files changed, 43 insertions(+), 39 deletions(-)

diff --git a/tests/pipeline/test_quarters_estimates.py b/tests/pipeline/test_quarters_estimates.py
index 4f7b8366..8301201d 100644
--- a/tests/pipeline/test_quarters_estimates.py
+++ b/tests/pipeline/test_quarters_estimates.py
@@ -746,7 +746,7 @@ class WithEstimateWindows(WithEstimates):
                           columns=[SID_FIELD_NAME,
                                    'estimate',
                                    'knowledge_date'])
-        df = df.pivot_table(columns='sid',
+        df = df.pivot_table(columns=SID_FIELD_NAME,
                             values='estimate',
                             index='knowledge_date')
         df = df.reindex(
@@ -796,8 +796,8 @@ class WithEstimateWindows(WithEstimates):
         engine.run_pipeline(
             Pipeline({'est': SomeFactor()}),
             start_date=start_idx,
-            end_date=pd.Timestamp('2015-01-20', tz='utc'),  # last event date
-            # we have
+            # last event date we have
+            end_date=pd.Timestamp('2015-01-20', tz='utc'),
         )
 
 
@@ -938,7 +938,7 @@ class QuarterShiftTestCase(ZiplineTestCase):
     backwards/forwards from a starting point.
     """
     def test_quarter_normalization(self):
-        input_yrs = pd.Series([0] * 4, dtype=np.int64)
+        input_yrs = pd.Series(range(2011, 2015), dtype=np.int64)
         input_qtrs = pd.Series(range(1, 5), dtype=np.int64)
         result_years, result_quarters = split_normalized_quarters(
             normalize_quarters(input_yrs, input_qtrs)
diff --git a/zipline/pipeline/loaders/blaze/estimates.py b/zipline/pipeline/loaders/blaze/estimates.py
index b63c58a6..58a2fba0 100644
--- a/zipline/pipeline/loaders/blaze/estimates.py
+++ b/zipline/pipeline/loaders/blaze/estimates.py
@@ -16,6 +16,7 @@ from zipline.pipeline.loaders.earnings_estimates import (
     NextEarningsEstimatesLoader,
     PreviousEarningsEstimatesLoader,
     required_estimates_fields,
+    metadata_columns,
 )
 from zipline.pipeline.loaders.utils import (
     check_data_query_args,
@@ -104,17 +105,20 @@ class BlazeEstimatesLoader(PipelineLoader):
         self._checkpoints = checkpoints
 
     def load_adjusted_array(self, columns, dates, assets, mask):
-        raw = load_raw_data(assets,
-                            dates,
-                            self._data_query_time,
-                            self._data_query_tz,
-                            self._expr,
-                            self._odo_kwargs,
-                            checkpoints=self._checkpoints)
+        column_names = [column.name for column in columns]
+        raw = load_raw_data(
+            assets,
+            dates,
+            self._data_query_time,
+            self._data_query_tz,
+            self._expr[sorted(metadata_columns.union(column_names))],
+            self._odo_kwargs,
+            checkpoints=self._checkpoints,
+        )
 
         return self.loader(
             raw,
-            self._columns,
+            {k: self._columns[k] for k in column_names}
         ).load_adjusted_array(
             columns,
             dates,
diff --git a/zipline/pipeline/loaders/blaze/events.py b/zipline/pipeline/loaders/blaze/events.py
index 6ecf3c6f..0a6e56f3 100644
--- a/zipline/pipeline/loaders/blaze/events.py
+++ b/zipline/pipeline/loaders/blaze/events.py
@@ -57,7 +57,7 @@ class BlazeEventsLoader(PipelineLoader):
     start the backtest with knowledge of all announcements.
     """
 
-    __doc__ == __doc__.format(SID_FIELD_NAME=SID_FIELD_NAME,
+    __doc__ = __doc__.format(SID_FIELD_NAME=SID_FIELD_NAME,
                               TS_FIELD_NAME=TS_FIELD_NAME,
                               EVENT_DATE_FIELD_NAME=EVENT_DATE_FIELD_NAME)
 
diff --git a/zipline/pipeline/loaders/earnings_estimates.py b/zipline/pipeline/loaders/earnings_estimates.py
index 6790e2c4..c7a141a8 100644
--- a/zipline/pipeline/loaders/earnings_estimates.py
+++ b/zipline/pipeline/loaders/earnings_estimates.py
@@ -50,23 +50,24 @@ def split_normalized_quarters(normalized_quarters):
     return years, quarters + 1
 
 
+# These metadata columns are used to align event indexers.
+metadata_columns = frozenset({
+    TS_FIELD_NAME,
+    SID_FIELD_NAME,
+    EVENT_DATE_FIELD_NAME,
+    FISCAL_QUARTER_FIELD_NAME,
+    FISCAL_YEAR_FIELD_NAME,
+})
+
+
 def required_estimates_fields(columns):
     """
     Compute the set of resource columns required to serve
     `columns`.
     """
-    # These metadata columns are used to align event indexers.
-    return {
-        TS_FIELD_NAME,
-        SID_FIELD_NAME,
-        EVENT_DATE_FIELD_NAME,
-        FISCAL_QUARTER_FIELD_NAME,
-        FISCAL_YEAR_FIELD_NAME
-    }.union(
-        # We also expect any of the field names that our loadable columns
-        # are mapped to.
-        viewvalues(columns),
-    )
+    # We also expect any of the field names that our loadable columns
+    # are mapped to.
+    return metadata_columns.union(viewvalues(columns))
 
 
 def validate_column_specs(events, columns):
@@ -269,18 +270,13 @@ class EarningsEstimatesLoader(PipelineLoader):
             The array of data and overwrites for the given column.
         """
         col_to_overwrites = defaultdict(dict)
-        # We no longer need NORMALIZED_QUARTERS in the index, but we do need it
-        # as a column to calculate adjustments.
-        zero_qtr_data = zero_qtr_data.reset_index(level=NORMALIZED_QUARTERS)
         zero_qtr_data.sort_index(inplace=True)
-
-        quarter_shifts = zero_qtr_data.loc[
-            zero_qtr_data.index[
-                zero_qtr_data.groupby(level=SID_FIELD_NAME)[
-                    NORMALIZED_QUARTERS
-                ].diff().nonzero()
-            ]
-        ]
+        # Here we want to get the LAST record from each group of records
+        # corresponding to a single quarter. This is to ensure that we select
+        # the most up-to-date event date in case the event date changes.
+        quarter_shifts = zero_qtr_data.groupby(
+            level=[SID_FIELD_NAME, NORMALIZED_QUARTERS]
+        ).nth(-1)
 
         sid_to_idx = dict(zip(assets, range(len(assets))))
 
@@ -290,7 +286,9 @@ class EarningsEstimatesLoader(PipelineLoader):
                 side=self.searchsorted_side,
             )
             sid = int(group.name)
-            qtrs_with_estimates = group[NORMALIZED_QUARTERS].values
+            qtrs_with_estimates = group.index.get_level_values(
+                NORMALIZED_QUARTERS
+            ).values
             for idx in next_qtr_start_indices:
                 if 0 < idx < len(dates):
                     # Only add adjustments if the next quarter starts somewhere
@@ -584,13 +582,14 @@ class NextEarningsEstimatesLoader(EarningsEstimatesLoader):
             An index of calendar dates, sid, and normalized quarters, for only
             the rows that have a next event.
         """
-
         next_releases_per_date = stacked_last_per_qtr.loc[
             stacked_last_per_qtr[EVENT_DATE_FIELD_NAME] >=
             stacked_last_per_qtr.index.get_level_values(SIMULTATION_DATES)
         ].groupby(
             level=[SIMULTATION_DATES, SID_FIELD_NAME],
             as_index=False,
+            # Here we take advantage of the fact that `stacked_last_per_qtr` is
+            # sorted by event date.
         ).nth(0)
         return next_releases_per_date.index
 
@@ -635,12 +634,13 @@ class PreviousEarningsEstimatesLoader(EarningsEstimatesLoader):
             An index of calendar dates, sid, and normalized quarters, for only
             the rows that have a previous event.
         """
-
         previous_releases_per_date = stacked_last_per_qtr.loc[
             stacked_last_per_qtr[EVENT_DATE_FIELD_NAME] <=
             stacked_last_per_qtr.index.get_level_values(SIMULTATION_DATES)
         ].groupby(
             level=[SIMULTATION_DATES, SID_FIELD_NAME],
             as_index=False,
+            # Here we take advantage of the fact that `stacked_last_per_qtr` is
+            # sorted by event date.
         ).nth(-1)
         return previous_releases_per_date.index

From f528c01ca9ac396135f7a89e58e3c046de1f352d Mon Sep 17 00:00:00 2001
From: Maya Tydykov <mtydykov@quantopian.com>
Date: Wed, 21 Sep 2016 15:35:21 -0400
Subject: [PATCH 16/16] TST: add test for changing event dates and adjustments

BUG: get column names from column dict

BUG: fix name map
---
 tests/pipeline/test_quarters_estimates.py     | 136 ++++++++++++++++--
 zipline/pipeline/loaders/blaze/core.py        |   2 +-
 zipline/pipeline/loaders/blaze/estimates.py   |   8 +-
 zipline/pipeline/loaders/blaze/events.py      |   4 +-
 .../pipeline/loaders/earnings_estimates.py    |  45 +++---
 zipline/testing/core.py                       |   6 -
 6 files changed, 156 insertions(+), 45 deletions(-)

diff --git a/tests/pipeline/test_quarters_estimates.py b/tests/pipeline/test_quarters_estimates.py
index 8301201d..8f0cf777 100644
--- a/tests/pipeline/test_quarters_estimates.py
+++ b/tests/pipeline/test_quarters_estimates.py
@@ -3,6 +3,7 @@ import itertools
 from nose.tools import assert_true
 from nose_parameterized import parameterized
 import numpy as np
+from numpy.testing import assert_array_equal
 import pandas as pd
 from toolz import merge
 
@@ -44,9 +45,9 @@ class Estimates(DataSet):
     estimate = Column(dtype=float64_dtype)
 
 
-def QuartersEstimates(num_qtr):
+def QuartersEstimates(announcements_out):
     class QtrEstimates(Estimates):
-        num_quarters = num_qtr
+        num_announcements = announcements_out
         name = Estimates
     return QtrEstimates
 
@@ -123,11 +124,11 @@ class WithWrongLoaderDefinition(WithEstimates):
 
     Tests
     ------
-    test_wrong_num_quarters_passed()
+    test_wrong_num_announcements_passed()
         Tests that loading with an incorrect quarter number raises an error.
-    test_no_num_quarters_attr()
+    test_no_num_announcements_attr()
         Tests that the loader throws an AssertionError if the dataset being
-        loaded has no `num_quarters` attribute.
+        loaded has no `num_announcements` attribute.
     """
 
     @classmethod
@@ -141,7 +142,7 @@ class WithWrongLoaderDefinition(WithEstimates):
                                      'estimate'],
                             index=[0])
 
-    def test_wrong_num_quarters_passed(self):
+    def test_wrong_num_announcements_passed(self):
         bad_dataset1 = QuartersEstimates(-1)
         bad_dataset2 = QuartersEstimates(-2)
         good_dataset = QuartersEstimates(1)
@@ -150,7 +151,7 @@ class WithWrongLoaderDefinition(WithEstimates):
             self.trading_days,
             self.asset_finder,
         )
-        columns = {c.name + str(dataset.num_quarters): c.latest
+        columns = {c.name + str(dataset.num_announcements): c.latest
                    for dataset in (bad_dataset1,
                                    bad_dataset2,
                                    good_dataset)
@@ -165,7 +166,7 @@ class WithWrongLoaderDefinition(WithEstimates):
             )
             assert_raises_regex(e, INVALID_NUM_QTRS_MESSAGE % "-1,-2")
 
-    def test_no_num_quarters_attr(self):
+    def test_no_num_announcements_attr(self):
         dataset = QuartersEstimatesNoNumQuartersAttr(1)
         engine = SimplePipelineEngine(
             lambda x: self.loader,
@@ -657,6 +658,119 @@ class PreviousEstimateMultipleQuarters(
         return expected
 
 
+class WithVaryingNumEstimates(WithEstimates):
+    """
+    ZiplineTestCase mixin providing fixtures and a test to ensure that we
+    have the correct overwrites when the event date changes. We want to make
+    sure that if we have a quarter with an event date that gets pushed back,
+    we don't start overwriting for the next quarter early. Likewise,
+    if we have a quarter with an event date that gets pushed forward, we want
+    to make sure that we start applying adjustments at the appropriate, earlier
+    date, rather than the later date.
+
+    Methods
+    -------
+    assert_compute()
+        Defines how to determine that results computed for the `SomeFactor`
+        factor are correct.
+
+    Tests
+    -----
+    test_windows_with_varying_num_estimates()
+        Tests that we create the correct overwrites from 2015-01-13 to
+        2015-01-14 regardless of how event dates were updated for each
+        quarter for each sid.
+    """
+
+    @classmethod
+    def make_events(cls):
+        return pd.DataFrame({
+            SID_FIELD_NAME: [0] * 3 + [1] * 3,
+            TS_FIELD_NAME: [pd.Timestamp('2015-01-09'),
+                            pd.Timestamp('2015-01-12'),
+                            pd.Timestamp('2015-01-13')] * 2,
+            EVENT_DATE_FIELD_NAME: [pd.Timestamp('2015-01-12'),
+                                    pd.Timestamp('2015-01-13'),
+                                    pd.Timestamp('2015-01-20'),
+                                    pd.Timestamp('2015-01-13'),
+                                    pd.Timestamp('2015-01-12'),
+                                    pd.Timestamp('2015-01-20')],
+            'estimate': [11., 12., 21.] * 2,
+            FISCAL_QUARTER_FIELD_NAME: [1, 1, 2] * 2,
+            FISCAL_YEAR_FIELD_NAME: [2015] * 6
+        })
+
+    @classmethod
+    def assert_compute(cls, estimate, today):
+        raise NotImplementedError('assert_compute')
+
+    def test_windows_with_varying_num_estimates(self):
+        dataset = QuartersEstimates(1)
+        assert_compute = self.assert_compute
+
+        class SomeFactor(CustomFactor):
+            inputs = [dataset.estimate]
+            window_length = 3
+
+            def compute(self, today, assets, out, estimate):
+                assert_compute(estimate, today)
+
+        engine = SimplePipelineEngine(
+            lambda x: self.loader,
+            self.trading_days,
+            self.asset_finder,
+        )
+        engine.run_pipeline(
+            Pipeline({'est': SomeFactor()}),
+            start_date=pd.Timestamp('2015-01-13', tz='utc'),
+            # last event date we have
+            end_date=pd.Timestamp('2015-01-14', tz='utc'),
+        )
+
+
+class PreviousVaryingNumEstimates(
+    WithVaryingNumEstimates,
+    ZiplineTestCase
+):
+    def assert_compute(self, estimate, today):
+        if today == pd.Timestamp('2015-01-13', tz='utc'):
+            assert_array_equal(estimate[:, 0],
+                               np.array([np.NaN, np.NaN, 12]))
+            assert_array_equal(estimate[:, 1],
+                               np.array([np.NaN, 12, 12]))
+        else:
+            assert_array_equal(estimate[:, 0],
+                               np.array([np.NaN, 12, 12]))
+            assert_array_equal(estimate[:, 1],
+                               np.array([12, 12, 12]))
+
+    @classmethod
+    def make_loader(cls, events, columns):
+        return PreviousEarningsEstimatesLoader(events, columns)
+
+
+class NextVaryingNumEstimates(
+    WithVaryingNumEstimates,
+    ZiplineTestCase
+):
+
+    def assert_compute(self, estimate, today):
+        if today == pd.Timestamp('2015-01-13', tz='utc'):
+            assert_array_equal(estimate[:, 0],
+                               np.array([11, 12, 12]))
+            assert_array_equal(estimate[:, 1],
+                               np.array([np.NaN, np.NaN, 21]))
+        else:
+            assert_array_equal(estimate[:, 0],
+                               np.array([np.NaN, 21, 21]))
+            assert_array_equal(estimate[:, 1],
+                               np.array([np.NaN, 21, 21]))
+
+    @classmethod
+    def make_loader(cls, events, columns):
+        return NextEarningsEstimatesLoader(events, columns)
+
+
 class WithEstimateWindows(WithEstimates):
     """
     ZiplineTestCase mixin providing fixures and a test to test running a
@@ -761,8 +875,8 @@ class WithEstimateWindows(WithEstimates):
     @parameterized.expand(window_test_cases)
     def test_estimate_windows_at_quarter_boundaries(self,
                                                     start_idx,
-                                                    num_quarters_out):
-        dataset = QuartersEstimates(num_quarters_out)
+                                                    num_announcements_out):
+        dataset = QuartersEstimates(num_announcements_out)
         trading_days = self.trading_days
         timelines = self.timelines
         # The window length should be from the starting index back to the first
@@ -781,7 +895,7 @@ class WithEstimateWindows(WithEstimates):
             def compute(self, today, assets, out, estimate):
                 today_idx = trading_days.get_loc(today)
                 today_timeline = timelines[
-                    num_quarters_out
+                    num_announcements_out
                 ].loc[today].reindex(
                     trading_days[:today_idx + 1]
                 ).values
diff --git a/zipline/pipeline/loaders/blaze/core.py b/zipline/pipeline/loaders/blaze/core.py
index d5c3e40b..925faddf 100644
--- a/zipline/pipeline/loaders/blaze/core.py
+++ b/zipline/pipeline/loaders/blaze/core.py
@@ -188,7 +188,7 @@ from zipline.utils.input_validation import (
     ensure_timezone,
     optionally,
 )
-from zipline.utils.numpy_utils import bool_dtype, categorical_dtype
+from zipline.utils.numpy_utils import bool_dtype
 from zipline.utils.pool import SequentialPool
 from zipline.utils.preprocess import preprocess
 
diff --git a/zipline/pipeline/loaders/blaze/estimates.py b/zipline/pipeline/loaders/blaze/estimates.py
index 58a2fba0..3cb1064a 100644
--- a/zipline/pipeline/loaders/blaze/estimates.py
+++ b/zipline/pipeline/loaders/blaze/estimates.py
@@ -105,20 +105,22 @@ class BlazeEstimatesLoader(PipelineLoader):
         self._checkpoints = checkpoints
 
     def load_adjusted_array(self, columns, dates, assets, mask):
-        column_names = [column.name for column in columns]
+        # Only load requested columns.
+        requested_column_names = [self._columns[column.name]
+                                  for column in columns]
         raw = load_raw_data(
             assets,
             dates,
             self._data_query_time,
             self._data_query_tz,
-            self._expr[sorted(metadata_columns.union(column_names))],
+            self._expr[sorted(metadata_columns.union(requested_column_names))],
             self._odo_kwargs,
             checkpoints=self._checkpoints,
         )
 
         return self.loader(
             raw,
-            {k: self._columns[k] for k in column_names}
+            {column.name: self._columns[column.name] for column in columns}
         ).load_adjusted_array(
             columns,
             dates,
diff --git a/zipline/pipeline/loaders/blaze/events.py b/zipline/pipeline/loaders/blaze/events.py
index 0a6e56f3..cc778d5c 100644
--- a/zipline/pipeline/loaders/blaze/events.py
+++ b/zipline/pipeline/loaders/blaze/events.py
@@ -58,8 +58,8 @@ class BlazeEventsLoader(PipelineLoader):
     """
 
     __doc__ = __doc__.format(SID_FIELD_NAME=SID_FIELD_NAME,
-                              TS_FIELD_NAME=TS_FIELD_NAME,
-                              EVENT_DATE_FIELD_NAME=EVENT_DATE_FIELD_NAME)
+                             TS_FIELD_NAME=TS_FIELD_NAME,
+                             EVENT_DATE_FIELD_NAME=EVENT_DATE_FIELD_NAME)
 
     @preprocess(data_query_tz=optionally(ensure_timezone))
     def __init__(self,
diff --git a/zipline/pipeline/loaders/earnings_estimates.py b/zipline/pipeline/loaders/earnings_estimates.py
index c7a141a8..5a794683 100644
--- a/zipline/pipeline/loaders/earnings_estimates.py
+++ b/zipline/pipeline/loaders/earnings_estimates.py
@@ -37,7 +37,7 @@ NORMALIZED_QUARTERS = 'normalized_quarters'
 PREVIOUS_FISCAL_QUARTER = 'previous_fiscal_quarter'
 PREVIOUS_FISCAL_YEAR = 'previous_fiscal_year'
 SHIFTED_NORMALIZED_QTRS = 'shifted_normalized_quarters'
-SIMULTATION_DATES = 'dates'
+SIMULATION_DATES = 'dates'
 
 
 def normalize_quarters(years, quarters):
@@ -95,7 +95,7 @@ class EarningsEstimatesLoader(PipelineLoader):
     """
     An abstract pipeline loader for estimates data that can load data a
     variable number of quarters forwards/backwards from calendar dates
-    depending on the `num_quarters` attribute of the columns' dataset.
+    depending on the `num_announcements` attribute of the columns' dataset.
 
     Parameters
     ----------
@@ -152,11 +152,11 @@ class EarningsEstimatesLoader(PipelineLoader):
         self.name_map = name_map
 
     @abstractmethod
-    def get_zeroth_quarter_idx(self, num_quarters, last, dates):
+    def get_zeroth_quarter_idx(self, num_announcements, last, dates):
         raise NotImplementedError('get_zeroth_quarter_idx')
 
     @abstractmethod
-    def get_shifted_qtrs(self, zero_qtrs, num_quarters):
+    def get_shifted_qtrs(self, zero_qtrs, num_announcements):
         raise NotImplementedError('get_shifted_qtrs')
 
     @abstractmethod
@@ -178,7 +178,7 @@ class EarningsEstimatesLoader(PipelineLoader):
                                    zero_qtr_data,
                                    zeroth_quarter_idx,
                                    stacked_last_per_qtr,
-                                   num_quarters,
+                                   num_announcements,
                                    dates):
         """
         Selects the requested data for each date.
@@ -212,7 +212,7 @@ class EarningsEstimatesLoader(PipelineLoader):
                     zeroth_quarter_idx.get_level_values(
                         NORMALIZED_QUARTERS,
                     ),
-                    num_quarters,
+                    num_announcements,
                 ),
             ],
             names=[
@@ -397,18 +397,19 @@ class EarningsEstimatesLoader(PipelineLoader):
 
     def load_adjusted_array(self, columns, dates, assets, mask):
         # Separate out getting the columns' datasets and the datasets'
-        # num_quarters attributes to ensure that we're catching the right
+        # num_announcements attributes to ensure that we're catching the right
         # AttributeError.
         col_to_datasets = {col: col.dataset for col in columns}
         try:
-            groups = groupby(lambda col: col_to_datasets[col].num_quarters,
+            groups = groupby(lambda col:
+                             col_to_datasets[col].num_announcements,
                              col_to_datasets)
         except AttributeError:
             raise AttributeError("Datasets loaded via the "
                                  "EarningsEstimatesLoader must define a "
-                                 "`num_quarters` attribute that defines how "
-                                 "many quarters out the loader should load "
-                                 "the data relative to `dates`.")
+                                 "`num_announcements` attribute that defines "
+                                 "how many quarters out the loader should load"
+                                 " the data relative to `dates`.")
         if any(num_qtr < 0 for num_qtr in groups):
             raise ValueError(
                 INVALID_NUM_QTRS_MESSAGE % ','.join(
@@ -430,12 +431,12 @@ class EarningsEstimatesLoader(PipelineLoader):
         zeroth_quarter_idx = self.get_zeroth_quarter_idx(stacked_last_per_qtr)
         zero_qtr_data = stacked_last_per_qtr.loc[zeroth_quarter_idx]
 
-        for num_quarters, columns in groups.items():
+        for num_announcements, columns in groups.items():
             requested_qtr_data = self.get_requested_quarter_data(
                 zero_qtr_data,
                 zeroth_quarter_idx,
                 stacked_last_per_qtr,
-                num_quarters,
+                num_announcements,
                 dates,
             )
 
@@ -523,7 +524,7 @@ class EarningsEstimatesLoader(PipelineLoader):
         )
         # Set date index name for ease of reference
         stacked_last_per_qtr.index.set_names(
-            SIMULTATION_DATES,
+            SIMULATION_DATES,
             level=0,
             inplace=True,
         )
@@ -560,8 +561,8 @@ class NextEarningsEstimatesLoader(EarningsEstimatesLoader):
             ].values[:next_qtr_start_idx],
         )
 
-    def get_shifted_qtrs(self, zero_qtrs, num_quarters):
-        return zero_qtrs + (num_quarters - 1)
+    def get_shifted_qtrs(self, zero_qtrs, num_announcements):
+        return zero_qtrs + (num_announcements - 1)
 
     def get_zeroth_quarter_idx(self, stacked_last_per_qtr):
         """
@@ -584,9 +585,9 @@ class NextEarningsEstimatesLoader(EarningsEstimatesLoader):
         """
         next_releases_per_date = stacked_last_per_qtr.loc[
             stacked_last_per_qtr[EVENT_DATE_FIELD_NAME] >=
-            stacked_last_per_qtr.index.get_level_values(SIMULTATION_DATES)
+            stacked_last_per_qtr.index.get_level_values(SIMULATION_DATES)
         ].groupby(
-            level=[SIMULTATION_DATES, SID_FIELD_NAME],
+            level=[SIMULATION_DATES, SID_FIELD_NAME],
             as_index=False,
             # Here we take advantage of the fact that `stacked_last_per_qtr` is
             # sorted by event date.
@@ -612,8 +613,8 @@ class PreviousEarningsEstimatesLoader(EarningsEstimatesLoader):
             sid_idx,
         )
 
-    def get_shifted_qtrs(self, zero_qtrs, num_quarters):
-        return zero_qtrs - (num_quarters - 1)
+    def get_shifted_qtrs(self, zero_qtrs, num_announcements):
+        return zero_qtrs - (num_announcements - 1)
 
     def get_zeroth_quarter_idx(self, stacked_last_per_qtr):
         """
@@ -636,9 +637,9 @@ class PreviousEarningsEstimatesLoader(EarningsEstimatesLoader):
         """
         previous_releases_per_date = stacked_last_per_qtr.loc[
             stacked_last_per_qtr[EVENT_DATE_FIELD_NAME] <=
-            stacked_last_per_qtr.index.get_level_values(SIMULTATION_DATES)
+            stacked_last_per_qtr.index.get_level_values(SIMULATION_DATES)
         ].groupby(
-            level=[SIMULTATION_DATES, SID_FIELD_NAME],
+            level=[SIMULATION_DATES, SID_FIELD_NAME],
             as_index=False,
             # Here we take advantage of the fact that `stacked_last_per_qtr` is
             # sorted by event date.
diff --git a/zipline/testing/core.py b/zipline/testing/core.py
index 19240a5b..f9832767 100644
--- a/zipline/testing/core.py
+++ b/zipline/testing/core.py
@@ -49,14 +49,8 @@ from zipline.pipeline.loaders.testing import make_seeded_random_loader
 from zipline.utils import security_list
 from zipline.utils.calendars import get_calendar
 from zipline.utils.input_validation import expect_dimensions
-<<<<<<< HEAD
 from zipline.utils.numpy_utils import as_column, isnat
 from zipline.utils.pandas_utils import timedelta_to_integral_seconds
-=======
-from zipline.utils.numpy_utils import (
-    as_column,
-)
->>>>>>> WIP
 from zipline.utils.sentinel import sentinel
 
 import numpy as np