ENH: Add builtin factors for correlation and regression

2026-06-28 00:58:26 +08:00 · 2016-04-15 13:00:53 -04:00
parent da77e88786
commit 1ec0bced6d
15 changed files with 636 additions and 56 deletions
@@ -215,6 +215,15 @@ Pipeline API
 .. autoclass:: zipline.pipeline.factors.BollingerBands
   :members:

+.. autoclass:: zipline.pipeline.factors.RollingPearsonOfReturns
+   :members:
+
+.. autoclass:: zipline.pipeline.factors.RollingSpearmanOfReturns
+   :members:
+
+.. autoclass:: zipline.pipeline.factors.RollingLinearRegressionOfReturns
+   :members:
+
 .. autoclass:: zipline.pipeline.filters.Filter
   :members: __and__, __or__
   :exclude-members: dtype
@@ -173,6 +173,11 @@ Enhancements
 * Fetcher has been moved from Quantopian internal code into Zipline
  (:issue:`1105`).

+* Added new built-in factors,
+  :class:`~zipline.pipeline.factors.RollingPearsonOfReturns`,
+  :class:`~zipline.pipeline.factors.RollingSpearmanOfReturns` and
+  :class:`~zipline.pipeline.factors.RollingLinearRegressionOfReturns`
+  (:issue:`1154`)

 Experimental Features
 ~~~~~~~~~~~~~~~~~~~~~
@@ -14,6 +14,7 @@ from numpy import (
    float32,
    float64,
    full,
+    full_like,
    log,
    nan,
    tile,
@@ -36,6 +37,7 @@ from pandas import (
 )
 from pandas.compat.chainmap import ChainMap
 from pandas.util.testing import assert_frame_equal
+from scipy.stats.stats import linregress, pearsonr, spearmanr
 from six import iteritems, itervalues
 from toolz import merge

@@ -53,6 +55,10 @@ from zipline.pipeline.factors import (
    ExponentialWeightedMovingAverage,
    ExponentialWeightedMovingStdDev,
    MaxDrawdown,
+    Returns,
+    RollingLinearRegressionOfReturns,
+    RollingPearsonOfReturns,
+    RollingSpearmanOfReturns,
    SimpleMovingAverage,
 )
 from zipline.pipeline.loaders.equity_pricing_loader import (
@@ -66,8 +72,9 @@ from zipline.pipeline.loaders.synthetic import (
 )
 from zipline.pipeline.term import NotSpecified
 from zipline.testing import (
-    product_upper_triangle,
    check_arrays,
+    parameter_space,
+    product_upper_triangle,
 )
 from zipline.testing.fixtures import (
    WithAdjustmentReader,
@@ -1242,6 +1249,188 @@ class ParameterizedFactorTestCase(WithTradingEnvironment, ZiplineTestCase):
        expected_5 = rolling_mean((self.raw_data ** 2) * 2, window=5)[5:]
        assert_frame_equal(results['dv5'].unstack(), expected_5)

+    @parameter_space(returns_length=[2, 3], correlation_length=[3, 4])
+    def test_correlation_factors(self, returns_length, correlation_length):
+        """
+        Tests for the built-in factors `RollingPearsonOfReturns` and
+        `RollingSpearmanOfReturns`.
+        """
+        my_asset_column = 0
+        start_date_index = 6
+        end_date_index = 10
+
+        assets = self.asset_finder.retrieve_all(self.sids)
+        my_asset = assets[my_asset_column]
+        my_asset_filter = (AssetID() != (my_asset_column + 1))
+        num_days = end_date_index - start_date_index + 1
+
+        # Our correlation factors require that their target asset is not
+        # filtered out, so make sure that masking out our target asset does not
+        # take effect. That is, a filter which filters out only our target
+        # asset should produce the same result as if no mask was passed at all.
+        for mask in (NotSpecified, my_asset_filter):
+            pearson_factor = RollingPearsonOfReturns(
+                target=my_asset,
+                returns_length=returns_length,
+                correlation_length=correlation_length,
+                mask=mask,
+            )
+            spearman_factor = RollingSpearmanOfReturns(
+                target=my_asset,
+                returns_length=returns_length,
+                correlation_length=correlation_length,
+                mask=mask,
+            )
+
+            results = self.engine.run_pipeline(
+                Pipeline(
+                    columns={
+                        'pearson_factor': pearson_factor,
+                        'spearman_factor': spearman_factor,
+                    },
+                ),
+                self.dates[start_date_index],
+                self.dates[end_date_index],
+            )
+            pearson_results = results['pearson_factor'].unstack()
+            spearman_results = results['spearman_factor'].unstack()
+
+            # Run a separate pipeline that calculates returns starting
+            # (correlation_length - 1) days prior to our start date. This is
+            # because we need (correlation_length - 1) extra days of returns to
+            # compute our expected correlations.
+            returns = Returns(window_length=returns_length)
+            results = self.engine.run_pipeline(
+                Pipeline(columns={'returns': returns}),
+                self.dates[start_date_index - (correlation_length - 1)],
+                self.dates[end_date_index],
+            )
+            returns_results = results['returns'].unstack()
+
+            # On each day, calculate the expected correlation coefficients
+            # between the asset we are interested in and each other asset. Each
+            # correlation is calculated over `correlation_length` days.
+            expected_pearson_results = full_like(pearson_results, nan)
+            expected_spearman_results = full_like(spearman_results, nan)
+            for day in range(num_days):
+                todays_returns = returns_results.iloc[
+                    day:day + correlation_length
+                ]
+                my_asset_returns = todays_returns.iloc[:, my_asset_column]
+                for asset, other_asset_returns in todays_returns.iteritems():
+                    asset_column = int(asset) - 1
+                    expected_pearson_results[day, asset_column] = pearsonr(
+                        my_asset_returns, other_asset_returns,
+                    )[0]
+                    expected_spearman_results[day, asset_column] = spearmanr(
+                        my_asset_returns, other_asset_returns,
+                    )[0]
+
+            assert_frame_equal(
+                pearson_results,
+                DataFrame(
+                    expected_pearson_results,
+                    index=self.dates[start_date_index:end_date_index + 1],
+                    columns=assets,
+                ),
+            )
+            assert_frame_equal(
+                spearman_results,
+                DataFrame(
+                    expected_spearman_results,
+                    index=self.dates[start_date_index:end_date_index + 1],
+                    columns=assets,
+                ),
+            )
+
+    @parameter_space(returns_length=[2, 3], regression_length=[3, 4])
+    def test_regression_of_returns_factor(self,
+                                          returns_length,
+                                          regression_length):
+        """
+        Tests for the built-in factor `RollingLinearRegressionOfReturns`.
+        """
+        my_asset_column = 0
+        start_date_index = 6
+        end_date_index = 10
+
+        assets = self.asset_finder.retrieve_all(self.sids)
+        my_asset = assets[my_asset_column]
+        my_asset_filter = (AssetID() != (my_asset_column + 1))
+        num_days = end_date_index - start_date_index + 1
+
+        # The order of these is meant to align with the output of `linregress`.
+        outputs = ['beta', 'alpha', 'r_value', 'p_value', 'stderr']
+
+        # Our regression factor requires that its target asset is not filtered
+        # out, so make sure that masking out our target asset does not take
+        # effect. That is, a filter which filters out only our target asset
+        # should produce the same result as if no mask was passed at all.
+        for mask in (NotSpecified, my_asset_filter):
+            regression_factor = RollingLinearRegressionOfReturns(
+                target=my_asset,
+                returns_length=returns_length,
+                regression_length=regression_length,
+                mask=mask,
+            )
+            results = self.engine.run_pipeline(
+                Pipeline(
+                    columns={
+                        output: getattr(regression_factor, output)
+                        for output in outputs
+                    },
+                ),
+                self.dates[start_date_index],
+                self.dates[end_date_index],
+            )
+            output_results = {}
+            expected_output_results = {}
+            for output in outputs:
+                output_results[output] = results[output].unstack()
+                expected_output_results[output] = full_like(
+                    output_results[output], nan,
+                )
+
+            # Run a separate pipeline that calculates returns starting 2 days
+            # prior to our start date. This is because we need
+            # (regression_length - 1) extra days of returns to compute our
+            # expected regressions.
+            returns = Returns(window_length=returns_length)
+            results = self.engine.run_pipeline(
+                Pipeline(columns={'returns': returns}),
+                self.dates[start_date_index - (regression_length - 1)],
+                self.dates[end_date_index],
+            )
+            returns_results = results['returns'].unstack()
+
+            # On each day, calculate the expected regression results for Y ~ X
+            # where Y is the asset we are interested in and X is each other
+            # asset. Each regression is calculated over `regression_length`
+            # days of data.
+            for day in range(num_days):
+                todays_returns = returns_results.iloc[
+                    day:day + regression_length
+                ]
+                my_asset_returns = todays_returns.iloc[:, my_asset_column]
+                for asset, other_asset_returns in todays_returns.iteritems():
+                    asset_column = int(asset) - 1
+                    expected_regression_results = linregress(
+                        y=other_asset_returns, x=my_asset_returns,
+                    )
+                    for i, output in enumerate(outputs):
+                        expected_output_results[output][day, asset_column] = \
+                            expected_regression_results[i]
+
+            for output in outputs:
+                assert_frame_equal(
+                    output_results[output],
+                    DataFrame(
+                        expected_output_results[output],
+                        index=self.dates[start_date_index:end_date_index + 1],
+                        columns=assets,
+                    ),
+                )
+

 class StringColumnTestCase(WithSeededRandomPipelineEngine,
                           ZiplineTestCase):
@@ -7,7 +7,7 @@ from unittest import TestCase

 from zipline.errors import (
    DTypeNotSpecified,
-    WindowedInputToWindowedTerm,
+    NonWindowSafeInput,
    NotDType,
    TermInputsNotSpecified,
    TermOutputsEmpty,
@@ -198,7 +198,7 @@ class DependencyResolutionTestCase(TestCase):

    def test_disallow_recursive_lookback(self):

-        with self.assertRaises(WindowedInputToWindowedTerm):
+        with self.assertRaises(NonWindowSafeInput):
            SomeFactor(inputs=[SomeFactor(), SomeDataSet.foo])


@@ -418,10 +418,10 @@ class WindowLengthNotPositive(ZiplineError):
    ).strip()


-class WindowedInputToWindowedTerm(ZiplineError):
+class NonWindowSafeInput(ZiplineError):
    """
-    Raised when a windowed Pipeline API term is specified as an input to
-    another windowed term.
+    Raised when a Pipeline API term that is not deemed window safe is specified
+    as an input to another windowed term.

    This is an error because it's generally not safe to compose windowed
    functions on split/dividend adjusted data.
@@ -617,3 +617,10 @@ class HistoryWindowStartsBeforeData(ZiplineError):
        "History window extends before {first_trading_day}. To use this "
        "history window, start the backtest on or after {suggested_start_day}."
        )
+
+
+class NonExistentAssetInTimeFrame(ZiplineError):
+    msg = (
+        "The target asset '{asset}' does not exist for the entire timeframe "
+        "between {start_date} and {end_date}."
+    )
@@ -243,6 +243,20 @@ class AdjustedArray(object):
        )


+def ensure_adjusted_array(ndarray_or_adjusted_array, missing_value):
+    if isinstance(ndarray_or_adjusted_array, AdjustedArray):
+        return ndarray_or_adjusted_array
+    elif isinstance(ndarray_or_adjusted_array, ndarray):
+        return AdjustedArray(
+            ndarray_or_adjusted_array, NOMASK, {}, missing_value,
+        )
+    else:
+        raise TypeError(
+            "Can't convert %s to AdjustedArray" %
+            type(ndarray_or_adjusted_array).__name__
+        )
+
+
 def ensure_ndarray(ndarray_or_adjusted_array):
    """
    Return the input as a numpy ndarray.
@@ -113,6 +113,7 @@ class BoundColumn(LoadableTerm):
    """
    mask = AssetExists()
    inputs = ()
+    window_safe = True

    def __new__(cls, dtype, missing_value, dataset, name):
        return super(BoundColumn, cls).__new__(
@@ -20,7 +20,7 @@ from pandas import (
 from toolz import groupby, juxt
 from toolz.curried.operator import getitem

-from zipline.lib.adjusted_array import ensure_ndarray
+from zipline.lib.adjusted_array import ensure_adjusted_array, ensure_ndarray
 from zipline.errors import NoFurtherDataError
 from zipline.utils.numpy_utils import repeat_first_axis, repeat_last_axis
 from zipline.utils.pandas_utils import explode
@@ -265,28 +265,31 @@ class SimplePipelineEngine(object):
        that input.
        """
        offsets = graph.offset
+        out = []
        if term.windowed:
            # If term is windowed, then all input data should be instances of
            # AdjustedArray.
-            return [
-                workspace[input_].traverse(
-                    window_length=term.window_length,
-                    offset=offsets[term, input_]
+            for input_ in term.inputs:
+                adjusted_array = ensure_adjusted_array(
+                    workspace[input_], input_.missing_value,
                )
-                for input_ in term.inputs
-            ]
-
-        # If term is not windowed, input_data may be an AdjustedArray or
-        # np.ndarray.  Coerce the former to the latter.
-        out = []
-        for input_ in term.inputs:
-            input_data = ensure_ndarray(workspace[input_])
-            offset = offsets[term, input_]
-            # OPTIMIZATION: Don't make a copy by doing input_data[0:] if
-            # offset is zero.
-            if offset:
-                input_data = input_data[offset:]
-            out.append(input_data)
+                out.append(
+                    adjusted_array.traverse(
+                        window_length=term.window_length,
+                        offset=offsets[term, input_],
+                    )
+                )
+        else:
+            # If term is not windowed, input_data may be an AdjustedArray or
+            # np.ndarray.  Coerce the former to the latter.
+            for input_ in term.inputs:
+                input_data = ensure_ndarray(workspace[input_])
+                offset = offsets[term, input_]
+                # OPTIMIZATION: Don't make a copy by doing input_data[0:] if
+                # offset is zero.
+                if offset:
+                    input_data = input_data[offset:]
+                out.append(input_data)
        return out

    def get_loader(self, term):
@@ -8,10 +8,10 @@ from .events import (
    BusinessDaysSince13DFilingsDate,
    BusinessDaysSinceBuybackAuth,
    BusinessDaysSinceDividendAnnouncement,
-    BusinessDaysUntilNextExDate,
+    BusinessDaysSincePreviousEarnings,
    BusinessDaysSincePreviousExDate,
    BusinessDaysUntilNextEarnings,
-    BusinessDaysSincePreviousEarnings,
+    BusinessDaysUntilNextExDate,
 )
 from .technical import (
    AverageDollarVolume,
@@ -21,24 +21,27 @@ from .technical import (
    ExponentialWeightedMovingAverage,
    ExponentialWeightedMovingStdDev,
    MaxDrawdown,
-    RSI,
    Returns,
+    RollingLinearRegressionOfReturns,
+    RollingPearsonOfReturns,
+    RollingSpearmanOfReturns,
+    RSI,
    SimpleMovingAverage,
    VWAP,
    WeightedAverageValue,
 )

 __all__ = [
+    'AverageDollarVolume',
    'BollingerBands',
    'BusinessDaysSince13DFilingsDate',
    'BusinessDaysSinceBuybackAuth',
    'BusinessDaysSinceDividendAnnouncement',
-    'BusinessDaysUntilNextExDate',
+    'BusinessDaysSincePreviousEarnings',
    'BusinessDaysSincePreviousExDate',
    'BusinessDaysUntilNextEarnings',
-    'BusinessDaysSincePreviousEarnings',
+    'BusinessDaysUntilNextExDate',
    'CustomFactor',
-    'AverageDollarVolume',
    'EWMA',
    'EWMSTD',
    'ExponentialWeightedMovingAverage',
@@ -46,9 +49,12 @@ __all__ = [
    'Factor',
    'Latest',
    'MaxDrawdown',
-    'RSI',
    'RecarrayField',
    'Returns',
+    'RollingLinearRegressionOfReturns',
+    'RollingPearsonOfReturns',
+    'RollingSpearmanOfReturns',
+    'RSI',
    'SimpleMovingAverage',
    'VWAP',
    'WeightedAverageValue',
@@ -577,6 +577,7 @@ class Factor(RestrictedDTypeMixin, ComputableTerm):
            factor=self,
            mask=mask,
            groupby=groupby,
+            window_safe=True,
        )

    def rank(self, method='ordinal', ascending=True, mask=NotSpecified):
@@ -908,7 +909,7 @@ class GroupedRowTransform(Factor):
    """
    window_length = 0

-    def __new__(cls, transform, factor, mask, groupby):
+    def __new__(cls, transform, factor, mask, groupby, **kwargs):

        if mask is NotSpecified:
            mask = factor.mask
@@ -925,6 +926,7 @@ class GroupedRowTransform(Factor):
            missing_value=factor.missing_value,
            mask=mask,
            dtype=factor.dtype,
+            **kwargs
        )

    def _init(self, transform, *args, **kwargs):
@@ -1001,6 +1003,7 @@ class Rank(SingleInputMixin, Factor):
    """
    window_length = 0
    dtype = float64_dtype
+    window_safe = True

    def __new__(cls, factor, method, ascending, mask):
        return super(Rank, cls).__new__(
@@ -8,6 +8,7 @@ from numpy import (
    arange,
    average,
    clip,
+    corrcoef,
    diff,
    exp,
    fmax,
@@ -16,13 +17,17 @@ from numpy import (
    isnan,
    log,
    NINF,
+    searchsorted,
    sqrt,
    sum as np_sum,
 )
 from numexpr import evaluate
+from scipy.stats import linregress, spearmanr

 from zipline.pipeline.data import USEquityPricing
+from zipline.pipeline.filters import SingleAsset
 from zipline.pipeline.mixins import SingleInputMixin
+from zipline.pipeline.term import NotSpecified
 from zipline.utils.numpy_utils import ignore_nanwarnings
 from zipline.utils.input_validation import expect_types
 from zipline.utils.math_utils import (
@@ -42,6 +47,16 @@ class Returns(CustomFactor):
    **Default Inputs**: [USEquityPricing.close]
    """
    inputs = [USEquityPricing.close]
+    window_safe = True
+
+    def _validate(self):
+        super(Returns, self)._validate()
+        if self.window_length < 2:
+            raise ValueError(
+                "'Returns' expected a window length of at least 2, but was "
+                "given {window_length}. For daily returns, use a window "
+                "length of 2.".format(window_length=self.window_length)
+            )

    def compute(self, today, assets, out, close):
        out[:] = (close[-1] - close[0]) / close[0]
@@ -145,6 +160,260 @@ class AverageDollarVolume(CustomFactor):
        out[:] = nanmean(close * volume, axis=0)


+class _RollingCorrelationOfReturns(CustomFactor, SingleInputMixin):
+    """
+    Base class for factors computing a rolling correlation over a window of
+    Returns.
+
+    Parameters
+    ----------
+    target : zipline.assets.Asset
+        The asset to correlate with all other assets.
+    returns_length : int >= 2
+        Length of the lookback window over which to compute returns. Daily
+        returns require a window length of 2.
+    correlation_length : int >= 1
+        Length of the lookback window over which to compute each correlation
+        coefficient.
+    """
+    params = ['target']
+
+    def __new__(cls,
+                target,
+                returns_length,
+                correlation_length,
+                mask=NotSpecified,
+                **kwargs):
+        if mask is not NotSpecified:
+            # Make sure we do not filter out the asset of interest.
+            mask = mask | SingleAsset(asset=target)
+        return super(_RollingCorrelationOfReturns, cls).__new__(
+            cls,
+            target=target,
+            inputs=[Returns(window_length=returns_length)],
+            window_length=correlation_length,
+            mask=mask,
+            **kwargs
+        )
+
+
+class RollingPearsonOfReturns(_RollingCorrelationOfReturns):
+    """
+    Calculates the Pearson product-moment correlation coefficient of the
+    returns of the given asset with the returns of all other assets.
+
+    Pearson correlation is what most people mean when they say "correlation
+    coefficient" or "R-value".
+
+    Parameters
+    ----------
+    target : zipline.assets.Asset
+        The asset to correlate with all other assets.
+    returns_length : int >= 2
+        Length of the lookback window over which to compute returns. Daily
+        returns require a window length of 2.
+    correlation_length : int >= 1
+        Length of the lookback window over which to compute each correlation
+        coefficient.
+
+    Example
+    -------
+    Let the following be example 10-day returns for three different assets::
+
+                       SPY    MSFT     FB
+        2017-03-13    -.03     .03    .04
+        2017-03-14    -.02    -.03    .02
+        2017-03-15    -.01     .02    .01
+        2017-03-16       0    -.02    .01
+        2017-03-17     .01     .04   -.01
+        2017-03-20     .02    -.03   -.02
+        2017-03-21     .03     .01   -.02
+        2017-03-22     .04    -.02   -.02
+
+    Suppose we are interested in SPY's rolling returns correlation with each
+    stock from 2017-03-17 to 2017-03-22, using a 5-day look back window (that
+    is, we calculate each correlation coefficient over 5 days of data). We can
+    achieve this by doing::
+
+        rolling_correlations = RollingPearsonOfReturns(
+            target=Equity(8554),
+            returns_length=10,
+            correlation_length=5,
+        )
+
+    The result of computing ``rolling_correlations`` from 2017-03-17 to
+    2017-03-22 gives::
+
+                       SPY   MSFT     FB
+        2017-03-17       1    .15   -.96
+        2017-03-20       1    .10   -.96
+        2017-03-21       1   -.16   -.94
+        2017-03-22       1   -.16   -.85
+
+    Note that the column for SPY is all 1's, as the correlation of any data
+    series with itself is always 1. To understand how each of the other values
+    were calculated, take for example the .15 in MSFT's column. This is the
+    correlation coefficient between SPY's returns looking back from 2017-03-17
+    (-.03, -.02, -.01, 0, .01) and MSFT's returns (.03, -.03, .02, -.02, .04).
+
+    See Also
+    --------
+    :class:`zipline.pipeline.factors.technical.RollingSpearmanOfReturns`
+    :class:`zipline.pipeline.factors.technical.RollingLinearRegressionOfReturns`
+    """
+    def compute(self, today, assets, out, data, target):
+        asset_col = searchsorted(assets.values, target.sid)
+        out[:] = corrcoef(data, rowvar=0)[asset_col]
+
+
+class RollingSpearmanOfReturns(_RollingCorrelationOfReturns):
+    """
+    Calculates the Spearman rank correlation coefficient of the returns of the
+    given asset with the returns of all other assets.
+
+    Parameters
+    ----------
+    target : zipline.assets.Asset
+        The asset to correlate with all other assets.
+    returns_length : int >= 2
+        Length of the lookback window over which to compute returns. Daily
+        returns require a window length of 2.
+    correlation_length : int >= 1
+        Length of the lookback window over which to compute each correlation
+        coefficient.
+
+    See Also
+    --------
+    :class:`zipline.pipeline.factors.technical.RollingPearsonOfReturns`
+    :class:`zipline.pipeline.factors.technical.RollingLinearRegressionOfReturns`
+    """
+    def compute(self, today, assets, out, data, target):
+        asset_col = searchsorted(assets.values, target.sid)
+        out[:] = spearmanr(data)[0][asset_col]
+
+
+class RollingLinearRegressionOfReturns(CustomFactor, SingleInputMixin):
+    """
+    Perform an ordinary least-squares regression predicting the returns of all
+    other assets on the given asset.
+
+    Parameters
+    ----------
+    target : zipline.assets.Asset
+        The asset to regress against all other assets.
+    returns_length : int >= 2
+        Length of the lookback window over which to compute returns. Daily
+        returns require a window length of 2.
+    regression_length : int >= 1
+        Length of the lookback window over which to compute each regression.
+
+    Note
+    ----
+    This factor is designed to return five outputs:
+        - alpha, a factor that computes the intercepts of each regression.
+        - beta, a factor that computes the slopes of each regression.
+        - r_value, a factor that computes the correlation coefficient of each
+          regression.
+        - p_value, a factor that computes, for each regression, the two-sided
+          p-value for a hypothesis test whose null hypothesis is that the slope
+          is zero.
+        - stderr, a factor that computes the standard error of the estimate of
+          each regression.
+
+    Example
+    -------
+    Let the following be example 10-day returns for three different assets::
+
+                       SPY    MSFT     FB
+        2017-03-13    -.03     .03    .04
+        2017-03-14    -.02    -.03    .02
+        2017-03-15    -.01     .02    .01
+        2017-03-16       0    -.02    .01
+        2017-03-17     .01     .04   -.01
+        2017-03-20     .02    -.03   -.02
+        2017-03-21     .03     .01   -.02
+        2017-03-22     .04    -.02   -.02
+
+    Suppose we are interested in predicting each stock's returns from SPY's
+    over rolling 5-day look back windows. We can compute rolling regression
+    coefficients (alpha and beta) from 2017-03-17 to 2017-03-22 by doing::
+
+        regression_factor = RollingRegressionOfReturns(
+            target=Equity(8554),
+            returns_length=10,
+            regression_length=5,
+        )
+        alpha = regression_factor.alpha
+        beta = regression_factor.beta
+
+    The result of computing ``alpha`` from 2017-03-17 to 2017-03-22 gives::
+
+                       SPY    MSFT     FB
+        2017-03-17       0    .011   .003
+        2017-03-20       0   -.004   .004
+        2017-03-21       0    .007   .006
+        2017-03-22       0    .002   .008
+
+    And the result of computing ``beta`` from 2017-03-17 to 2017-03-22 gives::
+
+                       SPY    MSFT     FB
+        2017-03-17       1      .3   -1.1
+        2017-03-20       1      .2     -1
+        2017-03-21       1     -.3     -1
+        2017-03-22       1     -.3    -.9
+
+    Note that SPY's column for alpha is all 0's and for beta is all 1's, as the
+    regression line of SPY with itself is simply the function y = x.
+
+    To understand how each of the other values were calculated, take for
+    example MSFT's ``alpha`` and ``beta`` values on 2017-03-17 (.011 and .3,
+    respectively). These values are the result of running a linear regression
+    predicting MSFT's returns from SPY's returns, using values starting at
+    2017-03-17 and looking back 5 days. That is, the regression was run with
+    x = [-.03, -.02, -.01, 0, .01] and y = [.03, -.03, .02, -.02, .04], and it
+    produced a slope of .3 and an intercept of .011.
+
+    See Also
+    --------
+    :class:`zipline.pipeline.factors.technical.RollingPearsonOfReturns`
+    :class:`zipline.pipeline.factors.technical.RollingSpearmanOfReturns`
+    """
+    outputs = ['alpha', 'beta', 'r_value', 'p_value', 'stderr']
+    params = ['target']
+
+    def __new__(cls,
+                target,
+                returns_length,
+                regression_length,
+                mask=NotSpecified,
+                **kwargs):
+        if mask is not NotSpecified:
+            # Make sure we do not filter out the asset of interest.
+            mask = mask | SingleAsset(asset=target)
+        return super(RollingLinearRegressionOfReturns, cls).__new__(
+            cls,
+            target=target,
+            inputs=[Returns(window_length=returns_length)],
+            window_length=regression_length,
+            mask=mask,
+            **kwargs
+        )
+
+    def compute(self, today, assets, out, returns, target):
+        asset_col = searchsorted(assets.values, target.sid)
+        my_asset = returns[:, asset_col]
+        for i in range(len(out)):
+            other_asset = returns[:, i]
+            regr_results = linregress(y=other_asset, x=my_asset)
+            # `linregress` returns its results in the following order:
+            # slope, intercept, r-value, p-value, stderr
+            out.alpha[i] = regr_results[1]
+            out.beta[i] = regr_results[0]
+            out.r_value[i] = regr_results[2]
+            out.p_value[i] = regr_results[3]
+            out.stderr[i] = regr_results[4]
+
+
 class _ExponentialWeightedFactor(SingleInputMixin, CustomFactor):
    """
    Base class for factors implementing exponential-weighted operations.
@@ -6,6 +6,7 @@ from .filter import (
    NullFilter,
    NumExprFilter,
    PercentileFilter,
+    SingleAsset,
 )

 __all__ = [
@@ -16,4 +17,5 @@ __all__ = [
    'NullFilter',
    'NumExprFilter',
    'PercentileFilter',
+    'SingleAsset',
 ]
@@ -12,6 +12,7 @@ from numpy import (
 )
 from zipline.errors import (
    BadPercentileBounds,
+    NonExistentAssetInTimeFrame,
    UnsupportedDataType,
 )
 from zipline.lib.labelarray import LabelArray
@@ -31,7 +32,7 @@ from zipline.pipeline.expression import (
    NumericalExpression,
 )
 from zipline.utils.input_validation import expect_types
-from zipline.utils.numpy_utils import bool_dtype
+from zipline.utils.numpy_utils import bool_dtype, repeat_first_axis


 def concat_tuples(*tuples):
@@ -427,3 +428,35 @@ class Latest(LatestMixin, CustomFilter):
    Filter producing the most recently-known value of `inputs[0]` on each day.
    """
    pass
+
+
+class SingleAsset(Filter):
+    """
+    A Filter that computes to True only for the given asset.
+    """
+    inputs = []
+    window_length = 1
+
+    def __new__(cls, asset):
+        return super(SingleAsset, cls).__new__(cls, asset=asset)
+
+    def _init(self, asset, *args, **kwargs):
+        self._asset = asset
+        return super(SingleAsset, self)._init(*args, **kwargs)
+
+    @classmethod
+    def static_identity(cls, asset, *args, **kwargs):
+        return (
+            super(SingleAsset, cls).static_identity(*args, **kwargs), asset,
+        )
+
+    def _compute(self, arrays, dates, assets, mask):
+        is_my_asset = (assets == self._asset.sid)
+        out = repeat_first_axis(is_my_asset, len(mask))
+        # Raise an exception if `self._asset` does not exist for the entirety
+        # of the timeframe over which we are computing.
+        if (is_my_asset.sum() != 1) or ((out & mask).sum() != len(mask)):
+            raise NonExistentAssetInTimeFrame(
+                asset=self._asset, start_date=dates[0], end_date=dates[-1],
+            )
+        return out
@@ -62,30 +62,49 @@ class TermGraph(DiGraph):
    def offset(self):
        """
        For all pairs (term, input) such that `input` is an input to `term`,
-        compute a mapping:
+        compute a mapping::

            (term, input) -> offset(term, input)

-        where `offset(term, input)` is defined as
+        where ``offset(term, input)`` is the number of rows that ``term``
+        should truncate off the raw array produced for ``input`` before using
+        it. We compute this value as follows::

-            Max number of extra rows needed by any term depending on `input`
-            minus
-            Number of extra rows needed by `term`.
+            offset(term, input) = (extra_rows_computed(input)
+                                   - extra_rows_computed(term)
+                                   - requested_extra_rows(term, input))
+        Examples
+        --------

-        Example
-        -------
+        Case 1
+        ~~~~~~

        Factor A needs 5 extra rows of USEquityPricing.close, and Factor B
        needs 3 extra rows of the same.  Factor A also requires 5 extra rows of
-        USEquityPricing.high, which no other Factor uses.
+        USEquityPricing.high, which no other Factor uses.  We don't require any
+        extra rows of Factor A or Factor B

        We load 5 extra rows of both `price` and `high` to ensure we can
-        service Factor A, and the following offsets get computed:
+        service Factor A, and the following offsets get computed::

-        self.offset[Factor A, USEquityPricing.close] == 0
-        self.offset[Factor A, USEquityPricing.high] == 0
-        self.offset[Factor B, USEquityPricing.close] == 2
-        self.offset[Factor B, USEquityPricing.high] raises KeyError.
+            offset[Factor A, USEquityPricing.close] == (5 - 0) - 5 == 0
+            offset[Factor A, USEquityPricing.high]  == (5 - 0) - 5 == 0
+            offset[Factor B, USEquityPricing.close] == (5 - 0) - 3 == 2
+            offset[Factor B, USEquityPricing.high] raises KeyError.
+
+        Case 2
+        ~~~~~~
+
+        Factor A needs 5 extra rows of USEquityPricing.close, and Factor B
+        needs 3 extra rows of Factor A, and Factor B needs 2 extra rows of
+        USEquityPricing.close.
+
+        We load 8 extra rows of USEquityPricing.close (enough to load 5 extra
+        rows of Factor A), and the following offsets get computed::
+
+            offset[Factor A, USEquityPricing.close] == (8 - 3) - 5 == 0
+            offset[Factor B, USEquityPricing.close] == (8 - 0) - 2 == 6
+            offset[Factor B, Factor A]              == (3 - 0) - 3 == 0

        Notes
        -----
@@ -104,9 +123,15 @@ class TermGraph(DiGraph):
        zipline.pipeline.engine.SimplePipelineEngine._inputs_for_term
        zipline.pipeline.engine.SimplePipelineEngine._mask_and_dates_for_term
        """
-        return {(term, dep): self.extra_rows[dep] - additional_extra_rows
-                for term in self
-                for dep, additional_extra_rows in term.dependencies.items()}
+        extra = self.extra_rows
+        return {
+            # Another way of thinking about this is:
+            # How much bigger is the array for ``dep`` compared to ``term``?
+            # How much of that difference did I ask for.
+            (term, dep): (extra[dep] - extra[term]) - requested_extra_rows
+            for term in self
+            for dep, requested_extra_rows in term.dependencies.items()
+        }

    @lazyval
    def extra_rows(self):
@@ -8,7 +8,7 @@ from numpy import array, dtype as dtype_class, ndarray
 from six import with_metaclass
 from zipline.errors import (
    DTypeNotSpecified,
-    WindowedInputToWindowedTerm,
+    NonWindowSafeInput,
    NotDType,
    TermInputsNotSpecified,
    TermOutputsEmpty,
@@ -48,12 +48,16 @@ class Term(with_metaclass(ABCMeta, object)):
    # no params.
    params = ()

+    # Determines if a term is safe to be used as a windowed input.
+    window_safe = False
+
    _term_cache = WeakValueDictionary()

    def __new__(cls,
                domain=domain,
                dtype=dtype,
                missing_value=missing_value,
+                window_safe=NotSpecified,
                # params is explicitly not allowed to be passed to an instance.
                *args,
                **kwargs):
@@ -75,6 +79,8 @@ class Term(with_metaclass(ABCMeta, object)):
            dtype = cls.dtype
        if missing_value is NotSpecified:
            missing_value = cls.missing_value
+        if window_safe is NotSpecified:
+            window_safe = cls.window_safe

        dtype, missing_value = cls.validate_dtype(
            cls.__name__,
@@ -87,6 +93,7 @@ class Term(with_metaclass(ABCMeta, object)):
            domain=domain,
            dtype=dtype,
            missing_value=missing_value,
+            window_safe=window_safe,
            params=params,
            *args, **kwargs
        )
@@ -99,6 +106,7 @@ class Term(with_metaclass(ABCMeta, object)):
                    domain=domain,
                    dtype=dtype,
                    missing_value=missing_value,
+                    window_safe=window_safe,
                    params=params,
                    *args, **kwargs
                )
@@ -236,7 +244,12 @@ class Term(with_metaclass(ABCMeta, object)):
        pass

    @classmethod
-    def static_identity(cls, domain, dtype, missing_value, params):
+    def static_identity(cls,
+                        domain,
+                        dtype,
+                        missing_value,
+                        window_safe,
+                        params):
        """
        Return the identity of the Term that would be constructed from the
        given arguments.
@@ -248,9 +261,9 @@ class Term(with_metaclass(ABCMeta, object)):
        This is a classmethod so that it can be called from Term.__new__ to
        determine whether to produce a new instance.
        """
-        return (cls, domain, dtype, missing_value, params)
+        return (cls, domain, dtype, missing_value, window_safe, params)

-    def _init(self, domain, dtype, missing_value, params):
+    def _init(self, domain, dtype, missing_value, window_safe, params):
        """
        Parameters
        ----------
@@ -264,6 +277,7 @@ class Term(with_metaclass(ABCMeta, object)):
        self.domain = domain
        self.dtype = dtype
        self.missing_value = missing_value
+        self.window_safe = window_safe

        for name, value in params:
            if hasattr(self, name):
@@ -464,8 +478,8 @@ class ComputableTerm(Term):

        if self.window_length:
            for child in self.inputs:
-                if child.windowed:
-                    raise WindowedInputToWindowedTerm(parent=self, child=child)
+                if not child.window_safe:
+                    raise NonWindowSafeInput(parent=self, child=child)

    def _compute(self, inputs, dates, assets, mask):
        """