From 826ad061f66de1823c238e6994f1223ee386e27d Mon Sep 17 00:00:00 2001
From: Conner Fromknecht <conner@mit.edu>
Date: Tue, 11 Jul 2017 13:20:19 -0700
Subject: [PATCH] WIP: Five Minute bars and FiveMinuteSimulationClock

---
 .github/ISSUE_TEMPLATE.md                  |   4 +-
 catalyst/__main__.py                       |   2 +-
 catalyst/algorithm.py                      |  57 +++++--
 catalyst/data/bundles/bundle.py            | 180 ++++++++++++---------
 catalyst/data/five_minute_bars.py          |  77 +++++----
 catalyst/data/us_equity_pricing.py         |  10 +-
 catalyst/gens/sim_engine.pyx               |  23 +++
 catalyst/gens/tradesimulation.py           |   3 +-
 catalyst/utils/calendars/calendar_utils.py |   2 +-
 catalyst/utils/events.py                   |   1 +
 10 files changed, 231 insertions(+), 128 deletions(-)
diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md
index bc501c44..7d03b8fd 100644
--- a/.github/ISSUE_TEMPLATE.md
+++ b/.github/ISSUE_TEMPLATE.md
@@ -1,4 +1,4 @@
-Dear Zipline Maintainers,
+Dear Catalyst Maintainers,
 
 Before I tell you about my issue, let me describe my environment:
 
@@ -7,7 +7,7 @@ Before I tell you about my issue, let me describe my environment:
 * Operating System: (Windows Version or `$ uname --all`)
 * Python Version: `$ python --version`
 * Python Bitness: `$ python -c 'import math, sys;print(int(math.log(sys.maxsize + 1, 2) + 1))'`
-* How did you install Zipline: (`pip`, `conda`, or `other (please explain)`)
+* How did you install Catalyst: (`pip`, `conda`, or `other (please explain)`)
 * Python packages: `$ pip freeze` or `$ conda list`
 
 Now that you know a little about me, let me tell you about the issue I am
diff --git a/catalyst/__main__.py b/catalyst/__main__.py
index c006df68..7ee6edf6 100644
--- a/catalyst/__main__.py
+++ b/catalyst/__main__.py
@@ -123,7 +123,7 @@ def ipython_only(option):
 )
 @click.option(
     '--data-frequency',
-    type=click.Choice({'daily', 'minute'}),
+    type=click.Choice({'daily', '5-minute', 'minute'}),
     default='daily',
     show_default=True,
     help='The data frequency of the simulation.',
diff --git a/catalyst/algorithm.py b/catalyst/algorithm.py
index a6fdcebf..8fe12d34 100644
--- a/catalyst/algorithm.py
+++ b/catalyst/algorithm.py
@@ -133,7 +133,10 @@ from catalyst.utils.security_list import SecurityList
 import catalyst.protocol
 from catalyst.sources.requests_csv import PandasRequestsCSV
 
-from catalyst.gens.sim_engine import MinuteSimulationClock
+from catalyst.gens.sim_engine import (
+    MinuteSimulationClock,
+    FiveMinuteSimulationClock,
+)
 from catalyst.sources.benchmark_source import BenchmarkSource
 from catalyst.catalyst_warnings import ZiplineDeprecationWarning
 
@@ -170,7 +173,7 @@ class TradingAlgorithm(object):
     algo_filename : str, optional
         The filename for the algoscript. This will be used in exception
         tracebacks. default: '<string>'.
-    data_frequency : {'daily', 'minute'}, optional
+    data_frequency : {'daily', '5-minute', 'minute'}, optional
         The duration of the bars.
     instant_fill : bool, optional
         Whether to fill orders immediately or on next bar. default: False
@@ -223,7 +226,7 @@ class TradingAlgorithm(object):
             script : str
                 Algoscript that contains initialize and
                 handle_data function definition.
-            data_frequency : {'daily', 'minute'}
+            data_frequency : {'daily', '5-minute', 'minute'}
                The duration of the bars.
             capital_base : float <default: 1.0e5>
                How much capital to start with.
@@ -449,7 +452,7 @@ class TradingAlgorithm(object):
         self._in_before_trading_start = True
 
         with handle_non_market_minutes(data) if \
-                self.data_frequency == "minute" else ExitStack():
+                self.data_frequency in ('minute', '5-minute') else ExitStack():
             self._before_trading_start(self, data)
 
         self._in_before_trading_start = False
@@ -505,10 +508,11 @@ class TradingAlgorithm(object):
         market_closes = trading_o_and_c['market_close']
         minutely_emission = False
 
-        if self.sim_params.data_frequency == 'minute':
+        if self.sim_params.data_frequency in set(('minute', '5-minute')):
             market_opens = trading_o_and_c['market_open']
 
-            minutely_emission = self.sim_params.emission_rate == "minute"
+            minutely_emission = self.sim_params.emission_rate in \
+                set(('minute', '5-minute'))
         else:
             # in daily mode, we want to have one bar per session, timestamped
             # as the last minute of the session.
@@ -528,10 +532,19 @@ class TradingAlgorithm(object):
         # FIXME generalize these values
         before_trading_start_minutes = days_at_time(
             self.sim_params.sessions,
-            time(8, 45),
-            "US/Eastern"
+            time(0, 0),
+            'UTC',
         )
 
+        if self.sim_params.data_frequency == '5-minute':
+            return FiveMinuteSimulationClock(
+                self.sim_params.sessions,
+                execution_opens,
+                execution_closes,
+                before_trading_start_minutes,
+                minute_emission=minutely_emission,
+            )
+
         return MinuteSimulationClock(
             self.sim_params.sessions,
             execution_opens,
@@ -660,8 +673,11 @@ class TradingAlgorithm(object):
                     # Assume data is daily if timestamp times are
                     # standardized, otherwise assume minute bars.
                     times = data.major_axis.time
-                    if np.all(times == times[0]):
+                    time_count = times.nunique()
+                    if time_count == 1:
                         self.sim_params.data_frequency = 'daily'
+                    elif time_count == 288:
+                        self.sim_params.data_frequency = '5-minute'
                     else:
                         self.sim_params.data_frequency = 'minute'
 
@@ -683,6 +699,8 @@ class TradingAlgorithm(object):
 
                 if self.sim_params.data_frequency == 'daily':
                     equity_reader_arg = 'equity_daily_reader'
+                elif self.sim_params.data_frequency == '5-minute':
+                    equity_daily_reader = 'equity_5_minute_reader'
                 elif self.sim_params.data_frequency == 'minute':
                     equity_reader_arg = 'equity_minute_reader'
                 equity_reader = PanelBarReader(
@@ -926,9 +944,9 @@ class TradingAlgorithm(object):
                   The arena from the simulation parameters. This will normally
                   be ``'backtest'`` but some systems may use this distinguish
                   live trading from backtesting.
-              data_frequency : {'daily', 'minute'}
+              data_frequency : {'daily', '5-minute', 'minute'}
                   data_frequency tells the algorithm if it is running with
-                  daily data or minute data.
+                  daily, minute, or five-minute mode.
               start : datetime
                   The start date for the simulation.
               end : datetime
@@ -1102,12 +1120,19 @@ class TradingAlgorithm(object):
                           'date_rule. You should use keyword argument '
                           'time_rule= when calling schedule_function without '
                           'specifying a date_rule', stacklevel=3)
+        
+        freq = self.sim_params.data_frequency
 
         date_rule = date_rule or date_rules.every_day()
-        time_rule = ((time_rule or time_rules.every_minute())
-                     if self.sim_params.data_frequency == 'minute' else
-                     # If we are in daily mode the time_rule is ignored.
-                     time_rules.every_minute())
+        if freq is 'daily':
+            # ignore time rule in daily mode
+            time_rule = time_rules.every_minute()
+        else:
+            # use provided time rule or default to every minute or 5 minutes
+            # based on desired data frequency.
+            time_rule = time_rule or (time_rules.every_5_minutes()
+                                      if freq is '5-minute' else
+                                      time_rules.every_minute())
 
         # Check the type of the algorithm's schedule before pulling calendar
         # Note that the ExchangeTradingSchedule is currently the only
@@ -1782,7 +1807,7 @@ class TradingAlgorithm(object):
 
     @data_frequency.setter
     def data_frequency(self, value):
-        assert value in ('daily', 'minute')
+        assert value in ('daily', '5-minute', 'minute')
         self.sim_params.data_frequency = value
 
     @api_method
diff --git a/catalyst/data/bundles/bundle.py b/catalyst/data/bundles/bundle.py
index 0ab965b7..fb095f9e 100644
--- a/catalyst/data/bundles/bundle.py
+++ b/catalyst/data/bundles/bundle.py
@@ -117,24 +117,8 @@ class AbstractBundle(object):
 
             symbol_map = raw_metadata.symbol
 
-            daily_bar_writer.write(
-                self._fetch_symbol_iter(
-                    api_key,
-                    cache,
-                    symbol_map,
-                    calendar,
-                    start_session,
-                    end_session,
-                    'daily',
-                    retries,
-                ),
-                assets=raw_metadata.index,
-                show_progress=show_progress,
-            )
-
-            """
-            for data_frequency in self.frequencies:
-                self._write_symbol_for_freq(
+            if 'daily' in self.frequencies:
+                daily_bar_writer.write(
                     self._fetch_symbol_iter(
                         api_key,
                         cache,
@@ -142,22 +126,31 @@ class AbstractBundle(object):
                         calendar,
                         start_session,
                         end_session,
-                        data_frequency,
+                        'daily',
                         retries,
                     ),
-                    data_frequency,
-                    daily_bar_writer,
-                    minute_bar_writer,
                     assets=raw_metadata.index,
                     show_progress=show_progress,
                 )
-            """
-                
-                    
 
             metadata = self._post_process_metadata(raw_metadata, cache)
             asset_db_writer.write(metadata)
 
+            if '5-minute' in self.frequencies:
+                minute_bar_writer.write(
+                    self._fetch_symbol_iter(
+                        api_key,
+                        cache,
+                        symbol_map,
+                        calendar,
+                        start_session,
+                        end_session,
+                        '5-minute',
+                        retries,
+                    ),
+                    show_progress=show_progress,
+                )
+
             adjustment_writer.write()
         else:
             self._download(show_progress, output_dir)
@@ -188,7 +181,7 @@ class AbstractBundle(object):
         raw_iter = self._fetch_metadata_iter(api_key, cache, retries, environ)
         
         def item_show_func(_, _it=iter(count())):
-            return 'Downloading metadata page: {0}'.format(next(_it))
+            return 'Downloading metadata: {0}'.format('.' * next(_it))
 
         with maybe_show_progress(
             raw_iter,
@@ -263,65 +256,108 @@ class AbstractBundle(object):
                            calendar,
                            start_session,
                            end_session,
-                           frequency,
+                           data_frequency,
                            retries):
 
         for asset_id, symbol in symbol_map.iteritems():
+            # Record start time of iteration, compare at end of iteration to
+            # adhere to the datas source's rate limit policy.
             start_time = pd.Timestamp.utcnow()
-            try:
-                raw_data = cache[symbol]
-            except KeyError:
-                raw_data = None
-            
-            if raw_data is not None and not raw_data.empty:
-                last = raw_data.index[-1].tz_localize('UTC')
-            else:
-                last = start_session
 
-            next_start_time = last + pd.Timedelta(minutes=5)
-            if start_time > next_start_time:
-                raw_diff = self.fetch_raw_symbol_frame(
-                    api_key,
-                    symbol,
-                    last,
-                    end_session,
-                    frequency,
-                )
-                raw_diff = raw_diff[
-                    (raw_diff.index >= last) &
-                    (raw_diff.index <= end_session)
-                ]
+            # Fetch new data if cached data is absent or stale, otherwise
+            # returns the cached data unaltered.  The `should_sleep` flag
+            # indicates that an API call was attempted, and that we should be
+            # ensure aren't exceeding our rate limit before proceeding to the
+            # next symbol. If the raw_data is updated, it is cached before being
+            # returned.
+            raw_data, should_sleep = self._maybe_update_symbol_frame(
+                start_time,
+                api_key,
+                cache,
+                symbol,
+                start_session,
+                end_session,
+                data_frequency,
+            )
 
-                raw_data = cache[symbol] = (
-                    raw_data.append(raw_diff)
-                    if raw_data is not None else
-                    raw_diff
-                )
-
-                raw_data = raw_data[~raw_data.index.duplicated(keep='last')]
-
-                should_sleep = True
-            else:
-                should_sleep = False      
-
-            """
-            sessions = calendar.sessions_in_range(start_session, end_session)
-
-            print 'raw_data before:\n', raw_data.head()
-            raw_data = raw_data.reindex(
-                sessions,
-                copy=False,
-            ).fillna(0.0)
-            print 'raw_data after:\n', raw_data.head()
-            """
+            # TODO(cfromknecht) further data validation?
 
+            # Pass asset_id and symbol data to writer.
             yield asset_id, raw_data
 
+            # If an API call was made during this iteration and the time to
+            # reach this point was less than the inter-request `wait_time`,
+            # sleep until after enough time has elapsed to prevent getting rate
+            # limited.
             if should_sleep:
                 remaining = pd.Timestamp.utcnow() - start_time + self.wait_time
                 if remaining.value > 0:
                     sleep(remaining.value / 10**9)
 
+    def _maybe_update_symbol_frame(self,
+                                   start_time,
+                                   api_key,
+                                   cache,
+                                   symbol,
+                                   start_session,
+                                   end_session,
+                                   data_frequency):
+        try:
+            raw_data = cache[symbol]
+        except KeyError:
+            raw_data = None
+
+        # Select the most recent date in cached dataset if it exists,
+        # otherwise use the provided `start_session`.
+        last = (
+            raw_data.index[-1].tz_localize('UTC')
+            if raw_data is not None and not raw_data.empty else
+            start_session
+        )
+
+        # Determine time at which cached data will be considered stale.
+        cache_expiration = last + pd.Timedelta(minutes=5)
+        if start_time <= cache_expiration:
+            # Data is fresh enough to reuse, no need to update. Iterator can
+            # proceed to next symbol directly since no API call was required.
+            should_sleep = False
+        else:
+            # Data for symbol is old enough to attempt an update or is not
+            # present in the cache.  Fetch raw data for a single symbol 
+            # with requested intervals and frequency.
+            raw_diff = self.fetch_raw_symbol_frame(
+                api_key,
+                symbol,
+                last,
+                end_session,
+                data_frequency,
+            )
+
+            # Filter incoming data to minimize overlap.
+            raw_diff = raw_diff[
+                (raw_diff.index >= last) &
+                (raw_diff.index <= end_session)
+            ]
+
+            # Append incoming data to cached data if it exists,
+            # otherwise treat incoming data as the entire raw dataset.
+            raw_data = cache[symbol] = (
+                raw_data.append(raw_diff)
+                if raw_data is not None else
+                raw_diff
+            )
+
+            # Filter out any duplicates entries, keep last one as previous
+            # one was probably an incomplete frame.
+            raw_data = raw_data[~raw_data.index.duplicated(keep='last')]
+
+            # If we arrive here, we must have attempted an API call.
+            # This flag tells the iterator to pause before starting the next
+            # asset, that we don't exceed the data source's rate limit.
+            should_sleep = True
+
+        return raw_data, should_sleep
+
     def _write_symbol_for_freq(self,
                                pricing_iter,
                                data_frequency,
@@ -338,13 +374,11 @@ class AbstractBundle(object):
         elif data_frequency == '5-minute':
             minute_bar_writer.write(
                 pricing_iter,
-                assets=assets,
                 show_progress=show_progress,
             )
         elif data_frequency == 'minute':
             minute_bar_writer.write(
                 pricing_iter,
-                assets=assets,
                 show_progress=show_progress,
             )
         else:
diff --git a/catalyst/data/five_minute_bars.py b/catalyst/data/five_minute_bars.py
index b772c22b..aa592740 100644
--- a/catalyst/data/five_minute_bars.py
+++ b/catalyst/data/five_minute_bars.py
@@ -24,6 +24,10 @@ from bcolz import ctable
 from intervaltree import IntervalTree
 import logbook
 import numpy as np
+from numpy import (
+    iinfo,
+    uint64,
+)
 import pandas as pd
 from pandas import HDFStore
 import tables
@@ -39,23 +43,28 @@ from catalyst.data._minute_bar_internal import (
 from catalyst.gens.sim_engine import NANOS_IN_MINUTE
 
 from catalyst.data.bar_reader import BarReader, NoDataOnDate
-from catalyst.data.us_equity_pricing import check_uint32_safe
+from catalyst.data.us_equity_pricing import (
+    winsorise_uint64,
+    check_uint64_safe,
+)
 from catalyst.utils.calendars import get_calendar
 from catalyst.utils.cli import maybe_show_progress
 from catalyst.utils.memoize import lazyval
 
-
 logger = logbook.Logger('FiveMinuteBars')
 
-CRYPTO_ASSETS_FIVE_MINUTES_PER_DAY = 288
+OPEN_FIVE_MINUTES_PER_DAY = 288
 US_EQUITIES_MINUTES_PER_DAY = 390
-FUTURES_MINUTES_PER_DAY = 1440
 
 DEFAULT_EXPECTEDLEN = US_EQUITIES_MINUTES_PER_DAY * 252 * 15
-DEFAULT_EXPECTED_CRYPTO_LEN = CRYPTO_ASSETS_FIVE_MINUTES_PER_DAY * 366 * 15
+DEFAULT_EXPECTEDLEN_CRYPTO = OPEN_FIVE_MINUTES_PER_DAY * 366 * 15
 
-OHLC_RATIO = 1000
+OHLC_RATIO = 1000000
 
+OHLC = frozenset(['open', 'high', 'low', 'close'])
+OHLCV = frozenset(['open', 'high', 'low', 'close', 'volume'])
+
+UINT64_MAX = iinfo(uint64).max
 
 class BcolzFiveMinuteOverlappingData(Exception):
     pass
@@ -68,7 +77,7 @@ class BcolzFiveMinuteWriterColumnMismatch(Exception):
 class FiveMinuteBarReader(BarReader):
     @property
     def data_frequency(self):
-        return "five-minute"
+        return "5-minute"
 
 
 def _calc_five_minute_index(market_opens, five_minutes_per_day):
@@ -116,19 +125,19 @@ def _sid_subdir_path(sid):
 
 
 def convert_cols(cols, scale_factor, sid, invalid_data_behavior):
-    """Adapt OHLCV columns into uint32 columns.
+    """Adapt OHLCV columns into uint64 columns.
 
     Parameters
     ----------
     cols : dict
         A dict mapping each column name (open, high, low, close, volume)
-        to a float column to convert to uint32.
+        to a float column to convert to uint64.
     scale_factor : int
-        Factor to use to scale float values before converting to uint32.
+        Factor to use to scale float values before converting to uint64.
     sid : int
         Sid of the relevant asset, for logging.
     invalid_data_behavior : str
-        Specifies behavior when data cannot be converted to uint32.
+        Specifies behavior when data cannot be converted to uint64.
         If 'raise', raises an exception.
         If 'warn', logs a warning and filters out incompatible values.
         If 'ignore', silently filters out incompatible values.
@@ -137,6 +146,7 @@ def convert_cols(cols, scale_factor, sid, invalid_data_behavior):
     scaled_highs = np.nan_to_num(cols['high']) * scale_factor
     scaled_lows = np.nan_to_num(cols['low']) * scale_factor
     scaled_closes = np.nan_to_num(cols['close']) * scale_factor
+    volumes = np.nan_to_num(cols['volume'])
 
     exclude_mask = np.zeros_like(scaled_opens, dtype=bool)
 
@@ -145,11 +155,12 @@ def convert_cols(cols, scale_factor, sid, invalid_data_behavior):
         ('high', scaled_highs),
         ('low', scaled_lows),
         ('close', scaled_closes),
+        ('volume', volumes),
     ]:
         max_val = scaled_col.max()
 
         try:
-            check_uint32_safe(max_val, col_name)
+            check_uint64_safe(max_val, col_name)
         except ValueError:
             if invalid_data_behavior == 'raise':
                 raise
@@ -157,20 +168,20 @@ def convert_cols(cols, scale_factor, sid, invalid_data_behavior):
             if invalid_data_behavior == 'warn':
                 logger.warn(
                     'Values for sid={}, col={} contain some too large for '
-                    'uint32 (max={}), filtering them out',
+                    'uint64 (max={}), filtering them out',
                     sid, col_name, max_val,
                 )
 
             # We want to exclude all rows that have an unsafe value in
             # this column.
-            exclude_mask &= (scaled_col >= np.iinfo(np.uint32).max)
+            exclude_mask &= (scaled_col >= iinfo(uint64).max)
 
-    # Convert all cols to uint32.
-    opens = scaled_opens.astype(np.uint32)
-    highs = scaled_highs.astype(np.uint32)
-    lows = scaled_lows.astype(np.uint32)
-    closes = scaled_closes.astype(np.uint32)
-    volumes = cols['volume'].astype(np.uint32)
+    # Convert all cols to uint64.
+    opens = scaled_opens.astype(uint64)
+    highs = scaled_highs.astype(uint64)
+    lows = scaled_lows.astype(uint64)
+    closes = scaled_closes.astype(uint64)
+    volumes = volumes.astype(uint64)
 
     # Exclude rows with unsafe values by setting to zero.
     opens[exclude_mask] = 0
@@ -290,7 +301,7 @@ class BcolzFiveMinuteBarMetadata(object):
         ohlc_ratio : int
             The default ratio by which to multiply the pricing data to
             convert the floats from floats to an integer to fit within
-            the np.uint32. If ohlc_ratios_per_sid is None or does not
+            the np.uint64. If ohlc_ratios_per_sid is None or does not
             contain a mapping for a given sid, this ratio is used.
         ohlc_ratios_per_sid : dict
              A dict mapping each sid in the output to the factor by
@@ -374,13 +385,13 @@ class BcolzFiveMinuteBarWriter(object):
         The last trading session in the data set.
     default_ohlc_ratio : int, optional
         The default ratio by which to multiply the pricing data to
-        convert from floats to integers that fit within np.uint32. If
+        convert from floats to integers that fit within np.uint64. If
         ohlc_ratios_per_sid is None or does not contain a mapping for a
         given sid, this ratio is used. Default is OHLC_RATIO (1000).
     ohlc_ratios_per_sid : dict, optional
         A dict mapping each sid in the output to the ratio by which to
         multiply the pricing data to convert the floats from floats to
-        an integer to fit within the np.uint32.
+        an integer to fit within the np.uint64.
     expectedlen : int, optional
         The expected length of the dataset, used when creating the initial
         bcolz ctable.
@@ -405,9 +416,9 @@ class BcolzFiveMinuteBarWriter(object):
 
     The open, high, low, and close columns are integers which are 1000 times
     the quoted price, so that the data can represented and stored as an
-    np.uint32, supporting market prices quoted up to the thousands place.
+    np.uint64, supporting market prices quoted up to the thousands place.
 
-    volume is a np.uint32 with no mutation of the tens place.
+    volume is a np.uint64 with no mutation of the tens place.
 
     The 'index' for each individual asset are a repeating period of minutes of
     length `minutes_per_day` starting from each market open.
@@ -575,7 +586,7 @@ class BcolzFiveMinuteBarWriter(object):
         if not os.path.exists(sid_containing_dirname):
             # Other sids may have already created the containing directory.
             os.makedirs(sid_containing_dirname)
-        initial_array = np.empty(0, np.uint32)
+        initial_array = np.empty(0, np.uint64)
         table = ctable(
             rootdir=path,
             columns=[
@@ -612,7 +623,7 @@ class BcolzFiveMinuteBarWriter(object):
         five_minute_offset = len(table) % self._five_minutes_per_day
         num_to_prepend = numdays * self._five_minutes_per_day - five_minute_offset
 
-        prepend_array = np.zeros(num_to_prepend, np.uint32)
+        prepend_array = np.zeros(num_to_prepend, np.uint64)
         # Fill all OHLCV with zeros.
         table.append([prepend_array] * 5)
         table.flush()
@@ -817,11 +828,11 @@ class BcolzFiveMinuteBarWriter(object):
 
         minutes_count = all_minutes_in_window.size
 
-        open_col = np.zeros(minutes_count, dtype=np.uint32)
-        high_col = np.zeros(minutes_count, dtype=np.uint32)
-        low_col = np.zeros(minutes_count, dtype=np.uint32)
-        close_col = np.zeros(minutes_count, dtype=np.uint32)
-        vol_col = np.zeros(minutes_count, dtype=np.uint32)
+        open_col = np.zeros(minutes_count, dtype=uint64)
+        high_col = np.zeros(minutes_count, dtype=uint64)
+        low_col = np.zeros(minutes_count, dtype=uint64)
+        close_col = np.zeros(minutes_count, dtype=uint64)
+        vol_col = np.zeros(minutes_count, dtype=uint64)
 
         dt_ixs = np.searchsorted(all_minutes_in_window.values,
                                  dts.astype('datetime64[ns]'))
@@ -1250,7 +1261,7 @@ class BcolzFiveMinuteBarReader(FiveMinuteBarReader):
             if field != 'volume':
                 out = np.full(shape, np.nan)
             else:
-                out = np.zeros(shape, dtype=np.uint32)
+                out = np.zeros(shape, dtype=int64)
 
             for i, sid in enumerate(sids):
                 carray = self._open_minute_file(field, sid)
diff --git a/catalyst/data/us_equity_pricing.py b/catalyst/data/us_equity_pricing.py
index f20ac003..04b3e225 100644
--- a/catalyst/data/us_equity_pricing.py
+++ b/catalyst/data/us_equity_pricing.py
@@ -117,7 +117,15 @@ UINT64_MAX = iinfo(uint64).max
 def check_uint32_safe(value, colname):
     if value >= UINT32_MAX:
         raise ValueError(
-            "Value %s from column '%s' is too large" % (value, colname)
+            "Value %s from column '%s' is too large "
+            "for uint32" % (value, colname)
+        )
+
+def check_uint64_safe(value, colname):
+    if value >= UINT64_MAX:
+        raise ValueError(
+            "Value %s from column '%s' is too large "
+            "for uint64" % (value, colname)
         )
 
 
diff --git a/catalyst/gens/sim_engine.pyx b/catalyst/gens/sim_engine.pyx
index aa3a9d51..3eddda84 100644
--- a/catalyst/gens/sim_engine.pyx
+++ b/catalyst/gens/sim_engine.pyx
@@ -20,7 +20,9 @@ cimport cython
 from cpython cimport bool
 
 cdef np.int64_t _nanos_in_minute = 60000000000
+cdef np.int64_t _nanos_in_five_minutes = 5 * _nanos_in_minute
 NANOS_IN_MINUTE = _nanos_in_minute
+NANOS_IN_FIVE_MINUTES = _nanos_in_five_minutes
 
 cpdef enum:
     BAR = 0
@@ -115,3 +117,24 @@ cdef class MinuteSimulationClock:
             yield minute, BAR
             if minute_emission:
                 yield minute, MINUTE_END
+
+cdef class FiveMinuteSimulationClock(MinuteSimulationClock):
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    cdef dict calc_minutes_by_session(self):
+        cdef dict five_minutes_by_session
+        cdef int session_idx
+        cdef np.int64_t session_nano
+        cdef np.ndarray[np.int64_t, ndim=1] five_minutes_nanos
+
+        five_minutes_by_session = {}
+        for session_idx, session_nano in enumerate(self.sessions_nanos):
+            five_minutes_nanos = np.arange(
+                self.market_opens_nanos[session_idx],
+                self.market_closes_nanos[session_idx] + _nanos_in_five_minutes,
+                _nanos_in_five_minutes
+            )
+            five_minutes_by_session[session_nano] = pd.to_datetime(
+                five_minutes_nanos, utc=True, box=True
+            )
+        return five_minutes_by_session
diff --git a/catalyst/gens/tradesimulation.py b/catalyst/gens/tradesimulation.py
index f7daa233..1ef9dfbf 100644
--- a/catalyst/gens/tradesimulation.py
+++ b/catalyst/gens/tradesimulation.py
@@ -34,6 +34,7 @@ class AlgorithmSimulator(object):
 
     EMISSION_TO_PERF_KEY_MAP = {
         'minute': 'minute_perf',
+        '5-minute': '5_minute_perf',
         'daily': 'daily_perf'
     }
 
@@ -201,7 +202,7 @@ class AlgorithmSimulator(object):
             stack.enter_context(self.processor)
             stack.enter_context(ZiplineAPI(self.algo))
 
-            if algo.data_frequency == 'minute':
+            if algo.data_frequency in set(('minute', '5-minute')):
                 def execute_order_cancellation_policy():
                     algo.blotter.execute_cancel_policy(SESSION_END)
 
diff --git a/catalyst/utils/calendars/calendar_utils.py b/catalyst/utils/calendars/calendar_utils.py
index 0e4824a7..6ec335a5 100644
--- a/catalyst/utils/calendars/calendar_utils.py
+++ b/catalyst/utils/calendars/calendar_utils.py
@@ -25,7 +25,7 @@ _default_calendar_factories = {
     'us_futures': QuantopianUSFuturesCalendar,
 }
 _default_calendar_aliases = {
-    'CATX': 'OPEN',
+    'POLO': 'OPEN',
     'NASDAQ': 'NYSE',
     'BATS': 'NYSE',
     'CBOT': 'CME',
diff --git a/catalyst/utils/events.py b/catalyst/utils/events.py
index 10bb2f55..3fc83dd4 100644
--- a/catalyst/utils/events.py
+++ b/catalyst/utils/events.py
@@ -602,6 +602,7 @@ class date_rules(object):
 class time_rules(object):
     market_open = AfterOpen
     market_close = BeforeClose
+    every_5_minutes = Always
     every_minute = Always