From f0050f2e2fba69190d98659236f5f8fee8066bce Mon Sep 17 00:00:00 2001 From: Conner Fromknecht Date: Thu, 13 Jul 2017 03:15:25 -0700 Subject: [PATCH] CLI UI improvements * Customzed progressbar format and display information * Added new bar to BaseBundle and DailyBarWriter --- catalyst/data/bundles/__init__.py | 2 +- catalyst/data/bundles/base.py | 61 +++++++++++++++++---------- catalyst/data/bundles/base_pricing.py | 2 +- catalyst/data/bundles/poloniex.py | 2 +- catalyst/data/us_equity_pricing.py | 16 ++++--- catalyst/utils/cli.py | 28 +++++++++++- 6 files changed, 77 insertions(+), 34 deletions(-) diff --git a/catalyst/data/bundles/__init__.py b/catalyst/data/bundles/__init__.py index 9ba8e11c..17343b9c 100644 --- a/catalyst/data/bundles/__init__.py +++ b/catalyst/data/bundles/__init__.py @@ -1,6 +1,6 @@ # These imports are necessary to force module-scope register calls to happen. from . import quandl # noqa -from . import poloniex_bundle +from . import poloniex from .core import ( UnknownBundle, bundles, diff --git a/catalyst/data/bundles/base.py b/catalyst/data/bundles/base.py index 3e16c16d..b72cc7a6 100644 --- a/catalyst/data/bundles/base.py +++ b/catalyst/data/bundles/base.py @@ -24,7 +24,10 @@ import pandas as pd from . import core as bundles -from catalyst.utils.cli import maybe_show_progress +from catalyst.utils.cli import ( + item_show_count, + maybe_show_progress +) from catalyst.utils.memoize import lazyval logbook.StderrHandler().push_application() @@ -130,7 +133,11 @@ class BaseBundle(object): show_progress=show_progress, ) - metadata = self._post_process_metadata(raw_metadata, cache) + metadata = self._post_process_metadata( + raw_metadata, + cache, + show_progress=show_progress, + ) asset_db_writer.write(metadata) if '5-minute' in self.frequencies: @@ -176,42 +183,49 @@ class BaseBundle(object): show_progress=False): raw_iter = self._fetch_metadata_iter(api_key, cache, retries, environ) - - def item_show_func(_, _it=iter(count())): - return 'Downloading metadata: {0}'.format('.' * next(_it)) with maybe_show_progress( raw_iter, show_progress, - item_show_func=item_show_func, - label='Fetching {bundle} metadata:'.format(bundle=self.name), + label='Fetching symbol metadata', + item_show_func=item_show_count(), + length=3, + show_percent=False, ) as blocks: metadata = pd.concat(blocks, ignore_index=True) return metadata - def _post_process_metadata(self, metadata, cache): + def _post_process_metadata(self, metadata, cache, show_progress=False): final_metadata = pd.DataFrame( columns=self.md_column_names, index=metadata.index, ) + + with maybe_show_progress( + metadata.symbol.iteritems(), + show_progress, + label='Post-processing symbol metadata', + item_show_func=item_show_count(len(metadata)), + length=len(metadata), + show_percent=False, + ) as symbols_map: + for asset_id, symbol in symbols_map: + try: + raw_data = cache[symbol] + except KeyError: + raise ValueError( + 'Unable to find cached data for symbol: {0}'.format(symbol) + ) - for asset_id, symbol in metadata.symbol.iteritems(): - try: - raw_data = cache[symbol] - except KeyError: - raise ValueError( - 'Unable to find cached data for symbol: {0}'.format(symbol) + final_symbol_metadata = self.post_process_symbol_metadata( + metadata.iloc[asset_id], + raw_data, ) - final_symbol_metadata = self.post_process_symbol_metadata( - metadata.iloc[asset_id], - raw_data, - ) + final_metadata.iloc[asset_id] = final_symbol_metadata - final_metadata.iloc[asset_id] = final_symbol_metadata - - final_metadata['exchange'] = self.exchange + final_metadata['exchange'] = self.exchange return final_metadata @@ -237,13 +251,14 @@ class BaseBundle(object): 'attempts.'.format(page_number, retries), ) - # update cached value for key - cache[key] = raw if raw.empty: # empty DataFrame signals completion break + # update cached value for key + cache[key] = raw + yield raw def _fetch_symbol_iter(self, diff --git a/catalyst/data/bundles/base_pricing.py b/catalyst/data/bundles/base_pricing.py index 81ca12f3..8ddc95da 100644 --- a/catalyst/data/bundles/base_pricing.py +++ b/catalyst/data/bundles/base_pricing.py @@ -13,7 +13,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -from catalyst.data.bundles.bundle import BaseBundle +from catalyst.data.bundles.base import BaseBundle from catalyst.utils.memoize import lazyval class BasePricingBundle(BaseBundle): diff --git a/catalyst/data/bundles/poloniex.py b/catalyst/data/bundles/poloniex.py index 1ed113f8..2d6b9ebd 100644 --- a/catalyst/data/bundles/poloniex.py +++ b/catalyst/data/bundles/poloniex.py @@ -20,7 +20,7 @@ import pandas as pd from six.moves.urllib.parse import urlencode from catalyst.data.bundles.core import register_bundle -from catalyst.data.bundles.pricing_bundle import BaseCryptoPricingBundle +from catalyst.data.bundles.base_pricing import BaseCryptoPricingBundle from catalyst.utils.memoize import lazyval class PoloniexBundle(BaseCryptoPricingBundle): diff --git a/catalyst/data/us_equity_pricing.py b/catalyst/data/us_equity_pricing.py index 04b3e225..b2244218 100644 --- a/catalyst/data/us_equity_pricing.py +++ b/catalyst/data/us_equity_pricing.py @@ -73,7 +73,10 @@ from catalyst.utils.sqlite_utils import ( coerce_string_to_conn, ) from catalyst.utils.memoize import lazyval -from catalyst.utils.cli import maybe_show_progress +from catalyst.utils.cli import ( + item_show_count, + maybe_show_progress, +) from ._equities import _compute_row_slices, _read_bcolz_data from ._adjustments import load_adjustments_from_sqlite @@ -226,10 +229,7 @@ class BcolzDailyBarWriter(object): @property def progress_bar_message(self): - return "Merging daily equity files:" - - def progress_bar_item_show_func(self, value): - return value if value is None else str(value[0]) + return 'Compiling daily pricing data' def write(self, data, @@ -257,15 +257,17 @@ class BcolzDailyBarWriter(object): table : bcolz.ctable The newly-written table. """ + total = None if assets is None else len(assets) ctx = maybe_show_progress( ( (sid, self.to_ctable(df, invalid_data_behavior)) for sid, df in data ), show_progress=show_progress, - item_show_func=self.progress_bar_item_show_func, label=self.progress_bar_message, - length=len(assets) if assets is not None else None, + item_show_func=item_show_count(total), + length=total, + show_percent=False, ) with ctx as it: return self._write_internal(it, assets) diff --git a/catalyst/utils/cli.py b/catalyst/utils/cli.py index 70e15d35..6e604838 100644 --- a/catalyst/utils/cli.py +++ b/catalyst/utils/cli.py @@ -1,10 +1,33 @@ +from itertools import count + import click import pandas as pd from .context_tricks import CallbackManager +DEFAULT_BAR_TEMPLATE = ' [%(bar)s] %(label)s: %(info)s' +DEFAULT_EMPTY_CHAR = ' ' +DEFAULT_FILL_CHAR = '=' -def maybe_show_progress(it, show_progress, **kwargs): +def item_show_count(total=None): + def maybe_show_total(index): + if total is not None: + return '{0}/{1}'.format(index, total) + return str(index) + + def item_show_func(item, _it=iter(count())): + if item is not None: + return maybe_show_total(next(_it)) + return 'DONE' + + return item_show_func + +def maybe_show_progress(it, + show_progress, + empty_char=DEFAULT_EMPTY_CHAR, + fill_char=DEFAULT_FILL_CHAR, + bar_template=DEFAULT_BAR_TEMPLATE, + **kwargs): """Optionally show a progress bar for the given iterator. Parameters @@ -30,6 +53,9 @@ def maybe_show_progress(it, show_progress, **kwargs): ... """ if show_progress: + kwargs['bar_template'] = bar_template + kwargs['empty_char'] = empty_char + kwargs['fill_char'] = fill_char return click.progressbar(it, **kwargs) # context manager that just return `it` when we enter it