From 3b5c2118f0d01bad3f4967bcc8bbb9e0688694bf Mon Sep 17 00:00:00 2001 From: Stephen Mildenhall Date: Fri, 19 Dec 2025 10:51:12 +0000 Subject: [PATCH] Added pyarrow support in GT (new and old versions) and in Fabricator --- .gitignore | 3 +- docs/versions.rst | 7 + greater_tables/__init__.py | 2 +- greater_tables/core.py | 1241 +++------- greater_tables/core_old.py | 2062 ++++++++++++++++ greater_tables/fabrications.py | 19 +- gt-testers.ipynb | 4006 +------------------------------- 7 files changed, 2476 insertions(+), 4864 deletions(-) create mode 100644 greater_tables/core_old.py diff --git a/.gitignore b/.gitignore index a76be03..2192fd7 100644 --- a/.gitignore +++ b/.gitignore @@ -168,4 +168,5 @@ docs/books.bib docs/library.bib # rust -gtfont/target \ No newline at end of file +gtfont/target +/.virtual_documents diff --git a/docs/versions.rst b/docs/versions.rst index 1c96eab..54340ae 100644 --- a/docs/versions.rst +++ b/docs/versions.rst @@ -11,9 +11,16 @@ Versions and Change Log .. TODO * self.padl and r / 12 in make html width adj s/b elsewhere +6.0.0 +----- + +* Support for pyarrow data types. +* Added pyarrow option to Fabricator to create pyarrow dataframes. + 5.4.1 and 5.4.1 ------------------ * Minor bug fixes; tikz method no works for empty dataframe. +* Non-integer width bug. 5.4 ---- diff --git a/greater_tables/__init__.py b/greater_tables/__init__.py index 8ab7541..62a3cb7 100644 --- a/greater_tables/__init__.py +++ b/greater_tables/__init__.py @@ -1,6 +1,6 @@ __project__ = 'greater_tables' __author__ = 'Stephen J Mildenhall' -__version__ = '5.4.2' +__version__ = '6.0.0' from . core import GT from . fabrications import * diff --git a/greater_tables/core.py b/greater_tables/core.py index 8200f9c..a7938df 100644 --- a/greater_tables/core.py +++ b/greater_tables/core.py @@ -1,10 +1,10 @@ """ -Core rendering logic for GreaterTables. +Core rendering logic for GreaterTables (PyArrow Enhanced Edition). -Defines the `GreaterTables` class, which formats and renders pandas DataFrames +Defines the `GT` class, which formats and renders pandas DataFrames to HTML, plain text, or LaTeX output using a validated configuration model. -This is the main entry point for rendering logic. See `gtconfig.py` for configuration schema. +Refactored [2025-03-06] to support PyArrow-backed dataframes natively. """ from collections import namedtuple @@ -25,8 +25,14 @@ from cachetools import LRUCache import numpy as np import pandas as pd from pandas.errors import IntCastingNaNError -from pandas.api.types import is_datetime64_any_dtype, is_integer_dtype, \ - is_float_dtype # , is_numeric_dtype +from pandas.api.types import ( + is_datetime64_any_dtype, + is_integer_dtype, + is_float_dtype, + is_string_dtype, + is_extension_array_dtype, # Crucial for PyArrow detection + is_numeric_dtype +) from pydantic import ValidationError from rich import box from IPython.display import display, SVG @@ -37,13 +43,10 @@ from . hasher import df_short_hash from . etcher import Etcher from . utilities import * -# turn off this fuck-fest +# Modern pandas settings pd.set_option('future.no_silent_downcasting', True) -# pandas complaining about casting columns eg putting object in float column warnings.simplefilter(action='ignore', category=FutureWarning) - -# GPT recommended approach logger = logging.getLogger(__name__) @@ -54,55 +57,10 @@ class GT(object): Provides html and latex output in quarto/Jupyter accessible manner. Wraps AND COPIES the dataframe df. WILL NOT REFLECT CHANGES TO DF. - Recommended usage is to subclass GT (or use functools.partial) and set - defaults suitable to your particular - application. In that way you can maintain a "house-style" - - Process - -------- - - **Input transformation** - - * ``namedtuple`` converted to ``DataFrame`` - * ``pd.Series`` converted to ``DataFrame`` - * ``list`` converted to ``DataFrame``, optionally using row 0 as - ``config.header_row`` - * A string is assumed to be a pipe-separated markdown table which is - converted to a ``DataFrame`` setting aligners per the alignment row - * All other input types are an error - - The input ``df`` must have unique column names. It is then copied into - ``self.df`` which will be changed and ``self.raw_df`` for reference. - The copy is hashed for the table name. - - **Mangling** - - * If show_index, the index is reset and kept, so that all columns are on an - config.equal footing - * The index change levels are computed to determine LaTeX hrules - * ratio year, and raw columns converted to a list (can be input as a single - string name) - * Columns, except raw columns, are cast to floats - * Column types by index determined - * default formatter function set (wrapping input, if any) - * Aligner column input decoded into aligner values - (``grt-left,grt-right,grt-center``); index aligners separated - * Formatters decoded, strings mapped to lambda functions as f-string - formatters, integers as number of decimals - * Tab values expanded into an iterable - * Dataframe at this point (index reset, cast) saved to - ``df_pre_applying_formatters`` - * Determine formatters (``df_formatters`` property, a list of column index - formatting functions: - * Make the default float formatter if entered (callable, string, number; - wrapped in try/except) - * Determine each column's format type and add function - * Run ``apply_formatters`` to apply all format choices to ``df``. This - function handles index columns slightly differently, but results in the - formatters being applied to each column. - * Sparsify if requested and if multiindex - * Result is a dataframe with all object column types and values that - reflect the formatting choices. + **PyArrow / 2.0 Architecture Note:** + This class now uses an "Inspect-then-Dispatch" philosophy. It does not + coerce inputs to floats. It respects int64[pyarrow] and string[pyarrow] + types natively, handling nulls via validity masks rather than NaN-casting. """ def __init__( @@ -120,112 +78,13 @@ class GT(object): date_cols=None, raw_cols=None, show_index=True, - # config: Configurator | None = None, config_path: Path | None = None, **overrides, ): - """ - Parameters: - - :param df: target DataFrame or list of lists or markdown table string - :param caption: table caption, optional (GT will look for gt_caption - attribute of df and use that) - :param label: TeX label (used in \\label{} command). For markdown - tables with #tbl:... in the caption it is extracted automatically. - :param aligners: None or dict (type or colname) -> left | center | - right - :param formatters: None or dict (type or colname) or callable -> - format function for the column; formatters trump ratio_cols; if - callable passed, it is applied to **all columns**. - :param tabs: None or list of column widths in characters or a common - int or float width. (It is converted into em; one character is about - 0.5em on average; digits are exactly 0.5em.) If None, will be calculated. - Default None. - :param unbreakable: None or list of columns to be considered unbreakable - :param ratio_cols: None, or "all" or list of column names treated as - ratios. Set defaults in derived class suitable to application. - :param year_cols: None, or "all" or list of column names treated as - years (no commas, no decimals). Set defaults in derived class suitable - to application. - :param date_cols: None, or "all" or list of column names treated as - dates. Set defaults in derived class suitable to application. - :param raw_cols: None, or "all" or list of column names that are NOT - cast to floats. Set defaults in derived class suitable to application. - :param show_index: if True, show the index columns, default True - - Available keyword *overrides: - - :param config.default_integer_str: format f-string for integers, default - value '{x:,d}' - :param config.default_float_str: format f-string for floats, default - value '{x:,.3f}' - :param config.default_date_str: format f-string for dates, default '%Y-%m-%d'. - NOTE: no braces or x! - :param config.default_ratio_str: format f-string for ratios, default '{x:.1%}' - :param config.table_float_format: None or format string for floats in the - table format function, applied to entire table, default None - :param config.table_hrule_width: width of the table top, botton and header - hrule, default 1 - :param config.table_vrule_width: width of the table vrule, separating the - index from the body, default 1 - :param config.hrule_widths: None or tuple of three ints for hrule widths - (for use with multiindexes) - :param config.vrule_widths: None or tuple of three ints for vrule widths - (for use when columns have multiindexes) - :param config.sparsify: if True, config.sparsify the index columns, you almost always - want this to be true! - :param config.sparsify_columns: if True, config.sparsify the columns, default True, - generally a better look, headings centered in colspans - :param config.spacing: 'tight', 'medium', 'wide' to quickly set cell padding. - Medium is default (2, 10, 2, 10). - :param config.padding_trbl: None or tuple of four ints for padding, in order - top, right, bottom, left. - :param config.tikz_scale: scale factor applied to tikz LaTeX tables. - :param config.font_body: font size for body text, default 0.9. Units in em. - :param config.font_head: font size for header text, default 1.0. Units in em. - :param config.font_caption: font size for caption text, default 1.1. - Units in em. - :param config.font_bold_index: if True, make the index columns bold, - default False. - :param config.pef_precision: precision (digits after period) for pandas - engineering format, default 3. - :param config.pef_lower: apply engineering format to floats with absolute - value < 10**config.pef_lower; default -3. - :param config.pef_upper: apply engineering format to floats with absolute - value > 10**config.pef_upper; default 6. - :param config.cast_to_floats: if True, try to cast all non-integer, non-date - columns to floats - :param config.header_row: True: use first row as headers; False no headings. - Default True - :param config.equal: if True, set all column widths config.equal. Default False. Maybe - ignored, depending on computed ideal column widths. - :param config.caption_align: for the caption - :param config.large_ok: signal that you are intentionally applying to a large - dataframe. Sub-classes may restrict or apply .head() to df. - :param config.max_str_length: maximum displayed length of object types, that - are cast to strings. Eg if you have nested DataFrames! - :param str_table_fmt: table border format used for string output - (markdown), default mixed_grid DEPRECATED?? - :param config.table_width_mode: - 'explicit': set using config.max_table_width_em - 'natural': each cell on one line (can be very wide with long strings) - 'breakable': wrap breakable cells (text strings) at word boundaries - to fit longest word - 'minimum': wrap breakable and ok-to-break (dates) cells - :param config.table_width_header_adjust: additional proportion of table width - used to balance header columns. - :param config.table_width_header_relax: extra spaces allowed per column heading - to facilitate better column header wrapping. - :param config.max_table_width_em: max table width used for markdown string output, - default 200; width is never less than minimum width. Padding (3 chars - per row plus 1) consumed out of config.max_table_width_em in string output mode. - :param config.debug: if True, add id to caption and use colored lines in table, - default False. - """ + # --- Config Loading --- if config and config_path: - raise ValueError( - "Pass either 'config' or 'config_path', not both.") + raise ValueError("Pass either 'config' or 'config_path', not both.") if config: base_config = config @@ -235,36 +94,25 @@ class GT(object): raw = yaml.safe_load(config_path.read_text(encoding="utf-8")) base_config = Configurator.model_validate(raw) except (ValidationError, OSError) as e: - raise ValueError( - f"Failed to load config from {config_path}") from e + raise ValueError(f"Failed to load config from {config_path}") from e else: base_config = Configurator() - # access through config - # update and validate; need to merge to avoid repeated args - # merged = dict(base_config.model_dump(), **overrides) merged = base_config.model_dump() | overrides self.config = Configurator(**merged) - # no validation - # self.config = base_config.model_copy(update=overrides) - # deal with alternative input modes for df: None, DataFrame, Series, markdown text table + # --- Data Ingestion --- if df is None: - # don't want None to fail df = pd.DataFrame([]) if isinstance(df, pd.DataFrame): - # usual use case pass elif isinstance(df, pd.Series): df = df.to_frame() elif isinstance(df, list): df = pd.DataFrame(df) - # override this selection come what may show_index = False if self.config.header_row: - # Set first row as column names df.columns = df.iloc[0] - # Drop first row and reset index df = df[1:].reset_index(drop=True) elif isinstance(df, str): df = df.strip() @@ -276,287 +124,179 @@ class GT(object): elif GT._is_namedtuple_instance(df): df = GT._ntdf(df) else: - raise ValueError( - 'df must be a DataFrame, a list of lists, or a markdown table string') + raise ValueError('df must be a DataFrame, a list of lists, or a markdown table string') if len(df) > self.config.large_warning and not self.config.large_ok: raise ValueError( - f'Large dataframe (>{self.config.large_warning} rows) and config.large_ok not set to true. ' - 'Set large_ok=True or increase value of large_warning.') + f'Large dataframe (>{self.config.large_warning} rows). Set large_ok=True.') if not df.columns.is_unique: raise ValueError('df column names are not unique') - # extract value BEFORE copying, copying does not carry these attributes over if caption != '': self.caption = caption else: - # used by querex etc. self.caption = getattr(df, 'gt_caption', '') self.label = label - self.df = df.copy(deep=True) # the object being formatted - self.raw_df = df.copy(deep=True) - # if not column_names: - # get rid of column names - # self.df.columns.names = [None] * self.df.columns.nlevels + + # --- PHASE 1: Data Preparation (Modified for Arrow) --- + self.df = df.copy() + self.raw_df = df.copy() self.df_id = df_short_hash(self.df) if self.caption != '' and self.config.debug: self.caption += f' (id: {self.df_id})' - # self.max_str_length = max_str_length - # before messing + self.show_index = show_index self.nindex = self.df.index.nlevels if self.show_index else 0 self.ncolumns = self.df.columns.nlevels self.ncols = self.df.shape[1] self.dt = self.df.dtypes - # reset index to put all columns on an config.equal footing, but note number ofindex cols + # Handle Index with warnings.catch_warnings(): if self.show_index: - warnings.simplefilter( - "ignore", category=pd.errors.PerformanceWarning) - self.df = self.df.reset_index( - drop=False, col_level=self.df.columns.nlevels - 1) - # want the new index to be ints - that is not default if old was multiindex + warnings.simplefilter("ignore", category=pd.errors.PerformanceWarning) + self.df = self.df.reset_index(drop=False, col_level=self.df.columns.nlevels - 1) + # Ensure index is essentially a row number for internal tracking self.df.index = np.arange(self.df.shape[0], dtype=int) - self.index_change_level = Indexing.changed_column( - self.df.iloc[:, :self.nindex]) + + self.index_change_level = Indexing.changed_column(self.df.iloc[:, :self.nindex]) if self.ncolumns > 1: - # will be empty rows above the index headers - self.index_change_level = pd.Series( - [i[-1] for i in self.index_change_level]) + self.index_change_level = pd.Series([i[-1] for i in self.index_change_level]) self.column_change_level = Indexing.changed_level(self.raw_df.columns) - # determine ratio columns - if ratio_cols is not None and not self.df.columns.is_unique: - logger.warning( - 'Ratio cols specified with non-unique column names: ignoring request.') - self.ratio_cols = [] + # --- Column Tagging --- + # Helper to process column lists/regex + def resolve_cols(cols): + if cols is None: return [] + if cols == 'all': return list(self.df.columns) + if not isinstance(cols, (tuple, list)): return self.cols_from_regex(cols) + return cols + + # Check for non-unique collision first + if not self.df.columns.is_unique: + logger.warning('Cols specified with non-unique column names: ignoring request.') + self.ratio_cols, self.year_cols, self.date_cols, self.raw_cols = [], [], [], [] else: - if ratio_cols is None: - self.ratio_cols = [] - elif ratio_cols == 'all': - self.ratio_cols = [i for i in self.df.columns] - elif ratio_cols is not None and not isinstance(ratio_cols, (tuple, list)): - self.ratio_cols = self.cols_from_regex( - ratio_cols) # [ratio_cols] - else: - self.ratio_cols = ratio_cols + self.ratio_cols = resolve_cols(ratio_cols) + self.year_cols = resolve_cols(year_cols) + self.date_cols = resolve_cols(date_cols) + self.raw_cols = resolve_cols(raw_cols) - # determine year columns - if year_cols is not None and not self.df.columns.is_unique: - logger.warning( - 'Year cols specified with non-unique column names: ignoring request.') - self.year_cols = [] - else: - if year_cols is None: - self.year_cols = [] - elif year_cols is not None and not isinstance(year_cols, (tuple, list)): - self.year_cols = self.cols_from_regex(year_cols) # [year_cols] - else: - self.year_cols = year_cols + # --- REMOVED: Aggressive Float Coercion Loop --- + # The previous version looped through all columns and tried to cast them + # to floats to detect types. This broke PyArrow strings/timestamps. + # We now trust the dtypes provided by the dataframe. - # determine date columns - if date_cols is not None and not self.df.columns.is_unique: - logger.warning( - 'Year cols specified with non-unique column names: ignoring request.') - self.date_cols = [] - else: - if date_cols is None: - self.date_cols = [] - elif date_cols is not None and not isinstance(date_cols, (tuple, list)): - self.date_cols = self.cols_from_regex(date_cols) # [date_cols] - else: - self.date_cols = date_cols + # --- Type Detection for Breakability --- + if unbreakable is None: unbreakable = [] + elif isinstance(unbreakable, str): unbreakable = [unbreakable] - # determine columns NOT to cast to floats - if raw_cols is not None and not self.df.columns.is_unique: - logger.warning( - 'Year cols specified with non-unique column names: ignoring request.') - self.raw_cols = [] - else: - if raw_cols is None: - self.raw_cols = [] - elif raw_cols is not None and not isinstance(raw_cols, (tuple, list)): - self.raw_cols = self.cols_from_regex(raw_cols) # [raw_cols] - else: - self.raw_cols = raw_cols - - # figure the default formatter (used in conjunction with raw columns) - if self.config.default_formatter is None: - self.default_formatter = self._default_formatter - else: - assert callable( - self.config.default_formatter), 'config.default_formatter must be callable' - - def wrapped_default_formatter(x): - try: - return self.config.default_formatter(x) - except ValueError: - return str(x) - self.default_formatter = wrapped_default_formatter - - # cast as much as possible to floats - with warnings.catch_warnings(): - warnings.simplefilter( - "ignore", category=pd.errors.PerformanceWarning) - if self.config.cast_to_floats: - for i, c in enumerate(self.df.columns): - if c in self.raw_cols or c in self.date_cols: - continue - old_type = self.df.dtypes[c] - if not np.any((is_integer_dtype(self.df.iloc[:, i]), - is_datetime64_any_dtype(self.df.iloc[:, i]))): - try: - self.df.iloc[:, i] = self.df.iloc[: ,i].astype(float) - logger.debug( - f'coerced column {i}={c} from {old_type} to float success') - except (ValueError, TypeError): - logger.debug( - f'coercing {i}={c} from {old_type} to float FAILED') - # massage unbreakable - if unbreakable is None: - unbreakable = [] - elif isinstance(unbreakable, str): - unbreakable = [unbreakable] - - # now can determine types and infer the break penalties (for column sizes) self.float_col_indices = [] self.integer_col_indices = [] self.date_col_indices = [] - self.object_col_indices = [] # not actually used, but for neatness + self.object_col_indices = [] self.break_penalties = [] - # manage non-unique col names here - logger.debug('FIGURING TYPES') - for i, cn in enumerate(self.df.columns): # range(self.df.shape[1]): + + logger.debug('FIGURING TYPES (Arrow-Aware)') + for i, cn in enumerate(self.df.columns): + # We look at the actual series to determine type ser = self.df.iloc[:, i] - if cn in self.date_cols: - logger.debug(f'col {i}/{cn} specified as date col') + dtype = ser.dtype + + # Use pandas.api.types for robust checking across backends + is_date = (cn in self.date_cols) or is_datetime64_any_dtype(dtype) + is_int = is_integer_dtype(dtype) + is_flt = is_float_dtype(dtype) + + if is_date: self.date_col_indices.append(i) self.break_penalties.append( Breakability.NEVER if cn in unbreakable else Breakability.DATE) - elif is_datetime64_any_dtype(ser): - logger.debug(f'col {i} = {self.df.columns[i]} is DATE') - self.date_col_indices.append(i) - self.break_penalties.append( - Breakability.NEVER if cn in unbreakable else Breakability.DATE) - elif is_integer_dtype(ser): - logger.debug(f'col {i} = {self.df.columns[i]} is INTEGER') + elif is_int: self.integer_col_indices.append(i) - self.break_penalties.append( - Breakability.NEVER if cn in unbreakable else Breakability.NEVER) - elif is_float_dtype(ser): - logger.debug(f'col {i} = {self.df.columns[i]} is FLOAT') + self.break_penalties.append(Breakability.NEVER) + elif is_flt: self.float_col_indices.append(i) - self.break_penalties.append( - Breakability.NEVER if cn in unbreakable else Breakability.NEVER) + self.break_penalties.append(Breakability.NEVER) else: - logger.debug(f'col {i} = {self.df.columns[i]} is OBJECT') + # String / Object / Boolean self.object_col_indices.append(i) - c = ser.name - if c in self.year_cols or c in self.ratio_cols: - self.break_penalties.append( - Breakability.NEVER if cn in unbreakable else Breakability.NEVER) + if cn in self.year_cols or cn in self.ratio_cols: + self.break_penalties.append(Breakability.NEVER) else: self.break_penalties.append( Breakability.NEVER if cn in unbreakable else Breakability.ACCEPTABLE) - # figure out column and index alignment + # --- Alignment Logic --- if aligners is not None and np.any(self.df.columns.duplicated()): - logger.warning( - 'aligners specified with non-unique column names: ignoring request.') + logger.warning('aligners specified with non-unique column names: ignoring request.') aligners = None - if aligners is None: - # not using - aligners = [] - elif isinstance(aligners, str): - # lrc for each column - aligners = {c: a for c, a in zip(self.df.columns, aligners)} - self.df_aligners = [] + if aligners is None: aligners = [] + elif isinstance(aligners, str): + aligners = {c: a for c, a in zip(self.df.columns, aligners)} + + self.df_aligners = [] lrc = {'l': 'grt-left', 'r': 'grt-right', 'c': 'grt-center'} - # TODO: index aligners + for i, c in enumerate(self.df.columns): - # test aligners BEFORE index! if c in aligners: self.df_aligners.append(lrc.get(aligners[c], 'grt-center')) elif i < self.nindex: - # index -> left self.df_aligners.append('grt-left') elif c in self.year_cols: self.df_aligners.append('grt-center') elif c in self.raw_cols: - # these are strings self.df_aligners.append('grt-left') elif i in self.date_col_indices: - # center dates, why not! self.df_aligners.append('grt-center') elif c in self.ratio_cols or i in self.float_col_indices or i in self.integer_col_indices: - # number -> right self.df_aligners.append('grt-right') else: - # all else, left self.df_aligners.append('grt-left') self.df_idx_aligners = self.df_aligners[:self.nindex] - self.default_formatters = {} - if formatters is None: - pass - elif callable(formatters): - # apply to all columns - for k in self.df.columns: - self.default_formatters[k] = formatters - else: - for k, v in formatters.items(): - if callable(v): - self.default_formatters[k] = v - elif isinstance(v, str): - self.default_formatters[k] = lambda x: v.format(x=x) - elif isinstance(v, int): - fmt = f'{{x:.{v}f}}' - self.default_formatters[k] = lambda x: fmt.format(x=x) - else: - raise ValueError( - 'formatters must be dict of callables or ints or format strings {x:...}') + # --- Formatter Setup --- + self.user_formatters_override = {} + if formatters: + if callable(formatters): + for k in self.df.columns: + self.user_formatters_override[k] = formatters + else: + for k, v in formatters.items(): + if callable(v): self.user_formatters_override[k] = v + elif isinstance(v, str): self.user_formatters_override[k] = lambda x: v.format(x=x) + elif isinstance(v, int): + fmt = f'{{x:.{v}f}}' + self.user_formatters_override[k] = lambda x: fmt.format(x=x) + else: + raise ValueError('Formatters must be dict of callables, ints, or strings') + # --- Tabs --- if tabs is None: self.tabs = None elif isinstance(tabs, (int, float)): self.tabs = (tabs,) * (self.nindex + self.ncols) elif isinstance(tabs, (np.ndarray, pd.Series, list, tuple)): - if len(tabs) == self.nindex + self.ncols: - self.tabs = tabs # Already iterable and right length, self.tabs = as is - else: - logger.error( - f'{self.tabs=} has wrong length. Ignoring.') - self.tabs = None + self.tabs = tabs if len(tabs) == self.nindex + self.ncols else None else: - logger.error( - f'{self.tabs=} must be None, a single number, or a list of ' - 'numbers of the correct length. Ignoring.') self.tabs = None + # --- Padding / Config --- if self.config.padding_trbl is not None: padding_trbl = self.config.padding_trbl - elif self.config.padding_trbl is None: - if self.config.spacing == 'tight': - padding_trbl = (0, 5, 0, 5) - elif self.config.spacing == 'medium': - padding_trbl = (2, 10, 2, 10) - elif self.config.spacing == 'wide': - padding_trbl = (4, 15, 4, 15) - else: - raise ValueError( - 'config.spacing must be tight, medium, or wide or tuple of four ints.') - # pydantic will see to it this is OK + else: + spacing_map = {'tight': (0,5,0,5), 'medium': (2,10,2,10), 'wide': (4,15,4,15)} + padding_trbl = spacing_map.get(self.config.spacing, (2,10,2,10)) self.padt, self.padr, self.padb, self.padl = padding_trbl - # because of the problem of non-unique indexes use a list and - # not a dict to pass the formatters to to_html self.max_table_width_em = self.config.max_table_inch_width * 72 / self.config.table_font_pt_size + + # --- Initialization State --- self._pef = None self._df_formatters = None self.df_style = '' @@ -572,47 +312,40 @@ class GT(object): self._html_knowledge_df = None self._tex_knowledge_df = None self._knowledge_dfs = None - # finally config.sparsify and then apply formaters - # this radically alters the df, so keep a copy for now... + + # --- Apply Formatters (THE BIG CHANGE) --- + # We keep the raw dataframe mostly untouched. + # self.df becomes the Formatted DataFrame (Strings). self.df_pre_applying_formatters = self.df.copy() + + # Apply formatters modifies self.df to contain strings self.df = self.apply_formatters(self.df) - # config.sparsify + + # Sparsify if self.config.sparsify and self.nindex > 1: self.df = Sparsify.sparsify(self.df, self.df.columns[:self.nindex]) - # for c in self.df.columns[:self.nindex]: - # # config.sparsify returns some other stuff... - # self.df[c], _ = GT.config.sparsify(self.df[c]) - # make final tex and html versions + + # LaTeX / HTML mapping if self.config.tex_to_html is not None: - # NEED TO WORK ON INDEXES TOO self.df_html = self.df.map(self.config.tex_to_html) else: self.df_html = self.df + if self.config.tikz_escape_tex: self.df_tex = Escaping.escape_df_tex(self.df) else: self.df_tex = self.df def __repr__(self): - """Basic representation.""" return f"GT(df_id={self.df_id})" def __str__(self): - """String representation, for print().""" return self.make_string() def _repr_html_(self): - """ - Apply format to self.df. - - ratio cols like in constructor - """ return self.html def _repr_latex_(self): - """Generate a LaTeX tabular representation.""" - # return '' - # latex = self.df.to_latex(caption=self.caption, formatters=self._df_formatters) if self._clean_tex == '': self._clean_tex = self.make_tikz() logger.info('CREATED LATEX') @@ -627,13 +360,6 @@ class GT(object): self._cache[key] = value def cols_from_regex(self, regex): - """ - Return columns matching a regex. - - For Index and MultiIndex. Operates on ``self.df`` and includes - index (if ``show_index``) and columns of input dataframe. Search - applies to any level of the index. Case sensitive. - """ pattern = re.compile(regex) matching_cols = [ col for col in self.df.columns @@ -641,273 +367,216 @@ class GT(object): for level in (col if isinstance(col, tuple) else (col,))) ] return matching_cols - # return [col for col in self.df.columns if isinstance(col, str) and re.search(regex, col)] - # define the default and easy formatters =================================================== - def default_ratio_formatter(self, x): - """Ratio formatter.""" + # --- New Robust Formatters (Arrow-Safe) --- + + def _fmt_int_safe(self, x): + """Handle Integers (PyArrow or NumPy) safely with nulls.""" + if pd.isna(x): return "" try: - return self.config.default_ratio_str.format(x=x) - except ValueError: + return self.config.default_integer_str.format(x=int(x)) + except (ValueError, TypeError): return str(x) - def default_date_formatter(self, x): - """Date formatter that works for strings too.""" - if pd.isna(x): - return "" - try: - dt = pd.to_datetime(x, errors='coerce') - if pd.isna(dt): - return str(x) - return dt.strftime(self.config.default_date_str) - except Exception: - logger.error("date error with %s", x) - return str(x) - - def default_integer_formatter(self, x): - """Integer formatter.""" - try: - return self.config.default_integer_str.format(x=x) - except ValueError: - return str(x) - - def default_year_formatter(self, x): - """Year formatter.""" - try: - return f'{int(x):d}' - except (TypeError, ValueError): - return str(x) - - def default_raw_formatter(self, x): - """Formatter for columns flagged as raw.""" + def _fmt_date_iso(self, x): + """Handle Dates/Timestamps safely.""" + if pd.isna(x): return "" + # If it's a Timestamp object (Arrow or Pandas), it has strftime + if hasattr(x, "strftime"): + return x.strftime(self.config.default_date_str) return str(x) - @staticmethod - def default_float_format(x, neng=3): - """ - the endless quest for the perfect float formatter... - NOT USED AT THE MINUTE. - - tester:: - - for x in 1.123123982398324723947 * 10.**np.arange(-23, 23): - print(default_float_format(x)) - - :param x: - :return: - """ - ef = pd.io.formats.format.EngFormatter(neng, True) # noqa + def _fmt_float_smart(self, x): + """Smart float formatting that handles 'Year-like' floats.""" + if pd.isna(x): return "" try: - if x == 0: - ans = '0' - elif 1e-3 <= abs(x) < 1e6: - if abs(x) <= 10: - ans = f'{x:.3g}' - elif abs(x) < 100: - ans = f'{x:,.2f}' - elif abs(x) < 1000: - ans = f'{x:,.1f}' - else: - ans = f'{x:,.0f}' - else: - ans = ef(x) - return ans - except ValueError as e: - logger.debug(f'ValueError {e}') - return str(x) - except TypeError as e: - logger.debug(f'TypeError {e}') - return str(x) - except AttributeError as e: - logger.debug(f'AttributeError {e}') + # Check if it's effectively an integer (e.g. 2021.0) + # This preserves the "Float as Int" behavior for legacy data + if isinstance(x, float) and x.is_integer(): + return self.config.default_integer_str.format(x=int(x)) + return self.config.default_float_str.format(x=x) + except (ValueError, TypeError): return str(x) - def _default_formatter(self, x): - """Default universal formatter for other types.""" - try: - f = float(x) - except (TypeError, ValueError): - s = str(x) - return s if self.config.max_str_length < 0 else s[:self.config.max_str_length] + def _fmt_ratio(self, x): + if pd.isna(x): return "" + try: return self.config.default_ratio_str.format(x=x) + except: return str(x) - if self.default_float_formatter: - return self.default_float_formatter(f) + def _fmt_year(self, x): + if pd.isna(x): return "" + try: return f'{int(x):d}' + except: return str(x) - if np.isinf(f) or np.isnan(f): # clearer handling of weird float cases - return str(x) - - if f.is_integer(): - return self.config.default_integer_str.format(x=int(f)) - else: - return self.config.default_float_str.format(x=f) + def _fmt_raw(self, x): + if pd.isna(x): return "" + return str(x) + # --- Legacy Float Formatter Factory (Preserved but safe) --- def pef(self, x): - """Pandas engineering format.""" if self._pef is None: - self._pef = pd.io.formats.format.EngFormatter(accuracy=self.config.pef_precision, use_eng_prefix=True) # noqa + self._pef = pd.io.formats.format.EngFormatter( + accuracy=self.config.pef_precision, use_eng_prefix=True) return self._pef(x) def make_float_formatter(self, ser): """ - Make a float formatter suitable for the Series ser. - - Obeys these rules: - * All elements in the column are formatted consistently - * ... - - TODO flesh out... at some point shd use pef?! - + Create a customized float formatter based on column statistics. + Works with Arrow columns as .mean(), .abs() dispatch correctly. """ - amean = ser.abs().mean() - # mean = ser.mean() - amn = ser.abs().min() - amx = ser.abs().max() - # smallest = ser.abs().min() - # sd = ser.sd() - # p10, p50, p90 = np.quantile(ser, [0.1, .5, 0.9], method='inverted_cdf') - # pl = 10. ** self.config.pef_lower - # pu = 10. ** self.config.pef_upper - pl, pu = 10. ** self.config.pef_lower, 10. ** self.config.pef_upper - if amean < 1: - precision = 5 - elif amean < 10: - precision = 3 - elif amean < 20000: - precision = 2 - else: - precision = 0 - fmt = f'{{x:,.{precision}f}}' - logger.debug(f'{ser.name=}, {amean=}, {fmt=}') - if amean < pl or amean > pu or amx / max(1, amn) > pu: - # go with eng + try: + # Drop nulls for stats calculation to avoid issues + ser_valid = ser.dropna() + if len(ser_valid) == 0: + return self._fmt_float_smart + + amean = ser_valid.abs().mean() + # amn = ser_valid.abs().min() + # amx = ser_valid.abs().max() + + pl, pu = 10. ** self.config.pef_lower, 10. ** self.config.pef_upper + + if amean < 1: precision = 5 + elif amean < 10: precision = 3 + elif amean < 20000: precision = 2 + else: precision = 0 + + fmt = f'{{x:,.{precision}f}}' + def ff(x): + if pd.isna(x): return "" try: - return self.pef(x) + # Check for Engineering Format conditions + val_abs = abs(x) + if (val_abs > 0) and (val_abs < pl or val_abs > pu): + return self.pef(x) + return fmt.format(x=x) except (ValueError, TypeError, InvalidOperation): return str(x) - else: - def ff(x): - try: - return fmt.format(x=x) - # well and good but results in ugly differences - # by entries in a column - # if x == int(x) and np.abs(x) < pu: - # return f'{x:,.0f}.' - # else: - # return fmt.format(x=x) - except (ValueError, TypeError): - return str(x) - return ff + return ff + except Exception as e: + logger.debug(f"Float formatter factory failed: {e}. Using default.") + return self._fmt_float_smart - @ property + @property def df_formatters(self): """ - Make and return the list of formatters. - - Created one per column. Int, date, objects use defaults, but - for float cols the formatter is created custom to the details of - each column. + Dispatcher: Inspects types and assigns formatters. """ if self._df_formatters is None: - # because of non-unique indexes, index by position not name - if self.config.table_float_format is not None: + self._df_formatters = [] + + # Pre-calc custom table-wide float format if it exists + custom_float = None + if self.config.table_float_format: if callable(self.config.table_float_format): - # wrap in error protections - def ff(x): + # Wrap to handle safe calls + def safe_custom_float(x): try: return self.config.table_float_format(x=x) except ValueError: return str(x) except Exception as e: logger.error(f'Custom float function raised {e=}') - self.default_float_formatter = ff + return str(x) + custom_float = safe_custom_float else: - if type(self.config.table_float_format) != str: - raise ValueError( - 'config.table_float_format must be a string or a function') fmt = self.config.table_float_format - - def ff(x): + def safe_custom_float_str(x): try: return fmt.format(x=x) except ValueError: return str(x) except Exception as e: - logger.error( - f'Custom float format string raised {e=}') - self.default_float_formatter = ff - else: - self.default_float_formatter = False + logger.error(f'Custom float format string raised {e=}') + return str(x) + custom_float = safe_custom_float_str - self._df_formatters = [] - for i, c in enumerate(self.df.columns): - # set a default, note here can have - # non-unique index so work with position i - if c in self.default_formatters: - self._df_formatters.append(self.default_formatters[c]) - elif c in self.ratio_cols: - # print(f'{i} ratio') - self._df_formatters.append(self.default_ratio_formatter) - elif c in self.year_cols: - self._df_formatters.append(self.default_year_formatter) - elif c in self.raw_cols: - self._df_formatters.append(self.default_raw_formatter) - elif i in self.date_col_indices: - self._df_formatters.append(self.default_date_formatter) - elif i in self.integer_col_indices: - # print(f'{i} int') - self._df_formatters.append(self.default_integer_formatter) - elif i in self.float_col_indices: - # trickier approach... - self._df_formatters.append( - self.default_float_formatter or self.make_float_formatter(self.df.iloc[:, i])) + for i, col_name in enumerate(self.df.columns): + # 1. User overrides (highest priority) + if col_name in self.user_formatters_override: + self._df_formatters.append(self.user_formatters_override[col_name]) + continue + + # 2. Semantic Tags + if col_name in self.ratio_cols: + self._df_formatters.append(self._fmt_ratio) + continue + if col_name in self.year_cols: + self._df_formatters.append(self._fmt_year) + continue + if col_name in self.raw_cols: + self._df_formatters.append(self._fmt_raw) + continue + + # 3. Type-Based Dispatch (The "PyArrow" logic) + dtype = self.df[col_name].dtype + + # Date/Time + if (i in self.date_col_indices) or is_datetime64_any_dtype(dtype): + self._df_formatters.append(self._fmt_date_iso) + + # Integer (NumPy or Arrow) + elif is_integer_dtype(dtype): + self._df_formatters.append(self._fmt_int_safe) + + # Float (NumPy or Arrow) + elif is_float_dtype(dtype): + if custom_float: + self._df_formatters.append(custom_float) + else: + # Use the smart factory + self._df_formatters.append(self.make_float_formatter(self.df.iloc[:, i])) + + # Default / String / Object else: - # print(f'{i} default') - self._df_formatters.append(self.default_formatter) - # self._df_formatters is now a list of length config.equal to cols in df + self._df_formatters.append(self._fmt_raw) + if len(self._df_formatters) != self.df.shape[1]: - raise ValueError( - f'Something wrong: {len(self._df_formatters)=} != {self.df.shape=}') + raise ValueError(f'Formatter count mismatch: {len(self._df_formatters)} != {self.df.shape[1]}') + return self._df_formatters @staticmethod def apply_formatters_work(df, formatters): """Apply formatters to a DataFrame.""" try: - # very surprising bug: if df is empty new_df will have type float!! - new_df = pd.DataFrame({i: map(f, df.iloc[:, i]) - for i, f in enumerate(formatters)}, dtype=object) + # This applies the lambda functions to every cell + new_df = pd.DataFrame({ + i: map(f, df.iloc[:, i]) + for i, f in enumerate(formatters) + }, index=df.index) # Preserve index! except TypeError: print('NASTY TYPE ERROR') raise + new_df.columns = df.columns - return new_df + + # OPTIMIZATION: Convert to PyArrow strings immediately. + # This enables vectorized width calculations later. + try: + return new_df.astype("string[pyarrow]") + except ImportError: + # Fallback if pyarrow not installed (unlikely given context) + return new_df.astype(str) def apply_formatters(self, df, mode='adjusted'): """ - Replace df (the raw df) with formatted df, including the index. - - If mode is 'adjusted' operates on columns only, does not touch the - index. Otherwise, called from tikz and operating on raw_df + Replace df (the raw df) with formatted string df. """ if mode == 'adjusted': - # apply to df where the index has been reset - # number of columns = len(self.df_formatters) return GT.apply_formatters_work(df, self.df_formatters) elif mode == 'raw': - # work on raw_df where the index has not been reset - # because of non-unique indexes, index by position not name - # create the df and the index separately data_formatters = self.df_formatters[self.nindex:] new_body = GT.apply_formatters_work(df, data_formatters) if not self.show_index: return new_body - # else have to handle the index + index_formatters = self.df_formatters[:self.nindex] df_index = df.reset_index( drop=False, col_level=self.df.columns.nlevels - 1).iloc[:, :self.nindex] new_index = GT.apply_formatters_work(df_index, index_formatters) - # put them back together + new_df = pd.concat([new_index, new_body], axis=1) new_df = new_df.set_index(list(df_index.columns)) new_df.index.names = df.index.names @@ -915,32 +584,23 @@ class GT(object): else: raise ValueError(f'unknown mode {mode}') + # --- Knowledge DFs --- @property def text_knowledge_df(self): - """Uber source of information for text formatting.""" if self._text_knowledge_df is None: self._text_knowledge_df = self.estimate_column_widths_by_mode('text') return self._text_knowledge_df @property def html_knowledge_df(self): - """Uber source of information for html formatting.""" if self._html_knowledge_df is None: self._html_knowledge_df = self.estimate_column_widths_by_mode('html') return self._html_knowledge_df @property def tex_knowledge_df(self): - """Uber source of information for tex formatting.""" if self._tex_knowledge_df is None: - # seems this is unlikely to be a good idea! - # if (all(self.df_tex.index == self.df_html.index) - # and all(self.df_tex.columns == self.df_html.columns) - # and all(self.df_tex == self.df_html)): - # self._tex_knowledge_df = self.html_knowledge_df - # else: if not self.config.tikz: - # just repeat html so you have something self._tex_knowledge_df = self.html_knowledge_df else: self._tex_knowledge_df = self.estimate_column_widths_by_mode('tex') @@ -997,87 +657,65 @@ class GT(object): return bit def estimate_column_widths_by_mode(self, mode): - r""" - Return dataframe of width information: three modes for text, html, and tex. - - Mode adjusts which df is used and how widths are estimated - - * text -> self.df and len = str.len - * html -> self.df_html and len = - * tex -> self.df_tex and len = - - Returned dataframe has columns named mode_xxx, where xxx can be - - * natural: max len by col - * minimum width = max length given breaks - * acceptable = allowing for break type by column - - * head_natural, head_min, head_acceptable for the heading - - * raw_recommended - * header_adjustment - * recommended - - pat and iso_date_split regex explanation: - - # re.split(r'(?<=[\s.,:;!?()\[\]{}\-\\/|])\s*', text) - # (?<=...) is a lookbehind to preserve the break character with the left-hand fragment. - # [\s.,:;!?()\[\]{}\-\\/|] matches common punctuation and separators: - # \s = whitespace - # . , : ; ! ? = terminal punctuation - # () [] {} = brackets - # \- = dash - # \\/| = slash, backslash, pipe - + """ + Return dataframe of width information. + OPTIMIZED: Uses vectorized string operations for 'text' mode. """ assert mode in ('text', 'html', 'tex'), 'Only html, text and tex modes valid.' + if mode == 'text': - df = self.df + df = self.df # This is now string[pyarrow] thanks to apply_formatters + # Vectorized length calculation (Much faster than map(len)) + # We assume df is already string[pyarrow] + try: + # Ideally: df.apply(lambda x: x.str.len().max()) + # Since df is objects/strings, .str accessor works if dtype is string + if is_string_dtype(df.iloc[:,0]): + natural_width = df.apply(lambda x: x.str.len().max()).to_dict() + else: + natural_width = df.map(len).max(axis=0).to_dict() + except: + natural_width = df.map(len).max(axis=0).to_dict() + len_function = len - # no bold in text mode bold_adjustment = 1.0 elif mode == 'html': df = self.df_html len_function = TextLength.text_display_len bold_adjustment = 1.1 - else: # mode == 'tex': + natural_width = df.map(lambda x: len_function(x.strip())).max(axis=0).to_dict() + else: df = self.df_tex len_function = TextLength.text_display_len bold_adjustment = 1.1 + natural_width = df.map(lambda x: len_function(x.strip())).max(axis=0).to_dict() n_row, n_col = df.shape - # The width if content didn't wrap (single line) - # Series=dict colname->max width of cells in column - natural_width = df.map(lambda x: len_function(x.strip())).max(axis=0).to_dict() - # in text mode: figure out where you can break; pat breaks after punctuation or at - pat = r'(?<=[.,;:!?)\]}\u2014\u2013])\s+|--*\s+|\s+' iso_date_split = r'(?<=\b\d{4})-(?=\d{2}-\d{2})' pat = f'{pat}|{iso_date_split}' - # Calculate ideal (no wrap) and minimum possible widths for all columns - # The absolute minimum width each column can take (e.g., longest word for text) minimum_width = {} header_natural = {} header_minimum = {} for col_name in df.columns: + # For minimum width, we still need splitting minimum_width[col_name] = ( - df[col_name].str + df[col_name].astype(str).str # Ensure str accessor .split(pat=pat, regex=True, expand=True) .fillna('') .map(len_function) .max(axis=1) .max() ) - # ensure is a tuple + ctuple = col_name if isinstance(col_name, tuple) else (col_name, ) header_natural[col_name] = bold_adjustment * max(map(len_function, ctuple)) header_minimum[col_name] = bold_adjustment * min(len_function(part) for i in ctuple for part in re.split(pat, str(i))) - # begin to assemble the parts - # ans will be the col_width_df; break_penalties needed by all methods ans = pd.DataFrame({ 'alignment': [i[4:] for i in self.df_aligners], 'break_penalties': self.break_penalties, @@ -1085,31 +723,29 @@ class GT(object): 'natural_width': natural_width.values(), 'minimum_width': minimum_width.values(), }, index=df.columns) + ans['acceptable_width'] = np.where( ans.break_penalties == Breakability.ACCEPTABLE, ans.minimum_width, ans.natural_width) ans['header_natural'] = header_natural ans['header_minimum'] = header_minimum if mode in ('html', 'tex'): - # put in some padding TODO KLUDGE ans['natural_width'] += 1 ans['minimum_width'] += 1 ans['header_natural'] += 1 ans['header_minimum'] += 1 - # adjustments and recommendations - these are keyed to text output with padding natural, acceptable, minimum = ans.iloc[:, 3:6].sum() head_natural, head_minimum = ans.iloc[:, 6:8].sum() if mode == 'text': - # +1 for the pipe | symbol - PADDING = 2 # per column TODO enhance + PADDING = 2 pad_adjustment = (PADDING + 1) * n_col - 1 else: - PADDING = 1 # per column TODO enhance + PADDING = 1 pad_adjustment = PADDING * n_col + if self.config.table_width_mode == 'explicit': - # target width INCLUDES padding and column marks | target_width = self.max_table_width_em - pad_adjustment elif self.config.table_width_mode == 'natural': target_width = natural + pad_adjustment @@ -1121,62 +757,41 @@ class GT(object): logger.info('config self.max_table_width_em %s', self.max_table_width_em) logger.info('target width after column spacer adjustment %s', target_width) - # extra space for the headers to relax, if useful if self.config.table_width_header_adjust > 0: max_extra = int(self.config.table_width_header_adjust * target_width) else: max_extra = 0 + if target_width > natural: - # everything gets its natural width ans['recommended'] = ans['natural_width'] space = target_width - natural logger.info('Space for NATURAL! Spare space = %s', space) elif target_width > acceptable: - # strings wrap ans['recommended'] = ans['acceptable_width'] - # use up extra on the ACCEPTABLE cols space = target_width - acceptable - logger.info( - 'Using "breaks acceptable" (dates not wrapped), spare space = %s', space) + logger.info('Using "breaks acceptable" (dates not wrapped), spare space = %s', space) elif target_width > minimum: - # strings and dates wrap ans['recommended'] = ans['minimum_width'] - # use up extra on dates first, then strings space = target_width - minimum - logger.info( - 'Using "minimum" (all breakable incl dates), spare space = %s', space) + logger.info('Using "minimum" (all breakable incl dates), spare space = %s', space) else: - # OK severely too small ans['recommended'] = ans['minimum_width'] space = target_width - minimum - # hard to shut this up... - logger.info( - 'Mode %s, desired width too small for pleasant formatting, table will be too wide by spare space %s em < 0.', - mode, space) + logger.info('Mode %s, desired width too small, table too wide by %s em.', mode, space) + logger.info(f'{mode=} {target_width=}, {natural=}, {acceptable=}, {minimum=}, {max_extra=}, {space=}') - # this section tweaks the widths for column headers -> text output only. - # trust tex and html output to naturally make better decisions about line breaks in the heading. if mode == "text" and space > 0 and df.columns.nlevels == 1: - # text mode only: see if some header tweaks are in order (Index only for now, TODO) - # Step 1: baseline comes in from code above ans['raw_recommended'] = ans['recommended'] - - # Step 2: optimize to get rid of intra-line breaks if max_extra > 0: adj = Width.header_adjustment(df, ans['recommended'], space, max_extra) - # create new col and populate per GPT ans['header_tweak'] = pd.Series(adj) else: ans['header_tweak'] = 0 ans['recommended'] = ans['recommended'] + ans['header_tweak'] - # in this case zero out impact of header_natural and header_minimum cos don't want to use them below ans['header_natural'] = ans['recommended'] ans['header_minimum'] = ans['recommended'] - # Step 3 (all modes): distribute remaining shortfall proportionally - # account for - # obvs remaining == space if mode is not text remaining = target_width - ans['recommended'].sum() ans['pre_shortfall_recommended'] = ans['recommended'] if remaining > 0: @@ -1192,82 +807,45 @@ class GT(object): logger.info('no shortfall to allocate after header adjustments') if mode == 'tex': - # tex mode only need tikz raw size for tex code layout tikz_colw = dict.fromkeys(df.columns, 0) tikz_headw = dict.fromkeys(df.columns, 0) for i, c in enumerate(df.columns): - # figure width of the column labels - c0 = c # before we mess around with it, for setting dict values - if not isinstance(c, tuple): - # make it one: now index and multi index on same footing - c = (c,) - # convert to strings + c0 = c + if not isinstance(c, tuple): c = (c,) c = [str(i) for i in c] tikz_headw[c0] = max(map(len, c)) - - # now figure the width of the elements in the column tikz_colw[c0] = df.iloc[:, i].map(lambda x: len(str(x))).max() - # needed tikz width is greater of two for c in df.columns: tikz_colw[c] = max(tikz_colw[c], tikz_headw[c]) - # distribute any overage using the measures already done ans['tikz_colw'] = tikz_colw - ans['tikz_colw'] += 2 # for \I + ans['tikz_colw'] += 2 - # in all cases...assemble the answer with relevant information return_columns = [ - 'alignment', - 'break_penalties', - 'breakability', - 'natural_width', - 'acceptable_width', - 'minimum_width', - 'header_natural', - 'header_minimum', - 'raw_recommended', - 'header_tweak', - 'pre_space_share_recommended', - 'proto_recommended', - 'recommended', - 'tikz_colw', + 'alignment', 'break_penalties', 'breakability', 'natural_width', + 'acceptable_width', 'minimum_width', 'header_natural', 'header_minimum', + 'raw_recommended', 'header_tweak', 'pre_space_share_recommended', + 'proto_recommended', 'recommended', 'tikz_colw', ] ans = ans[[i for i in return_columns if i in ans.columns]] - # need recommended to be > 0 ans['recommended'] = np.maximum(ans['recommended'], 1) return ans def make_style(self, tabs): """Write out custom CSS for the table.""" if self.config.debug: - head_tb = '#0ff' - body_b = '#f0f' - h0 = '#f00' - h1 = '#b00' - h2 = '#900' - bh0 = '#f00' - bh1 = '#b00' - v0 = '#0f0' - v1 = '#0a0' - v2 = '#090' + head_tb, body_b = '#0ff', '#f0f' + h0, h1, h2 = '#f00', '#b00', '#900' + bh0, bh1, v0, v1, v2 = '#f00', '#b00', '#0f0', '#0a0', '#090' else: - head_tb = '#000' - body_b = '#000' - h0 = '#000' - h1 = '#000' - h2 = '#000' - bh0 = '#000' - bh1 = '#000' - v0 = '#000' - v1 = '#000' - v2 = '#000' + head_tb = body_b = h0 = h1 = h2 = bh0 = bh1 = v0 = v1 = v2 = '#000' + table_hrule = self.config.table_hrule_width table_vrule = self.config.table_vrule_width - # for local use padt, padr, padb, padl = self.padt, self.padr, self.padb, self.padl style = [f''' ') logger.info('CREATED CSS') return '\n'.join(style) @@ -1382,76 +958,44 @@ class GT(object): index_name_to_level = dict( zip(self.raw_df.index.names, range(self.nindex))) index_change_level = self.index_change_level.map(index_name_to_level) - # this is easier and computed in the init column_change_level = self.column_change_level - # Start table html = [f''] - if self.label != "": - pass - # TODO put in achor tag somehow!! if self.caption != '': html.append(f'') - # Process header: allow_duplicates=True means can create cols with the same name bit = self.df_html.T.reset_index(drop=False, allow_duplicates=True) idx_header = bit.iloc[:self.nindex, :self.ncolumns] columns = bit.iloc[self.nindex:, :self.ncolumns] - # figure appropriate widths tabs = self.html_knowledge_df['recommended'].map(lambda x: np.round(x, 3)) - - # set column widths; tabs returns lengths of strings in each column tabs = np.array(tabs) + (self.padl + self.padr) / 12 - # this gets stripped out by quarto, so make part of style + html.append('') for w in tabs: html.append(f'') html.append('') - # TODO Add header aligners - # this is TRANSPOSED!! if self.config.sparsify_columns: html.append("") for i in range(self.ncolumns): - # one per row of columns m index, usually only 1 html.append("") if self.show_index: for j, r in enumerate(idx_header.iloc[:, i]): - # columns one per level of index html.append(f'') - # if not for col span issue you could just to this: - # for j in range(self.ncols): - # hrule = f'grt-bhrule-{i}' if i < self.ncolumns - 1 else '' - # if j == 0: - # # start with the first column come what may - # vrule = f'grt-vrule-index' - # elif j >= self.column_change_level[i]: - # vrule = f'grt-vrule-{column_change_level[cum_col]}' - # else: - # vrule = '' - # html.append(f'') - # here, the groupby needs to consider all levels at and above i - # this concats all the levels - # need :i+1 to get down to the ith level - cum_col = 0 # keep track of where we are up to + cum_col = 0 for j, (nm, g) in enumerate(groupby(columns.iloc[:, :i + 1]. apply(lambda x: ':::'.join(str(i) for i in x), axis=1))): - # ::: needs to be something that does not appear in the col names - # need to combine for groupby but be able to split off the last level - # picks off the name of the bottom level nm = nm.split(':::')[-1] hrule = f'grt-bhrule-{i}' if i < self.ncolumns - 1 else '' colspan = sum(1 for _ in g) if 0 < j: vrule = f'grt-vrule-{column_change_level[cum_col]}' elif j == 0 and self.show_index: - # start with the first column if showing index vrule = f'grt-vrule-index' else: vrule = '' if j == 0 and not self.show_index: - # first column, no index, left align label html.append( f'') else: @@ -1463,21 +1007,15 @@ class GT(object): else: html.append("") for i in range(self.ncolumns): - # one per row of columns m index, usually only 1 html.append("") if self.show_index: for j, r in enumerate(idx_header.iloc[:, i]): - # columns one per level of index html.append(f'') for j, r in enumerate(columns.iloc[:, i]): - # one per column of dataframe - # figure how high up mindex the vrules go - # all headings get hrules, it's the vrules that are tricky hrule = f'grt-bhrule-{i}' if i < self.ncolumns - 1 else '' if 0 < j < self.ncols and i >= column_change_level[j]: vrule = f'grt-vrule-{column_change_level[j]}' elif j == 0 and self.show_index: - # start with the first column come what may vrule = f'grt-vrule-index' else: vrule = '' @@ -1489,37 +1027,28 @@ class GT(object): bold_idx = 'grt-bold' if self.config.font_bold_index else '' html.append("") for i, (n, r) in enumerate(self.df_html.iterrows()): - # one per row of dataframe html.append("") hrule = '' if self.show_index: for j, c in enumerate(r.iloc[:self.nindex]): - # dx = data in index - # if this is the level that changes for this row - # will use a top rule hence omit i = 0 which already has an hrule - # appears in the index change level. But if it DOES NOT appear then - # it isn't a change level so no rule required if i > 0 and hrule == '' and i in index_change_level and j == index_change_level[i]: hrule = f'grt-hrule-{j}' - # html.append(f'') col_id = f'grt-c-{j}' html.append( f'') for j, c in enumerate(r.iloc[self.nindex:]): - # first col left handled by index/body divider if 0 < j < self.ncols: vrule = f'grt-vrule-{column_change_level[j]}' elif j == 0 and self.show_index: - # start with the first column come what may vrule = f'grt-vrule-index' else: vrule = '' - # html.append(f'') col_id = f'grt-c-{j+self.nindex}' html.append( f'') html.append("") html.append("") + text = '\n'.join(html) self._df_html_text = Escaping.clean_html_tex(text) logger.info('CREATED HTML') @@ -1530,11 +1059,7 @@ class GT(object): if not self.config.debug: for style_tag in soup.find_all("style"): if style_tag.string: - # Remove CSS comments - cleaned_css = re.sub( - r'/\*.*?\*/', '', style_tag.string, flags=re.DOTALL) - # Minify whitespace - # cleaned_css = re.sub(r'\s+', ' ', cleaned_css).strip() + cleaned_css = re.sub(r'/\*.*?\*/', '', style_tag.string, flags=re.DOTALL) style_tag.string.replace_with(cleaned_css) return soup @@ -1542,99 +1067,20 @@ class GT(object): def html(self): if self._clean_html == '': if self._df_html_text == '': - # makes style and html (need tabs) self.make_html() - code = ["
", - self._df_style_text, - self._df_html_text, - "
"] + code = ["
", self._df_style_text, self._df_html_text, "
"] soup = BeautifulSoup('\n'.join(code), 'html.parser') soup = self.clean_style(soup) - self._clean_html = str(soup) # .prettify() -> too many newlines + self._clean_html = str(soup) logger.info('CREATED COMBINED HTML and STYLE') return self._clean_html def make_tikz(self): """ Write DataFrame to custom tikz matrix. - - Updated version that uses self.df and does not need to - reapply formatters or sparsify. Various HTML->TeX replacements - are still needed, e.g., dealing with % and _ outside formulas. - - Write DataFrame to custom tikz matrix to allow greater control of - formatting and insertion of horizontal and vertical divider lines - - Estimates tabs from text width of fields (not so great if includes - a lot of TeX macros) with a manual override available. Tabs gives - the widths of each field in em (width of M) - - Standard row height = 1.5em seems to work - set in meta. - - first and last thick rules - others below (Python, zero-based) row number, excluding title row - - keyword arguments : value (no newlines in value) escape back slashes! - ``#keyword...`` rows ignored - passed in as a string to facilitate using them with %%pmt? - - **Rules** - - * hrule at i means below row i of the table. (1-based) Top, bottom and - below index lines are inserted automatically. Top and bottom lines - are thicker. - * vrule at i means to the left of table column i (1-based); there will - never be a rule to the far right...it looks plebby; remember you must - include the index columns! - - Issue: column with floats and spaces or missing causes problems (VaR, - TVaR, EPD, mean and CV table) - - From great.pres_maker.df_to_tikz - - Arguments moved into config: - - column_sep=4 / 8, # was 3/8 - row_sep=1 / 8, - container_env='table', - extra_defs='', - hrule=None, - vrule=None, - post_process='', - latex=None, - - - keyword args: - - scale picks up self.config.tikz_scale; scale applied to whole - table - default 0.717 - height row height, rec. 1 (em) - column_sep col sep in em - row_sep row sep in em - container_env table, figure or sidewaysfigure - color color for text boxes (helps config.debugging) - extra_defs TeX defintions and commands put at top of table, - e.g., \\centering - lines lines below these rows, -1 for next to last row - etc.; list of ints - post_process e.g., non-line commands put at bottom of table - latex arguments after \\begin{table}[latex] - caption text for caption - - Previous version see great.pres_maker - Original version see: C:\\S\\TELOS\\CAS\\AR_Min_Bias\\cvs_to_md.py - - :param column_sep: - :param row_sep: - :param figure: - :param extra_defs: - :param post_process: - :param label: - :return: """ if not self.config.tikz: return '' - # pull out arguments (convert to local vars - these used to be arguments) column_sep = self.config.tikz_column_sep row_sep = self.config.tikz_row_sep container_env = self.config.tikz_container_env @@ -1643,11 +1089,9 @@ class GT(object): post_process = self.config.tikz_post_process latex = self.config.tikz_latex - # TODO: really should just work with this not a copy? df = self.df_tex.copy() caption = self.caption label = self.label - # prepare label and caption if label == '': lt = '' label = '' @@ -1663,9 +1107,8 @@ class GT(object): caption = f'\\caption{{{self.caption}}}\n{label}' if not df.columns.is_unique: - # possible index/body column interaction raise ValueError('tikz routine requires unique column names') - # centering handled by quarto + header = """ \\begin{{{container_env}}}{latex} {caption} @@ -1681,8 +1124,6 @@ class GT(object): nodes in empty cells, nodes={{rectangle, scale={scale}, text badly ragged {debug}}}, """ - # put draw=blue!10 or so in nodes to see the node - footer = """ {post_process} @@ -1698,26 +1139,17 @@ class GT(object): vrule = set() else: vrule = set(vrule) - # to the left of... +1 vrule.add(nc_index + 1) logger.info( f'rows in columns {nr_columns}, columns in index {nc_index}') - # internal TeX code (same as HTML code) matrix_name = self.df_id - - # column and tikz display widths colw = self.tex_knowledge_df['tikz_colw'].fillna(0).round(3) tabs = self.tex_knowledge_df['recommended'].map(lambda x: np.round(x, 3)) - # these are indexed with pre-TeX mangling names - # colw.index = df.columns - # tabs.index = df.columns - # alignment dictionaries - these are still used below ad = {'l': 'left', 'r': 'right', 'c': 'center'} ad2 = {'l': '<', 'r': '>', 'c': '^'} - # use df_aligners, at this point the index has been reset align = [] for n, i in zip(df.columns, self.df_aligners): if i == 'grt-left': @@ -1729,14 +1161,12 @@ class GT(object): else: align.append('l') - # start writing sio = StringIO() if latex is None: latex = '' else: latex = f'[{latex}]' if self.config.debug: - # color all boxes debug = ', draw=blue!10' else: debug = '' @@ -1748,8 +1178,6 @@ class GT(object): latex=latex, debug=debug)) - # table header - # title rows, start with the empty spacer row i = 1 sio.write( f'\trow {i}/.style={{nodes={{text=black, anchor=north, inner ysep=0, text height=0, text depth=0}}}},\n') @@ -1757,20 +1185,14 @@ class GT(object): sio.write( f'\trow {i}/.style={{nodes={{text=black, anchor=south, inner ysep=.2em, minimum height=1.3em, font=\\bfseries, align=center}}}},\n') - # override for index columns headers - # probably ony need for the bottom row with a multiindex? for i in range(2, nr_columns + 2): for j in range(1, 1+nc_index): sio.write( f'\trow {i} column {j}/.style=' '{nodes={font=\\bfseries\\itshape, align=left}},\n' ) - # write column spec for i, w, al in zip(range(1, len(align) + 1), tabs, align): - # average char is only 0.48 of M - # https://en.wikipedia.org/wiki/Em_(gtypography) if i == 1: - # first column sets row height for entire row sio.write(f'\tcolumn {i:>2d}/.style={{' f'nodes={{align={ad[al]:<6s}}}, ' 'text height=0.9em, text depth=0.2em, ' @@ -1779,7 +1201,6 @@ class GT(object): else: sio.write(f'\tcolumn {i:>2d}/.style={{' f'nodes={{align={ad[al]:<6s}}}, nosep, text width={max(2, w):.2f}em}},\n') - # extra col to right which enforces row height sio.write( f'\tcolumn {i+1:>2d}/.style={{text height=0.9em, text depth=0.2em, nosep, text width=0em}}\n') sio.write('\t}]\n') @@ -1787,16 +1208,13 @@ class GT(object): sio.write("\\matrix ({matrix_name}) [table, ampersand replacement=\\&]{{\n".format( matrix_name=matrix_name)) - # body of table, starting with the column headers - # spacer row nl = '' for cn, al in zip(df.columns, align): s = f'{nl} {{cell:{ad2[al]}{colw[cn]}s}} ' nl = '\\&' sio.write(s.format(cell=' ')) - # include the blank extra last column sio.write('\\& \\\\\n') - # write header rows (again, issues with multi index) + mi_vrules = {} sparse_columns = {} if isinstance(df.columns, pd.MultiIndex): @@ -1805,44 +1223,29 @@ class GT(object): sparse_columns[lvl], mi_vrules[lvl] = Sparsify.sparsify_mi(df.columns.get_level_values(lvl), lvl == len(df.columns.levels) - 1) for cn, c, al in zip(df.columns, sparse_columns[lvl], align): - # c = wfloat_format(c) s = f'{nl} {{cell:{ad2[al]}{colw[cn]}s}} ' nl = '\\&' sio.write(s.format(cell=c + '\\I')) - # include the blank extra last column sio.write('\\& \\\\\n') else: nl = '' for c, al in zip(df.columns, align): - # c = wfloat_format(c) s = f'{nl} {{cell:{ad2[al]}{colw[c]}s}} ' nl = '\\&' - # cols may not be strings... sio.write(s.format(cell=str(c) + '\\I')) sio.write('\\& \\\\\n') - # write table entries for idx, row in df.iterrows(): nl = '' for c, cell, al in zip(df.columns, row, align): - # cell = wfloat_format(cell) s = f'{nl} {{cell:{ad2[al]}{colw[c]}s}} ' nl = '\\&' sio.write(s.format(cell=cell)) - # if c=='p': - # print('COLp', cell, type(cell), s, s.format(cell=cell)) sio.write('\\& \\\\\n') sio.write(f'}};\n\n') - # decorations and post processing - horizontal and vertical lines nr, nc = df.shape - # add for the index and the last row plus 1 for the added spacer row at the top nr += nr_columns + 1 - # always include top and bottom - # you input a table row number and get a line below it; it is implemented as a line ABOVE the next row - # function to convert row numbers to TeX table format (edge case on last row -1 is nr and is caught, -2 - # is below second to last row = above last row) - # shift down extra 1 for the spacer row at the top def python_2_tex(x): return x + nr_columns + 2 if x >= 0 else nr + x + 3 @@ -1854,12 +1257,10 @@ class GT(object): hrule = list(tb_rules) logger.debug(f'hlines: {hrule}') - # why yshift = row_sep / 2 xshift = -column_sep / 2 descender_proportion = 0.25 - # top rule is special ls = 'thick' ln = 1 sio.write( @@ -1869,17 +1270,13 @@ class GT(object): ls = 'thick' if ln == nr + nr_columns + \ 1 else ('semithick' if ln == 1 + nr_columns else 'very thin') if ln < nr: - # line above TeX row ln+1 that exists sio.write(f'\\path[draw, {ls}] ([yshift={-yshift}em]{matrix_name}-{ln}-1.south west) -- ' f'([yshift={-yshift}em]{matrix_name}-{ln}-{nc+1}.south east);\n') else: - # line above row below bottom = line below last row - # descenders are 200 to 300 below baseline ln = nr sio.write(f'\\path[draw, thick] ([yshift={-descender_proportion-yshift}em]{matrix_name}-{ln}-1.base west) -- ' f'([yshift={-descender_proportion-yshift}em]{matrix_name}-{ln}-{nc+1}.base east);\n') - # if multi index put in lines within the index TODO make this better! if nr_columns > 1: for ln in range(2, nr_columns + 1): sio.write(f'\\path[draw, very thin] ([xshift={xshift}em, yshift={-yshift}em]' @@ -1888,9 +1285,6 @@ class GT(object): written = set(range(1, nc_index + 1)) if vrule and self.show_index: - # to left of col, 1 based, includes index - # write these first - # TODO fix madness vrule is to the left, mi_vrules are to the right... ls = 'very thin' for cn in vrule: if cn not in written: @@ -1901,13 +1295,8 @@ class GT(object): if len(mi_vrules) > 0: logger.debug( f'Generated vlines {mi_vrules}; already written {written}') - # vertical rules for the multi index - # these go to the RIGHT of the relevant column and reflect the index columns already - # mi_vrules = {level of index: [list of vrule columns] - # written keeps track of which vrules have been done already; start by cutting out the index columns ls = 'ultra thin' for k, cols in mi_vrules.items(): - # don't write the lowest level if k == len(mi_vrules) - 1: break for cn in cols: @@ -1931,7 +1320,6 @@ class GT(object): def make_rich(self, console, box_style=box.SQUARE): """Render to a rich table using Console object console.""" - # figure col widths and aligners cw = self.text_knowledge_df['recommended'] aligners = self.text_knowledge_df['alignment'] show_lines = self.config.hrule_widths[0] > 0 @@ -1979,10 +1367,8 @@ class GT(object): Greater Table - - ') + logger.info('CREATED CSS') + return '\n'.join(style) + + def make_html(self): + """Convert a pandas DataFrame to an HTML table.""" + index_name_to_level = dict( + zip(self.raw_df.index.names, range(self.nindex))) + index_change_level = self.index_change_level.map(index_name_to_level) + # this is easier and computed in the init + column_change_level = self.column_change_level + + # Start table + html = [f'
{self.caption}
{r}{nm}{nm}
{r}
{c}{c}{c}{c}
'] + if self.label != "": + pass + # TODO put in achor tag somehow!! + if self.caption != '': + html.append(f'') + + # Process header: allow_duplicates=True means can create cols with the same name + bit = self.df_html.T.reset_index(drop=False, allow_duplicates=True) + idx_header = bit.iloc[:self.nindex, :self.ncolumns] + columns = bit.iloc[self.nindex:, :self.ncolumns] + + # figure appropriate widths + tabs = self.html_knowledge_df['recommended'].map(lambda x: np.round(x, 3)) + + # set column widths; tabs returns lengths of strings in each column + tabs = np.array(tabs) + (self.padl + self.padr) / 12 + # this gets stripped out by quarto, so make part of style + html.append('') + for w in tabs: + html.append(f'') + html.append('') + + # TODO Add header aligners + # this is TRANSPOSED!! + if self.config.sparsify_columns: + html.append("") + for i in range(self.ncolumns): + # one per row of columns m index, usually only 1 + html.append("") + if self.show_index: + for j, r in enumerate(idx_header.iloc[:, i]): + # columns one per level of index + html.append(f'') + # if not for col span issue you could just to this: + # for j in range(self.ncols): + # hrule = f'grt-bhrule-{i}' if i < self.ncolumns - 1 else '' + # if j == 0: + # # start with the first column come what may + # vrule = f'grt-vrule-index' + # elif j >= self.column_change_level[i]: + # vrule = f'grt-vrule-{column_change_level[cum_col]}' + # else: + # vrule = '' + # html.append(f'') + # here, the groupby needs to consider all levels at and above i + # this concats all the levels + # need :i+1 to get down to the ith level + cum_col = 0 # keep track of where we are up to + for j, (nm, g) in enumerate(groupby(columns.iloc[:, :i + 1]. + apply(lambda x: ':::'.join(str(i) for i in x), axis=1))): + # ::: needs to be something that does not appear in the col names + # need to combine for groupby but be able to split off the last level + # picks off the name of the bottom level + nm = nm.split(':::')[-1] + hrule = f'grt-bhrule-{i}' if i < self.ncolumns - 1 else '' + colspan = sum(1 for _ in g) + if 0 < j: + vrule = f'grt-vrule-{column_change_level[cum_col]}' + elif j == 0 and self.show_index: + # start with the first column if showing index + vrule = f'grt-vrule-index' + else: + vrule = '' + if j == 0 and not self.show_index: + # first column, no index, left align label + html.append( + f'') + else: + html.append( + f'') + cum_col += colspan + html.append("") + html.append("") + else: + html.append("") + for i in range(self.ncolumns): + # one per row of columns m index, usually only 1 + html.append("") + if self.show_index: + for j, r in enumerate(idx_header.iloc[:, i]): + # columns one per level of index + html.append(f'') + for j, r in enumerate(columns.iloc[:, i]): + # one per column of dataframe + # figure how high up mindex the vrules go + # all headings get hrules, it's the vrules that are tricky + hrule = f'grt-bhrule-{i}' if i < self.ncolumns - 1 else '' + if 0 < j < self.ncols and i >= column_change_level[j]: + vrule = f'grt-vrule-{column_change_level[j]}' + elif j == 0 and self.show_index: + # start with the first column come what may + vrule = f'grt-vrule-index' + else: + vrule = '' + html.append( + f'') + html.append("") + html.append("") + + bold_idx = 'grt-bold' if self.config.font_bold_index else '' + html.append("") + for i, (n, r) in enumerate(self.df_html.iterrows()): + # one per row of dataframe + html.append("") + hrule = '' + if self.show_index: + for j, c in enumerate(r.iloc[:self.nindex]): + # dx = data in index + # if this is the level that changes for this row + # will use a top rule hence omit i = 0 which already has an hrule + # appears in the index change level. But if it DOES NOT appear then + # it isn't a change level so no rule required + if i > 0 and hrule == '' and i in index_change_level and j == index_change_level[i]: + hrule = f'grt-hrule-{j}' + # html.append(f'') + col_id = f'grt-c-{j}' + html.append( + f'') + for j, c in enumerate(r.iloc[self.nindex:]): + # first col left handled by index/body divider + if 0 < j < self.ncols: + vrule = f'grt-vrule-{column_change_level[j]}' + elif j == 0 and self.show_index: + # start with the first column come what may + vrule = f'grt-vrule-index' + else: + vrule = '' + # html.append(f'') + col_id = f'grt-c-{j+self.nindex}' + html.append( + f'') + html.append("") + html.append("") + text = '\n'.join(html) + self._df_html_text = Escaping.clean_html_tex(text) + logger.info('CREATED HTML') + self._df_style_text = self.make_style(tabs) + + def clean_style(self, soup): + """Minify CSS inside + + + +

Rendered Table

+ +{table_html} + + + +''' + p = Path(fn) + p.parent.mkdir(parents=True, exist_ok=True) + p = p.with_suffix('.html') + print(p) + html = html_boiler_plate.replace('{table_html}', self.html) + soup = BeautifulSoup(html, 'html.parser') + p.write_text(soup.prettify(), encoding='utf-8') + logger.info(f'Saved to {p}') + + def show_svg(self): + """Display svg in Jupyter.""" + svg = self.make_svg() + if svg != 'no svg output': + display(SVG(svg)) + else: + print('No SVG file available (TeX compile error).') + + def show_html(self, fn=''): + if fn == '': + with tempfile.NamedTemporaryFile(suffix=".html", delete=False) as tmp: + tmp_path = Path(tmp.name) + else: + tmp_path = Path(fn) + self.save_html(fn=tmp_path) + os.startfile(tmp_path) # native Windows way to open in default browser + return tmp_path + + @staticmethod + def uber_test(df, show_html=False, **kwargs): + """ + Print various diagnostics and all the formats. + + show_html -> run show_html to display in new browser tab. + """ + f = GT(df, **kwargs) + display(f) + if show_html: + f.show_html() + print(f) + f.show_svg() + display(df) + display(f.width_report()) + print(f.make_tikz()) + return f + + @staticmethod + def _is_namedtuple_instance(x) -> bool: + """Heuristic: namedtuple instances are tuples whose class defines _fields.""" + return isinstance(x, tuple) and isinstance(getattr(type(x), "_fields", None), tuple) + + @staticmethod + def _ntdf(t): + """Convert named tuple to pandas dataframe to display.""" + return pd.Series(t, index=pd.Index(t._fields, name="Item")).to_frame('Value') + diff --git a/greater_tables/fabrications.py b/greater_tables/fabrications.py index 57351df..c460be8 100644 --- a/greater_tables/fabrications.py +++ b/greater_tables/fabrications.py @@ -11,7 +11,6 @@ from math import prod from pathlib import Path from typing import Optional, Union, Literal import hashlib -import random import re import numpy as np @@ -93,7 +92,7 @@ class Fabricator: metric_suffix = ["", "rate", "score", "amount", "index", "ratio", "factor", "value"] - def __init__(self, decorate=False, seed: Optional[int] = None): + def __init__(self, decorate=False, pyarrow: bool = False, seed: Optional[int] = None): """ Fabricate small synthetic pandas DataFrames for testing. @@ -105,6 +104,7 @@ class Fabricator: self.seed = int( seed if seed is not None else np.random.SeedSequence().entropy) self.decorate = decorate + self.pyarrow = pyarrow # rng self.rng = np.random.default_rng(self.seed) @@ -295,6 +295,9 @@ class Fabricator: if simplify: df = self.drop_singleton_levels(df) + if self.pyarrow: + df = df.convert_dtypes(dtype_backend='pyarrow') + self.cache.appendleft(df) return df @@ -366,7 +369,7 @@ class Fabricator: if dtype == 'd': return random_datetime_series(n, rng=self.rng) if dtype == 'y': - return pd.Series(random.sample(range(1990, 2031), n)) + return pd.Series(self.rng.integers(1990, 2031, n)) if dtype == 't': start_dt = datetime.now() - timedelta(days=365 * 2) return pd.Series([ @@ -411,7 +414,8 @@ class Fabricator: @staticmethod def random_date_within_last_n_years(n: int) -> pd.Timestamp: today = datetime.today() - days = random.randint(0, n * 365) + # days = random.randint(0, n * 365) + days = self.rng.integers(0, n * 365, endpoint=True) return pd.Timestamp(today - timedelta(days=days)) def _insert_missing(self, df: pd.DataFrame, prop: float) -> pd.DataFrame: @@ -473,10 +477,11 @@ class Fabricator: return primes -def quick_fab(rows: int = 10, data_spec: str = 's3sfid', **kwargs): +def quick_fab(rows: int = 10, data_spec: str = 's3sfid', pyarrow=False, **kwargs): """One-stop quick fabrication of a random dataframe.""" - fab = Fabricator() - return fab.make(rows, data_spec, **kwargs) + fab = Fabricator(pyarrow=pyarrow) + df = fab.make(rows, data_spec, **kwargs) + return df rand_df = make_df = quick_df = quick_fab diff --git a/gt-testers.ipynb b/gt-testers.ipynb index d45062f..3043de5 100644 --- a/gt-testers.ipynb +++ b/gt-testers.ipynb @@ -6,2027 +6,108 @@ "metadata": {}, "source": [ "# Greater Tables Testers\n", + "\n", + "pyarrow stuff...\n", + "\n", "## Provenance\n", "* Created new: 2025-06-06" ] }, { "cell_type": "code", - "execution_count": 1, + "execution_count": null, "id": "f733e11c-9165-4bb8-9dc5-a4ba0c6b556a", "metadata": {}, "outputs": [], "source": [ - "from greater_tables import GT\n", + "from greater_tables.core_old import GT\n", "import greater_tables.utilities as gtu\n", - "import greater_tables.test_tables as gtt\n", - "import scratch as sc" + "import greater_tables.fabrications as gtf\n", + "from pathlib import Path\n", + "import pandas as pd" ] }, { "cell_type": "code", - "execution_count": 2, + "execution_count": null, "id": "dfa32d92-e4c3-430f-b5be-9f8ab3cb74b5", "metadata": {}, "outputs": [], "source": [ - "tdf = gtu.TestDFGenerator()\n", - "ts = tdf.test_suite()" + "# make some random dataframes\n", + "fab = gtf.Fabricator(pyarrow=True)" ] }, { "cell_type": "code", - "execution_count": 18, + "execution_count": null, + "id": "aa6fd1eb-1a20-4400-845f-8aa7567ad25e", + "metadata": {}, + "outputs": [], + "source": [ + "for _ in range(10):\n", + " display(GT(fab.random().head(10))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "46147a8a-5c8a-47ad-a5af-36733fadf64b", + "metadata": {}, + "outputs": [], + "source": [ + "dfs = dict(zip('ABCDE', [fab.random().head(10) for _ in range(5)]))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5ae6e5af-ee7d-4917-851e-2174c772ea6d", + "metadata": {}, + "outputs": [], + "source": [ + "p = Path('\\\\s\\\\appdata\\\\archivum\\\\libraries\\\\silly-library')\n", + "ps = list(p.glob('*.feather'))\n", + "ps" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "bdb7f70c-e90a-45db-b26b-93a624d7ef2c", + "metadata": {}, + "outputs": [], + "source": [ + "dfs = {f.name: pd.read_feather(f, dtype_backend='pyarrow') for f in ps}" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "547176b4-ad8d-419a-baa9-caa725e746a5", + "metadata": {}, + "outputs": [], + "source": [ + "for v in dfs.values(): print(v.dtypes)" + ] + }, + { + "cell_type": "code", + "execution_count": null, "id": "1224b554-c80e-4ca4-a34a-b51bdf41ec07", "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "
{self.caption}
{r}{nm}{nm}{nm}
{r}{r}
{c}{c}{c}{c}
\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "
floats dates filenames
currency estimatereference timestamp transaction supplierestimatecategory duration user expense ratio
8,859.5372000-12-125,597.0442,045
6,390.1632021-09-223,810
3,536.1532020-08-112,825.8544,862
6,812.9862015-03-169,982.1223,642
9,730.5742011-07-055,179.8223,102
" - ], - "text/latex": [ - "\n", - "\\begin{table}\n", - "\\caption{floats dates filenames}\n", - "\n", - "\\centering{\n", - "\\begin{tikzpicture}[\n", - " auto,\n", - " transform shape,\n", - " nosep/.style={inner sep=0},\n", - " table/.style={\n", - " matrix of nodes,\n", - " row sep=0.125em,\n", - " column sep=0.375em,\n", - " nodes in empty cells,\n", - " nodes={rectangle, scale=1.0, text badly ragged },\n", - "\trow 1/.style={nodes={text=black, anchor=north, inner ysep=0, text height=0, text depth=0}},\n", - "\trow 2/.style={nodes={text=black, anchor=south, inner ysep=.2em, minimum height=1.3em, font=\\bfseries}},\n", - "\tcolumn 1/.style={nodes={align=left }, text height=0.9em, text depth=0.2em, inner xsep=0.375em, inner ysep=0, text width=5.40em},\n", - "\tcolumn 2/.style={nodes={align=left }, nosep, text width=6.60em},\n", - "\tcolumn 3/.style={nodes={align=left }, nosep, text width=5.40em},\n", - "\tcolumn 4/.style={nodes={align=left }, nosep, text width=4.80em},\n", - "\tcolumn 5/.style={text height=0.9em, text depth=0.2em, nosep, text width=0em}\t}]\n", - "\\matrix (TSXEYJZS4F67Y) [table, ampersand replacement=\\&]{\n", - " \\& \\& \\& \\& \\\\\n", - " currency estimate\\grtspacer \\& reference timestamp transaction supplier\\grtspacer \\& estimate\\grtspacer \\& category duration user expense ratio\\grtspacer \\& \\\\\n", - " 8,859.537 \\& 2000-12-12 \\& 5,597.044 \\& 2,045 \\& \\\\\n", - " 6,390.163 \\& 2021-09-22 \\& \\& 3,810 \\& \\\\\n", - " 3,536.153 \\& 2020-08-11 \\& 2,825.854 \\& 4,862 \\& \\\\\n", - " 6,812.986 \\& 2015-03-16 \\& 9,982.122 \\& 3,642 \\& \\\\\n", - " 9,730.574 \\& 2011-07-05 \\& 5,179.822 \\& 3,102 \\& \\\\\n", - "};\n", - "\n", - "\\path[draw, thick] (TSXEYJZS4F67Y-1-1.south west) -- (TSXEYJZS4F67Y-1-5.south east);\n", - "\\path[draw, semithick] ([yshift=-0.0625em]TSXEYJZS4F67Y-2-1.south west) -- ([yshift=-0.0625em]TSXEYJZS4F67Y-2-5.south east);\n", - "\\path[draw, thick] ([yshift=-0.3125em]TSXEYJZS4F67Y-7-1.base west) -- ([yshift=-0.3125em]TSXEYJZS4F67Y-7-5.base east);\n", - "\n", - "\n", - "\n", - "\\end{tikzpicture}\n", - "} % close centering\n", - "\\end{table}\n" - ], - "text/plain": [ - "GreaterTable(df_id=TSXEYJZS4F67Y)" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "
dense text and numbers
location reference transactionemail loss date operationaccount type extension transaction combined ratioprocessing addresstransaction loss date failure duration locationexpense ratio identifier filename
transaction operation status filename timestamp location duration extension loss ratio remark account balance note project supplier currency user combined ratio client failure category4,857.9201,050comment status supplier type filename extension processing combined ratio identifier amount premium estimate transaction address location balance account loss date note duration2027-02-14
loss ratio amount timestamp loss date transaction note category extension remark type failure estimate location email premium supplier balance combined ratio account322.0393,135category loss ratio entry estimate operation type description premium timestamp failure duration2019-11-238,801.455
type currency address remark transaction identifier processing email filename supplier loss ratio location comment failure timestamp7,236.9053,228supplier account project amount balance description processing reference note remark status currency loss ratio identifier entry2006-03-223,062.313
operation status processing expense ratio premium address identifier supplier timestamp duration currency balance location project category remark amount3,552.6012,771filename estimate extension description supplier amount identifier project expense ratio processing account2005-10-062,792.562
transaction extension expense ratio user reference operation note amount timestamp combined ratio estimate project premium description filename location4,779.972178note premium reference user amount category location entry extension duration description loss date timestamp address failure loss ratio client identifier estimate filename project account type currency2001-09-248,642.682
" - ], - "text/latex": [ - "\n", - "\\begin{table}\n", - "\\caption{dense text and numbers}\n", - "\n", - "\\centering{\n", - "\\begin{tikzpicture}[\n", - " auto,\n", - " transform shape,\n", - " nosep/.style={inner sep=0},\n", - " table/.style={\n", - " matrix of nodes,\n", - " row sep=0.125em,\n", - " column sep=0.375em,\n", - " nodes in empty cells,\n", - " nodes={rectangle, scale=1.0, text badly ragged },\n", - "\trow 1/.style={nodes={text=black, anchor=north, inner ysep=0, text height=0, text depth=0}},\n", - "\trow 2/.style={nodes={text=black, anchor=south, inner ysep=.2em, minimum height=1.3em, font=\\bfseries}},\n", - "\tcolumn 1/.style={nodes={align=left }, text height=0.9em, text depth=0.2em, inner xsep=0.375em, inner ysep=0, text width=110.40em},\n", - "\tcolumn 2/.style={nodes={align=left }, nosep, text width=5.40em},\n", - "\tcolumn 3/.style={nodes={align=left }, nosep, text width=6.60em},\n", - "\tcolumn 4/.style={nodes={align=left }, nosep, text width=121.20em},\n", - "\tcolumn 5/.style={nodes={align=left }, nosep, text width=6.60em},\n", - "\tcolumn 6/.style={nodes={align=left }, nosep, text width=6.00em},\n", - "\tcolumn 7/.style={text height=0.9em, text depth=0.2em, nosep, text width=0em}\t}]\n", - "\\matrix (TEV43N4TJRUB4) [table, ampersand replacement=\\&]{\n", - " \\& \\& \\& \\& \\& \\& \\\\\n", - " location reference transaction\\grtspacer \\& email loss date operation\\grtspacer \\& account type extension transaction combined ratio\\grtspacer \\& processing address\\grtspacer \\& transaction loss date failure duration location\\grtspacer \\& expense ratio identifier filename\\grtspacer \\& \\\\\n", - " transaction operation status filename timestamp location duration extension loss ratio remark account balance note project supplier currency user combined ratio client failure category \\& 4,857.920 \\& 1,050 \\& comment status supplier type filename extension processing combined ratio identifier amount premium estimate transaction address location balance account loss date note duration \\& 2027-02-14 \\& \\& \\\\\n", - " loss ratio amount timestamp loss date transaction note category extension remark type failure estimate location email premium supplier balance combined ratio account \\& 322.039 \\& 3,135 \\& category loss ratio entry estimate operation type description premium timestamp failure duration \\& 2019-11-23 \\& 8,801.455 \\& \\\\\n", - " type currency address remark transaction identifier processing email filename supplier loss ratio location comment failure timestamp \\& 7,236.905 \\& 3,228 \\& supplier account project amount balance description processing reference note remark status currency loss ratio identifier entry \\& 2006-03-22 \\& 3,062.313 \\& \\\\\n", - " operation status processing expense ratio premium address identifier supplier timestamp duration currency balance location project category remark amount \\& 3,552.601 \\& 2,771 \\& filename estimate extension description supplier amount identifier project expense ratio processing account \\& 2005-10-06 \\& 2,792.562 \\& \\\\\n", - " transaction extension expense ratio user reference operation note amount timestamp combined ratio estimate project premium description filename location \\& 4,779.972 \\& 178 \\& note premium reference user amount category location entry extension duration description loss date timestamp address failure loss ratio client identifier estimate filename project account type currency \\& 2001-09-24 \\& 8,642.682 \\& \\\\\n", - "};\n", - "\n", - "\\path[draw, thick] (TEV43N4TJRUB4-1-1.south west) -- (TEV43N4TJRUB4-1-7.south east);\n", - "\\path[draw, semithick] ([yshift=-0.0625em]TEV43N4TJRUB4-2-1.south west) -- ([yshift=-0.0625em]TEV43N4TJRUB4-2-7.south east);\n", - "\\path[draw, thick] ([yshift=-0.3125em]TEV43N4TJRUB4-7-1.base west) -- ([yshift=-0.3125em]TEV43N4TJRUB4-7-7.base east);\n", - "\n", - "\n", - "\n", - "\\end{tikzpicture}\n", - "} % close centering\n", - "\\end{table}\n" - ], - "text/plain": [ - "GreaterTable(df_id=TEV43N4TJRUB4)" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "
mixed data with missing
note type estimate statusoperation descriptionlocationaddress currency durationamount address account estimate client
2,0262006-07-04amount_estimate_type_loss ratio_reference.pdf
client supplier balance loss date expense ratio identifier operation email entry loss ratio address comment extension project processing3,057type_note_entry_extension.pdf
3,128.995note timestamp supplier email address status transaction loss ratio extension operation description filename client comment loss date expense ratio premium remark categorypremium_amount_entry.pdf
type estimate note balance email loss ratio description location identifier amount currency category extension entry project combined ratio timestamp2007-05-21extension_identifier_account.pdf
6,976.716amount category user failure status entry estimate balance type email comment processing location operation currency extension2,6542004-09-13
" - ], - "text/latex": [ - "\n", - "\\begin{table}\n", - "\\caption{mixed data with missing}\n", - "\n", - "\\centering{\n", - "\\begin{tikzpicture}[\n", - " auto,\n", - " transform shape,\n", - " nosep/.style={inner sep=0},\n", - " table/.style={\n", - " matrix of nodes,\n", - " row sep=0.125em,\n", - " column sep=0.375em,\n", - " nodes in empty cells,\n", - " nodes={rectangle, scale=1.0, text badly ragged },\n", - "\trow 1/.style={nodes={text=black, anchor=north, inner ysep=0, text height=0, text depth=0}},\n", - "\trow 2/.style={nodes={text=black, anchor=south, inner ysep=.2em, minimum height=1.3em, font=\\bfseries}},\n", - "\tcolumn 1/.style={nodes={align=left }, text height=0.9em, text depth=0.2em, inner xsep=0.375em, inner ysep=0, text width=5.40em},\n", - "\tcolumn 2/.style={nodes={align=left }, nosep, text width=102.60em},\n", - "\tcolumn 3/.style={nodes={align=left }, nosep, text width=4.80em},\n", - "\tcolumn 4/.style={nodes={align=left }, nosep, text width=6.00em},\n", - "\tcolumn 5/.style={nodes={align=left }, nosep, text width=27.00em},\n", - "\tcolumn 6/.style={text height=0.9em, text depth=0.2em, nosep, text width=0em}\t}]\n", - "\\matrix (TV5PU4PMJQ5AL) [table, ampersand replacement=\\&]{\n", - " \\& \\& \\& \\& \\& \\\\\n", - " note type estimate status\\grtspacer \\& operation description\\grtspacer \\& location\\grtspacer \\& address currency duration\\grtspacer \\& amount address account estimate client\\grtspacer \\& \\\\\n", - " \\& \\& 2,026 \\& 2006-07-04 \\& amount_estimate_type_loss ratio_reference.pdf \\& \\\\\n", - " \\& client supplier balance loss date expense ratio identifier operation email entry loss ratio address comment extension project processing \\& 3,057 \\& \\& type_note_entry_extension.pdf \\& \\\\\n", - " 3,128.995 \\& note timestamp supplier email address status transaction loss ratio extension operation description filename client comment loss date expense ratio premium remark category \\& \\& \\& premium_amount_entry.pdf \\& \\\\\n", - " \\& type estimate note balance email loss ratio description location identifier amount currency category extension entry project combined ratio timestamp \\& \\& 2007-05-21 \\& extension_identifier_account.pdf \\& \\\\\n", - " 6,976.716 \\& amount category user failure status entry estimate balance type email comment processing location operation currency extension \\& 2,654 \\& 2004-09-13 \\& \\& \\\\\n", - "};\n", - "\n", - "\\path[draw, thick] (TV5PU4PMJQ5AL-1-1.south west) -- (TV5PU4PMJQ5AL-1-6.south east);\n", - "\\path[draw, semithick] ([yshift=-0.0625em]TV5PU4PMJQ5AL-2-1.south west) -- ([yshift=-0.0625em]TV5PU4PMJQ5AL-2-6.south east);\n", - "\\path[draw, thick] ([yshift=-0.3125em]TV5PU4PMJQ5AL-7-1.base west) -- ([yshift=-0.3125em]TV5PU4PMJQ5AL-7-6.base east);\n", - "\n", - "\n", - "\n", - "\\end{tikzpicture}\n", - "} % close centering\n", - "\\end{table}\n" - ], - "text/plain": [ - "GreaterTable(df_id=TV5PU4PMJQ5AL)" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "
long header names
Detailed Instrumentation Configuration SummaryArchive Metadata Extraction Date FieldOverview Record Approximation NotesVelocity Gradient Approximation FloatPressure Summary Int Field
expense ratio email operation type balance timestamp premium identifier duration loss date2026-05-14user filename transaction extension premium status category comment combined ratio operation timestamp type entry address8,183.384934
project comment remark address transaction location identifier currency balance description expense ratio note type duration amount supplier2001-11-08failure transaction status timestamp address premium expense ratio user note extension remark currency identifier category operation1,280.8082,692
premium email estimate reference transaction extension project note description combined ratio balance filename location status identifier expense ratio supplier category type client entry timestamp loss ratio failure processing2015-07-16email status processing amount note project extension account description user address failure expense ratio filename category transaction premium entry loss ratio estimate supplier remark combined ratio reference location9,362.2974,033
estimate reference failure amount identifier filename email combined ratio loss date comment2022-10-18type address account location reference supplier currency processing identifier balance failure category timestamp combined ratio transaction9,161.457285
reference entry email currency description account supplier identifier premium location combined ratio2016-07-10address operation transaction email failure extension project note currency expense ratio reference timestamp processing2,059.8823,151
" - ], - "text/latex": [ - "\n", - "\\begin{table}\n", - "\\caption{long header names}\n", - "\n", - "\\centering{\n", - "\\begin{tikzpicture}[\n", - " auto,\n", - " transform shape,\n", - " nosep/.style={inner sep=0},\n", - " table/.style={\n", - " matrix of nodes,\n", - " row sep=0.125em,\n", - " column sep=0.375em,\n", - " nodes in empty cells,\n", - " nodes={rectangle, scale=1.0, text badly ragged },\n", - "\trow 1/.style={nodes={text=black, anchor=north, inner ysep=0, text height=0, text depth=0}},\n", - "\trow 2/.style={nodes={text=black, anchor=south, inner ysep=.2em, minimum height=1.3em, font=\\bfseries}},\n", - "\tcolumn 1/.style={nodes={align=left }, text height=0.9em, text depth=0.2em, inner xsep=0.375em, inner ysep=0, text width=136.80em},\n", - "\tcolumn 2/.style={nodes={align=left }, nosep, text width=6.00em},\n", - "\tcolumn 3/.style={nodes={align=left }, nosep, text width=133.20em},\n", - "\tcolumn 4/.style={nodes={align=left }, nosep, text width=7.80em},\n", - "\tcolumn 5/.style={nodes={align=left }, nosep, text width=4.80em},\n", - "\tcolumn 6/.style={text height=0.9em, text depth=0.2em, nosep, text width=0em}\t}]\n", - "\\matrix (TH235NQCO5NMR) [table, ampersand replacement=\\&]{\n", - " \\& \\& \\& \\& \\& \\\\\n", - " Detailed Instrumentation Configuration Summary\\grtspacer \\& Archive Metadata Extraction Date Field\\grtspacer \\& Overview Record Approximation Notes\\grtspacer \\& Velocity Gradient Approximation Float\\grtspacer \\& Pressure Summary Int Field\\grtspacer \\& \\\\\n", - " expense ratio email operation type balance timestamp premium identifier duration loss date \\& 2026-05-14 \\& user filename transaction extension premium status category comment combined ratio operation timestamp type entry address \\& 8,183.384 \\& 934 \\& \\\\\n", - " project comment remark address transaction location identifier currency balance description expense ratio note type duration amount supplier \\& 2001-11-08 \\& failure transaction status timestamp address premium expense ratio user note extension remark currency identifier category operation \\& 1,280.808 \\& 2,692 \\& \\\\\n", - " premium email estimate reference transaction extension project note description combined ratio balance filename location status identifier expense ratio supplier category type client entry timestamp loss ratio failure processing \\& 2015-07-16 \\& email status processing amount note project extension account description user address failure expense ratio filename category transaction premium entry loss ratio estimate supplier remark combined ratio reference location \\& 9,362.297 \\& 4,033 \\& \\\\\n", - " estimate reference failure amount identifier filename email combined ratio loss date comment \\& 2022-10-18 \\& type address account location reference supplier currency processing identifier balance failure category timestamp combined ratio transaction \\& 9,161.457 \\& 285 \\& \\\\\n", - " reference entry email currency description account supplier identifier premium location combined ratio \\& 2016-07-10 \\& address operation transaction email failure extension project note currency expense ratio reference timestamp processing \\& 2,059.882 \\& 3,151 \\& \\\\\n", - "};\n", - "\n", - "\\path[draw, thick] (TH235NQCO5NMR-1-1.south west) -- (TH235NQCO5NMR-1-6.south east);\n", - "\\path[draw, semithick] ([yshift=-0.0625em]TH235NQCO5NMR-2-1.south west) -- ([yshift=-0.0625em]TH235NQCO5NMR-2-6.south east);\n", - "\\path[draw, thick] ([yshift=-0.3125em]TH235NQCO5NMR-7-1.base west) -- ([yshift=-0.3125em]TH235NQCO5NMR-7-6.base east);\n", - "\n", - "\n", - "\n", - "\\end{tikzpicture}\n", - "} % close centering\n", - "\\end{table}\n" - ], - "text/plain": [ - "GreaterTable(df_id=TH235NQCO5NMR)" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "
file-centric record
category entry project emaillocation balance premium usercommentsupplier operation filename description currencycomment estimate premiumreference suppliertype
category_note_currency.pdf2024-08-15project amount email loss date account entry status user reference supplier location loss ratio description type processing failure combined ratio filename premium transaction timestamp remark expense ratio comment8,154.1443,7502007-10-31
failure_loss ratio_expense ratio_timestamp_type.pdf2001-02-13expense ratio identifier processing filename amount client timestamp remark account location project estimate loss ratio failure transaction user operation balance type currency3,608.8164352008-09-26processing_loss ratio_category.pdf
loss date_loss ratio_amount_remark_filename.pdf2002-08-23processing user supplier premium identifier transaction amount email comment extension loss date combined ratio failure address entry estimate project balance note operation timestamp description reference type5,306.7829532002-09-10
filename_currency_balance.pdf2015-04-01currency address reference location project operation premium email remark note extension comment account amount filename balance category entry loss ratio failure identifier type processing5,628.4583,6412001-07-21amount_supplier.pdf
note_category_loss date_duration_currency.pdf2019-07-06failure loss ratio combined ratio currency user address expense ratio status premium identifier loss date location account note reference email remark supplier project type6,697.8234,9752007-05-09entry_balance_address.pdf
" - ], - "text/latex": [ - "\n", - "\\begin{table}\n", - "\\caption{file-centric record}\n", - "\n", - "\\centering{\n", - "\\begin{tikzpicture}[\n", - " auto,\n", - " transform shape,\n", - " nosep/.style={inner sep=0},\n", - " table/.style={\n", - " matrix of nodes,\n", - " row sep=0.125em,\n", - " column sep=0.375em,\n", - " nodes in empty cells,\n", - " nodes={rectangle, scale=1.0, text badly ragged },\n", - "\trow 1/.style={nodes={text=black, anchor=north, inner ysep=0, text height=0, text depth=0}},\n", - "\trow 2/.style={nodes={text=black, anchor=south, inner ysep=.2em, minimum height=1.3em, font=\\bfseries}},\n", - "\tcolumn 1/.style={nodes={align=left }, text height=0.9em, text depth=0.2em, inner xsep=0.375em, inner ysep=0, text width=30.60em},\n", - "\tcolumn 2/.style={nodes={align=left }, nosep, text width=6.00em},\n", - "\tcolumn 3/.style={nodes={align=left }, nosep, text width=128.40em},\n", - "\tcolumn 4/.style={nodes={align=left }, nosep, text width=6.60em},\n", - "\tcolumn 5/.style={nodes={align=left }, nosep, text width=4.80em},\n", - "\tcolumn 6/.style={nodes={align=left }, nosep, text width=6.00em},\n", - "\tcolumn 7/.style={nodes={align=left }, nosep, text width=20.40em},\n", - "\tcolumn 8/.style={text height=0.9em, text depth=0.2em, nosep, text width=0em}\t}]\n", - "\\matrix (T2TFTFBFALI7E) [table, ampersand replacement=\\&]{\n", - " \\& \\& \\& \\& \\& \\& \\& \\\\\n", - " category entry project email\\grtspacer \\& location balance premium user\\grtspacer \\& comment\\grtspacer \\& supplier operation filename description currency\\grtspacer \\& comment estimate premium\\grtspacer \\& reference supplier\\grtspacer \\& type\\grtspacer \\& \\\\\n", - " category_note_currency.pdf \\& 2024-08-15 \\& project amount email loss date account entry status user reference supplier location loss ratio description type processing failure combined ratio filename premium transaction timestamp remark expense ratio comment \\& 8,154.144 \\& 3,750 \\& 2007-10-31 \\& \\& \\\\\n", - " failure_loss ratio_expense ratio_timestamp_type.pdf \\& 2001-02-13 \\& expense ratio identifier processing filename amount client timestamp remark account location project estimate loss ratio failure transaction user operation balance type currency \\& 3,608.816 \\& 435 \\& 2008-09-26 \\& processing_loss ratio_category.pdf \\& \\\\\n", - " loss date_loss ratio_amount_remark_filename.pdf \\& 2002-08-23 \\& processing user supplier premium identifier transaction amount email comment extension loss date combined ratio failure address entry estimate project balance note operation timestamp description reference type \\& 5,306.782 \\& 953 \\& 2002-09-10 \\& \\& \\\\\n", - " filename_currency_balance.pdf \\& 2015-04-01 \\& currency address reference location project operation premium email remark note extension comment account amount filename balance category entry loss ratio failure identifier type processing \\& 5,628.458 \\& 3,641 \\& 2001-07-21 \\& amount_supplier.pdf \\& \\\\\n", - " note_category_loss date_duration_currency.pdf \\& 2019-07-06 \\& failure loss ratio combined ratio currency user address expense ratio status premium identifier loss date location account note reference email remark supplier project type \\& 6,697.823 \\& 4,975 \\& 2007-05-09 \\& entry_balance_address.pdf \\& \\\\\n", - "};\n", - "\n", - "\\path[draw, thick] (T2TFTFBFALI7E-1-1.south west) -- (T2TFTFBFALI7E-1-8.south east);\n", - "\\path[draw, semithick] ([yshift=-0.0625em]T2TFTFBFALI7E-2-1.south west) -- ([yshift=-0.0625em]T2TFTFBFALI7E-2-8.south east);\n", - "\\path[draw, thick] ([yshift=-0.3125em]T2TFTFBFALI7E-7-1.base west) -- ([yshift=-0.3125em]T2TFTFBFALI7E-7-8.base east);\n", - "\n", - "\n", - "\n", - "\\end{tikzpicture}\n", - "} % close centering\n", - "\\end{table}\n" - ], - "text/plain": [ - "GreaterTable(df_id=T2TFTFBFALI7E)" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], + "outputs": [], "source": [ - "dfs = gtt.make_dataframe_set(5)\n", "for k, v in dfs.items():\n", - " display(GT(v, caption=k, show_index=False))" + " display(GT(v.head(20), caption=k, show_index=False))" ] }, { "cell_type": "code", - "execution_count": 15, + "execution_count": null, "id": "01f627b5-22b9-45d1-9833-0afdc497fd7e", "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "
indexConsideration of ConsequencesProbabilityExpected Value
0A rather long text value that could wrap badly.LikelyHigh
1ShortUnlikelyLow
2A second problematic entry with spaces.ModerateModerate
" - ], - "text/latex": [ - "\n", - "\\begin{table}\n", - "% caption placeholder\n", - "\\centering{\n", - "\\begin{tikzpicture}[\n", - " auto,\n", - " transform shape,\n", - " nosep/.style={inner sep=0},\n", - " table/.style={\n", - " matrix of nodes,\n", - " row sep=0.125em,\n", - " column sep=0.375em,\n", - " nodes in empty cells,\n", - " nodes={rectangle, scale=1.0, text badly ragged },\n", - "\trow 1/.style={nodes={text=black, anchor=north, inner ysep=0, text height=0, text depth=0}},\n", - "\trow 2/.style={nodes={text=black, anchor=south, inner ysep=.2em, minimum height=1.3em, font=\\bfseries}},\n", - "\tcolumn 1/.style={nodes={align=left }, text height=0.9em, text depth=0.2em, inner xsep=0.375em, inner ysep=0, text width=3.00em},\n", - "\tcolumn 2/.style={nodes={align=left }, nosep, text width=28.20em},\n", - "\tcolumn 3/.style={nodes={align=left }, nosep, text width=6.60em},\n", - "\tcolumn 4/.style={nodes={align=left }, nosep, text width=4.80em},\n", - "\tcolumn 5/.style={text height=0.9em, text depth=0.2em, nosep, text width=0em}\t}]\n", - "\\matrix (TW3YUZ36SFQ35) [table, ampersand replacement=\\&]{\n", - " \\& \\& \\& \\& \\\\\n", - " index\\grtspacer \\& Consideration of Consequences\\grtspacer \\& Probability\\grtspacer \\& Expected Value\\grtspacer \\& \\\\\n", - " 0 \\& A rather long text value that could wrap badly. \\& Likely \\& High \\& \\\\\n", - " 1 \\& Short \\& Unlikely \\& Low \\& \\\\\n", - " 2 \\& A second problematic entry with spaces. \\& Moderate \\& Moderate \\& \\\\\n", - "};\n", - "\n", - "\\path[draw, thick] (TW3YUZ36SFQ35-1-1.south west) -- (TW3YUZ36SFQ35-1-5.south east);\n", - "\\path[draw, semithick] ([yshift=-0.0625em]TW3YUZ36SFQ35-2-1.south west) -- ([yshift=-0.0625em]TW3YUZ36SFQ35-2-5.south east);\n", - "\\path[draw, thick] ([yshift=-0.3125em]TW3YUZ36SFQ35-5-1.base west) -- ([yshift=-0.3125em]TW3YUZ36SFQ35-5-5.base east);\n", - "\\path[draw, very thin] ([xshift=-0.1875em]TW3YUZ36SFQ35-1-2.south west) -- ([yshift=-0.3125em, xshift=-0.1875em]TW3YUZ36SFQ35-5-2.base west);\n", - "\n", - "\n", - "\n", - "\\end{tikzpicture}\n", - "} % close centering\n", - "\\end{table}\n" - ], - "text/plain": [ - "GreaterTable(df_id=TW3YUZ36SFQ35)" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "
indexevent_datetimestamptransaction_code
02024-12-282024-12-28T14:23:00ABC-1001-ZZ
12025-01-052025-01-05T09:12:45XYZ-2048-AA
22031-06-212031-06-21T23:59:59LONG-CODE-2025-EXTREME
" - ], - "text/latex": [ - "\n", - "\\begin{table}\n", - "% caption placeholder\n", - "\\centering{\n", - "\\begin{tikzpicture}[\n", - " auto,\n", - " transform shape,\n", - " nosep/.style={inner sep=0},\n", - " table/.style={\n", - " matrix of nodes,\n", - " row sep=0.125em,\n", - " column sep=0.375em,\n", - " nodes in empty cells,\n", - " nodes={rectangle, scale=1.0, text badly ragged },\n", - "\trow 1/.style={nodes={text=black, anchor=north, inner ysep=0, text height=0, text depth=0}},\n", - "\trow 2/.style={nodes={text=black, anchor=south, inner ysep=.2em, minimum height=1.3em, font=\\bfseries}},\n", - "\tcolumn 1/.style={nodes={align=left }, text height=0.9em, text depth=0.2em, inner xsep=0.375em, inner ysep=0, text width=3.00em},\n", - "\tcolumn 2/.style={nodes={align=left }, nosep, text width=6.60em},\n", - "\tcolumn 3/.style={nodes={align=left }, nosep, text width=11.40em},\n", - "\tcolumn 4/.style={nodes={align=left }, nosep, text width=13.20em},\n", - "\tcolumn 5/.style={text height=0.9em, text depth=0.2em, nosep, text width=0em}\t}]\n", - "\\matrix (TODI5JTUWIHSZ) [table, ampersand replacement=\\&]{\n", - " \\& \\& \\& \\& \\\\\n", - " index\\grtspacer \\& event\\_date\\grtspacer \\& timestamp\\grtspacer \\& transaction\\_code\\grtspacer \\& \\\\\n", - " 0 \\& 2024-12-28 \\& 2024-12-28T14:23:00 \\& ABC-1001-ZZ \\& \\\\\n", - " 1 \\& 2025-01-05 \\& 2025-01-05T09:12:45 \\& XYZ-2048-AA \\& \\\\\n", - " 2 \\& 2031-06-21 \\& 2031-06-21T23:59:59 \\& LONG-CODE-2025-EXTREME \\& \\\\\n", - "};\n", - "\n", - "\\path[draw, thick] (TODI5JTUWIHSZ-1-1.south west) -- (TODI5JTUWIHSZ-1-5.south east);\n", - "\\path[draw, semithick] ([yshift=-0.0625em]TODI5JTUWIHSZ-2-1.south west) -- ([yshift=-0.0625em]TODI5JTUWIHSZ-2-5.south east);\n", - "\\path[draw, thick] ([yshift=-0.3125em]TODI5JTUWIHSZ-5-1.base west) -- ([yshift=-0.3125em]TODI5JTUWIHSZ-5-5.base east);\n", - "\\path[draw, very thin] ([xshift=-0.1875em]TODI5JTUWIHSZ-1-2.south west) -- ([yshift=-0.3125em, xshift=-0.1875em]TODI5JTUWIHSZ-5-2.base west);\n", - "\n", - "\n", - "\n", - "\\end{tikzpicture}\n", - "} % close centering\n", - "\\end{table}\n" - ], - "text/plain": [ - "GreaterTable(df_id=TODI5JTUWIHSZ)" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "
indexnotesstatuspath
0Item 1: delivered; ready for invoice./usr/local/bin/run.sh
1Warning -- unit may be faulty?C:\\Program Files\\App\\main.exe
2Check: power supply (see page 42)~/Documents/projects/final-report.pdf
" - ], - "text/latex": [ - "\n", - "\\begin{table}\n", - "% caption placeholder\n", - "\\centering{\n", - "\\begin{tikzpicture}[\n", - " auto,\n", - " transform shape,\n", - " nosep/.style={inner sep=0},\n", - " table/.style={\n", - " matrix of nodes,\n", - " row sep=0.125em,\n", - " column sep=0.375em,\n", - " nodes in empty cells,\n", - " nodes={rectangle, scale=1.0, text badly ragged },\n", - "\trow 1/.style={nodes={text=black, anchor=north, inner ysep=0, text height=0, text depth=0}},\n", - "\trow 2/.style={nodes={text=black, anchor=south, inner ysep=.2em, minimum height=1.3em, font=\\bfseries}},\n", - "\tcolumn 1/.style={nodes={align=left }, text height=0.9em, text depth=0.2em, inner xsep=0.375em, inner ysep=0, text width=3.00em},\n", - "\tcolumn 2/.style={nodes={align=left }, nosep, text width=22.20em},\n", - "\tcolumn 3/.style={nodes={align=left }, nosep, text width=3.60em},\n", - "\tcolumn 4/.style={nodes={align=left }, nosep, text width=22.20em},\n", - "\tcolumn 5/.style={text height=0.9em, text depth=0.2em, nosep, text width=0em}\t}]\n", - "\\matrix (TGGNTAGJXWXGX) [table, ampersand replacement=\\&]{\n", - " \\& \\& \\& \\& \\\\\n", - " index\\grtspacer \\& notes\\grtspacer \\& status\\grtspacer \\& path\\grtspacer \\& \\\\\n", - " 0 \\& Item 1: delivered; ready for invoice. \\& ✓ \\& /usr/local/bin/run.sh \\& \\\\\n", - " 1 \\& Warning -- unit may be faulty? \\& ✗ \\& C:\\Program Files\\App\\main.exe \\& \\\\\n", - " 2 \\& Check: power supply (see page 42) \\& ↺ \\& ~/Documents/projects/final-report.pdf \\& \\\\\n", - "};\n", - "\n", - "\\path[draw, thick] (TGGNTAGJXWXGX-1-1.south west) -- (TGGNTAGJXWXGX-1-5.south east);\n", - "\\path[draw, semithick] ([yshift=-0.0625em]TGGNTAGJXWXGX-2-1.south west) -- ([yshift=-0.0625em]TGGNTAGJXWXGX-2-5.south east);\n", - "\\path[draw, thick] ([yshift=-0.3125em]TGGNTAGJXWXGX-5-1.base west) -- ([yshift=-0.3125em]TGGNTAGJXWXGX-5-5.base east);\n", - "\\path[draw, very thin] ([xshift=-0.1875em]TGGNTAGJXWXGX-1-2.south west) -- ([yshift=-0.3125em, xshift=-0.1875em]TGGNTAGJXWXGX-5-2.base west);\n", - "\n", - "\n", - "\n", - "\\end{tikzpicture}\n", - "} % close centering\n", - "\\end{table}\n" - ], - "text/plain": [ - "GreaterTable(df_id=TGGNTAGJXWXGX)" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "
indexSerialMD5 HashUnwrapped
0A123B456a5c3e1d7f2b9c3d6f1e4a9b3c7d1e2f3SingleLineValue
1X987Y6549f1c4d3e7a6b2d5c8e3f9a1b7c6d4e5fAnotherOne
2Z000Z111ffb1a2c3d4e5f67890123456789abcdefNoBreaksHere
" - ], - "text/latex": [ - "\n", - "\\begin{table}\n", - "% caption placeholder\n", - "\\centering{\n", - "\\begin{tikzpicture}[\n", - " auto,\n", - " transform shape,\n", - " nosep/.style={inner sep=0},\n", - " table/.style={\n", - " matrix of nodes,\n", - " row sep=0.125em,\n", - " column sep=0.375em,\n", - " nodes in empty cells,\n", - " nodes={rectangle, scale=1.0, text badly ragged },\n", - "\trow 1/.style={nodes={text=black, anchor=north, inner ysep=0, text height=0, text depth=0}},\n", - "\trow 2/.style={nodes={text=black, anchor=south, inner ysep=.2em, minimum height=1.3em, font=\\bfseries}},\n", - "\tcolumn 1/.style={nodes={align=left }, text height=0.9em, text depth=0.2em, inner xsep=0.375em, inner ysep=0, text width=3.00em},\n", - "\tcolumn 2/.style={nodes={align=left }, nosep, text width=4.80em},\n", - "\tcolumn 3/.style={nodes={align=left }, nosep, text width=19.80em},\n", - "\tcolumn 4/.style={nodes={align=left }, nosep, text width=9.00em},\n", - "\tcolumn 5/.style={text height=0.9em, text depth=0.2em, nosep, text width=0em}\t}]\n", - "\\matrix (TFSJLV4XGAZKI) [table, ampersand replacement=\\&]{\n", - " \\& \\& \\& \\& \\\\\n", - " index\\grtspacer \\& Serial\\grtspacer \\& MD5 Hash\\grtspacer \\& Unwrapped\\grtspacer \\& \\\\\n", - " 0 \\& A123B456 \\& a5c3e1d7f2b9c3d6f1e4a9b3c7d1e2f3 \\& SingleLineValue \\& \\\\\n", - " 1 \\& X987Y654 \\& 9f1c4d3e7a6b2d5c8e3f9a1b7c6d4e5f \\& AnotherOne \\& \\\\\n", - " 2 \\& Z000Z111 \\& ffb1a2c3d4e5f67890123456789abcdef \\& NoBreaksHere \\& \\\\\n", - "};\n", - "\n", - "\\path[draw, thick] (TFSJLV4XGAZKI-1-1.south west) -- (TFSJLV4XGAZKI-1-5.south east);\n", - "\\path[draw, semithick] ([yshift=-0.0625em]TFSJLV4XGAZKI-2-1.south west) -- ([yshift=-0.0625em]TFSJLV4XGAZKI-2-5.south east);\n", - "\\path[draw, thick] ([yshift=-0.3125em]TFSJLV4XGAZKI-5-1.base west) -- ([yshift=-0.3125em]TFSJLV4XGAZKI-5-5.base east);\n", - "\\path[draw, very thin] ([xshift=-0.1875em]TFSJLV4XGAZKI-1-2.south west) -- ([yshift=-0.3125em, xshift=-0.1875em]TFSJLV4XGAZKI-5-2.base west);\n", - "\n", - "\n", - "\n", - "\\end{tikzpicture}\n", - "} % close centering\n", - "\\end{table}\n" - ], - "text/plain": [ - "GreaterTable(df_id=TFSJLV4XGAZKI)" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "
SimulationInputOutput
indexIDDate GeneratedModel NameParametersResult Summary
012024-11-15RiskModelV2α=0.95, β=3.2Stable. 5 iterations. RMSE=0.003
122025-02-04SuperModelα=0.99, β=2.1Converged quickly. RMSE=0.001
232026-08-12LongModelNameWithDetailsα=0.90, β=4.0, γ=1.0Diverged on step 4. RMSE=N/A
" - ], - "text/latex": [ - "\n", - "\\begin{table}\n", - "% caption placeholder\n", - "\\centering{\n", - "\\begin{tikzpicture}[\n", - " auto,\n", - " transform shape,\n", - " nosep/.style={inner sep=0},\n", - " table/.style={\n", - " matrix of nodes,\n", - " row sep=0.125em,\n", - " column sep=0.375em,\n", - " nodes in empty cells,\n", - " nodes={rectangle, scale=1.0, text badly ragged },\n", - "\trow 1/.style={nodes={text=black, anchor=north, inner ysep=0, text height=0, text depth=0}},\n", - "\trow 2/.style={nodes={text=black, anchor=south, inner ysep=.2em, minimum height=1.3em, font=\\bfseries}},\n", - "\trow 3/.style={nodes={text=black, anchor=south, inner ysep=.2em, minimum height=1.3em, font=\\bfseries}},\n", - "\tcolumn 1/.style={nodes={align=left }, text height=0.9em, text depth=0.2em, inner xsep=0.375em, inner ysep=0, text width=3.00em},\n", - "\tcolumn 2/.style={nodes={align=right }, nosep, text width=6.00em},\n", - "\tcolumn 3/.style={nodes={align=left }, nosep, text width=6.00em},\n", - "\tcolumn 4/.style={nodes={align=left }, nosep, text width=14.40em},\n", - "\tcolumn 5/.style={nodes={align=left }, nosep, text width=12.00em},\n", - "\tcolumn 6/.style={nodes={align=left }, nosep, text width=19.20em},\n", - "\tcolumn 7/.style={text height=0.9em, text depth=0.2em, nosep, text width=0em}\t}]\n", - "\\matrix (TOK5IKTT4JQGC) [table, ampersand replacement=\\&]{\n", - " \\& \\& \\& \\& \\& \\& \\\\\n", - " \\grtspacer \\& Simulation\\grtspacer \\& \\grtspacer \\& Input\\grtspacer \\& \\grtspacer \\& Output\\grtspacer \\& \\\\\n", - " index\\grtspacer \\& ID\\grtspacer \\& Date Generated\\grtspacer \\& Model Name\\grtspacer \\& Parameters\\grtspacer \\& Result Summary\\grtspacer \\& \\\\\n", - " 0 \\& 1 \\& 2024-11-15 \\& RiskModelV2 \\& α=0.95, β=3.2 \\& Stable. 5 iterations. RMSE=0.003 \\& \\\\\n", - " 1 \\& 2 \\& 2025-02-04 \\& SuperModel \\& α=0.99, β=2.1 \\& Converged quickly. RMSE=0.001 \\& \\\\\n", - " 2 \\& 3 \\& 2026-08-12 \\& LongModelNameWithDetails \\& α=0.90, β=4.0, γ=1.0 \\& Diverged on step 4. RMSE=N/A \\& \\\\\n", - "};\n", - "\n", - "\\path[draw, thick] (TOK5IKTT4JQGC-1-1.south west) -- (TOK5IKTT4JQGC-1-7.south east);\n", - "\\path[draw, semithick] ([yshift=-0.0625em]TOK5IKTT4JQGC-3-1.south west) -- ([yshift=-0.0625em]TOK5IKTT4JQGC-3-7.south east);\n", - "\\path[draw, thick] ([yshift=-0.3125em]TOK5IKTT4JQGC-6-1.base west) -- ([yshift=-0.3125em]TOK5IKTT4JQGC-6-7.base east);\n", - "\\path[draw, very thin] ([xshift=-0.1875em, yshift=-0.0625em]TOK5IKTT4JQGC-2-2.south west) -- ([yshift=-0.0625em]TOK5IKTT4JQGC-2-7.south east);\n", - "\\path[draw, very thin] ([xshift=-0.1875em]TOK5IKTT4JQGC-1-2.south west) -- ([yshift=-0.3125em, xshift=-0.1875em]TOK5IKTT4JQGC-6-2.base west);\n", - "\\path[draw, ultra thin] ([xshift=0.1875em, yshift=-0.0625em]TOK5IKTT4JQGC-1-3.south east) -- ([yshift=-0.3125em, xshift=0.1875em]TOK5IKTT4JQGC-6-3.base east);\n", - "\\path[draw, ultra thin] ([xshift=0.1875em, yshift=-0.0625em]TOK5IKTT4JQGC-1-5.south east) -- ([yshift=-0.3125em, xshift=0.1875em]TOK5IKTT4JQGC-6-5.base east);\n", - "\n", - "\n", - "\n", - "\\end{tikzpicture}\n", - "} % close centering\n", - "\\end{table}\n" - ], - "text/plain": [ - "GreaterTable(df_id=TOK5IKTT4JQGC)" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], + "outputs": [], "source": [ "dfm = gtt.make_manual_tests()\n", "for d in dfm:\n", @@ -2035,252 +116,10 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": null, "id": "86032085-3794-4af9-bfd8-5357af200196", "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "
indexfailure status combined ratio project timestamp (str)client premium (int)email identifier description supplier (str)extension duration reference premium (str)category processing client (date)
0Finish interest possible executive.115.00Maintain song.Hundred police much.None
1Kid sound off author.nanInternational step success likely police.Old morning can.2016-07-18
2Mother nearly cover girl.9,817.00Check.Your huge another.2015-01-15
3Heavy doctor deal old purpose population cost listen.7,054.00Parent paper poor star professor several measure.Them development part own recent everything.2021-02-20
4Old between little either rise.9,834.00Student mind thus dog range.Any pretty suddenly.2018-05-17
5None218.00No serious significant degree same.Boy under beautiful news apply run.2021-07-29
6Able miss forget wish television future.5,314.00Drive research.Should buy him floor actually.2022-04-23
7Always left commercial social.2,382.00Yet watch land there just foot.Ago must manage turn where manage take to ready.2021-06-29
" - ], - "text/latex": [ - "\n", - "\\begin{table}\n", - "% caption placeholder\n", - "\\centering{\n", - "\\begin{tikzpicture}[\n", - " auto,\n", - " transform shape,\n", - " nosep/.style={inner sep=0},\n", - " table/.style={\n", - " matrix of nodes,\n", - " row sep=0.125em,\n", - " column sep=0.375em,\n", - " nodes in empty cells,\n", - " nodes={rectangle, scale=1.0, text badly ragged },\n", - "\trow 1/.style={nodes={text=black, anchor=north, inner ysep=0, text height=0, text depth=0}},\n", - "\trow 2/.style={nodes={text=black, anchor=south, inner ysep=.2em, minimum height=1.3em, font=\\bfseries}},\n", - "\tcolumn 1/.style={nodes={align=left }, text height=0.9em, text depth=0.2em, inner xsep=0.375em, inner ysep=0, text width=3.00em},\n", - "\tcolumn 2/.style={nodes={align=left }, nosep, text width=31.80em},\n", - "\tcolumn 3/.style={nodes={align=right }, nosep, text width=4.80em},\n", - "\tcolumn 4/.style={nodes={align=left }, nosep, text width=29.40em},\n", - "\tcolumn 5/.style={nodes={align=left }, nosep, text width=28.80em},\n", - "\tcolumn 6/.style={nodes={align=left }, nosep, text width=6.00em},\n", - "\tcolumn 7/.style={text height=0.9em, text depth=0.2em, nosep, text width=0em}\t}]\n", - "\\matrix (TFVI7QJNH5MTR) [table, ampersand replacement=\\&]{\n", - " \\& \\& \\& \\& \\& \\& \\\\\n", - " index\\grtspacer \\& failure status combined ratio project timestamp (str)\\grtspacer \\& client premium (int)\\grtspacer \\& email identifier description supplier (str)\\grtspacer \\& extension duration reference premium (str)\\grtspacer \\& category processing client (date)\\grtspacer \\& \\\\\n", - " 0 \\& Finish interest possible executive. \\& 115.00 \\& Maintain song. \\& Hundred police much. \\& None \\& \\\\\n", - " 1 \\& Kid sound off author. \\& nan \\& International step success likely police. \\& Old morning can. \\& 2016-07-18 \\& \\\\\n", - " 2 \\& Mother nearly cover girl. \\& 9,817.00 \\& Check. \\& Your huge another. \\& 2015-01-15 \\& \\\\\n", - " 3 \\& Heavy doctor deal old purpose population cost listen. \\& 7,054.00 \\& Parent paper poor star professor several measure. \\& Them development part own recent everything. \\& 2021-02-20 \\& \\\\\n", - " 4 \\& Old between little either rise. \\& 9,834.00 \\& Student mind thus dog range. \\& Any pretty suddenly. \\& 2018-05-17 \\& \\\\\n", - " 5 \\& None \\& 218.00 \\& No serious significant degree same. \\& Boy under beautiful news apply run. \\& 2021-07-29 \\& \\\\\n", - " 6 \\& Able miss forget wish television future. \\& 5,314.00 \\& Drive research. \\& Should buy him floor actually. \\& 2022-04-23 \\& \\\\\n", - " 7 \\& Always left commercial social. \\& 2,382.00 \\& Yet watch land there just foot. \\& Ago must manage turn where manage take to ready. \\& 2021-06-29 \\& \\\\\n", - "};\n", - "\n", - "\\path[draw, thick] (TFVI7QJNH5MTR-1-1.south west) -- (TFVI7QJNH5MTR-1-7.south east);\n", - "\\path[draw, semithick] ([yshift=-0.0625em]TFVI7QJNH5MTR-2-1.south west) -- ([yshift=-0.0625em]TFVI7QJNH5MTR-2-7.south east);\n", - "\\path[draw, thick] ([yshift=-0.3125em]TFVI7QJNH5MTR-10-1.base west) -- ([yshift=-0.3125em]TFVI7QJNH5MTR-10-7.base east);\n", - "\\path[draw, very thin] ([xshift=-0.1875em]TFVI7QJNH5MTR-1-2.south west) -- ([yshift=-0.3125em, xshift=-0.1875em]TFVI7QJNH5MTR-10-2.base west);\n", - "\n", - "\n", - "\n", - "\\end{tikzpicture}\n", - "} % close centering\n", - "\\end{table}\n" - ], - "text/plain": [ - "GreaterTable(df_id=TFVI7QJNH5MTR)" - ] - }, - "execution_count": 11, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "df = gtt.make_test_dataframe(8, 5)\n", "f = GT(df, max_table_width=50)\n", @@ -2289,64 +128,10 @@ }, { "cell_type": "code", - "execution_count": 14, + "execution_count": null, "id": "1f4ae417-7ed0-45f7-9da4-79ec3667f96a", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n", - "\\begin{table}\n", - "% caption placeholder\n", - "\\centering{\n", - "\\begin{tikzpicture}[\n", - " auto,\n", - " transform shape,\n", - " nosep/.style={inner sep=0},\n", - " table/.style={\n", - " matrix of nodes,\n", - " row sep=0.125em,\n", - " column sep=0.375em,\n", - " nodes in empty cells,\n", - " nodes={rectangle, scale=1.0, text badly ragged },\n", - "\trow 1/.style={nodes={text=black, anchor=north, inner ysep=0, text height=0, text depth=0}},\n", - "\trow 2/.style={nodes={text=black, anchor=south, inner ysep=.2em, minimum height=1.3em, font=\\bfseries}},\n", - "\tcolumn 1/.style={nodes={align=left }, text height=0.9em, text depth=0.2em, inner xsep=0.375em, inner ysep=0, text width=3.00em},\n", - "\tcolumn 2/.style={nodes={align=left }, nosep, text width=31.80em},\n", - "\tcolumn 3/.style={nodes={align=right }, nosep, text width=4.80em},\n", - "\tcolumn 4/.style={nodes={align=left }, nosep, text width=29.40em},\n", - "\tcolumn 5/.style={nodes={align=left }, nosep, text width=28.80em},\n", - "\tcolumn 6/.style={nodes={align=left }, nosep, text width=6.00em},\n", - "\tcolumn 7/.style={text height=0.9em, text depth=0.2em, nosep, text width=0em}\t}]\n", - "\\matrix (TADRCD6DUU2DZ) [table, ampersand replacement=\\&]{\n", - " \\& \\& \\& \\& \\& \\& \\\\\n", - " index\\grtspacer \\& failure status combined ratio project timestamp (str)\\grtspacer \\& client premium (int)\\grtspacer \\& email identifier description supplier (str)\\grtspacer \\& extension duration reference premium (str)\\grtspacer \\& category processing client (date)\\grtspacer \\& \\\\\n", - " 0 \\& Finish interest possible executive. \\& 115.00 \\& Maintain song. \\& Hundred police much. \\& None \\& \\\\\n", - " 1 \\& Kid sound off author. \\& nan \\& International step success likely police. \\& Old morning can. \\& 2016-07-18 \\& \\\\\n", - " 2 \\& Mother nearly cover girl. \\& 9,817.00 \\& Check. \\& Your huge another. \\& 2015-01-15 \\& \\\\\n", - " 3 \\& Heavy doctor deal old purpose population cost listen. \\& 7,054.00 \\& Parent paper poor star professor several measure. \\& Them development part own recent everything. \\& 2021-02-20 \\& \\\\\n", - " 4 \\& Old between little either rise. \\& 9,834.00 \\& Student mind thus dog range. \\& Any pretty suddenly. \\& 2018-05-17 \\& \\\\\n", - " 5 \\& None \\& 218.00 \\& No serious significant degree same. \\& Boy under beautiful news apply run. \\& 2021-07-29 \\& \\\\\n", - " 6 \\& Able miss forget wish television future. \\& 5,314.00 \\& Drive research. \\& Should buy him floor actually. \\& 2022-04-23 \\& \\\\\n", - " 7 \\& Always left commercial social. \\& 2,382.00 \\& Yet watch land there just foot. \\& Ago must manage turn where manage take to ready. \\& 2021-06-29 \\& \\\\\n", - "};\n", - "\n", - "\\path[draw, thick] (TADRCD6DUU2DZ-1-1.south west) -- (TADRCD6DUU2DZ-1-7.south east);\n", - "\\path[draw, semithick] ([yshift=-0.0625em]TADRCD6DUU2DZ-2-1.south west) -- ([yshift=-0.0625em]TADRCD6DUU2DZ-2-7.south east);\n", - "\\path[draw, thick] ([yshift=-0.3125em]TADRCD6DUU2DZ-10-1.base west) -- ([yshift=-0.3125em]TADRCD6DUU2DZ-10-7.base east);\n", - "\\path[draw, very thin] ([xshift=-0.1875em]TADRCD6DUU2DZ-1-2.south west) -- ([yshift=-0.3125em, xshift=-0.1875em]TADRCD6DUU2DZ-10-2.base west);\n", - "\n", - "\n", - "\n", - "\\end{tikzpicture}\n", - "} % close centering\n", - "\\end{table}\n", - "\n" - ] - } - ], + "outputs": [], "source": [ "f = GT(df, max_table_width=40)\n", "print(f._repr_latex_())" @@ -2370,1225 +155,10 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": null, "id": "dc15f2fe-58e9-4618-9dcf-ab9dcdfa3302", "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "
basic
discontinuanceboning floatcylinder floatgrassy datelurching datetimemonitoring datepleasantly intteaser yearweighing year
9433.2630.000002014-05-172022-03-212011-03-14-6,346.002,011.002,017
10,4245.3370.000032017-06-152009-10-132028-01-017,637.001,993.002,016
11,5820.2190.174782014-05-172018-11-022011-03-14-2,602.002,007.002,010
30,9460.0000.000042018-04-022011-04-242009-02-03-4,318.001,995.002,026
49,8570.0020.102292018-04-022018-11-022028-10-249,433.001,996.002,007
" - ], - "text/latex": [ - "\n", - "\\begin{table}\n", - "\\caption{basic}\n", - "\n", - "\\centering{\n", - "\\begin{tikzpicture}[\n", - " auto,\n", - " transform shape,\n", - " nosep/.style={inner sep=0},\n", - " table/.style={\n", - " matrix of nodes,\n", - " row sep=0.125em,\n", - " column sep=0.375em,\n", - " nodes in empty cells,\n", - " nodes={rectangle, scale=1.0, text badly ragged },\n", - "\trow 1/.style={nodes={text=black, anchor=north, inner ysep=0, text height=0, text depth=0}},\n", - "\trow 2/.style={nodes={text=black, anchor=south, inner ysep=.2em, minimum height=1.3em, font=\\bfseries}},\n", - "\tcolumn 1/.style={nodes={align=left }, text height=0.9em, text depth=0.2em, inner xsep=0.375em, inner ysep=0, text width=3.60em},\n", - "\tcolumn 2/.style={nodes={align=right }, nosep, text width=3.60em},\n", - "\tcolumn 3/.style={nodes={align=right }, nosep, text width=4.80em},\n", - "\tcolumn 4/.style={nodes={align=center}, nosep, text width=6.00em},\n", - "\tcolumn 5/.style={nodes={align=center}, nosep, text width=6.00em},\n", - "\tcolumn 6/.style={nodes={align=center}, nosep, text width=6.00em},\n", - "\tcolumn 7/.style={nodes={align=right }, nosep, text width=6.00em},\n", - "\tcolumn 8/.style={nodes={align=right }, nosep, text width=4.80em},\n", - "\tcolumn 9/.style={nodes={align=right }, nosep, text width=4.80em},\n", - "\tcolumn 10/.style={text height=0.9em, text depth=0.2em, nosep, text width=0em}\t}]\n", - "\\matrix (T7XQCSCCKYWUR) [table, ampersand replacement=\\&]{\n", - " \\& \\& \\& \\& \\& \\& \\& \\& \\& \\\\\n", - " discontinuance\\grtspacer \\& boning float\\grtspacer \\& cylinder float\\grtspacer \\& grassy date\\grtspacer \\& lurching datetime\\grtspacer \\& monitoring date\\grtspacer \\& pleasantly int\\grtspacer \\& teaser year\\grtspacer \\& weighing year\\grtspacer \\& \\\\\n", - " 943 \\& 3.263 \\& 0.00000 \\& 2014-05-17 \\& 2022-03-21 \\& 2011-03-14 \\& -6,346.00 \\& 2,011.00 \\& 2,017 \\& \\\\\n", - " 10,424 \\& 5.337 \\& 0.00003 \\& 2017-06-15 \\& 2009-10-13 \\& 2028-01-01 \\& 7,637.00 \\& 1,993.00 \\& 2,016 \\& \\\\\n", - " 11,582 \\& 0.219 \\& 0.17478 \\& 2014-05-17 \\& 2018-11-02 \\& 2011-03-14 \\& -2,602.00 \\& 2,007.00 \\& 2,010 \\& \\\\\n", - " 30,946 \\& 0.000 \\& 0.00004 \\& 2018-04-02 \\& 2011-04-24 \\& 2009-02-03 \\& -4,318.00 \\& 1,995.00 \\& 2,026 \\& \\\\\n", - " 49,857 \\& 0.002 \\& 0.10229 \\& 2018-04-02 \\& 2018-11-02 \\& 2028-10-24 \\& 9,433.00 \\& 1,996.00 \\& 2,007 \\& \\\\\n", - "};\n", - "\n", - "\\path[draw, thick] (T7XQCSCCKYWUR-1-1.south west) -- (T7XQCSCCKYWUR-1-10.south east);\n", - "\\path[draw, semithick] ([yshift=-0.0625em]T7XQCSCCKYWUR-2-1.south west) -- ([yshift=-0.0625em]T7XQCSCCKYWUR-2-10.south east);\n", - "\\path[draw, thick] ([yshift=-0.3125em]T7XQCSCCKYWUR-7-1.base west) -- ([yshift=-0.3125em]T7XQCSCCKYWUR-7-10.base east);\n", - "\\path[draw, very thin] ([xshift=-0.1875em]T7XQCSCCKYWUR-1-2.south west) -- ([yshift=-0.3125em, xshift=-0.1875em]T7XQCSCCKYWUR-7-2.base west);\n", - "\n", - "\n", - "\n", - "\\end{tikzpicture}\n", - "} % close centering\n", - "\\end{table}\n" - ], - "text/plain": [ - "GreaterTable(df_id=T7XQCSCCKYWUR)" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "
timeseries
privatesSacramento Edgewise Outlive floatSooner Celled Complements floatSuperheroes Prejudge Carnivore datetime
2007-04-2425.87-803.629u2008-10-28
2007-07-101.00-2.8672011-04-01
2008-09-3037,517.98 10.719M2032-08-18
2010-08-180.92-7242901.911Y2018-10-13
2013-07-26133.49-0.000y2019-01-15
" - ], - "text/latex": [ - "\n", - "\\begin{table}\n", - "\\caption{timeseries}\n", - "\n", - "\\centering{\n", - "\\begin{tikzpicture}[\n", - " auto,\n", - " transform shape,\n", - " nosep/.style={inner sep=0},\n", - " table/.style={\n", - " matrix of nodes,\n", - " row sep=0.125em,\n", - " column sep=0.375em,\n", - " nodes in empty cells,\n", - " nodes={rectangle, scale=1.0, text badly ragged },\n", - "\trow 1/.style={nodes={text=black, anchor=north, inner ysep=0, text height=0, text depth=0}},\n", - "\trow 2/.style={nodes={text=black, anchor=south, inner ysep=.2em, minimum height=1.3em, font=\\bfseries}},\n", - "\tcolumn 1/.style={nodes={align=left }, text height=0.9em, text depth=0.2em, inner xsep=0.375em, inner ysep=0, text width=6.00em},\n", - "\tcolumn 2/.style={nodes={align=right }, nosep, text width=6.00em},\n", - "\tcolumn 3/.style={nodes={align=right }, nosep, text width=7.80em},\n", - "\tcolumn 4/.style={nodes={align=center}, nosep, text width=6.60em},\n", - "\tcolumn 5/.style={text height=0.9em, text depth=0.2em, nosep, text width=0em}\t}]\n", - "\\matrix (TYWTZOU2SXCNM) [table, ampersand replacement=\\&]{\n", - " \\& \\& \\& \\& \\\\\n", - " privates\\grtspacer \\& Sacramento Edgewise Outlive float\\grtspacer \\& Sooner Celled Complements float\\grtspacer \\& Superheroes Prejudge Carnivore datetime\\grtspacer \\& \\\\\n", - " 2007-04-24 \\& 25.87 \\& -803.629u \\& 2008-10-28 \\& \\\\\n", - " 2007-07-10 \\& 1.00 \\& -2.867 \\& 2011-04-01 \\& \\\\\n", - " 2008-09-30 \\& 37,517.98 \\& 10.719M \\& 2032-08-18 \\& \\\\\n", - " 2010-08-18 \\& 0.92 \\& -7242901.911Y \\& 2018-10-13 \\& \\\\\n", - " 2013-07-26 \\& 133.49 \\& -0.000y \\& 2019-01-15 \\& \\\\\n", - "};\n", - "\n", - "\\path[draw, thick] (TYWTZOU2SXCNM-1-1.south west) -- (TYWTZOU2SXCNM-1-5.south east);\n", - "\\path[draw, semithick] ([yshift=-0.0625em]TYWTZOU2SXCNM-2-1.south west) -- ([yshift=-0.0625em]TYWTZOU2SXCNM-2-5.south east);\n", - "\\path[draw, thick] ([yshift=-0.3125em]TYWTZOU2SXCNM-7-1.base west) -- ([yshift=-0.3125em]TYWTZOU2SXCNM-7-5.base east);\n", - "\\path[draw, very thin] ([xshift=-0.1875em]TYWTZOU2SXCNM-1-2.south west) -- ([yshift=-0.3125em, xshift=-0.1875em]TYWTZOU2SXCNM-7-2.base west);\n", - "\n", - "\n", - "\n", - "\\end{tikzpicture}\n", - "} % close centering\n", - "\\end{table}\n" - ], - "text/plain": [ - "GreaterTable(df_id=TYWTZOU2SXCNM)" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "
multiindex
federallyrestrictionsdolphinAurora Merge Calculates floatBombing Honking Garage intFunnier Personals Shovels floatHealthier Chemically Construed floatWhereabouts Sept Cakes int
44,379disenfranchise80,923 32.454337,462,522 21.571M 238.509M-4,327
hybrid52,773 3.628M494,694,322 1.282GNaN6,496
58,702disenfranchise474 3.842M785,021,862 2.261k 11.004k-5,662
25,363 16.184M627,061,405 752.930k 398.233k6,743
41,796 3.821G562,015,666 9.799M 162.201m-2,439
" - ], - "text/latex": [ - "\n", - "\\begin{table}\n", - "\\caption{multiindex}\n", - "\n", - "\\centering{\n", - "\\begin{tikzpicture}[\n", - " auto,\n", - " transform shape,\n", - " nosep/.style={inner sep=0},\n", - " table/.style={\n", - " matrix of nodes,\n", - " row sep=0.125em,\n", - " column sep=0.375em,\n", - " nodes in empty cells,\n", - " nodes={rectangle, scale=1.0, text badly ragged },\n", - "\trow 1/.style={nodes={text=black, anchor=north, inner ysep=0, text height=0, text depth=0}},\n", - "\trow 2/.style={nodes={text=black, anchor=south, inner ysep=.2em, minimum height=1.3em, font=\\bfseries}},\n", - "\tcolumn 1/.style={nodes={align=left }, text height=0.9em, text depth=0.2em, inner xsep=0.375em, inner ysep=0, text width=3.60em},\n", - "\tcolumn 2/.style={nodes={align=left }, nosep, text width=8.40em},\n", - "\tcolumn 3/.style={nodes={align=left }, nosep, text width=3.60em},\n", - "\tcolumn 4/.style={nodes={align=right }, nosep, text width=6.00em},\n", - "\tcolumn 5/.style={nodes={align=right }, nosep, text width=6.60em},\n", - "\tcolumn 6/.style={nodes={align=right }, nosep, text width=5.40em},\n", - "\tcolumn 7/.style={nodes={align=right }, nosep, text width=6.00em},\n", - "\tcolumn 8/.style={nodes={align=right }, nosep, text width=6.60em},\n", - "\tcolumn 9/.style={text height=0.9em, text depth=0.2em, nosep, text width=0em}\t}]\n", - "\\matrix (TS57LTFJE7RXK) [table, ampersand replacement=\\&]{\n", - " \\& \\& \\& \\& \\& \\& \\& \\& \\\\\n", - " federally\\grtspacer \\& restrictions\\grtspacer \\& dolphin\\grtspacer \\& Aurora Merge Calculates float\\grtspacer \\& Bombing Honking Garage int\\grtspacer \\& Funnier Personals Shovels float\\grtspacer \\& Healthier Chemically Construed float\\grtspacer \\& Whereabouts Sept Cakes int\\grtspacer \\& \\\\\n", - " 44,379 \\& disenfranchise \\& 80,923 \\& 32.454 \\& 337,462,522 \\& 21.571M \\& 238.509M \\& -4,327 \\& \\\\\n", - " \\& hybrid \\& 52,773 \\& 3.628M \\& 494,694,322 \\& 1.282G \\& NaN \\& 6,496 \\& \\\\\n", - " 58,702 \\& disenfranchise \\& 474 \\& 3.842M \\& 785,021,862 \\& 2.261k \\& 11.004k \\& -5,662 \\& \\\\\n", - " \\& disenfranchise \\& 25,363 \\& 16.184M \\& 627,061,405 \\& 752.930k \\& 398.233k \\& 6,743 \\& \\\\\n", - " \\& disenfranchise \\& 41,796 \\& 3.821G \\& 562,015,666 \\& 9.799M \\& 162.201m \\& -2,439 \\& \\\\\n", - "};\n", - "\n", - "\\path[draw, thick] (TS57LTFJE7RXK-1-1.south west) -- (TS57LTFJE7RXK-1-9.south east);\n", - "\\path[draw, thick] ([yshift=-0.3125em]TS57LTFJE7RXK-7-1.base west) -- ([yshift=-0.3125em]TS57LTFJE7RXK-7-9.base east);\n", - "\\path[draw, semithick] ([yshift=-0.0625em]TS57LTFJE7RXK-2-1.south west) -- ([yshift=-0.0625em]TS57LTFJE7RXK-2-9.south east);\n", - "\\path[draw, very thin] ([yshift=-0.0625em]TS57LTFJE7RXK-4-1.south west) -- ([yshift=-0.0625em]TS57LTFJE7RXK-4-9.south east);\n", - "\\path[draw, very thin] ([xshift=-0.1875em]TS57LTFJE7RXK-1-4.south west) -- ([yshift=-0.3125em, xshift=-0.1875em]TS57LTFJE7RXK-7-4.base west);\n", - "\n", - "\n", - "\n", - "\\end{tikzpicture}\n", - "} % close centering\n", - "\\end{table}\n" - ], - "text/plain": [ - "GreaterTable(df_id=TS57LTFJE7RXK)" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "
multicolumns
obligestrapped
hencestrategiesinstallationsstrategies
wednesdaysbossperceptioncomplexsorrylists
9542011-02-270.00000 7.066G3,923 13.712M
2,7692029-08-160.18763-6.666k-3,817 1.606G
5,6822029-08-160.00002-649.606a-6,446 34.617M
9,6610.01298 46281395957.136Y6,756 2.661
36,3382019-09-023.68136 11.656Y3,922 1.286
" - ], - "text/latex": [ - "\n", - "\\begin{table}\n", - "\\caption{multicolumns}\n", - "\n", - "\\centering{\n", - "\\begin{tikzpicture}[\n", - " auto,\n", - " transform shape,\n", - " nosep/.style={inner sep=0},\n", - " table/.style={\n", - " matrix of nodes,\n", - " row sep=0.125em,\n", - " column sep=0.375em,\n", - " nodes in empty cells,\n", - " nodes={rectangle, scale=1.0, text badly ragged },\n", - "\trow 1/.style={nodes={text=black, anchor=north, inner ysep=0, text height=0, text depth=0}},\n", - "\trow 2/.style={nodes={text=black, anchor=south, inner ysep=.2em, minimum height=1.3em, font=\\bfseries}},\n", - "\trow 3/.style={nodes={text=black, anchor=south, inner ysep=.2em, minimum height=1.3em, font=\\bfseries}},\n", - "\trow 4/.style={nodes={text=black, anchor=south, inner ysep=.2em, minimum height=1.3em, font=\\bfseries}},\n", - "\tcolumn 1/.style={nodes={align=left }, text height=0.9em, text depth=0.2em, inner xsep=0.375em, inner ysep=0, text width=6.00em},\n", - "\tcolumn 2/.style={nodes={align=center}, nosep, text width=6.00em},\n", - "\tcolumn 3/.style={nodes={align=right }, nosep, text width=6.00em},\n", - "\tcolumn 4/.style={nodes={align=right }, nosep, text width=10.20em},\n", - "\tcolumn 5/.style={nodes={align=right }, nosep, text width=7.80em},\n", - "\tcolumn 6/.style={nodes={align=right }, nosep, text width=6.00em},\n", - "\tcolumn 7/.style={text height=0.9em, text depth=0.2em, nosep, text width=0em}\t}]\n", - "\\matrix (TXM56Z4VR2ECH) [table, ampersand replacement=\\&]{\n", - " \\& \\& \\& \\& \\& \\& \\\\\n", - " \\grtspacer \\& oblige\\grtspacer \\& \\grtspacer \\& \\grtspacer \\& strapped\\grtspacer \\& \\grtspacer \\& \\\\\n", - " \\grtspacer \\& hence\\grtspacer \\& \\grtspacer \\& strategies\\grtspacer \\& installations\\grtspacer \\& strategies\\grtspacer \\& \\\\\n", - " wednesdays\\grtspacer \\& boss\\grtspacer \\& perception\\grtspacer \\& complex\\grtspacer \\& sorry\\grtspacer \\& lists\\grtspacer \\& \\\\\n", - " 954 \\& 2011-02-27 \\& 0.00000 \\& 7.066G \\& 3,923 \\& 13.712M \\& \\\\\n", - " 2,769 \\& 2029-08-16 \\& 0.18763 \\& -6.666k \\& -3,817 \\& 1.606G \\& \\\\\n", - " 5,682 \\& 2029-08-16 \\& 0.00002 \\& -649.606a \\& -6,446 \\& 34.617M \\& \\\\\n", - " 9,661 \\& \\& 0.01298 \\& 46281395957.136Y \\& 6,756 \\& 2.661 \\& \\\\\n", - " 36,338 \\& 2019-09-02 \\& 3.68136 \\& 11.656Y \\& 3,922 \\& 1.286 \\& \\\\\n", - "};\n", - "\n", - "\\path[draw, thick] (TXM56Z4VR2ECH-1-1.south west) -- (TXM56Z4VR2ECH-1-7.south east);\n", - "\\path[draw, semithick] ([yshift=-0.0625em]TXM56Z4VR2ECH-4-1.south west) -- ([yshift=-0.0625em]TXM56Z4VR2ECH-4-7.south east);\n", - "\\path[draw, thick] ([yshift=-0.3125em]TXM56Z4VR2ECH-9-1.base west) -- ([yshift=-0.3125em]TXM56Z4VR2ECH-9-7.base east);\n", - "\\path[draw, very thin] ([xshift=-0.1875em, yshift=-0.0625em]TXM56Z4VR2ECH-2-2.south west) -- ([yshift=-0.0625em]TXM56Z4VR2ECH-2-7.south east);\n", - "\\path[draw, very thin] ([xshift=-0.1875em, yshift=-0.0625em]TXM56Z4VR2ECH-3-2.south west) -- ([yshift=-0.0625em]TXM56Z4VR2ECH-3-7.south east);\n", - "\\path[draw, very thin] ([xshift=-0.1875em]TXM56Z4VR2ECH-1-2.south west) -- ([yshift=-0.3125em, xshift=-0.1875em]TXM56Z4VR2ECH-9-2.base west);\n", - "\\path[draw, ultra thin] ([xshift=0.1875em, yshift=-0.0625em]TXM56Z4VR2ECH-1-4.south east) -- ([yshift=-0.3125em, xshift=0.1875em]TXM56Z4VR2ECH-9-4.base east);\n", - "\\path[draw, ultra thin] ([xshift=0.1875em, yshift=-0.0625em]TXM56Z4VR2ECH-2-3.south east) -- ([yshift=-0.3125em, xshift=0.1875em]TXM56Z4VR2ECH-9-3.base east);\n", - "\\path[draw, ultra thin] ([xshift=0.1875em, yshift=-0.0625em]TXM56Z4VR2ECH-2-5.south east) -- ([yshift=-0.3125em, xshift=0.1875em]TXM56Z4VR2ECH-9-5.base east);\n", - "\n", - "\n", - "\n", - "\\end{tikzpicture}\n", - "} % close centering\n", - "\\end{table}\n" - ], - "text/plain": [ - "GreaterTable(df_id=TXM56Z4VR2ECH)" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "
complex
originalitysilent
swamitortwarnsswamitort
heartbreakinvesttriggeredlaserspastsystemsveteranchalkcollegialgraderstrivializeripostecomplementary
26,561modal8,3018,600.00-5,874.002020-01-30-0.000y 79.8032021-07-29 42.510nNaN1,992.00
robbing3,413-6,481.006,985.002012-10-262024-08-16 525.351 43.5302011-06-20 74.613u 399.425nan
15,208491.00-7,376.002029-11-152026-04-07 262.122p 386.621M2010-10-13 60.250u 10.456M2,007.00
34,445-7,082.003,805.002015-11-202031-07-29 14.838mNaN2017-09-11 11.059u 14.1471,997.00
37,0663,038.00-8,099.002025-08-132025-04-03 22438.774Y 140.911k2006-07-28 416.652n 22.5072,023.00
" - ], - "text/latex": [ - "\n", - "\\begin{table}\n", - "\\caption{complex}\n", - "\n", - "\\centering{\n", - "\\begin{tikzpicture}[\n", - " auto,\n", - " transform shape,\n", - " nosep/.style={inner sep=0},\n", - " table/.style={\n", - " matrix of nodes,\n", - " row sep=0.125em,\n", - " column sep=0.375em,\n", - " nodes in empty cells,\n", - " nodes={rectangle, scale=1.0, text badly ragged },\n", - "\trow 1/.style={nodes={text=black, anchor=north, inner ysep=0, text height=0, text depth=0}},\n", - "\trow 2/.style={nodes={text=black, anchor=south, inner ysep=.2em, minimum height=1.3em, font=\\bfseries}},\n", - "\trow 3/.style={nodes={text=black, anchor=south, inner ysep=.2em, minimum height=1.3em, font=\\bfseries}},\n", - "\trow 4/.style={nodes={text=black, anchor=south, inner ysep=.2em, minimum height=1.3em, font=\\bfseries}},\n", - "\tcolumn 1/.style={nodes={align=left }, text height=0.9em, text depth=0.2em, inner xsep=0.375em, inner ysep=0, text width=6.00em},\n", - "\tcolumn 2/.style={nodes={align=left }, nosep, text width=4.20em},\n", - "\tcolumn 3/.style={nodes={align=left }, nosep, text width=5.40em},\n", - "\tcolumn 4/.style={nodes={align=right }, nosep, text width=6.00em},\n", - "\tcolumn 5/.style={nodes={align=right }, nosep, text width=6.00em},\n", - "\tcolumn 6/.style={nodes={align=center}, nosep, text width=6.00em},\n", - "\tcolumn 7/.style={nodes={align=center}, nosep, text width=6.00em},\n", - "\tcolumn 8/.style={nodes={align=right }, nosep, text width=6.00em},\n", - "\tcolumn 9/.style={nodes={align=right }, nosep, text width=6.00em},\n", - "\tcolumn 10/.style={nodes={align=center}, nosep, text width=6.00em},\n", - "\tcolumn 11/.style={nodes={align=right }, nosep, text width=6.00em},\n", - "\tcolumn 12/.style={nodes={align=right }, nosep, text width=6.00em},\n", - "\tcolumn 13/.style={nodes={align=right }, nosep, text width=6.00em},\n", - "\tcolumn 14/.style={text height=0.9em, text depth=0.2em, nosep, text width=0em}\t}]\n", - "\\matrix (TBBWYY5OA6SMZ) [table, ampersand replacement=\\&]{\n", - " \\& \\& \\& \\& \\& \\& \\& \\& \\& \\& \\& \\& \\& \\\\\n", - " \\grtspacer \\& \\grtspacer \\& \\grtspacer \\& originality\\grtspacer \\& \\grtspacer \\& \\grtspacer \\& \\grtspacer \\& \\grtspacer \\& \\grtspacer \\& \\grtspacer \\& \\grtspacer \\& silent\\grtspacer \\& \\grtspacer \\& \\\\\n", - " \\grtspacer \\& \\grtspacer \\& \\grtspacer \\& swami\\grtspacer \\& \\grtspacer \\& tort\\grtspacer \\& \\grtspacer \\& warns\\grtspacer \\& \\grtspacer \\& \\grtspacer \\& \\grtspacer \\& swami\\grtspacer \\& tort\\grtspacer \\& \\\\\n", - " heartbreak\\grtspacer \\& invest\\grtspacer \\& triggered\\grtspacer \\& lasers\\grtspacer \\& past\\grtspacer \\& systems\\grtspacer \\& veteran\\grtspacer \\& chalk\\grtspacer \\& collegial\\grtspacer \\& graders\\grtspacer \\& trivialize\\grtspacer \\& riposte\\grtspacer \\& complementary\\grtspacer \\& \\\\\n", - " 26,561 \\& modal \\& 8,301 \\& 8,600.00 \\& -5,874.00 \\& 2020-01-30 \\& \\& -0.000y \\& 79.803 \\& 2021-07-29 \\& 42.510n \\& NaN \\& 1,992.00 \\& \\\\\n", - " \\& robbing \\& 3,413 \\& -6,481.00 \\& 6,985.00 \\& 2012-10-26 \\& 2024-08-16 \\& 525.351 \\& 43.530 \\& 2011-06-20 \\& 74.613u \\& 399.425 \\& nan \\& \\\\\n", - " \\& robbing \\& 15,208 \\& 491.00 \\& -7,376.00 \\& 2029-11-15 \\& 2026-04-07 \\& 262.122p \\& 386.621M \\& 2010-10-13 \\& 60.250u \\& 10.456M \\& 2,007.00 \\& \\\\\n", - " \\& robbing \\& 34,445 \\& -7,082.00 \\& 3,805.00 \\& 2015-11-20 \\& 2031-07-29 \\& 14.838m \\& NaN \\& 2017-09-11 \\& 11.059u \\& 14.147 \\& 1,997.00 \\& \\\\\n", - " \\& robbing \\& 37,066 \\& 3,038.00 \\& -8,099.00 \\& 2025-08-13 \\& 2025-04-03 \\& 22438.774Y \\& 140.911k \\& 2006-07-28 \\& 416.652n \\& 22.507 \\& 2,023.00 \\& \\\\\n", - "};\n", - "\n", - "\\path[draw, thick] (TBBWYY5OA6SMZ-1-1.south west) -- (TBBWYY5OA6SMZ-1-14.south east);\n", - "\\path[draw, semithick] ([yshift=-0.0625em]TBBWYY5OA6SMZ-4-1.south west) -- ([yshift=-0.0625em]TBBWYY5OA6SMZ-4-14.south east);\n", - "\\path[draw, thick] ([yshift=-0.3125em]TBBWYY5OA6SMZ-9-1.base west) -- ([yshift=-0.3125em]TBBWYY5OA6SMZ-9-14.base east);\n", - "\\path[draw, very thin] ([xshift=-0.1875em, yshift=-0.0625em]TBBWYY5OA6SMZ-2-4.south west) -- ([yshift=-0.0625em]TBBWYY5OA6SMZ-2-14.south east);\n", - "\\path[draw, very thin] ([xshift=-0.1875em, yshift=-0.0625em]TBBWYY5OA6SMZ-3-4.south west) -- ([yshift=-0.0625em]TBBWYY5OA6SMZ-3-14.south east);\n", - "\\path[draw, very thin] ([xshift=-0.1875em]TBBWYY5OA6SMZ-1-4.south west) -- ([yshift=-0.3125em, xshift=-0.1875em]TBBWYY5OA6SMZ-9-4.base west);\n", - "\\path[draw, ultra thin] ([xshift=0.1875em, yshift=-0.0625em]TBBWYY5OA6SMZ-1-11.south east) -- ([yshift=-0.3125em, xshift=0.1875em]TBBWYY5OA6SMZ-9-11.base east);\n", - "\\path[draw, ultra thin] ([xshift=0.1875em, yshift=-0.0625em]TBBWYY5OA6SMZ-2-5.south east) -- ([yshift=-0.3125em, xshift=0.1875em]TBBWYY5OA6SMZ-9-5.base east);\n", - "\\path[draw, ultra thin] ([xshift=0.1875em, yshift=-0.0625em]TBBWYY5OA6SMZ-2-7.south east) -- ([yshift=-0.3125em, xshift=0.1875em]TBBWYY5OA6SMZ-9-7.base east);\n", - "\\path[draw, ultra thin] ([xshift=0.1875em, yshift=-0.0625em]TBBWYY5OA6SMZ-2-12.south east) -- ([yshift=-0.3125em, xshift=0.1875em]TBBWYY5OA6SMZ-9-12.base east);\n", - "\n", - "\n", - "\n", - "\\end{tikzpicture}\n", - "} % close centering\n", - "\\end{table}\n" - ], - "text/plain": [ - "GreaterTable(df_id=TBBWYY5OA6SMZ)" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], + "outputs": [], "source": [ "for k, v in ts.items():\n", " display(GT(v.head(), show_index=True, max_table_width=80, debug=False, caption=k))" @@ -3604,272 +174,10 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": null, "id": "51858d09-c98f-4675-a61c-c7a839f75686", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n", - "\\begin{table}\n", - "\\caption{basic}\n", - "\n", - "\\centering{\n", - "\\begin{tikzpicture}[\n", - " auto,\n", - " transform shape,\n", - " nosep/.style={inner sep=0},\n", - " table/.style={\n", - " matrix of nodes,\n", - " row sep=0.125em,\n", - " column sep=0.375em,\n", - " nodes in empty cells,\n", - " nodes={rectangle, scale=1.0, text badly ragged },\n", - "\trow 1/.style={nodes={text=black, anchor=north, inner ysep=0, text height=0, text depth=0}},\n", - "\trow 2/.style={nodes={text=black, anchor=south, inner ysep=.2em, minimum height=1.3em, font=\\bfseries}},\n", - "\tcolumn 1/.style={nodes={align=left }, text height=0.9em, text depth=0.2em, inner xsep=0.375em, inner ysep=0, text width=3.60em},\n", - "\tcolumn 2/.style={nodes={align=right }, nosep, text width=3.60em},\n", - "\tcolumn 3/.style={nodes={align=right }, nosep, text width=4.80em},\n", - "\tcolumn 4/.style={nodes={align=center}, nosep, text width=6.00em},\n", - "\tcolumn 5/.style={nodes={align=center}, nosep, text width=6.00em},\n", - "\tcolumn 6/.style={nodes={align=center}, nosep, text width=6.00em},\n", - "\tcolumn 7/.style={nodes={align=right }, nosep, text width=6.00em},\n", - "\tcolumn 8/.style={nodes={align=right }, nosep, text width=4.80em},\n", - "\tcolumn 9/.style={nodes={align=right }, nosep, text width=4.80em},\n", - "\tcolumn 10/.style={text height=0.9em, text depth=0.2em, nosep, text width=0em}\t}]\n", - "\\matrix (TAQLQQIAQAPUY) [table, ampersand replacement=\\&]{\n", - " \\& \\& \\& \\& \\& \\& \\& \\& \\& \\\\\n", - " discontinuance\\grtspacer \\& boning float\\grtspacer \\& cylinder float\\grtspacer \\& grassy date\\grtspacer \\& lurching datetime\\grtspacer \\& monitoring date\\grtspacer \\& pleasantly int\\grtspacer \\& teaser year\\grtspacer \\& weighing year\\grtspacer \\& \\\\\n", - " 943 \\& 3.263 \\& 0.00000 \\& 2014-05-17 \\& 2022-03-21 \\& 2011-03-14 \\& -6,346.00 \\& 2,011.00 \\& 2,017 \\& \\\\\n", - " 10,424 \\& 5.337 \\& 0.00003 \\& 2017-06-15 \\& 2009-10-13 \\& 2028-01-01 \\& 7,637.00 \\& 1,993.00 \\& 2,016 \\& \\\\\n", - " 11,582 \\& 0.219 \\& 0.17478 \\& 2014-05-17 \\& 2018-11-02 \\& 2011-03-14 \\& -2,602.00 \\& 2,007.00 \\& 2,010 \\& \\\\\n", - " 30,946 \\& 0.000 \\& 0.00004 \\& 2018-04-02 \\& 2011-04-24 \\& 2009-02-03 \\& -4,318.00 \\& 1,995.00 \\& 2,026 \\& \\\\\n", - " 49,857 \\& 0.002 \\& 0.10229 \\& 2018-04-02 \\& 2018-11-02 \\& 2028-10-24 \\& 9,433.00 \\& 1,996.00 \\& 2,007 \\& \\\\\n", - "};\n", - "\n", - "\\path[draw, thick] (TAQLQQIAQAPUY-1-1.south west) -- (TAQLQQIAQAPUY-1-10.south east);\n", - "\\path[draw, semithick] ([yshift=-0.0625em]TAQLQQIAQAPUY-2-1.south west) -- ([yshift=-0.0625em]TAQLQQIAQAPUY-2-10.south east);\n", - "\\path[draw, thick] ([yshift=-0.3125em]TAQLQQIAQAPUY-7-1.base west) -- ([yshift=-0.3125em]TAQLQQIAQAPUY-7-10.base east);\n", - "\\path[draw, very thin] ([xshift=-0.1875em]TAQLQQIAQAPUY-1-2.south west) -- ([yshift=-0.3125em, xshift=-0.1875em]TAQLQQIAQAPUY-7-2.base west);\n", - "\n", - "\n", - "\n", - "\\end{tikzpicture}\n", - "} % close centering\n", - "\\end{table}\n", - "\n", - "\n", - "\\begin{table}\n", - "\\caption{timeseries}\n", - "\n", - "\\centering{\n", - "\\begin{tikzpicture}[\n", - " auto,\n", - " transform shape,\n", - " nosep/.style={inner sep=0},\n", - " table/.style={\n", - " matrix of nodes,\n", - " row sep=0.125em,\n", - " column sep=0.375em,\n", - " nodes in empty cells,\n", - " nodes={rectangle, scale=1.0, text badly ragged },\n", - "\trow 1/.style={nodes={text=black, anchor=north, inner ysep=0, text height=0, text depth=0}},\n", - "\trow 2/.style={nodes={text=black, anchor=south, inner ysep=.2em, minimum height=1.3em, font=\\bfseries}},\n", - "\tcolumn 1/.style={nodes={align=left }, text height=0.9em, text depth=0.2em, inner xsep=0.375em, inner ysep=0, text width=6.00em},\n", - "\tcolumn 2/.style={nodes={align=right }, nosep, text width=6.00em},\n", - "\tcolumn 3/.style={nodes={align=right }, nosep, text width=7.80em},\n", - "\tcolumn 4/.style={nodes={align=center}, nosep, text width=6.60em},\n", - "\tcolumn 5/.style={text height=0.9em, text depth=0.2em, nosep, text width=0em}\t}]\n", - "\\matrix (TLAKKRZA3NH2N) [table, ampersand replacement=\\&]{\n", - " \\& \\& \\& \\& \\\\\n", - " privates\\grtspacer \\& Sacramento Edgewise Outlive float\\grtspacer \\& Sooner Celled Complements float\\grtspacer \\& Superheroes Prejudge Carnivore datetime\\grtspacer \\& \\\\\n", - " 2007-04-24 \\& 25.87 \\& -803.629u \\& 2008-10-28 \\& \\\\\n", - " 2007-07-10 \\& 1.00 \\& -2.867 \\& 2011-04-01 \\& \\\\\n", - " 2008-09-30 \\& 37,517.98 \\& 10.719M \\& 2032-08-18 \\& \\\\\n", - " 2010-08-18 \\& 0.92 \\& -7242901.911Y \\& 2018-10-13 \\& \\\\\n", - " 2013-07-26 \\& 133.49 \\& -0.000y \\& 2019-01-15 \\& \\\\\n", - "};\n", - "\n", - "\\path[draw, thick] (TLAKKRZA3NH2N-1-1.south west) -- (TLAKKRZA3NH2N-1-5.south east);\n", - "\\path[draw, semithick] ([yshift=-0.0625em]TLAKKRZA3NH2N-2-1.south west) -- ([yshift=-0.0625em]TLAKKRZA3NH2N-2-5.south east);\n", - "\\path[draw, thick] ([yshift=-0.3125em]TLAKKRZA3NH2N-7-1.base west) -- ([yshift=-0.3125em]TLAKKRZA3NH2N-7-5.base east);\n", - "\\path[draw, very thin] ([xshift=-0.1875em]TLAKKRZA3NH2N-1-2.south west) -- ([yshift=-0.3125em, xshift=-0.1875em]TLAKKRZA3NH2N-7-2.base west);\n", - "\n", - "\n", - "\n", - "\\end{tikzpicture}\n", - "} % close centering\n", - "\\end{table}\n", - "\n", - "\n", - "\\begin{table}\n", - "\\caption{multiindex}\n", - "\n", - "\\centering{\n", - "\\begin{tikzpicture}[\n", - " auto,\n", - " transform shape,\n", - " nosep/.style={inner sep=0},\n", - " table/.style={\n", - " matrix of nodes,\n", - " row sep=0.125em,\n", - " column sep=0.375em,\n", - " nodes in empty cells,\n", - " nodes={rectangle, scale=1.0, text badly ragged },\n", - "\trow 1/.style={nodes={text=black, anchor=north, inner ysep=0, text height=0, text depth=0}},\n", - "\trow 2/.style={nodes={text=black, anchor=south, inner ysep=.2em, minimum height=1.3em, font=\\bfseries}},\n", - "\tcolumn 1/.style={nodes={align=left }, text height=0.9em, text depth=0.2em, inner xsep=0.375em, inner ysep=0, text width=3.60em},\n", - "\tcolumn 2/.style={nodes={align=left }, nosep, text width=8.40em},\n", - "\tcolumn 3/.style={nodes={align=left }, nosep, text width=3.60em},\n", - "\tcolumn 4/.style={nodes={align=right }, nosep, text width=6.00em},\n", - "\tcolumn 5/.style={nodes={align=right }, nosep, text width=6.60em},\n", - "\tcolumn 6/.style={nodes={align=right }, nosep, text width=5.40em},\n", - "\tcolumn 7/.style={nodes={align=right }, nosep, text width=6.00em},\n", - "\tcolumn 8/.style={nodes={align=right }, nosep, text width=6.60em},\n", - "\tcolumn 9/.style={text height=0.9em, text depth=0.2em, nosep, text width=0em}\t}]\n", - "\\matrix (TGYG4EU4RPTVC) [table, ampersand replacement=\\&]{\n", - " \\& \\& \\& \\& \\& \\& \\& \\& \\\\\n", - " federally\\grtspacer \\& restrictions\\grtspacer \\& dolphin\\grtspacer \\& Aurora Merge Calculates float\\grtspacer \\& Bombing Honking Garage int\\grtspacer \\& Funnier Personals Shovels float\\grtspacer \\& Healthier Chemically Construed float\\grtspacer \\& Whereabouts Sept Cakes int\\grtspacer \\& \\\\\n", - " 44,379 \\& disenfranchise \\& 80,923 \\& 32.454 \\& 337,462,522 \\& 21.571M \\& 238.509M \\& -4,327 \\& \\\\\n", - " \\& hybrid \\& 52,773 \\& 3.628M \\& 494,694,322 \\& 1.282G \\& NaN \\& 6,496 \\& \\\\\n", - " 58,702 \\& disenfranchise \\& 474 \\& 3.842M \\& 785,021,862 \\& 2.261k \\& 11.004k \\& -5,662 \\& \\\\\n", - " \\& disenfranchise \\& 25,363 \\& 16.184M \\& 627,061,405 \\& 752.930k \\& 398.233k \\& 6,743 \\& \\\\\n", - " \\& disenfranchise \\& 41,796 \\& 3.821G \\& 562,015,666 \\& 9.799M \\& 162.201m \\& -2,439 \\& \\\\\n", - "};\n", - "\n", - "\\path[draw, thick] (TGYG4EU4RPTVC-1-1.south west) -- (TGYG4EU4RPTVC-1-9.south east);\n", - "\\path[draw, thick] ([yshift=-0.3125em]TGYG4EU4RPTVC-7-1.base west) -- ([yshift=-0.3125em]TGYG4EU4RPTVC-7-9.base east);\n", - "\\path[draw, semithick] ([yshift=-0.0625em]TGYG4EU4RPTVC-2-1.south west) -- ([yshift=-0.0625em]TGYG4EU4RPTVC-2-9.south east);\n", - "\\path[draw, very thin] ([yshift=-0.0625em]TGYG4EU4RPTVC-4-1.south west) -- ([yshift=-0.0625em]TGYG4EU4RPTVC-4-9.south east);\n", - "\\path[draw, very thin] ([xshift=-0.1875em]TGYG4EU4RPTVC-1-4.south west) -- ([yshift=-0.3125em, xshift=-0.1875em]TGYG4EU4RPTVC-7-4.base west);\n", - "\n", - "\n", - "\n", - "\\end{tikzpicture}\n", - "} % close centering\n", - "\\end{table}\n", - "\n", - "\n", - "\\begin{table}\n", - "\\caption{multicolumns}\n", - "\n", - "\\centering{\n", - "\\begin{tikzpicture}[\n", - " auto,\n", - " transform shape,\n", - " nosep/.style={inner sep=0},\n", - " table/.style={\n", - " matrix of nodes,\n", - " row sep=0.125em,\n", - " column sep=0.375em,\n", - " nodes in empty cells,\n", - " nodes={rectangle, scale=1.0, text badly ragged },\n", - "\trow 1/.style={nodes={text=black, anchor=north, inner ysep=0, text height=0, text depth=0}},\n", - "\trow 2/.style={nodes={text=black, anchor=south, inner ysep=.2em, minimum height=1.3em, font=\\bfseries}},\n", - "\trow 3/.style={nodes={text=black, anchor=south, inner ysep=.2em, minimum height=1.3em, font=\\bfseries}},\n", - "\trow 4/.style={nodes={text=black, anchor=south, inner ysep=.2em, minimum height=1.3em, font=\\bfseries}},\n", - "\tcolumn 1/.style={nodes={align=left }, text height=0.9em, text depth=0.2em, inner xsep=0.375em, inner ysep=0, text width=6.00em},\n", - "\tcolumn 2/.style={nodes={align=center}, nosep, text width=6.00em},\n", - "\tcolumn 3/.style={nodes={align=right }, nosep, text width=6.00em},\n", - "\tcolumn 4/.style={nodes={align=right }, nosep, text width=10.20em},\n", - "\tcolumn 5/.style={nodes={align=right }, nosep, text width=7.80em},\n", - "\tcolumn 6/.style={nodes={align=right }, nosep, text width=6.00em},\n", - "\tcolumn 7/.style={text height=0.9em, text depth=0.2em, nosep, text width=0em}\t}]\n", - "\\matrix (TVGLOCWS34RTD) [table, ampersand replacement=\\&]{\n", - " \\& \\& \\& \\& \\& \\& \\\\\n", - " \\grtspacer \\& oblige\\grtspacer \\& \\grtspacer \\& \\grtspacer \\& strapped\\grtspacer \\& \\grtspacer \\& \\\\\n", - " \\grtspacer \\& hence\\grtspacer \\& \\grtspacer \\& strategies\\grtspacer \\& installations\\grtspacer \\& strategies\\grtspacer \\& \\\\\n", - " wednesdays\\grtspacer \\& boss\\grtspacer \\& perception\\grtspacer \\& complex\\grtspacer \\& sorry\\grtspacer \\& lists\\grtspacer \\& \\\\\n", - " 954 \\& 2011-02-27 \\& 0.00000 \\& 7.066G \\& 3,923 \\& 13.712M \\& \\\\\n", - " 2,769 \\& 2029-08-16 \\& 0.18763 \\& -6.666k \\& -3,817 \\& 1.606G \\& \\\\\n", - " 5,682 \\& 2029-08-16 \\& 0.00002 \\& -649.606a \\& -6,446 \\& 34.617M \\& \\\\\n", - " 9,661 \\& \\& 0.01298 \\& 46281395957.136Y \\& 6,756 \\& 2.661 \\& \\\\\n", - " 36,338 \\& 2019-09-02 \\& 3.68136 \\& 11.656Y \\& 3,922 \\& 1.286 \\& \\\\\n", - "};\n", - "\n", - "\\path[draw, thick] (TVGLOCWS34RTD-1-1.south west) -- (TVGLOCWS34RTD-1-7.south east);\n", - "\\path[draw, semithick] ([yshift=-0.0625em]TVGLOCWS34RTD-4-1.south west) -- ([yshift=-0.0625em]TVGLOCWS34RTD-4-7.south east);\n", - "\\path[draw, thick] ([yshift=-0.3125em]TVGLOCWS34RTD-9-1.base west) -- ([yshift=-0.3125em]TVGLOCWS34RTD-9-7.base east);\n", - "\\path[draw, very thin] ([xshift=-0.1875em, yshift=-0.0625em]TVGLOCWS34RTD-2-2.south west) -- ([yshift=-0.0625em]TVGLOCWS34RTD-2-7.south east);\n", - "\\path[draw, very thin] ([xshift=-0.1875em, yshift=-0.0625em]TVGLOCWS34RTD-3-2.south west) -- ([yshift=-0.0625em]TVGLOCWS34RTD-3-7.south east);\n", - "\\path[draw, very thin] ([xshift=-0.1875em]TVGLOCWS34RTD-1-2.south west) -- ([yshift=-0.3125em, xshift=-0.1875em]TVGLOCWS34RTD-9-2.base west);\n", - "\\path[draw, ultra thin] ([xshift=0.1875em, yshift=-0.0625em]TVGLOCWS34RTD-1-4.south east) -- ([yshift=-0.3125em, xshift=0.1875em]TVGLOCWS34RTD-9-4.base east);\n", - "\\path[draw, ultra thin] ([xshift=0.1875em, yshift=-0.0625em]TVGLOCWS34RTD-2-3.south east) -- ([yshift=-0.3125em, xshift=0.1875em]TVGLOCWS34RTD-9-3.base east);\n", - "\\path[draw, ultra thin] ([xshift=0.1875em, yshift=-0.0625em]TVGLOCWS34RTD-2-5.south east) -- ([yshift=-0.3125em, xshift=0.1875em]TVGLOCWS34RTD-9-5.base east);\n", - "\n", - "\n", - "\n", - "\\end{tikzpicture}\n", - "} % close centering\n", - "\\end{table}\n", - "\n", - "\n", - "\\begin{table}\n", - "\\caption{complex}\n", - "\n", - "\\centering{\n", - "\\begin{tikzpicture}[\n", - " auto,\n", - " transform shape,\n", - " nosep/.style={inner sep=0},\n", - " table/.style={\n", - " matrix of nodes,\n", - " row sep=0.125em,\n", - " column sep=0.375em,\n", - " nodes in empty cells,\n", - " nodes={rectangle, scale=1.0, text badly ragged },\n", - "\trow 1/.style={nodes={text=black, anchor=north, inner ysep=0, text height=0, text depth=0}},\n", - "\trow 2/.style={nodes={text=black, anchor=south, inner ysep=.2em, minimum height=1.3em, font=\\bfseries}},\n", - "\trow 3/.style={nodes={text=black, anchor=south, inner ysep=.2em, minimum height=1.3em, font=\\bfseries}},\n", - "\trow 4/.style={nodes={text=black, anchor=south, inner ysep=.2em, minimum height=1.3em, font=\\bfseries}},\n", - "\tcolumn 1/.style={nodes={align=left }, text height=0.9em, text depth=0.2em, inner xsep=0.375em, inner ysep=0, text width=6.00em},\n", - "\tcolumn 2/.style={nodes={align=left }, nosep, text width=4.20em},\n", - "\tcolumn 3/.style={nodes={align=left }, nosep, text width=5.40em},\n", - "\tcolumn 4/.style={nodes={align=right }, nosep, text width=6.00em},\n", - "\tcolumn 5/.style={nodes={align=right }, nosep, text width=6.00em},\n", - "\tcolumn 6/.style={nodes={align=center}, nosep, text width=6.00em},\n", - "\tcolumn 7/.style={nodes={align=center}, nosep, text width=6.00em},\n", - "\tcolumn 8/.style={nodes={align=right }, nosep, text width=6.00em},\n", - "\tcolumn 9/.style={nodes={align=right }, nosep, text width=6.00em},\n", - "\tcolumn 10/.style={nodes={align=center}, nosep, text width=6.00em},\n", - "\tcolumn 11/.style={nodes={align=right }, nosep, text width=6.00em},\n", - "\tcolumn 12/.style={nodes={align=right }, nosep, text width=6.00em},\n", - "\tcolumn 13/.style={nodes={align=right }, nosep, text width=6.00em},\n", - "\tcolumn 14/.style={text height=0.9em, text depth=0.2em, nosep, text width=0em}\t}]\n", - "\\matrix (TVDUZM2RAS34T) [table, ampersand replacement=\\&]{\n", - " \\& \\& \\& \\& \\& \\& \\& \\& \\& \\& \\& \\& \\& \\\\\n", - " \\grtspacer \\& \\grtspacer \\& \\grtspacer \\& originality\\grtspacer \\& \\grtspacer \\& \\grtspacer \\& \\grtspacer \\& \\grtspacer \\& \\grtspacer \\& \\grtspacer \\& \\grtspacer \\& silent\\grtspacer \\& \\grtspacer \\& \\\\\n", - " \\grtspacer \\& \\grtspacer \\& \\grtspacer \\& swami\\grtspacer \\& \\grtspacer \\& tort\\grtspacer \\& \\grtspacer \\& warns\\grtspacer \\& \\grtspacer \\& \\grtspacer \\& \\grtspacer \\& swami\\grtspacer \\& tort\\grtspacer \\& \\\\\n", - " heartbreak\\grtspacer \\& invest\\grtspacer \\& triggered\\grtspacer \\& lasers\\grtspacer \\& past\\grtspacer \\& systems\\grtspacer \\& veteran\\grtspacer \\& chalk\\grtspacer \\& collegial\\grtspacer \\& graders\\grtspacer \\& trivialize\\grtspacer \\& riposte\\grtspacer \\& complementary\\grtspacer \\& \\\\\n", - " 26,561 \\& modal \\& 8,301 \\& 8,600.00 \\& -5,874.00 \\& 2020-01-30 \\& \\& -0.000y \\& 79.803 \\& 2021-07-29 \\& 42.510n \\& NaN \\& 1,992.00 \\& \\\\\n", - " \\& robbing \\& 3,413 \\& -6,481.00 \\& 6,985.00 \\& 2012-10-26 \\& 2024-08-16 \\& 525.351 \\& 43.530 \\& 2011-06-20 \\& 74.613u \\& 399.425 \\& nan \\& \\\\\n", - " \\& robbing \\& 15,208 \\& 491.00 \\& -7,376.00 \\& 2029-11-15 \\& 2026-04-07 \\& 262.122p \\& 386.621M \\& 2010-10-13 \\& 60.250u \\& 10.456M \\& 2,007.00 \\& \\\\\n", - " \\& robbing \\& 34,445 \\& -7,082.00 \\& 3,805.00 \\& 2015-11-20 \\& 2031-07-29 \\& 14.838m \\& NaN \\& 2017-09-11 \\& 11.059u \\& 14.147 \\& 1,997.00 \\& \\\\\n", - " \\& robbing \\& 37,066 \\& 3,038.00 \\& -8,099.00 \\& 2025-08-13 \\& 2025-04-03 \\& 22438.774Y \\& 140.911k \\& 2006-07-28 \\& 416.652n \\& 22.507 \\& 2,023.00 \\& \\\\\n", - "};\n", - "\n", - "\\path[draw, thick] (TVDUZM2RAS34T-1-1.south west) -- (TVDUZM2RAS34T-1-14.south east);\n", - "\\path[draw, semithick] ([yshift=-0.0625em]TVDUZM2RAS34T-4-1.south west) -- ([yshift=-0.0625em]TVDUZM2RAS34T-4-14.south east);\n", - "\\path[draw, thick] ([yshift=-0.3125em]TVDUZM2RAS34T-9-1.base west) -- ([yshift=-0.3125em]TVDUZM2RAS34T-9-14.base east);\n", - "\\path[draw, very thin] ([xshift=-0.1875em, yshift=-0.0625em]TVDUZM2RAS34T-2-4.south west) -- ([yshift=-0.0625em]TVDUZM2RAS34T-2-14.south east);\n", - "\\path[draw, very thin] ([xshift=-0.1875em, yshift=-0.0625em]TVDUZM2RAS34T-3-4.south west) -- ([yshift=-0.0625em]TVDUZM2RAS34T-3-14.south east);\n", - "\\path[draw, very thin] ([xshift=-0.1875em]TVDUZM2RAS34T-1-4.south west) -- ([yshift=-0.3125em, xshift=-0.1875em]TVDUZM2RAS34T-9-4.base west);\n", - "\\path[draw, ultra thin] ([xshift=0.1875em, yshift=-0.0625em]TVDUZM2RAS34T-1-11.south east) -- ([yshift=-0.3125em, xshift=0.1875em]TVDUZM2RAS34T-9-11.base east);\n", - "\\path[draw, ultra thin] ([xshift=0.1875em, yshift=-0.0625em]TVDUZM2RAS34T-2-5.south east) -- ([yshift=-0.3125em, xshift=0.1875em]TVDUZM2RAS34T-9-5.base east);\n", - "\\path[draw, ultra thin] ([xshift=0.1875em, yshift=-0.0625em]TVDUZM2RAS34T-2-7.south east) -- ([yshift=-0.3125em, xshift=0.1875em]TVDUZM2RAS34T-9-7.base east);\n", - "\\path[draw, ultra thin] ([xshift=0.1875em, yshift=-0.0625em]TVDUZM2RAS34T-2-12.south east) -- ([yshift=-0.3125em, xshift=0.1875em]TVDUZM2RAS34T-9-12.base east);\n", - "\n", - "\n", - "\n", - "\\end{tikzpicture}\n", - "} % close centering\n", - "\\end{table}\n", - "\n" - ] - } - ], + "outputs": [], "source": [ "for k, v in ts.items():\n", " print(GT(v.head(), show_index=True, max_table_width=80, debug=False, caption=k).make_tikz())" @@ -3893,166 +201,10 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": null, "id": "5a9dda81-8334-4f9f-ab2c-358228c4d936", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "cw_target = 80 and input_target = 48\n", - "Weird, cols not all the same size 0 83\n", - "Actual text table width by row 0\n", - "top row │ ┃ │ │ │ lurch │ │ │ │ │\n", - "actual widths by column [ 9 8 10 8 8 8 12 11 8]\n", - "actual total width 83\n", - "formatted output\n", - "┍━━━━━━━━┳━━━━━━━┯━━━━━━━━━┯━━━━━━━┯━━━━━━━┯━━━━━━━┯━━━━━━━━━━━┯━━━━━━━━━━┯━━━━━━━┑\n", - "│ ┃ │ │ │ lurch │ │ │ │ │\n", - "│ discon ┃ bonin │ │ grass │ ing d │ monit │ │ │ weigh │\n", - "│ tinuan ┃ g │ cylinde │ y │ ateti │ oring │ pleasantl │ teaser │ ing │\n", - "│ ce ┃ float │ r float │ date │ me │ date │ y int │ year │ year │\n", - "┝━━━━━━━━╋━━━━━━━┿━━━━━━━━━┿━━━━━━━┿━━━━━━━┿━━━━━━━┿━━━━━━━━━━━┿━━━━━━━━━━┿━━━━━━━┥\n", - "│ 943 ┃ 3.263 │ 0.00000 │ 2014- │ 2022- │ 2011- │ -6,346.00 │ 2,011.00 │ 2,017 │\n", - "│ ┃ │ │ 05-17 │ 03-21 │ 03-14 │ │ │ │\n", - "├────────╂───────┼─────────┼───────┼───────┼───────┼───────────┼──────────┼───────┤\n", - "│ 10,424 ┃ 5.337 │ 0.00003 │ 2017- │ 2009- │ 2028- │ 7,637.00 │ 1,993.00 │ 2,016 │\n", - "│ ┃ │ │ 06-15 │ 10-13 │ 01-01 │ │ │ │\n", - "├────────╂───────┼─────────┼───────┼───────┼───────┼───────────┼──────────┼───────┤\n", - "│ 11,582 ┃ 0.219 │ 0.17478 │ 2014- │ 2018- │ 2011- │ -2,602.00 │ 2,007.00 │ 2,010 │\n", - "│ ┃ │ │ 05-17 │ 11-02 │ 03-14 │ │ │ │\n", - "├────────╂───────┼─────────┼───────┼───────┼───────┼───────────┼──────────┼───────┤\n", - "│ 30,946 ┃ 0.000 │ 0.00004 │ 2018- │ 2011- │ 2009- │ -4,318.00 │ 1,995.00 │ 2,026 │\n", - "│ ┃ │ │ 04-02 │ 04-24 │ 02-03 │ │ │ │\n", - "├────────╂───────┼─────────┼───────┼───────┼───────┼───────────┼──────────┼───────┤\n", - "│ 49,857 ┃ 0.002 │ 0.10229 │ 2018- │ 2018- │ 2028- │ 9,433.00 │ 1,996.00 │ 2,007 │\n", - "│ ┃ │ │ 04-02 │ 11-02 │ 10-24 │ │ │ │\n", - "┕━━━━━━━━┻━━━━━━━┷━━━━━━━━━┷━━━━━━━┷━━━━━━━┷━━━━━━━┷━━━━━━━━━━━┷━━━━━━━━━━┷━━━━━━━┙\n", - "\n", - "=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*\n", - "cw_target = 80 and input_target = 68\n", - "Weird, cols not all the same size 0 57\n", - "Actual text table width by row 0\n", - "top row │ ┃ Sacramento │ │ Superheroes │\n", - "actual widths by column [13 13 16 14]\n", - "actual total width 57\n", - "formatted output\n", - "┍━━━━━━━━━━━━┳━━━━━━━━━━━━┯━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━┑\n", - "│ ┃ Sacramento │ │ Superheroes │\n", - "│ ┃ Edgewise │ Sooner Celled │ Prejudge │\n", - "│ ┃ Outlive │ Complements │ Carnivore │\n", - "│ privates ┃ float │ float │ datetime │\n", - "┝━━━━━━━━━━━━╋━━━━━━━━━━━━┿━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━┥\n", - "│ 2007-04-24 ┃ 25.87 │ -803.629u │ 2008-10-28 │\n", - "├────────────╂────────────┼───────────────┼─────────────┤\n", - "│ 2007-07-10 ┃ 1.00 │ -2.867 │ 2011-04-01 │\n", - "├────────────╂────────────┼───────────────┼─────────────┤\n", - "│ 2008-09-30 ┃ 37,517.98 │ 10.719M │ 2032-08-18 │\n", - "├────────────╂────────────┼───────────────┼─────────────┤\n", - "│ 2010-08-18 ┃ 0.92 │ -7242901.911Y │ 2018-10-13 │\n", - "├────────────╂────────────┼───────────────┼─────────────┤\n", - "│ 2013-07-26 ┃ 133.49 │ -0.000y │ 2019-01-15 │\n", - "┕━━━━━━━━━━━━┻━━━━━━━━━━━━┷━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━┙\n", - "\n", - "=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*\n", - "cw_target = 80 and input_target = 60\n", - "Weird, cols not all the same size 0 91\n", - "Actual text table width by row 0\n", - "top row │ │ │ ┃ Aurora │ │ Funnier │ Healthie │ Wherea │\n", - "actual widths by column [ 9 17 9 10 14 11 11 9]\n", - "actual total width 91\n", - "formatted output\n", - "┍━━━━━━━━┯━━━━━━━━━━━━━━━━┯━━━━━━━━┳━━━━━━━━━┯━━━━━━━━━━━━━┯━━━━━━━━━━┯━━━━━━━━━━┯━━━━━━━━┑\n", - "│ │ │ ┃ Aurora │ │ Funnier │ Healthie │ Wherea │\n", - "│ │ │ ┃ Merge C │ │ Personal │ r Chemic │ bouts │\n", - "│ │ │ ┃ alculat │ Bombing │ s │ ally Con │ Sept │\n", - "│ federa │ │ dolphi ┃ es │ Honking │ Shovels │ strued │ Cakes │\n", - "│ lly │ restrictions │ n ┃ float │ Garage int │ float │ float │ int │\n", - "┝━━━━━━━━┿━━━━━━━━━━━━━━━━┿━━━━━━━━╋━━━━━━━━━┿━━━━━━━━━━━━━┿━━━━━━━━━━┿━━━━━━━━━━┿━━━━━━━━┥\n", - "│ 44,379 │ disenfranchise │ 80,923 ┃ 32.454 │ 337,462,522 │ 21.571M │ 238.509M │ -4,327 │\n", - "├────────┼────────────────┼────────╂─────────┼─────────────┼──────────┼──────────┼────────┤\n", - "│ │ hybrid │ 52,773 ┃ 3.628M │ 494,694,322 │ 1.282G │ NaN │ 6,496 │\n", - "├────────┼────────────────┼────────╂─────────┼─────────────┼──────────┼──────────┼────────┤\n", - "│ 58,702 │ disenfranchise │ 474 ┃ 3.842M │ 785,021,862 │ 2.261k │ 11.004k │ -5,662 │\n", - "├────────┼────────────────┼────────╂─────────┼─────────────┼──────────┼──────────┼────────┤\n", - "│ │ │ 25,363 ┃ 16.184M │ 627,061,405 │ 752.930k │ 398.233k │ 6,743 │\n", - "├────────┼────────────────┼────────╂─────────┼─────────────┼──────────┼──────────┼────────┤\n", - "│ │ │ 41,796 ┃ 3.821G │ 562,015,666 │ 9.799M │ 162.201m │ -2,439 │\n", - "┕━━━━━━━━┷━━━━━━━━━━━━━━━━┷━━━━━━━━┻━━━━━━━━━┷━━━━━━━━━━━━━┷━━━━━━━━━━┷━━━━━━━━━━┷━━━━━━━━┙\n", - "\n", - "=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*\n", - "cw_target = 80 and input_target = 60\n", - "Weird, cols not all the same size 0 66\n", - "Actual text table width by row 0\n", - "top row │ ┃ oblig │ │ │ strapp │ strappe │\n", - "actual widths by column [ 9 8 10 19 9 10]\n", - "actual total width 66\n", - "formatted output\n", - "┍━━━━━━━━┳━━━━━━━┯━━━━━━━━━┯━━━━━━━━━━━━━━━━━━┯━━━━━━━━┯━━━━━━━━━┑\n", - "│ ┃ oblig │ │ │ strapp │ strappe │\n", - "│ ┃ e │ oblige │ oblige │ ed │ d │\n", - "├────────╂───────┼─────────┼──────────────────┼────────┼─────────┤\n", - "│ ┃ │ │ │ instal │ │\n", - "│ ┃ │ │ │ lation │ strateg │\n", - "│ ┃ hence │ hence │ strategies │ s │ ies │\n", - "├────────╂───────┼─────────┼──────────────────┼────────┼─────────┤\n", - "│ wednes ┃ │ percept │ │ │ │\n", - "│ days ┃ boss │ ion │ complex │ sorry │ lists │\n", - "┝━━━━━━━━╋━━━━━━━┿━━━━━━━━━┿━━━━━━━━━━━━━━━━━━┿━━━━━━━━┿━━━━━━━━━┥\n", - "│ 954 ┃ 2011- │ 0.00000 │ 7.066G │ 3,923 │ 13.712M │\n", - "│ ┃ 02-27 │ │ │ │ │\n", - "├────────╂───────┼─────────┼──────────────────┼────────┼─────────┤\n", - "│ 2,769 ┃ 2029- │ 0.18763 │ -6.666k │ -3,817 │ 1.606G │\n", - "│ ┃ 08-16 │ │ │ │ │\n", - "├────────╂───────┼─────────┼──────────────────┼────────┼─────────┤\n", - "│ 5,682 ┃ 2029- │ 0.00002 │ -649.606a │ -6,446 │ 34.617M │\n", - "│ ┃ 08-16 │ │ │ │ │\n", - "├────────╂───────┼─────────┼──────────────────┼────────┼─────────┤\n", - "│ 9,661 ┃ │ 0.01298 │ 46281395957.136Y │ 6,756 │ 2.661 │\n", - "├────────╂───────┼─────────┼──────────────────┼────────┼─────────┤\n", - "│ 36,338 ┃ 2019- │ 3.68136 │ 11.656Y │ 3,922 │ 1.286 │\n", - "│ ┃ 09-02 │ │ │ │ │\n", - "┕━━━━━━━━┻━━━━━━━┷━━━━━━━━━┷━━━━━━━━━━━━━━━━━━┷━━━━━━━━┷━━━━━━━━━┙\n", - "\n", - "=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*\n", - "cw_target = 80 and input_target = 40\n", - "Weird, cols not all the same size 0 133\n", - "Actual text table width by row 0\n", - "top row │ │ │ ┃ │ │ origi │ origi │ │ │ origi │ │ │ │\n", - "actual widths by column [ 9 10 9 12 12 8 8 13 11 8 11 10 11]\n", - "actual total width 133\n", - "formatted output\n", - "┍━━━━━━━━┯━━━━━━━━━┯━━━━━━━━┳━━━━━━━━━━━┯━━━━━━━━━━━┯━━━━━━━┯━━━━━━━┯━━━━━━━━━━━━┯━━━━━━━━━━┯━━━━━━━┯━━━━━━━━━━┯━━━━━━━━━┯━━━━━━━━━━┑\n", - "│ │ │ ┃ │ │ origi │ origi │ │ │ origi │ │ │ │\n", - "│ │ │ ┃ originali │ originali │ nalit │ nalit │ originalit │ original │ nalit │ original │ │ │\n", - "│ │ │ ┃ ty │ ty │ y │ y │ y │ ity │ y │ ity │ silent │ silent │\n", - "├────────┼─────────┼────────╂───────────┼───────────┼───────┼───────┼────────────┼──────────┼───────┼──────────┼─────────┼──────────┤\n", - "│ │ │ ┃ swami │ swami │ tort │ tort │ warns │ warns │ warns │ warns │ swami │ tort │\n", - "├────────┼─────────┼────────╂───────────┼───────────┼───────┼───────┼────────────┼──────────┼───────┼──────────┼─────────┼──────────┤\n", - "│ heartb │ │ trigge ┃ │ │ syste │ veter │ │ collegia │ grade │ triviali │ │ compleme │\n", - "│ reak │ invest │ red ┃ lasers │ past │ ms │ an │ chalk │ l │ rs │ ze │ riposte │ ntary │\n", - "┝━━━━━━━━┿━━━━━━━━━┿━━━━━━━━╋━━━━━━━━━━━┿━━━━━━━━━━━┿━━━━━━━┿━━━━━━━┿━━━━━━━━━━━━┿━━━━━━━━━━┿━━━━━━━┿━━━━━━━━━━┿━━━━━━━━━┿━━━━━━━━━━┥\n", - "│ 26,561 │ modal │ 8,301 ┃ 8,600.00 │ -5,874.00 │ 2020- │ │ -0.000y │ 79.803 │ 2021- │ 42.510n │ NaN │ 1,992.00 │\n", - "│ │ │ ┃ │ │ 01-30 │ │ │ │ 07-29 │ │ │ │\n", - "├────────┼─────────┼────────╂───────────┼───────────┼───────┼───────┼────────────┼──────────┼───────┼──────────┼─────────┼──────────┤\n", - "│ │ robbing │ 3,413 ┃ -6,481.00 │ 6,985.00 │ 2012- │ 2024- │ 525.351 │ 43.530 │ 2011- │ 74.613u │ 399.425 │ nan │\n", - "│ │ │ ┃ │ │ 10-26 │ 08-16 │ │ │ 06-20 │ │ │ │\n", - "├────────┼─────────┼────────╂───────────┼───────────┼───────┼───────┼────────────┼──────────┼───────┼──────────┼─────────┼──────────┤\n", - "│ │ │ 15,208 ┃ 491.00 │ -7,376.00 │ 2029- │ 2026- │ 262.122p │ 386.621M │ 2010- │ 60.250u │ 10.456M │ 2,007.00 │\n", - "│ │ │ ┃ │ │ 11-15 │ 04-07 │ │ │ 10-13 │ │ │ │\n", - "├────────┼─────────┼────────╂───────────┼───────────┼───────┼───────┼────────────┼──────────┼───────┼──────────┼─────────┼──────────┤\n", - "│ │ │ 34,445 ┃ -7,082.00 │ 3,805.00 │ 2015- │ 2031- │ 14.838m │ NaN │ 2017- │ 11.059u │ 14.147 │ 1,997.00 │\n", - "│ │ │ ┃ │ │ 11-20 │ 07-29 │ │ │ 09-11 │ │ │ │\n", - "├────────┼─────────┼────────╂───────────┼───────────┼───────┼───────┼────────────┼──────────┼───────┼──────────┼─────────┼──────────┤\n", - "│ │ │ 37,066 ┃ 3,038.00 │ -8,099.00 │ 2025- │ 2025- │ 22438.774Y │ 140.911k │ 2006- │ 416.652n │ 22.507 │ 2,023.00 │\n", - "│ │ │ ┃ │ │ 08-13 │ 04-03 │ │ │ 07-28 │ │ │ │\n", - "┕━━━━━━━━┷━━━━━━━━━┷━━━━━━━━┻━━━━━━━━━━━┷━━━━━━━━━━━┷━━━━━━━┷━━━━━━━┷━━━━━━━━━━━━┷━━━━━━━━━━┷━━━━━━━┷━━━━━━━━━━┷━━━━━━━━━┷━━━━━━━━━━┙\n", - "\n", - "=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*\n" - ] - } - ], + "outputs": [], "source": [ "for k, v in ts.items():\n", " # if k != 'multiindex': continue\n",