From 476da0b8cb690aa707bb9cd46ffe6e5f7edf9f38 Mon Sep 17 00:00:00 2001 From: Stephen Mildenhall Date: Wed, 25 Jun 2025 20:04:08 +0100 Subject: [PATCH] 5.0.0.beta renamed gtXX to XX; added auto SCM --- docs/versions.rst | 1 + greater_tables/__init__.py | 6 +- greater_tables/cli.py | 4 +- greater_tables/{gtconfig.py => config.py} | 12 ++- greater_tables/{gtcore.py => core.py} | 99 +++++++------------ greater_tables/{gtenums.py => enums.py} | 0 greater_tables/{gtetcher.py => etcher.py} | 2 +- .../{gtfabrications.py => fabrications.py} | 0 greater_tables/{gtformats.py => formats.py} | 0 greater_tables/{gthasher.py => hasher.py} | 0 .../{gtutilities.py => utilities.py} | 85 +++++++++++++++- pyproject.toml | 11 ++- 12 files changed, 146 insertions(+), 74 deletions(-) rename greater_tables/{gtconfig.py => config.py} (95%) rename greater_tables/{gtcore.py => core.py} (96%) rename greater_tables/{gtenums.py => enums.py} (100%) rename greater_tables/{gtetcher.py => etcher.py} (99%) rename greater_tables/{gtfabrications.py => fabrications.py} (100%) rename greater_tables/{gtformats.py => formats.py} (100%) rename greater_tables/{gthasher.py => hasher.py} (100%) rename greater_tables/{gtutilities.py => utilities.py} (94%) diff --git a/docs/versions.rst b/docs/versions.rst index bc4e095..8f24def 100644 --- a/docs/versions.rst +++ b/docs/versions.rst @@ -8,6 +8,7 @@ Versions * Introduced gtutilities to pull out text width estimation, cleaning and escaping, etc. * Delete rich table output format? * Moved logging setup to separate file, called from cli but optional for use in other situation. GPT recommended approach. +* Removed ``gt`` prefix from module file names, except logging. .. * test cases * tex and or html in data, index, columns, escaped/unescaped diff --git a/greater_tables/__init__.py b/greater_tables/__init__.py index 04a00e7..2cea140 100644 --- a/greater_tables/__init__.py +++ b/greater_tables/__init__.py @@ -2,9 +2,9 @@ __version__ = '5.0.0' __project__ = 'greater_tables' __author__ = 'Stephen J Mildenhall' -from . gtcore import GT -from . gtfabrications import Fabricator -from . gtetcher import Etcher +from . core import GT +from . fabrications import Fabricator +from . etcher import Etcher # from . gtbreaks import Breakability diff --git a/greater_tables/cli.py b/greater_tables/cli.py index 34b1bc1..b325557 100644 --- a/greater_tables/cli.py +++ b/greater_tables/cli.py @@ -10,8 +10,8 @@ from . gtlogging import setup_logging setup_logging() # <-- must come before using your package -from .gtconfig import Configurator, write_template -from .gtcore import GT +from . config import Configurator, write_template +from . core import GT @click.group() diff --git a/greater_tables/gtconfig.py b/greater_tables/config.py similarity index 95% rename from greater_tables/gtconfig.py rename to greater_tables/config.py index 29647b9..b80a749 100644 --- a/greater_tables/gtconfig.py +++ b/greater_tables/config.py @@ -10,7 +10,6 @@ Also includes functions for writing editable config templates and loading from Y from pathlib import Path from typing import Optional, Union, Literal, Callable, Any -import yaml from pydantic import BaseModel, Field, ValidationError, ConfigDict import yaml @@ -29,7 +28,7 @@ class Configurator(BaseModel): :Usage: - >>> from greater_tables.gtconfig import Configurator + >>> from greater_tables.config import Configurator >>> cfg = Configurator(font_size="1.2em", caption_align="left") :see also: ``GTConfig`` for loading from YAML with overrides. @@ -134,14 +133,17 @@ class Configurator(BaseModel): header_alignment: Literal["few", "center"] = Field( default='few', description="NYI!! TOOD Alignment of header cells, few=follow column, center=center." ) - max_table_width: int = Field( - 200, description="Maximum table width for markdown/text output mode" + table_font_pt_size: float = Field( + default=11, description="Font size for table text in points, 12 points/inch; 85 char/6.5 inch page" + ) + max_table_inch_width: float = Field( + 8.0, description="Maximum/target table width in inches" ) table_width_mode: Literal["explicit", "natural", "breakable", "minimum"] = Field( "explicit", description=( "Mode for determining table width. " - "'explicit': fixed width using max_table_width; " + "'explicit': fixed width using max_table_width_em; " "'natural': each cell fits its full content; " "'breakable': wrap breakable strings; " "'minimum': also wraps dates or float-like cells" diff --git a/greater_tables/gtcore.py b/greater_tables/core.py similarity index 96% rename from greater_tables/gtcore.py rename to greater_tables/core.py index 723c563..ed30153 100644 --- a/greater_tables/gtcore.py +++ b/greater_tables/core.py @@ -30,11 +30,11 @@ from pydantic import ValidationError from rich import box from IPython.display import display, SVG -from . gtenums import Breakability -from . gtconfig import Configurator -from . gthasher import df_short_hash -from . gtetcher import Etcher -from . gtutilities import * +from . enums import Breakability +from . config import Configurator +from . hasher import df_short_hash +from . etcher import Etcher +from . utilities import * # turn off this fuck-fest pd.set_option('future.no_silent_downcasting', True) @@ -181,7 +181,7 @@ class GT(object): :param str_table_fmt: table border format used for string output (markdown), default mixed_grid DEPRECATED?? :param config.table_width_mode: - 'explicit': set using config.max_table_width + 'explicit': set using config.max_table_width_em 'natural': each cell on one line (can be very wide with long strings) 'breakable': wrap breakable cells (text strings) at word boundaries to fit longest word @@ -190,9 +190,9 @@ class GT(object): used to balance header columns. :param config.table_width_header_relax: extra spaces allowed per column heading to facilitate better column header wrapping. - :param config.max_table_width: max table width used for markdown string output, + :param config.max_table_width_em: max table width used for markdown string output, default 200; width is never less than minimum width. Padding (3 chars - per row plus 1) consumed out of config.max_table_width in string output mode. + per row plus 1) consumed out of config.max_table_width_em in string output mode. :param config.debug: if True, add id to caption and use colored lines in table, default False. """ @@ -290,17 +290,7 @@ class GT(object): # get rid of column names # self.df.columns.names = [None] * self.df.columns.nlevels self.df_id = df_short_hash(self.df) - # TODO: update / change - # self.str_table_fmt = str_table_fmt - # TODO: implement - # self.table_width_mode = config.table_width_mode.lower() - # if config.table_width_mode not in ('explicit', 'natural', 'breakable', 'minimum'): - # raise ValueError(f'Inadmissible options {config.table_width_mode} for config.table_width_mode.') - # self.table_width_mode = table_width_mode - # self.table_width_header_adjust = table_width_header_adjust - # self.table_width_header_relax = table_width_header_relax - # self.max_table_width = max_table_width - # self.debug = debug + if self.caption != '' and self.config.debug: self.caption += f' (id: {self.df_id})' # self.max_str_length = max_str_length @@ -520,34 +510,6 @@ class GT(object): raise ValueError( 'formatters must be dict of callables or ints or format strings {x:...}') - # store defaults - # self.default_integer_str = default_integer_str - # VERY rarely used; for floats in cols that are not floats - # self.default_float_str = default_float_str - # self.default_date_str = default_date_str.replace( - # '{x:', '').replace('}', '') - # self.default_ratio_str = default_ratio_str - # self.pef_precision = pef_precision - # self.pef_lower = pef_lower - # self.pef_upper = pef_upper - self._pef = None - # self.table_float_format = table_float_format - # self.default_float_formatter = None - # self.hrule_widths = hrule_widths or (0, 0, 0) - # if not isinstance(self.config.hrule_widths, (list, tuple)): - # self.config.hrule_widths = (self.config.hrule_widths,) - # self.vrule_widths = vrule_widths or (0, 0, 0) - # if not isinstance(self.config.hrule_widths, (list, tuple)): - # self.config.hrule_widths = (self.config.hrule_widths, ) - # self.table_hrule_width = table_hrule_width - # self.table_vrule_width = table_vrule_width - # self.font_body = font_body - # self.font_head = font_head - # self.font_caption = font_caption - # self.tikz_scale = tikz_scale - # self.font_bold_index = font_bold_index - # self.caption_align = caption_align - # self.sparsify_columns = sparsify_columns if tabs is None: self.tabs = None elif isinstance(tabs, (int, float)): @@ -582,6 +544,8 @@ class GT(object): # because of the problem of non-unique indexes use a list and # not a dict to pass the formatters to to_html + self.max_table_width_em = self.config.max_table_inch_width * 72 / self.config.table_font_pt_size + self._pef = None self._df_formatters = None self.df_style = '' self.df_html = '' @@ -968,9 +932,17 @@ class GT(object): h = self.html_knowledge_df.recommended.sum() tikz = self.tex_knowledge_df['tikz_colw'].sum() tex = self.tex_knowledge_df['scaled_tabs'].sum() - mtw = self.config.max_table_width - cols = self.df.shape[1] - df = pd.Series({ + mtw = self.max_table_width_em + bit = pd.DataFrame({ + 'text natural': self.text_knowledge_df.natural_width, + 'text minimum': self.text_knowledge_df.minimum_width, + 'text header tweak': self.text_knowledge_df.header_tweak, + 'text recommended': self.text_knowledge_df.recommended, + 'html recommended': self.html_knowledge_df.recommended, + 'tex recommended': self.tex_knowledge_df['scaled_tabs'], + 'tikz recommended': self.tex_knowledge_df['tikz_colw'], + }).fillna(0) + ser = pd.Series({ 'text natural': natural, 'text minimum': minimum, 'text header tweak': ht, @@ -978,13 +950,13 @@ class GT(object): 'html recommended': h, 'tex recommended': tex, 'tikz recommended': tikz, - 'requested': mtw, - 'width mode' : self.config.table_width_mode, - 'header relax': self.config.table_width_header_adjust, - 'header chars': self.config.table_width_header_relax, - }).to_frame('value') - df.index.name = 'metric' - return df + }) + bit.loc['total', :] = ser + print(f"requested width = {mtw}\n" + f"width mode = {self.config.table_width_mode}\n" + f"header relax = {self.config.table_width_header_adjust}\n" + f"header chars = {self.config.table_width_header_relax}") + return bit def estimate_column_widths_by_mode(self, mode): """ @@ -1074,9 +1046,9 @@ class GT(object): PADDING = 2 # per column TODO enhance if self.config.table_width_mode == 'explicit': # target width INCLUDES padding and column marks | - target_width = self.config.max_table_width - \ - (PADDING + 1) * n_col - 1 - logger.info(f'Col padding effect {self.config.max_table_width=}' + target_width = self.max_table_width_em - \ + (PADDING + 1) * n_col - 1 + logger.info(f'Col padding effect {self.max_table_width_em=}' f' ==> {target_width=}') elif self.config.table_width_mode == 'natural': target_width = natural + (PADDING + 1) * n_col + 1 @@ -1177,9 +1149,14 @@ class GT(object): target_width, ans['recommended'].sum() - target_width ) + else: + # avoid a failure blow + ans['raw_recommended'] = np.nan + ans['header_tweak'] = np.nan + ans['natural_w_header'] = np.nan else: # for html and tex modes: adapts from old estimate_column_widths - target_width = self.config.max_table_width + target_width = self.max_table_width_em nc_index = self.nindex # without tex adjustment diff --git a/greater_tables/gtenums.py b/greater_tables/enums.py similarity index 100% rename from greater_tables/gtenums.py rename to greater_tables/enums.py diff --git a/greater_tables/gtetcher.py b/greater_tables/etcher.py similarity index 99% rename from greater_tables/gtetcher.py rename to greater_tables/etcher.py index 6c7842d..c6ad1e4 100644 --- a/greater_tables/gtetcher.py +++ b/greater_tables/etcher.py @@ -12,7 +12,7 @@ from pathlib import Path from subprocess import run, Popen, PIPE from IPython.display import SVG, display -from .gthasher import txt_short_hash +from . hasher import txt_short_hash logger = logging.getLogger(__name__) diff --git a/greater_tables/gtfabrications.py b/greater_tables/fabrications.py similarity index 100% rename from greater_tables/gtfabrications.py rename to greater_tables/fabrications.py diff --git a/greater_tables/gtformats.py b/greater_tables/formats.py similarity index 100% rename from greater_tables/gtformats.py rename to greater_tables/formats.py diff --git a/greater_tables/gthasher.py b/greater_tables/hasher.py similarity index 100% rename from greater_tables/gthasher.py rename to greater_tables/hasher.py diff --git a/greater_tables/gtutilities.py b/greater_tables/utilities.py similarity index 94% rename from greater_tables/gtutilities.py rename to greater_tables/utilities.py index e8bfc65..85ad082 100644 --- a/greater_tables/gtutilities.py +++ b/greater_tables/utilities.py @@ -9,10 +9,11 @@ import re from textwrap import wrap import pandas as pd +from pybtex.textutils import width from rich import box from rich.table import Table -from . gtformats import GT_Format, TableFormat, Line, DataRow +from . formats import GT_Format, TableFormat, Line, DataRow __all__ = ['MD2DF', 'Escaping', 'TextLength', @@ -221,6 +222,88 @@ class TextLength: "?": 0.6, " ": 0.4, } + width_table = { + "a": 0.444, + "b": 0.5, + "c": 0.444, + "d": 0.5, + "e": 0.444, + "f": 0.333, + "g": 0.5, + "h": 0.5, + "i": 0.278, + "j": 0.278, + "k": 0.5, + "l": 0.278, + "m": 0.778, + "n": 0.5, + "o": 0.5, + "p": 0.5, + "q": 0.5, + "r": 0.333, + "s": 0.389, + "t": 0.278, + "u": 0.5, + "v": 0.5, + "w": 0.722, + "x": 0.5, + "y": 0.5, + "z": 0.444, + "A": 0.722, + "B": 0.667, + "C": 0.667, + "D": 0.722, + "E": 0.611, + "F": 0.556, + "G": 0.722, + "H": 0.722, + "I": 0.333, + "J": 0.389, + "K": 0.722, + "L": 0.611, + "M": 0.889, + "N": 0.722, + "O": 0.722, + "P": 0.556, + "Q": 0.722, + "R": 0.667, + "S": 0.556, + "T": 0.611, + "U": 0.722, + "V": 0.722, + "W": 0.944, + "X": 0.722, + "Y": 0.722, + "Z": 0.611, + "0": 0.5, + "1": 0.5, + "2": 0.5, + "3": 0.5, + "4": 0.5, + "5": 0.5, + "6": 0.5, + "7": 0.5, + "8": 0.5, + "9": 0.5, + ".": 0.25, + ",": 0.25, + ":": 0.278, + ";": 0.278, + "(": 0.333, + ")": 0.333, + "[": 0.333, + "]": 0.333, + "’": 0.333, + '"': 0.444, + "!": 0.333, + "?": 0.444, + " ": 0.25, + "|": 0.2, + "‘": 0.333, + "{": 0.48, + "}": 0.48, + "-": 0.333, + } char_width = {c: w for chars, w in width_table.items() for c in chars} return char_width.get(c, 0.6) diff --git a/pyproject.toml b/pyproject.toml index 6ab5bbe..95a6eb0 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,5 +1,9 @@ [build-system] -requires = ["setuptools", "build"] +requires = [ + "setuptools", + "build", + "setuptools_scm[toml]>=7.0" + ] build-backend = "setuptools.build_meta" [project] @@ -27,6 +31,11 @@ classifiers = [ "Topic :: Office/Business" ] +[tool.setuptools_scm] +write_to = "greater_tables/_version.py" +version_scheme = "post-release" +local_scheme = "no-local-version" + [tool.setuptools.packages.find] include = ["greater_tables", "greater_tables.data"] exclude = ["img", "tests", "docs"]