5.0.0alpha rationalized widths, removed staticmethods from GT into utilities

2026-06-27 16:15:38 +08:00 · 2025-06-25 14:10:47 +01:00
parent 6c3102e03a
commit a9be2834b2
11 changed files with 1336 additions and 1222 deletions
@@ -1,6 +1,22 @@
 Versions
 ==========

+5.0.0
+-------
+* Introduced ``knowledge_df`` as source of all formatting instructions
+* ``df_tex`` and ``df_html`` computed before ``knowledge`` applying TeX and HTML specific transformations which are reflected in the estimated widths
+* Introduced gtutilities to pull out text width estimation, cleaning and escaping, etc.
+* Delete rich table output format?
+* Moved logging setup to separate file, called from cli but optional for use in other situation. GPT recommended approach.
+
+* test cases
+    * tex and or html in data, index, columns, escaped/unescaped
+    * long cols
+    * text with multiindex columns
+
+* TODO
+    * self.padl and r / 12 in make html width adj s/b elsewhere
+
 4.0.0
 -------
 * Moved constants out of magic strings and into config
@@ -1,4 +1,4 @@
-__version__ = '4.0.0'
+__version__ = '5.0.0'
 __project__ = 'greater_tables'
 __author__ = 'Stephen J Mildenhall'

@@ -1,6 +1,15 @@
+"""
+Command line interface: convert files to HTML, text, svg or pdf.
+"""
+
 import click
 import pandas as pd
 from pathlib import Path
+
+from . gtlogging import setup_logging
+
+setup_logging()  # <-- must come before using your package
+
 from .gtconfig import Configurator, write_template
 from .gtcore import GT

@@ -85,10 +85,9 @@ class Configurator(BaseModel):
    padding_trbl: Optional[tuple[int, int, int, int]] = Field(
        None, description="Manual padding in the order (top, right, bottom, left)"
    )
+    tex_to_html: Optional[Callable[[str], str]] = Field(
+        default=None, description="Function to map non-math TeX to HTML, eg remap \\textbf{...}")

-    tikz_scale: float = Field(
-        1.0, description="Scaling factor applied to LaTeX TikZ tables"
-    )
    font_body: float = Field(
        0.9, description="Font size for body text (in em units)"
    )
@@ -132,6 +131,9 @@ class Configurator(BaseModel):
        -1, description="Maximum length for stringified objects (e.g. nested DataFrames); -1 = unlimited"
    )

+    header_alignment: Literal["few", "center"] = Field(
+        default='few', description="NYI!! TOOD Alignment of header cells, few=follow column, center=center." )
+
    max_table_width: int = Field(
        200, description="Maximum table width for markdown/text output mode"
    )
@@ -153,10 +155,13 @@ class Configurator(BaseModel):
    )

    # tikz specific options
+    tikz_scale: float = Field(
+        1.0, description="Scaling factor applied to LaTeX TikZ tables"
+    )
    tikz_column_sep: float = Field(
-        0.5, description="Separation between columns")
+        1, description="Separation between columns")
    tikz_row_sep: float = Field(
-        0.125, description="Separation between rows")
+        0.25, description="Separation between rows")
    tikz_container_env: Literal["table", "figure", "sidewaysfigure"] = Field(
        default="table",
        description="Type of element: 'table', 'figure', or 'sidewaysfigure'"
@@ -175,7 +180,8 @@ class Configurator(BaseModel):
        '', description="non-line commands put at bottom of table")
    tikz_latex: Optional[str] = Field(
        None, description="arguments at top of table \\begin{table}[tikz_latex]")
-
+    tikz_escape_tex: Optional[bool] = Field(
+        True, description="If true, escape non-math TeX like %, _ and \\. Default no escaping, allowing \\textbf{...} to work.")
    # meta
    debug: bool = Field(
        False, description="Run in debug mode with more reporting, include internal ID in caption and use colored output lines")
@@ -6,6 +6,7 @@ Good for testing. Outputs are cached by hash. PDF→SVG uses pdf2svg.
 GPT re-write of my old great2.blog code.
 """

+import logging
 import re
 from pathlib import Path
 from subprocess import run, Popen, PIPE
@@ -14,6 +15,9 @@ from IPython.display import SVG, display
 from .gthasher import txt_short_hash


+logger = logging.getLogger(__name__)
+
+
 class Etcher:
    """Create PDF and SVG files from Tikz blocks."""
    # Full TeX preamble to generate a .fmt if needed
@@ -76,13 +80,13 @@ class Etcher:
            '&pdflatex',
            tmp.name,
            ]
-        print(f'Running {" ".join(cmd)} to build format file...')
+        logger.info(f'Running {" ".join(cmd)} to build format file...')
        (self.file_path.parent / 'make_format.bat').write_text(" ".join(cmd), encoding='utf-8')
        self.run_command(cmd, raise_on_error=True, cwd=self.out_path)
        # tidy up ... to some extent
        # tmp.unlink()
        (self.out_path / f'{self.format_file.stem}.log').unlink()
-        print('...success...format file built', self.format_file.resolve())
+        logger.info('...success...format file built', self.format_file.resolve())

    def process_tikz(self):
        """Compile TikZ to PDF and convert to SVG."""
@@ -109,7 +113,7 @@ class Etcher:
        ]
        (tex_path.parent / 'make_tikz.bat').write_text(" ".join(tex_cmd), encoding='utf-8')
        if self.debug:
-            print("Running:", " ".join(tex_cmd))
+            logger.info("Running:", " ".join(tex_cmd))
        if self.run_command(tex_cmd):
            raise ValueError('TeX failed to compile, not pdf or svg output.')
            # no tidying up
@@ -122,7 +126,7 @@ class Etcher:
                str(svg_path)
            ]
            if self.debug:
-                print("Running:", " ".join(svg_cmd))
+                logger.info("Running:", " ".join(svg_cmd))
            self.run_command(svg_cmd, raise_on_error=True)

            if not self.debug:
@@ -140,15 +144,15 @@ class Etcher:
        with Popen(command, cwd=cwd, stdout=PIPE, stderr=PIPE, universal_newlines=True) as p:
            stdout, stderr = p.communicate()
            if stdout and self.debug:
-                print('Run command output ends\n', stdout.strip()[-250:])
+                logger.info('Run command output ends\n', stdout.strip()[-250:])
            if stdout:
                if stdout.find('no output PDF file produced') > 0:
-                    print("ERROR no pdf output\n"*5)
+                    logger.error("ERROR no pdf output\n"*5)
                    return -1
            if stderr:
                if raise_on_error:
                    raise RuntimeError(stderr.strip())
                else:
-                    print(stderr.strip())
+                    logger.error(stderr.strip())
                    return -2
        return 0
@@ -6,6 +6,7 @@ from collections import deque
 from datetime import datetime, timedelta
 from importlib.resources import files
 from itertools import cycle, chain, count, zip_longest, product, islice
+# import logging
 from math import prod
 from pathlib import Path
 from typing import Optional, Union
@@ -16,7 +17,8 @@ import re
 import numpy as np
 import pandas as pd

-from IPython.display import display
+
+# logger = logging.getLogger(__name__)


 class Fabricator:
@@ -62,7 +64,8 @@ class Fabricator:
        with data_path.open('r', encoding='utf-8') as f:
            tex_list = pd.read_csv(f, index_col=0)['expr'].to_list()
        # trim down slightly
-        pat = re.compile(r'(?<!\\)\b[a-z]{4,}\b')
+        # dont' want | in tex...messes up tables!
+        pat = re.compile(r'(?<!\\)\b[a-z]{4,}\b|\|')
        tex_list = [i for i in tex_list if not pat.search(i) and len(i)<=50]
        self.rng.shuffle(tex_list)
        self._tex_gen = cycle(tex_list)
@@ -113,7 +116,10 @@ class Fabricator:
            df = df.droplevel(drop_levels, axis=1)
        return df

-    def make(self, rows, data_spec, *, index_levels=1, index_names=None, column_groups=1, column_levels=1, column_names=None, decorate=False, simplify=True, oversample=1):
+    def make(self, rows, data_spec, *, index_levels=1, index_names=None,
+             column_groups=1, column_levels=1, column_names=None,
+             metric_name_spec='',
+             decorate=False, simplify=True, oversample=1):
        """
        Fabricate a dataframe with the given specification.

@@ -171,7 +177,12 @@ class Fabricator:
        index = pd.MultiIndex.from_tuples(islice(product(*(self._generate_column('s', v) for v in self.primes_for_product(rows, index_levels))), rows), names=inames)

        # create with col groups and drop later if needed
-        metric_names = [self.metric_name(t) for t in data_spec]
+        if metric_name_spec == '':
+            metric_names = [self.metric_name(t) for t in data_spec]
+        else:
+            metric_name_spec = self._parse_colspec(metric_name_spec)
+            assert len(metric_name_spec) == len(data_spec), "metric name spec not consistent with data spec"
+            metric_names = [self._generate_column(dt, 1).iloc[0] for dt in metric_name_spec]
        if column_groups > 0:
            cnames = (column_names or [f'c_{i}' for i in range(column_levels)]) + ['metric']
            columns_pfp = self.primes_for_product(column_groups, column_levels)
@@ -8,6 +8,7 @@ import time
 import base64
 import pandas as pd

+
 def df_short_hash(df, length=12):
    """Generate a short, time-dependent hash for a DataFrame (safe for HTML IDs)."""
    hasher = hashlib.blake2b(digest_size=8)  # Smaller output
@@ -0,0 +1,36 @@
+"""
+Manage logger setup for CLI. Users have the option of
+calling this if they want similar logging.
+"""
+
+import logging
+import sys
+
+def setup_logging(level=logging.INFO):
+    # Disable log propagation to prevent duplicates
+    # logger.propagate = False
+    root = logging.getLogger()
+    if root.hasHandlers():
+        root.handlers.clear()
+    root.setLevel(level)
+    handler = logging.StreamHandler(sys.stderr)
+    formatter = logging.Formatter('%(asctime)s | %(levelname)s | %(message)s')
+    handler.setFormatter(formatter)
+    root.addHandler(handler)
+
+
+# # Disable log propagation to prevent duplicates
+# logger.propagate = False
+# if logger.hasHandlers():
+#     # Clear existing handlers
+#     logger.handlers.clear()
+# # SET DEGBUGGER LEVEL
+# LEVEL = logging.INFO    # DEBUG or INFO, WARNING, ERROR, CRITICAL
+# logger.setLevel(LEVEL)
+# handler = logging.StreamHandler(sys.stderr)
+# handler.setLevel(LEVEL)
+# formatter = logging.Formatter(
+#     '%(asctime)s | %(levelname)s |  %(funcName)-15s | %(message)s')
+# handler.setFormatter(formatter)
+# logger.addHandler(handler)
+# logger.info(f'Logger Setup; {__name__} module recompiled.')
@@ -0,0 +1,784 @@
+"""
+Text length, column width balancing and other utilities.
+"""
+
+import html
+from io import StringIO
+import logging
+import re
+from textwrap import wrap
+
+import pandas as pd
+from rich import box
+from rich.table import Table
+
+from . gtformats import GT_Format, TableFormat, Line, DataRow
+
+
+__all__ = ['MD2DF', 'Escaping', 'TextLength',
+           'Sparsify', 'Indexing', 'Width', 'TextOutput',
+           'RichOutput']
+
+
+logger = logging.getLogger(__name__)
+
+
+class MD2DF:
+    """Convert markdown to dataframe."""
+    @staticmethod
+    def md_to_df(txt):
+        """Convert markdown text string table to DataFrame."""
+        # extract table and optional caption part
+        table, caption = MD2DF.parse_markdown_table_and_caption(txt)
+        m = re.search(r'\{#(tbl[:a-zA-Z0-9_-]+)\}', caption)
+        if m:
+            label = m.group(1)
+            if label != '':
+                # remove from caption
+                caption = caption.replace(f'{{#{label}}}', '').strip()
+        else:
+            label = ''
+        # print(f'{caption = } and {label = }')
+        if table == '':
+            raise ValueError('Bad markdown table')
+
+        # remove starting and ending | in each line (optional anyway)
+        txt = re.sub(r'^\||\|$', '', table, flags=re.MULTILINE)
+        txt = txt.split('\n')
+        # remove starting and ending *'s added by hand - but try to avoid * within headings!
+        txt[0] = '|'.join([re.sub(r'^\*\*?|\*\*?$', '', i.strip())
+                          for i in txt[0].split('|')])
+
+        # remove the alignment row
+        alignment_row = txt.pop(1)
+        aligners = []
+        for t in alignment_row.split('|'):
+            if t[0] == ':' and t[-1] == ':':
+                aligners.append('c')
+            elif t[0] == ':':
+                aligners.append('l')
+            elif t[-1] == ':':
+                aligners.append('r')
+            else:
+                # no alignment info
+                pass
+        if len(aligners) == 0:
+            aligners = None
+        else:
+            aligners = ''.join(aligners)
+        txt = [[j.strip() for j in i.split('|')] for i in txt]
+        df = pd.DataFrame(txt).T
+        df = df.set_index(0)
+        df = df.T
+        return df, aligners, caption, label
+
+    @staticmethod
+    def parse_markdown_table_and_caption(txt: str) -> tuple[str, str | None]:
+        """
+        Parses a Markdown table and an optional caption from a given string,
+        handling cases where only the caption is present.
+
+        Args:
+            txt: The input string.
+
+        Returns:
+            A tuple containing the table string (empty if not found) and the caption string (or None if no caption).
+        """
+        table_match = re.search(r"((?:\|.*\|\s*(?:\n|$))+)", txt, re.DOTALL)
+        caption_match = re.search(
+            r"^(?:table)?:\s*(.+)", txt, re.MULTILINE + re.IGNORECASE)
+
+        table_part = table_match.group(1).strip() if table_match else ""
+        caption_part = caption_match.group(1) if caption_match else ""
+
+        return table_part.strip(), caption_part.strip()
+
+
+class Escaping:
+    """Escape html and tex within tables."""
+    @staticmethod
+    def clean_name(n):
+        """
+        Escape underscores for using a name in a DataFrame index
+        and converts to a string. Also escape %.
+
+        Called by Tikz routines.
+
+        :param n: input name, str
+        :return:
+        """
+        try:
+            if type(n) == str:
+                # quote underscores that are not in dollars
+                return '$'.join((i if n % 2 else i.replace('_', '\\_').replace('%', '\\%')
+                                 for n, i in enumerate(n.split('$'))))
+            else:
+                # can't contain an underscore!
+                return str(n)
+        except:
+            return str(n)
+
+    @staticmethod
+    def clean_index(df):
+        """
+        escape _ for columns and index, being careful about subscripts
+        in TeX formulas.
+
+        :param df:
+        :return:
+        """
+        return df.rename(index=Escaping.clean_name, columns=Escaping.clean_name)
+
+    @staticmethod
+    def clean_html_tex(text):
+        r"""
+        Clean TeX entries in HTML: $ -> \( and \) and $$ to \[ \].
+
+        Apply after all other HTML rendering steps. HTML rendering only.
+        """
+        text = re.sub(r'\$\$(.*?)\$\$', r'\\[\1\\]', text, flags=re.DOTALL)
+        # Convert inline math: $...$ → \(...\)
+        text = re.sub(r'(?<!\$)\$(.*?)(?<!\\)\$(?!\$)', r'\\(\1\\)', text)
+        return text
+
+    @staticmethod
+    def escape_tex_outside_math(text):
+        # Pattern to match math environments: $...$, $$...$$, \[...\]
+        if not isinstance(text, str):
+            return text
+        math_pattern = re.compile(
+            r'(\$\$.*?\$\$|\$.*?\$|\\\[.*?\\\])', re.DOTALL)
+
+        # def escape_non_math(s):
+        #     return s.replace('\\', r'\\').replace('%', r'\%')
+        # because of use within tikz tables
+        def escape_non_math(s):
+            return s.replace('\\', r'\textbackslash{}').replace('%', r'\%').replace('_', r'\_')
+
+        parts = []
+        last_end = 0
+        for m in math_pattern.finditer(text):
+            start, end = m.span()
+            parts.append(escape_non_math(text[last_end:start]))
+            parts.append(m.group())  # math part, unescaped
+            last_end = end
+        parts.append(escape_non_math(text[last_end:]))
+
+        return ''.join(parts)
+
+    @staticmethod
+    def escape_df_tex(df):
+        # Escape data elements
+        df = df.map(Escaping.escape_tex_outside_math)
+
+        # Escape index and column values
+        def escape_index(idx):
+            if isinstance(idx, pd.MultiIndex):
+                return pd.MultiIndex.from_tuples(
+                    [tuple(Escaping.escape_tex_outside_math(x)
+                           for x in tup) for tup in idx],
+                    names=[Escaping.escape_tex_outside_math(
+                        n) if n else n for n in idx.names]
+                )
+            else:
+                return pd.Index([Escaping.escape_tex_outside_math(x) for x in idx],
+                                name=Escaping.escape_tex_outside_math(idx.name) if idx.name else None)
+
+        df.index = escape_index(df.index)
+        df.columns = escape_index(df.columns)
+
+        return df
+
+
+class TextLength:
+    """Estimate length of displayed text."""
+    # TeX control sequence display widths (heuristic)
+    TEX_SIMPLE_GLYPHS = {
+        'alpha', 'beta', 'gamma', 'delta', 'epsilon', 'zeta', 'eta', 'theta',
+        'iota', 'kappa', 'lambda', 'mu', 'nu', 'xi', 'omicron', 'pi', 'rho',
+        'sigma', 'tau', 'upsilon', 'phi', 'chi', 'psi', 'omega', 'infty',
+        'sum', 'prod', 'int', 'cup', 'cap', 'vee', 'wedge', 'forall', 'exists',
+        'neg', 'leq', 'geq', 'neq', 'approx', 'to', 'leftarrow', 'rightarrow'
+    }
+    TEX_WIDE = {'frac', 'sqrt', 'sum', 'int', 'prod'}
+    TEX_SPACING = {'quad', 'qquad', ',', ';', ' ', '!'}
+
+    @staticmethod
+    def approximate_char_width_em(c: str) -> float:
+        width_table = {
+            "il.':|!`": 0.3,
+            "frtJ(){}[]*": 0.5,
+            "abcdeghknopqsuvxyz": 0.6,
+            "LCDEFHISTUZ": 0.7,
+            "ABGKNOPQRXYV": 0.8,
+            "mwMW": 0.9,
+            "0123456789": 0.6,
+            "-_=+<>": 0.5,
+            "#$%^&@~": 0.6,
+            ",;": 0.25,
+            '"': 0.4,
+            "/\\": 0.5,
+            "?": 0.6,
+            " ": 0.4,
+        }
+        char_width = {c: w for chars, w in width_table.items() for c in chars}
+        return char_width.get(c, 0.6)
+
+    @staticmethod
+    def text_display_len(s: str) -> float:
+        """Estimate display width in ems, ignoring HTML tags, interpreting TeX, and HTML entities."""
+        def strip_html_tags(text):
+            return re.sub(r'<[^>]*>', '', text)
+
+        def decode_entities(text):
+            return html.unescape(text)
+
+        if '$' not in s and '<' not in s and '&' not in s:
+            return sum(TextLength.approximate_char_width_em(c) for c in s)
+
+        parts = re.split(r'(\$\$.*?\$\$)|(\$.*?\$)', s)
+        total = 0.0
+        for part in parts:
+            if part is None:
+                continue
+            if part.startswith('$$') and part.endswith('$$'):
+                total += TextLength.estimate_math_width(part[2:-2])
+            elif part.startswith('$') and part.endswith('$'):
+                total += TextLength.estimate_math_width(part[1:-1])
+            else:
+                visible = strip_html_tags(part)
+                decoded = decode_entities(visible)
+                total += sum(TextLength.approximate_char_width_em(c) for c in decoded)
+        return total
+
+    @staticmethod
+    def estimate_math_width(tex: str) -> float:
+        tokens = re.findall(r'\\[a-zA-Z]+|[a-zA-Z0-9]|.', tex)
+        width = 0.0
+        for tok in tokens:
+            if tok.startswith('\\'):
+                name = tok[1:]
+                if name in TextLength.TEX_SIMPLE_GLYPHS:
+                    width += 0.6
+                elif name in TextLength.TEX_WIDE:
+                    width += 1.5
+                elif name in TextLength.TEX_SPACING:
+                    width += 0.4
+                else:
+                    width += 1.0  # unknown control sequences
+            elif tok in '{}':
+                continue  # grouping only
+            elif tok in '^_':
+                width += 0.3  # assume sub/superscript takes some space
+            else:
+                width += TextLength.approximate_char_width_em(tok)
+        return width
+
+    # original
+    # @staticmethod
+    # def text_display_len(s: str) -> int:
+    #     """Estimate text display length in em of a string allowing for TeX constructs."""
+    #     # note you DO WANT SPACES! So, no strip applied ever.
+    #     if s.find('$') < 0:
+    #         return len(s)
+    #     parts = re.split(r'(\$\$.*?\$\$)|(\$.*?\$)', s)
+    #     total = 0
+    #     for part in parts:
+    #         if part is None:
+    #             continue
+    #         if part.startswith('$$') and part.endswith('$$'):
+    #             total += TextLength._estimate_math_width(part[2:-2])
+    #         elif part.startswith('$') and part.endswith('$'):
+    #             total += TextLength._estimate_math_width(part[1:-1])
+    #         else:
+    #             total += len(part)
+    #     return total
+
+    # @staticmethod
+    # def _estimate_math_width(tex: str) -> int:
+    #     tokens = re.findall(r'\\[a-zA-Z]+|[a-zA-Z0-9]|.', tex)
+    #     width = 0
+    #     for tok in tokens:
+    #         if tok.startswith('\\'):
+    #             name = tok[1:]
+    #             if name in TextLength.TEX_SIMPLE_GLYPHS:
+    #                 width += 1
+    #             elif name in TextLength.TEX_WIDE:
+    #                 width += 3
+    #             elif name in TextLength.TEX_SPACING:
+    #                 width += 1
+    #             else:
+    #                 width += 2  # unknown control sequences
+    #         elif tok in '{}^_':
+    #             continue  # grouping, sub/superscripts: ignore
+    #         else:
+    #             width += 1
+    #     return width
+
+
+class Sparsify:
+    """Sparsify multiindex rows and columns."""
+    @staticmethod
+    def sparsify(df, cs):
+        out = df.copy()
+        for i, c in enumerate(cs):
+            mask = df[cs[:i + 1]].ne(df[cs[:i + 1]].shift()).any(axis=1)
+            out.loc[~mask, c] = ''
+        return out
+
+    @staticmethod
+    def sparsify_mi(mi, bottom_level=False):
+        """
+        as above for a multi index level, without the benefit of the index...
+        really all should use this function
+        :param mi:
+        :param bottom_level: for the lowest level ... all values repeated, no sparsificaiton
+        :return:
+        """
+        last = mi[0]
+        new_col = list(mi)
+        rules = []
+        for k, v in enumerate(new_col[1:]):
+            if v == last and not bottom_level:
+                new_col[k + 1] = ''
+            else:
+                last = v
+                rules.append(k + 1)
+                new_col[k + 1] = v
+        return new_col, rules
+
+
+class Indexing:
+    """Changed column and level from a multi-index."""
+    @staticmethod
+    def changed_column(bit):
+        """Return the column that changes with each row."""
+        tf = bit.ne(bit.shift())
+        tf = tf.loc[tf.any(axis=1)]
+        return tf.idxmax(axis=1)
+
+    @staticmethod
+    def changed_level(idx):
+        """
+        Return the level of index that changes with each row.
+
+        Very ingenious GTP code with some SM enhancements.
+        """
+        # otherwise you alter the actual index
+        idx = idx.copy()
+        idx.names = [i for i in range(idx.nlevels)]
+        # Determine at which level the index changes
+        # Convert MultiIndex to a DataFrame
+        index_df = idx.to_frame(index=False)
+        # true / false match last row
+        tf = index_df.ne(index_df.shift())
+        # changes need at least one true
+        tf = tf.loc[tf.any(axis=1)]
+        level_changes = tf.idxmax(axis=1)
+        return level_changes
+
+
+class Width:
+    """Adjust column widths based on heading widths."""
+
+    @staticmethod
+    def header_adjustment(df, min_widths, space, max_extra):
+        """
+        Fine-adjust heading for optimal config.spacing.
+
+        Return a dict with per-column recommended width adjustments to avoid
+        intra-word breaks and reduce overall header height.
+
+        Parameters:
+            df: DataFrame with 1-level string column names
+            min_widths: dict of column name -> minimal acceptable width
+            space: amount of space available to be allocated
+            max_extra: max extra characters to consider allocating per column
+
+        Returns:
+            dict: column -> additional width to allocate
+        """
+        colnames = list(df.columns)
+        adjustments = {col: 0 for col in colnames}
+        num_lines = 0
+
+        def has_intra_word_break(text: str, width: int) -> bool:
+            """
+            Determine if textwrap.wrap breaks any words in the given text.
+
+            Gemini - GPT code did not work, even after seveal iterations.
+            This is a nice approach to the problem.
+
+            Args:
+                text: The input string.
+                width: The maximum width for wrapping.
+
+            Returns:
+                True if any word is broken across lines, False otherwise.
+            """
+            nonlocal num_lines
+            wrapped_lines = wrap(text, width=width)
+            num_lines = len(wrapped_lines)
+            original_words = text.split()
+
+            reconstructed_text_from_wrapped = " ".join(wrapped_lines)
+            reconstructed_words = reconstructed_text_from_wrapped.split()
+
+            # If the number of words differs, it means some words were split.
+            # This catches cases where a word might be split and then later re-joined
+            # due to subsequent wrapping logic, leading to a different number of words.
+            if len(original_words) != len(reconstructed_words):
+                return True
+
+            # Compare word by word. If any word from the original doesn't exactly match
+            # a word from the reconstructed list, it implies a split.
+            for i in range(len(original_words)):
+                if original_words[i] != reconstructed_words[i]:
+                    return True
+
+            return False
+
+        # First pass: avoid ugly intraword breaks
+        # make dict of col -> longest word length
+        min_acceptable = {c: v for c, v in
+                          zip(colnames, map(lambda x: max(len(i) for i in re.split(r'[ \-/]', x)), colnames))}
+        options = []
+        for col in colnames:
+            if not isinstance(col, str):
+                continue
+            base_width = min_widths[col]
+            if not has_intra_word_break(col, base_width):
+                options.append([col, 0, num_lines])
+                # nothing to be gained, move to next col
+                continue
+            extra0 = max(0, min_acceptable[col] - base_width)
+            if extra0 > max_extra:
+                # ok, can't flatten word because it is too long
+                extra0 = 0
+            elif extra0 == max_extra:
+                # go with that
+                adjustments[col] = max_extra
+                continue
+            # see if col can be flattened within max_extra chars, starting
+            # at extra0, which is enough to avoid intraword breaks
+            for extra in range(extra0, max_extra + 1):
+                if not has_intra_word_break(col, base_width + extra):
+                    options.append([col, extra, num_lines])
+                    if adjustments[col] == 0:
+                        # take first, but compute rest...
+                        adjustments[col] = extra
+            # temporary diagnostic DEBUG information - comment in prod
+            # from IPython.display import display
+            # config.debug = pd.Series([col, min_acceptable[col], base_width, has_intra_word_break(col, base_width), extra0, max_extra,
+            #     wrap(col,  base_width), extra],
+            #     index=['col name', 'min acceptable', 'base_width (from data)', 'intra word break', 'extra0', 'max_extra', 'split', 'selected extra']).to_frame('Value')
+            # display(config.debug)
+        # make df[col name, amount of extra space for col, resulting number of lines]
+        # this is needed as input for the optimal heading function (next)
+        input_df = pd.DataFrame(options, columns=['col', 'extra', 'num_lines'])
+        # min amount to avoid intra work breaks
+        avoid_intra = input_df.groupby('col').min().extra.sum()
+        if avoid_intra >= space:
+            # that's all we can do
+            print("NO FURTHER IMPROVEMENTS")
+        else:
+            # can try for a better solution
+            sol = Width.optimal_heading(input_df, space)
+            adjustments.update(sol[1])
+            logger.info('best solution: %s', sol)
+        # global temp
+        # temp = input_df
+        return adjustments, input_df
+
+    @staticmethod
+    def optimal_heading(input_df: pd.DataFrame, total_es_budget: int) -> tuple[int, dict[str, int]]:
+        """
+        Optimize extra config.spacing for best heading.
+
+        Finds the best way to allocate extra space to minimize max_lines in heading.
+
+        Gemini solution.
+
+        Args:
+            input_df: DataFrame with 'col', 'extra', 'num_lines'.
+            total_es_budget: The total extra space to allocate.
+
+        Returns:
+            A tuple: (min_max_lines, optimal_extra_allocation_per_column).
+
+        Why this approach is effective:
+        ---------------------------------
+
+        * **Optimal Solution:** The binary search guarantees finding the absolute minimum possible `max_lines` because it systematically explores the entire solution space.
+        * **Efficiency:** The `check` function runs in time proportional to the number of columns times the average number of `extra` options per column. The binary search itself performs `log(range_of_num_lines)` iterations. This makes the overall complexity efficient for typical table sizes.
+        * **Flexibility:** It does not assume any particular mathematical function relating `extra` space to `num_lines`. It works with arbitrary discrete relationships provided in the input DataFrame, as long as `num_lines` is non-increasing as `extra` increases (which is the natural expectation for this problem).
+
+        """
+        # Pre-processing
+        unique_cols = input_df['col'].unique().tolist()
+
+        col_extra_num_lines_options = {}
+        for col_name in unique_cols:
+            col_data = input_df[input_df['col'] ==
+                                col_name].sort_values(by='extra')
+            col_extra_num_lines_options[col_name] = list(
+                zip(col_data['extra'], col_data['num_lines']))
+
+        def check(target_max_lines: int) -> bool:
+            current_extra_needed = 0
+            for col_name in unique_cols:
+                min_extra_for_col = float('inf')
+                found_suitable_extra = False
+                for extra_val, num_lines_val in col_extra_num_lines_options[col_name]:
+                    if num_lines_val <= target_max_lines:
+                        min_extra_for_col = extra_val
+                        found_suitable_extra = True
+                        break
+
+                if not found_suitable_extra:
+                    return False
+
+                current_extra_needed += min_extra_for_col
+
+            return current_extra_needed <= total_es_budget
+
+        all_num_lines = input_df['num_lines'].unique()
+
+        # Corrected line: Check length of the numpy array
+        if len(all_num_lines) == 0:
+            return 0, {}
+
+        L = all_num_lines.min()
+        R = all_num_lines.max()
+
+        optimal_max_lines = R
+        best_allocation = {}
+
+        while L <= R:
+            mid_max_lines = L + (R - L) // 2
+
+            temp_current_extra_needed = 0
+            temp_current_allocation = {}
+            possible = True
+            for col_name in unique_cols:
+                min_extra_for_col = float('inf')
+                found_suitable_extra = False
+                for extra_val, num_lines_val in col_extra_num_lines_options[col_name]:
+                    if num_lines_val <= mid_max_lines:
+                        min_extra_for_col = extra_val
+                        found_suitable_extra = True
+                        break
+
+                if not found_suitable_extra:
+                    possible = False
+                    break
+
+                temp_current_extra_needed += min_extra_for_col
+                temp_current_allocation[col_name] = min_extra_for_col
+
+            if possible and temp_current_extra_needed <= total_es_budget:
+                optimal_max_lines = mid_max_lines
+                best_allocation = temp_current_allocation.copy()
+                R = mid_max_lines - 1
+            else:
+                L = mid_max_lines + 1
+
+        return optimal_max_lines, best_allocation
+
+
+class TextOutput:
+    """Convert dataframe to text, replaces pd.DataFrame.to_markdown."""
+
+    @staticmethod
+    def make_text_table(
+        df: pd.DataFrame,
+        data_col_widths: list[int],
+        data_col_aligns: list[str],
+        *,
+        index_levels: int = 1,
+        fmt: TableFormat = GT_Format
+    ) -> str:
+        """
+        Render self.df as a wrapped, boxed table.
+
+        Output like tabulate's mixed_grid with support for:
+        - Multi-level column headers (always shown, bottom-aligned, can wrap)
+        - Split index vs. body section with heavy vertical separator
+        - Per-column width and alignment
+        - Wrapped body cells with top alignment
+
+        Custom code to print a dataframe to text.
+
+        pd.DataFrame.to_string uses tabulate.tabulate which is hard to
+        control. This modoule provides similar functionality with greater
+        control over column widths and the ability to demark the index
+        columns.
+
+        Returns:
+            str: A fully formatted table as a string (useful for print, logs, or files).
+        """
+        buf = StringIO()
+
+        def _write_line(line: str) -> None:
+            """Writes a line to the buffer followed by a newline."""
+            buf.write(line + '\n')
+
+        def _format_cell(text: str, width: int, align: str) -> list[str]:
+            """
+            Formats a single cell, wrapping text and applying padding and alignment.
+            Returns a list of strings, each representing a line of the cell.
+            """
+            lines = wrap(str(text), width=width) or ['']
+            padded_width = width + 2 * fmt.padding
+            return [
+                (" " * fmt.padding)
+                + (line.ljust(width) if align == 'left'
+                   else line.center(width) if align == 'center'
+                   else line.rjust(width)) +
+                (" " * fmt.padding)
+                for line in lines
+            ]
+
+        def _make_horizontal_line(line_fmt: Line, col_widths: list[int]) -> str:
+            """Constructs a full horizontal line for the table."""
+            parts = []
+            for i, w in enumerate(col_widths):
+                total = w + 2 * fmt.padding
+                if index_levels and i == index_levels:
+                    parts.append(line_fmt.index_sep)
+                elif i > 0:
+                    parts.append(line_fmt.sep)
+                parts.append(line_fmt.hline * total)
+            return f"{line_fmt.begin}{''.join(parts)}{line_fmt.end}"
+
+        def _make_data_row(row_fmt: DataRow, line_cells: list[str]) -> str:
+            """Constructs a single data row from formatted cell strings."""
+            parts = []
+            for i, cell in enumerate(line_cells):
+                if index_levels and i == index_levels:
+                    parts.append(row_fmt.index_sep)
+                elif i > 0:
+                    parts.append(row_fmt.sep)
+                parts.append(cell)
+            return f"{row_fmt.begin}{''.join(parts)}{row_fmt.end}"
+
+        def _render_header_level(wrapped_cells: list[list[str]], level_widths: list[int]) -> list[str]:
+            """
+            Renders a single level of the header, ensuring cells are bottom-aligned.
+            Returns a list of strings, each representing a line of the header.
+            """
+            max_height = max(len(c) for c in wrapped_cells)
+            padded_cells = [
+                [' ' * (w + 2 * fmt.padding)] * (max_height - len(cell)) + cell
+                for cell, w in zip(wrapped_cells, level_widths)
+            ]
+            return [_make_data_row(fmt.headerrow, [col[i] for col in padded_cells]) for i in range(max_height)]
+
+        col_levels = df.columns.nlevels
+        col_tuples = df.columns if col_levels > 1 else [
+            (c,) for c in df.columns]
+
+        # Step 1: format each level of the column headers (one header line per level)
+        # header alignment is left in index and center in body
+        index_col_aligns = [
+            'left' if i < index_levels else 'center' for i in range(len(data_col_aligns))]
+        _write_line(_make_horizontal_line(fmt.lineabove, data_col_widths))
+        # collect all wrapped + bottom-aligned rows for each level
+        for level in range(col_levels):
+            level_texts = [str(t[level] if level < len(t) else '')
+                           for t in col_tuples]
+            wrapped_cells = [_format_cell(txt, w, a) for txt, w, a in zip(
+                level_texts, data_col_widths, index_col_aligns)]
+            level_rows = _render_header_level(wrapped_cells, data_col_widths)
+            for row in level_rows:
+                _write_line(row)
+            if level < col_levels - 1:
+                _write_line(_make_horizontal_line(
+                    fmt.linebetweenrows, data_col_widths))
+        _write_line(_make_horizontal_line(
+            fmt.linebelowheader, data_col_widths))
+
+        for row_idx, (_, row) in enumerate(df.iterrows()):
+            data_cells = [
+                _format_cell(val, w, a)
+                for val, w, a in zip(row.values, data_col_widths, data_col_aligns)
+            ]
+            max_height = max(len(c) for c in data_cells)
+            padded = [
+                c + [' ' * (w + 2 * fmt.padding)] * (max_height - len(c))
+                for c, w in zip(data_cells, data_col_widths)
+            ]
+            for i in range(max_height):
+                _write_line(_make_data_row(
+                    fmt.datarow, [col[i] for col in padded]))
+
+            if row_idx < len(df) - 1:
+                _write_line(_make_horizontal_line(
+                    fmt.linebetweenrows, data_col_widths))
+            else:
+                _write_line(_make_horizontal_line(
+                    fmt.linebelow, data_col_widths))
+
+        return buf.getvalue()
+
+
+class RichOutput:
+    """Render to a rich table."""
+
+    @staticmethod
+    def make_rich_table(
+        df,
+        column_widths,
+        column_alignments=None,
+        num_index_columns=0,
+        title=None,
+        show_lines=False,
+        box_style=box.SIMPLE_HEAVY,
+    ):
+        """
+        Render a preformatted DataFrame as a Rich table.
+
+        Parameters:
+            df (pd.DataFrame): DataFrame with all string values.
+            column_widths (dict or list): Widths by column name or position.
+            column_alignments (dict or list): Alignments ('left', 'center', 'right').
+            num_index_columns (int): Number of left-most columns to treat as index-like.
+            title (str): Optional title.
+            show_lines (bool): Add row separator lines.
+            box_style (rich.box.Box): Border style (see below).
+        """
+        colnames = list(df.columns)
+
+        if isinstance(column_widths, list):
+            column_widths = {colnames[i]: w for i,
+                             w in enumerate(column_widths)}
+
+        if column_alignments is None:
+            column_alignments = {}
+        elif isinstance(column_alignments, list):
+            column_alignments = {
+                colnames[i]: a for i, a in enumerate(column_alignments)}
+
+        table = Table(title=title,
+                      box=box_style,
+                      show_lines=show_lines,
+                      expand=True)
+
+        for i, col in enumerate(colnames):
+            is_index = i < num_index_columns
+            table.add_column(
+                header=str(col),
+                width=column_widths.get(col, None),
+                justify=column_alignments.get(col, "left"),
+                style="dim" if is_index else None,
+                header_style="bold dim" if is_index else "bold",
+                no_wrap=False,
+                overflow="fold",
+                vertical="middle",
+                # divider=divider,
+            )
+
+        for _, row in df.iterrows():
+            table.add_row(*row.tolist())
+
+        return table
@@ -0,0 +1,11 @@
+Obtaining file:///C:/S/TELOS/Python/greater_tables_project
+  Installing build dependencies: started
+  Installing build dependencies: finished with status 'done'
+  Checking if build backend supports build_editable: started
+  Checking if build backend supports build_editable: finished with status 'done'
+  Getting requirements to build editable: started
+  Getting requirements to build editable: finished with status 'done'
+  Preparing editable metadata (pyproject.toml): started
+  Preparing editable metadata (pyproject.toml): finished with status 'done'
+Collecting bs4 (from greater_tables==4.0.0)
+  Using cached bs4-0.0.2-py2.py3-none-any.whl.metadata (411 bytes)