Halfway to adding tikz support - post epiphany about how this works!

2026-06-27 16:15:38 +08:00 · 2025-03-08 16:05:36 +00:00
parent 71a2dd41d9
commit 09cdf82398
6 changed files with 1677 additions and 204 deletions
@@ -1,2 +1,9 @@
 # Greater Tables

+Creating presentation quality tables from pandas dataframes is frustrating. It is hard to left align text and right align numbers using pandas display or `.to_html`. The package `great_tables` does a really nice job with pandas and polars frames but does not support indexes. And formatting numbers requires care.
+
+## Examples
+
+
+
+## API
@@ -1,203 +0,0 @@
-# table formatting again
-import pandas as pd
-# from IPython.display import HTML
-
-
-def gtqd(df, col_align='', formatters=None, ratio_cols=None, **kwargs):
-    """Better HTML output for a DataFrame."""
-    table_id = f'T{id(df):x}'[::2].upper()
-    style = []
-
-    def r(x):
-        """Default ratio format."""
-        try:
-            return f'{x:.1%}'
-        except:
-            return x
-
-    def _float(x):
-        try:
-            return f'{x:,.3f}'
-        except:
-            return x
-
-    def _int(x):
-        try:
-            return f'{x:,.3f}'
-        except:
-            return x
-
-    # see if index col names in formatters?
-    dt = df.dtypes
-    if ratio_cols is None:
-        ratio_cols = []
-    elif not isinstance(ratio_cols, (list, tuple)):
-        ratio_cols = list(ratio_cols)
-
-    float_cols = df.select_dtypes(include='float').columns
-    integer_cols = df.select_dtypes(include='int').columns
-    if formatters is None:
-        formatters = {}
-
-    for c in df.columns:
-        if c not in formatters.keys():
-            # set a default
-            formatters[c] = r if c in ratio_cols else (
-                _float if c in float_cols else (
-                    _int if c in integer_cols else
-                    lambda x: x)
-                )
-
-    html = df.to_html(table_id=table_id, formatters=formatters, **kwargs)
-
-    if col_align == '':
-        # guess: index l, numeric r rest l
-        idx = 'l' * df.index.nlevels
-        numeric_cols = df.select_dtypes('number').columns
-        rc = ''.join('r' if c in numeric_cols else 'l' for c in df.columns)
-        col_align = idx + rc
-
-    # col no -> lrc -> spell out
-    d = {'l': 'left', 'r': 'right', 'c':'center'}
-    ca = col_align
-    ca = dict(zip(range(1, 1+len(ca)), map(d.get, ca)))
-    style.append('<style>')
-
-    style.append(f'''
-#{table_id}  {{
-/*  border-collapse: collapse;*/
-  width: 100%;
-/*  margin above left below right table*/
-  margin: 0px 0 10px 0;
-  font-family: "Open Sans Condensed", "Arial Narrow", Arial, "Roboto Condensed",  sans-serif;
-  font-size: 0.9em;
-}}
-
-''')
-
-
-    for i in range(1, 1+len(ca)):
-        style.append(
-            f'#{table_id} tbody td:nth-child({i}){{ text-align: {ca[i]}; }}')
-    style.append(f'#{table_id} th {{ text-align: center;}}')
-    style.append('</style>\n')
-
-    if len(style):
-        style = '\n'.join(style)
-    else:
-        style = ''
-
-    out = f'{style}{html}'
-
-    return out
-
-
-# def pf(df, *, ratio_cols=None, precision=3, pef_lower=-3, pef_upper=16,
-#     format_index=True):
-#     """Format a DataFrame."""
-
-#     df = df.copy()
-
-#     _ratio_names = ['max_LR', 'gross_LR', 'net_LR', 'ceded_LR', 'LR',
-#                     'COC']
-
-#     if ratio_cols == 'all':
-#         ratio_cols = [i for i in df.columns]
-
-#     elif ratio_cols is not None and type(ratio_cols) != list:
-#         ratio_cols = [ratio_cols]
-
-#     def pef(x):
-#         """Pandas engineering formatter."""
-#         return pd.io.formats.format.EngFormatter(accuracy=2, use_eng_prefix=True)(x)
-
-#     pl = 10.**pef_lower
-#     pu = 10.**pef_upper
-
-#     def nf(x):
-#         """Number formatter."""
-#         try:
-#             if x == int(x):
-#                 return f'{x:,.0f}'
-#             elif abs(x - 1) < 1e-3:
-#                 return f"1-{1 - x:.3g}"
-#             elif abs(x) < pl or abs(x) > pu:
-#                 return pef(x)
-#             elif abs(x) > 1e2:
-#                 fmt = f'{{x:,.{precision - 1}f}}'
-#                 return fmt.format(x=x)
-#                 # return f'{x:,.1f}'
-#             else:
-#                 fmt = f'{{x:,.{precision}f}}'
-#                 return fmt.format(x=x)
-#         except:
-#             return x
-
-#     def ratio(x):
-#         try:
-#             return f'{x:.1%}'
-#         except:
-#             return x
-
-#     def integer(x):
-#         return f'{x:,d}'
-
-#     # convert into string
-#     col_list = [f'{c}' for c in df.columns]
-
-#     if ratio_cols is None:
-#         ratio_cols = [c for c in col_list if c in _ratio_names]
-#         if len(ratio_cols) == 0:
-#             ratio_cols = None
-#     if ratio_cols is not None:
-#         col_list = list(set(col_list) - set(ratio_cols))
-
-#     number_cols = df.select_dtypes(include='number').columns
-
-#     index_cache = None
-#     if format_index:
-#         index_cache = df.index.names
-#         df = df. reset_index(drop=False)
-
-#     float_cols = df.select_dtypes(include='float').columns
-#     integer_cols = df.select_dtypes(include='int').columns
-
-#     for c in df:
-#         # if df.dtypes[c] in (int, float)
-#         if c in ratio_cols:
-#             df[c] = df[c].map(ratio)
-#         elif c in float_cols:
-#             df[c] = df[c].map(nf)
-#         elif c in integer_cols:
-#             df[c] = df[c].map(integer)
-#         else:
-#             print(f'Col {c} not treated')
-
-#     if format_index and index_cache is not None:
-#         df = df.set_index(index_cache)
-
-#     # align number columns
-#     # method 1
-#     # sdf = (
-#     #     df.style
-#     #     .applymap(lambda x: 'text-align: right;', subset=number_cols
-#     #         )
-#     # )
-
-#     # Define styles for specific columns
-#     # styles = [
-#     #     {'selector': f'td.col{i}', 'props': [('text-align', 'right')]}  # Apply to specific columns
-#     #     for i in number_cols
-#     # ]
-
-#     # # Apply styles
-#     # styled_df = df.style.set_table_styles(styles)
-
-#     # display(styled_df)
-
-
-#     # Generate table HTML with inline CSS
-#     table_html = df.to_html(index=True, classes="dataframe")
-
-
-#     return HTML(table_html)
@@ -0,0 +1,4 @@
+__version__ = '1.0.0'
+
+from . greater_tables import *
+from . utilities import *
@@ -0,0 +1,122 @@
+import pandas as pd
+import numpy as np
+import datetime as dt
+from IPython.display import HTML, display
+from . greater_tables import GT
+
+
+# __gt_global = GT()
+
+
+# def qhtml(df, **kwargs):
+#     """Generic "quick display" function."""
+#     return HTML(__gt_global(df, **kwargs))
+
+
+# def qd(df, **kwargs):
+#     """Generic "quick display" function."""
+#     if isinstance(df, pd.Series):
+#         if df.name is None:
+#             df.name = 'value'
+#         df = df.to_frame()
+#     return display(HTML(__gt_global(df, **kwargs)))
+
+
+def create_three_level_multiindex(df):
+    """
+    Adds two random levels to a DataFrame's column MultiIndex.
+
+    Parameters:
+        df (pd.DataFrame): Input DataFrame.
+
+    Returns:
+        pd.DataFrame: DataFrame with a three-level MultiIndex on the columns.
+    """
+    n_columns = len(df.columns)
+    level_1 = np.random.choice(["A", "B", "C"], size=n_columns)
+    level_2 = np.random.choice(["X", "Y", "Z"], size=n_columns)
+
+    # Create the MultiIndex
+    multi_index = pd.MultiIndex.from_tuples(
+        [(l1, l2, col) for l1, l2, col in zip(level_1, level_2, df.columns)],
+        names=["Level 1", "Level 2", df.columns.name]
+    )
+
+    # Apply the new MultiIndex to the DataFrame
+    df.columns = multi_index
+    return df
+
+
+def test_df(date=False, mi_columns=True):
+    """Make a test dataframe nr rows with multi index."""
+    nr = 10
+    words = 'Parliament organised a year-long programme of events called Parliament in the Making to celebrate the 800th anniversary of the sealing of Magna Carta on 15 June and the 750th anniversary of the first representative parliament on 20 January Events were coordinated with Parliament Week'
+    words = list(set(words.split(' ')))
+    w1 = ['Abel', 'Cain', 'Issac', 'Fred', 'George']
+    w2 = ['South', 'East', 'West', 'North']
+    w4 = ['A', 'B', 'C', 'D']
+    df = pd.DataFrame({'idx1': np.random.choice(w1, nr),
+                       'idx2': np.random.choice(w2, nr),
+                       'idx3': np.random.poisson(2, nr),
+                       'floats': np.random.rand(nr) * 3000.,
+                       'smaller': np.random.rand(nr) * 10 ** np.linspace(-3, 4, nr),
+                       'larger': np.random.choice([-1., 0, 1.], nr) * np.random.rand(nr) * 10 ** np.linspace(3, 12, nr),
+                       'ints': np.random.poisson(20, nr),
+                       'powers': np.pi * 10. ** np.arange(-20, 26, 5),
+                       'ratios': np.random.rand(nr) * 3. - 1.,
+                       'string': [' '.join(np.random.choice(words, 4, replace=False)) for i in range(nr)],
+                       # 'object': [np.random.poisson(2, nr) for i in range(nr)]
+                       })
+
+    if date:
+        df['date'] = [dt.datetime.fromordinal(np.random.randint(dt.date(2020, 1, 1).toordinal(),
+                                                                dt.date(2030, 1, 1).toordinal())) + dt.timedelta(seconds=np.random.randint(86400)) for _ in range(nr)]
+        df['date'] = pd.to_datetime(df['date'])
+    df.columns.name = 'Col name'
+    df = df.set_index(['idx1', 'idx2', 'idx3'])
+    if mi_columns:
+        df = create_three_level_multiindex(df)
+
+    # check unique and sort
+    df = df.loc[df.index[~df.index.duplicated()]]
+    assert np.all(~df.columns.duplicated()), 'Columns not all unique'
+    df = df.sort_index(axis=0).sort_index(axis=1)
+    return df
+
+
+def make_test_dfs():
+    """Make a dict of test dataframes with different characteristics."""
+    ans = {}
+    df = pd.DataFrame({'x': range(10), 'y': np.arange(10, dtype=float)})
+    df.x = df.x ** 2
+    df['z'] = df.y ** .25
+
+    ans['basic'] = df.copy()
+    df1 = df.copy()
+    df1.index.name = 'idx name'
+    ans['basic w idx name'] = df1.copy()
+
+    df1 = df.copy()
+    df1.columns.name = 'col name'
+    ans['basic w col name'] = df1.copy()
+
+    df1 = df.copy()
+    df1.index.name = 'idx name'
+    df1.columns.name = 'col name'
+    ans['basic w both names'] = df1.copy()
+
+    df1['date'] = [dt.datetime.fromordinal(np.random.randint(dt.date(2020, 1, 1).toordinal(),
+                   dt.date(2030, 1, 1).toordinal())) + dt.timedelta(seconds=np.random.randint(86400))
+                    for _ in range(len(df1))]
+    df2 = df1.set_index('date')
+    ans['time series'] = df2.copy()
+
+    df2 = df1.set_index('date', append=True)
+    ans['time series and range'] = df2.copy()
+
+    ans['realistic'] = test_df(date=False, mi_columns=False)
+    ans['realistic w date'] = test_df(date=True, mi_columns=False).droplevel(2, axis=0)
+    ans['realistic mi'] = test_df(date=False, mi_columns=True).droplevel(2, axis=1)
+    ans['realistic mi w date'] = test_df(date=True, mi_columns=True).droplevel(2, axis=0).droplevel(2, axis=1)
+
+    return ans
@@ -8,6 +8,11 @@ version = "1.0.0"
 description = "Perfect tables from pandas"
 authors = [{name = "Stephen J Mildehall", email = "mynl@me.com" }]
 readme = "README.md"
-license = { text = "BSD 3-Clause" }
+license = { text = "MIT" }
 requires-python = ">=3.10"
 dependencies = ["pandas"]
+classifiers = [
+    "License :: OSI Approved :: MIT License",
+    "Programming Language :: Python :: 3",
+    "Programming Language :: Python :: 3.10",
+]