Tidied logging; updated doc-test batch file

This commit is contained in:
Stephen Mildenhall
2025-07-04 09:57:33 +01:00
parent cc3b037016
commit 146579c9dd
5 changed files with 170 additions and 154 deletions
+85 -58
View File
@@ -1,57 +1,79 @@
@echo off
setlocal
:: --- Configuration ---
set "PYTHON_VERSION=3.10"
:: audit call: with python version and optional mode: new (default) or refresh
if "%1"=="" (
echo Usage: build.bat PYTHON_VERSION new^|refresh
exit /b 1
)
if /i not "%2"=="new" if /i not "%2"=="refresh" (
echo Usage: build.bat PYTHON_VERSION new^|refresh
echo Invalid second argument mode: "%2"
echo Must be 'new' or 'refresh'.
exit /b 1
)
:: --- Configuration CUSTOMIZE HERE! ---
set "PYTHON_VERSION=%1"
set "MODE=%2"
set "PROJECT_NAME=greater_tables_project"
REM set "PROJECT_REPO=https://github.com/mynl/%PROJECT_NAME%.git"
set "PROJECT_REPO=c:\s\telos\python\greater_tables_project"
set "BUILD_DIR=C:\tmp\%PROJECT_NAME%_rtd_build"
set "BUILD_DIR=C:\tmp\%PROJECT_NAME%_rtd_build_%1"
set "VENV_DIR=%BUILD_DIR%\venv"
set "HTML_OUTPUT_DIR=%BUILD_DIR%\html"
set "PORT=9800"
:: --- Prepare Environment and Clone Repository ---
if /i "%MODE%"=="new" (
echo Cleaning previous build directory...
pushd C:\tmp
rmdir /s /q "%BUILD_DIR%" >nul 2>&1
mkdir "%BUILD_DIR%"
:: --- Prepare Environment ---
echo Cleaning previous build directory...
pushd C:\tmp
rmdir /s /q "%BUILD_DIR%" >nul 2>&1
mkdir "%BUILD_DIR%"
echo Cloning repository...
git clone --depth 1 "%PROJECT_REPO%" "%BUILD_DIR%"
if %ERRORLEVEL% NEQ 0 (
echo Git clone failed. Exiting.
exit /b %ERRORLEVEL%
)
) else (
echo Reusing existing build directory at "%BUILD_DIR%"
)
:: --- Clone Repository ---
echo Cloning repository...
git clone --depth 1 "%PROJECT_REPO%" "%BUILD_DIR%"
rem git clone "%PROJECT_REPO%" "%BUILD_DIR%"
pushd "%BUILD_DIR%"
:: --- Fetch latest changes ---
echo Fetching latest changes...
git fetch origin --force --prune --prune-tags --depth 50 refs/heads/master:refs/remotes/origin/master
if %ERRORLEVEL% NEQ 0 (
echo Git clone failed. Exiting.
echo Git fetch failed. Exiting.
exit /b %ERRORLEVEL%
)
cd "%BUILD_DIR%"
:: --- Fetch latest changes ---
rem echo Fetching latest changes...
rem git fetch origin --force --prune --prune-tags --depth 50 refs/heads/master:refs/remotes/origin/master
rem if %ERRORLEVEL% NEQ 0 (
rem echo Git fetch failed. Exiting.
rem exit /b %ERRORLEVEL%
rem )
:: --- Checkout master branch ---
rem echo Checking out master branch...
rem git checkout --force origin/master
rem if %ERRORLEVEL% NEQ 0 (
rem echo Git checkout failed. Exiting.
rem exit /b %ERRORLEVEL%
rem )
echo Checking out master branch...
git checkout --force origin/master
if %ERRORLEVEL% NEQ 0 (
echo Git checkout failed. Exiting.
exit /b %ERRORLEVEL%
)
:: --- Setup Virtual Environment ---
echo Creating virtual environment for Python %PYTHON_VERSION%...
:: Assuming 'uv' is installed and available in PATH.
:: If not, you might need to install it: uv pip install uv
uv venv "%VENV_DIR%" --python %PYTHON_VERSION%
if %ERRORLEVEL% NEQ 0 (
echo Failed to create virtual environment. Ensure uv and Python %PYTHON_VERSION% are available. Exiting.
exit /b %ERRORLEVEL%
if /i "%MODE%"=="new" (
echo Creating virtual environment for Python %PYTHON_VERSION%...
uv venv "%VENV_DIR%" --python %PYTHON_VERSION%
if %ERRORLEVEL% NEQ 0 (
echo Failed to create virtual environment. Exiting.
exit /b %ERRORLEVEL%
)
)
if not exist "%VENV_DIR%\Scripts\activate.bat" (
echo Virtual environment not found at "%VENV_DIR%".
echo Please run with 'new' mode first to create it.
exit /b 1
)
call "%VENV_DIR%\Scripts\activate.bat"
@@ -60,26 +82,28 @@ if %ERRORLEVEL% NEQ 0 (
exit /b %ERRORLEVEL%
)
:: --- Install Dependencies ---
echo Upgrading setuptools...
uv pip install --upgrade setuptools
if %ERRORLEVEL% NEQ 0 (
echo Failed to upgrade setuptools. Exiting.
exit /b %ERRORLEVEL%
)
if /i "%MODE%"=="new" (
:: --- Install Dependencies ---
echo Upgrading setuptools...
uv pip install --upgrade setuptools
if %ERRORLEVEL% NEQ 0 (
echo Failed to upgrade setuptools. Exiting.
exit /b %ERRORLEVEL%
)
echo Installing Sphinx...
uv pip install --upgrade sphinx
if %ERRORLEVEL% NEQ 0 (
echo Failed to install Sphinx. Exiting.
exit /b %ERRORLEVEL%
)
echo Installing Sphinx...
uv pip install --upgrade sphinx
if %ERRORLEVEL% NEQ 0 (
echo Failed to install Sphinx. Exiting.
exit /b %ERRORLEVEL%
)
echo Installing project dependencies from pyproject.toml...
uv pip install --upgrade --no-cache-dir .[dev]
if %ERRORLEVEL% NEQ 0 (
echo Failed to install project dependencies. Exiting.
exit /b %ERRORLEVEL%
echo Installing project dependencies from pyproject.toml...
uv pip install --upgrade --no-cache-dir .[dev]
if %ERRORLEVEL% NEQ 0 (
echo Failed to install project dependencies. Exiting.
exit /b %ERRORLEVEL%
)
)
:: --- Build HTML Documentation ---
@@ -90,14 +114,17 @@ if %ERRORLEVEL% NEQ 0 (
exit /b %ERRORLEVEL%
)
echo.
echo HTML documentation built successfully in "%HTML_OUTPUT_DIR%"
echo run cd "%HTML_OUTPUT_DIR%" && python -m http.server %PORT%
echo to serve the documentation.
:: --- Launch Web Server and Open Docs ---
echo Launching a simple web server for the documentation...
start /b cmd /c "cd /d "%HTML_OUTPUT_DIR%" && python -m http.server %PORT%"
echo Opening documentation in your default browser...
start "" "http://localhost:%PORT%"
rem echo Launching a simple web server for the documentation...
rem start /b cmd /c "cd /d "%HTML_OUTPUT_DIR%" && python -m http.server %PORT%"
rem echo Opening documentation in your default browser...
rem start "" "http://localhost:%PORT%"
:: --- Optional: Build LaTeX/PDF (commented out) ---
:: echo Building LaTeX/PDF documentation...
+1 -1
View File
@@ -2,7 +2,7 @@ __project__ = 'greater_tables'
__author__ = 'Stephen J Mildenhall'
__version__ = '5.2.0'
from . _version import __version__
from . core import GT
from . fabrications import Fabricator
from . etcher import Etcher
from . gtlogging import setup_logging
-2
View File
@@ -168,8 +168,6 @@ class Configurator(BaseModel):
default="table",
description="Type of element: 'table', 'figure', or 'sidewaysfigure'"
)
tikz_extra_defs: str = Field(
'', description="TeX defintions and commands put at top of table, eg \\centering.")
tikz_hrule: Optional[list[int]] = Field(
default=None,
description="Optional, list of (0-based) integers for horizontal rules below each value; None means no lines."
+84 -77
View File
@@ -559,7 +559,7 @@ class GT(object):
self._text_knowledge_df = None
self._html_knowledge_df = None
self._tex_knowledge_df = None
self._knowledge_dfs = None
# finally config.sparsify and then apply formaters
# this radically alters the df, so keep a copy for now...
self.df_pre_applying_formatters = self.df.copy()
@@ -929,14 +929,23 @@ class GT(object):
self._tex_knowledge_df = self.estimate_column_widths_by_mode('tex')
return self._tex_knowledge_df
@property
def knowledge_dfs(self):
if self._knowledge_dfs is None:
self._knowledge_dfs = pd.concat((self.text_knowledge_df.T,
self.html_knowledge_df.T, self.tex_knowledge_df.T),
keys=['text','html', 'tex'], names=['mode', 'measure'])
self._knowledge_dfs['Total'] = self._knowledge_dfs.fillna(0.).apply(
lambda row: sum(x for x in row if pd.api.types.is_number(x)), axis=1)
idx = self._knowledge_dfs.query('Total == 0').index
self._knowledge_dfs.loc[idx, 'Total'] = ''
self._knowledge_dfs = self._knowledge_dfs.fillna('')
return self._knowledge_dfs
def width_report(self):
"""Return a report summarizing the width information."""
natural = self.text_knowledge_df.natural_width.sum()
minimum = self.text_knowledge_df.minimum_width.sum()
if 'header_tweak' in self.text_knowledge_df:
ht = self.text_knowledge_df.header_tweak.sum()
else:
ht = 0
text = self.text_knowledge_df.recommended.sum()
h = self.html_knowledge_df.recommended.sum()
tex = self.tex_knowledge_df.recommended.sum()
@@ -947,7 +956,6 @@ class GT(object):
bit = pd.DataFrame({
'text natural': self.text_knowledge_df.natural_width,
'text minimum': self.text_knowledge_df.minimum_width,
'text header tweak': self.text_knowledge_df.header_tweak,
'text recommended': self.text_knowledge_df.recommended,
'html recommended': self.html_knowledge_df.recommended,
'tex recommended': self.tex_knowledge_df.recommended,
@@ -956,7 +964,6 @@ class GT(object):
ser = pd.Series({
'text natural': natural,
'text minimum': minimum,
'text header tweak': ht,
'text recommended': text,
'html recommended': h,
'tex recommended': tex,
@@ -1023,14 +1030,16 @@ class GT(object):
# Series=dict colname->max width of cells in column
natural_width = df.map(lambda x: len_function(x.strip())).max(axis=0).to_dict()
# in text mode: figure out where you can break
pat = r'(?<=[.,;:!?)\]}\u2014\u2013])\s+|--+\s+|\s+'
# in text mode: figure out where you can break; pat breaks after punctuation or at -
pat = r'(?<=[.,;:!?)\]}\u2014\u2013])\s+|--*\s+|\s+'
iso_date_split = r'(?<=\b\d{4})-(?=\d{2}-\d{2})'
pat = f'{pat}|{iso_date_split}'
# Calculate ideal (no wrap) and minimum possible widths for all columns
# The absolute minimum width each column can take (e.g., longest word for text)
minimum_width = {}
header_natural = {}
header_minimum = {}
for col_name in df.columns:
minimum_width[col_name] = (
df[col_name].str
@@ -1040,6 +1049,10 @@ class GT(object):
.max(axis=1)
.max()
)
# ensure is a tuple
ctuple = col_name if isinstance(col_name, tuple) else (col_name, )
header_natural[col_name] = max(map(len_function, ctuple))
header_minimum[col_name] = min(len_function(part) for i in ctuple for part in re.split(pat, str(i)))
# begin to assemble the parts
# ans will be the col_width_df; break_penalties needed by all methods
@@ -1052,38 +1065,45 @@ class GT(object):
}, index=df.columns)
ans['acceptable_width'] = np.where(
ans.break_penalties == Breakability.ACCEPTABLE, ans.minimum_width, ans.natural_width)
ans['header_natural'] = header_natural
ans['header_minimum'] = header_minimum
if mode in ('html', 'tex'):
# put in some padding TODO KLUDGE
ans['natural_width'] += 1
ans['minimum_width'] += 1
ans['header_natural'] += 1
ans['header_minimum'] += 1
# adjustments and recommendations - these are keyed to text output with padding
natural, acceptable, minimum = ans.iloc[:, 3:].sum()
PADDING = 2 # per column TODO enhance
natural, acceptable, minimum = ans.iloc[:, 3:6].sum()
head_natural, head_minimum = ans.iloc[:, 6:8].sum()
if mode == 'text':
if self.config.table_width_mode == 'explicit':
# target width INCLUDES padding and column marks |
target_width = self.max_table_width_em - \
(PADDING + 1) * n_col - 1
logger.info(f'{self.max_table_width_em=}'
f' ==> {target_width=} after column spacer adjustment')
elif self.config.table_width_mode == 'natural':
# +1 for the pipe!
target_width = natural + (PADDING + 1) * n_col + 1
elif self.config.table_width_mode == 'breakable':
target_width = acceptable + (PADDING + 1) * n_col + 1
elif self.config.table_width_mode == 'minimum':
target_width = minimum + (PADDING + 1) * n_col + 1
# +1 for the pipe | symbol
PADDING = 2 # per column TODO enhance
pad_adjustment = (PADDING + 1) * n_col - 1
else:
# tex and html ignore niceties of padding?? these will be narrower
target_width = self.max_table_width_em
logger.info(f'{target_width=} ignoring column spacers')
PADDING = 1 # per column TODO enhance
pad_adjustment = PADDING * n_col
if self.config.table_width_mode == 'explicit':
# target width INCLUDES padding and column marks |
target_width = self.max_table_width_em - pad_adjustment
elif self.config.table_width_mode == 'natural':
target_width = natural + pad_adjustment
elif self.config.table_width_mode == 'breakable':
target_width = acceptable + pad_adjustment
elif self.config.table_width_mode == 'minimum':
target_width = minimum + pad_adjustment
logger.info('table_width_mode = %s', self.config.table_width_mode)
logger.info('config self.max_table_width_em %s', self.max_table_width_em)
logger.info('target width after column spacer adjustment %s', target_width)
# extra space for the headers to relax, if useful
if self.config.table_width_header_adjust > 0:
max_extra = int(
self.config.table_width_header_adjust * target_width)
max_extra = int(self.config.table_width_header_adjust * target_width)
else:
max_extra = 0
logger.info(f'{mode=} {target_width=}, {natural=}, {acceptable=}, {minimum=}, {max_extra=}')
if target_width > natural:
# everything gets its natural width
ans['recommended'] = ans['natural_width']
@@ -1110,59 +1130,46 @@ class GT(object):
logger.warning(
'Desired width too small for pleasant formatting, table will be too wide by spare space %s < 0.',
space)
logger.info(f'{mode=} {target_width=}, {natural=}, {acceptable=}, {minimum=}, {max_extra=}, {space=}')
# this section tweaks the widths for column headers -> text output only.
# trust tex and html output to naturally make better decisions about line breaks in the heading.
if mode == "text" and space > 0:
if mode == "text" and space > 0 and df.columns.nlevels == 1:
# text mode only: see if some header tweaks are in order (Index only for now, TODO)
if df.columns.nlevels == 1:
# Step 1: baseline comes in from code above
ans['raw_recommended'] = ans['recommended']
# Step 1: baseline comes in from code above
ans['raw_recommended'] = ans['recommended']
# Step 2: get rid of intra-line breaks
if max_extra > 0:
adj = Width.header_adjustment(df, ans['recommended'], space, max_extra)
# create new col and populate per GPT
ans['header_tweak'] = pd.Series(adj)
else:
ans['header_tweak'] = 0
ans['recommended'] = ans['recommended'] + ans['header_tweak']
# Step 2: optimize to get rid of intra-line breaks
if max_extra > 0:
adj = Width.header_adjustment(df, ans['recommended'], space, max_extra)
# create new col and populate per GPT
ans['header_tweak'] = pd.Series(adj)
else:
ans['header_tweak'] = 0
ans['recommended'] = ans['recommended'] + ans['header_tweak']
# in this case zero out impact of header_natural and header_minimum cos don't want to use them below
ans['header_natural'] = ans['recommended']
ans['header_minimum'] = ans['recommended']
# Step 3 (all modes): distribute remaining slack proportionally
# Step 3 (all modes): distribute remaining shortfall proportionally
# account for
# obvs remaining == space if mode is not text
remaining = target_width - ans['recommended'].sum()
ans['pre_space_share_recommended'] = ans['recommended']
ans['pre_shortfall_recommended'] = ans['recommended']
if remaining > 0:
slack = ans['natural_width'] - ans['recommended']
total_slack = slack.clip(lower=0).sum()
if total_slack > 0:
logger.info('total slack to allocate after header adjustments = %s', total_slack)
fractions = slack.clip(lower=0) / total_slack
ans['recommended'] += np.floor(fractions *
remaining).astype(int)
shortfall = ans[['natural_width', 'header_natural']].max(axis=1) - ans['recommended']
total_shortfall = shortfall.clip(lower=0).sum()
if total_shortfall > 0:
logger.info('total shortfall to allocate after header adjustments = %s', total_shortfall)
fractions = shortfall.clip(lower=0) / total_shortfall
ans['proto_recommended'] = ans['recommended'] + np.floor(fractions * remaining).astype(int)
ans['recommended'] = np.minimum(ans[['natural_width', 'header_natural']].max(axis=1),
ans['proto_recommended'])
else:
logger.info('no slack to allocate after header adjustments')
# Ensure final constraint
# try:
# ans['recommended'] = ans['recommended'].astype(int)
# except IntCastingNaNError:
# print('getting error')
# print(ans['recommended'])
# ans['recommended'] = pd.to_numeric(
# ans['recommended'], errors='coerce').fillna(0).astype(int)
# logger.info("Raw rec: %s\tTweaks: %s\tActual: %s\tTarget: %s\tOver/(U): %s",
# ans['raw_recommended'].sum(),
# ans['header_tweak'].sum(),
# ans['recommended'].sum(),
# target_width,
# ans['recommended'].sum() - target_width
# )
logger.info('no shortfall to allocate after header adjustments')
if mode == 'tex':
# tex mode only need tikz raw size for tex code layout
nc_index = self.nindex
tikz_colw = dict.fromkeys(df.columns, 0)
tikz_headw = dict.fromkeys(df.columns, 0)
for i, c in enumerate(df.columns):
@@ -1192,9 +1199,12 @@ class GT(object):
'natural_width',
'acceptable_width',
'minimum_width',
'header_natural',
'header_minimum',
'raw_recommended',
'header_tweak',
'pre_space_share_recommended',
'proto_recommended',
'recommended',
'tikz_colw',
]
@@ -1603,7 +1613,6 @@ class GT(object):
column_sep = self.config.tikz_column_sep
row_sep = self.config.tikz_row_sep
container_env = self.config.tikz_container_env
extra_defs = self.config.tikz_extra_defs
hrule = self.config.tikz_hrule
vrule = self.config.tikz_vrule
post_process = self.config.tikz_post_process
@@ -1631,12 +1640,11 @@ class GT(object):
if not df.columns.is_unique:
# possible index/body column interaction
raise ValueError('tikz routine requires unique column names')
# {extra_defs}
# centering handled by quarto
header = """
\\begin{{{container_env}}}{latex}
{caption}
\\centering{{
% \\centering{{
\\begin{{tikzpicture}}[
auto,
transform shape,
@@ -1654,7 +1662,7 @@ class GT(object):
{post_process}
\\end{{tikzpicture}}
}} % close centering
% }} % close centering
\\end{{{container_env}}}
"""
@@ -1709,7 +1717,6 @@ class GT(object):
debug = ''
sio.write(header.format(container_env=container_env,
caption=caption,
extra_defs=extra_defs,
scale=self.config.tikz_scale,
column_sep=column_sep,
row_sep=row_sep,
-16
View File
@@ -18,19 +18,3 @@ def setup_logging(level=logging.INFO):
handler.setFormatter(formatter)
root.addHandler(handler)
# # Disable log propagation to prevent duplicates
# logger.propagate = False
# if logger.hasHandlers():
# # Clear existing handlers
# logger.handlers.clear()
# # SET DEGBUGGER LEVEL
# LEVEL = logging.INFO # DEBUG or INFO, WARNING, ERROR, CRITICAL
# logger.setLevel(LEVEL)
# handler = logging.StreamHandler(sys.stderr)
# handler.setLevel(LEVEL)
# formatter = logging.Formatter(
# '%(asctime)s | %(levelname)s | %(funcName)-15s | %(message)s')
# handler.setFormatter(formatter)
# logger.addHandler(handler)
# logger.info(f'Logger Setup; {__name__} module recompiled.')