mirror of
https://github.com/wassname/greater_tables_project.git
synced 2026-06-27 16:15:38 +08:00
Working draft of 3.0.0
Added robust testdf
This commit is contained in:
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,119 @@
|
||||
"""
|
||||
Find and process blobs of TeX.
|
||||
|
||||
Change target directory to find other blobs.
|
||||
"""
|
||||
|
||||
from pathlib import Path
|
||||
import re
|
||||
import subprocess
|
||||
|
||||
import pandas as pd
|
||||
|
||||
|
||||
class TeXMacros():
|
||||
"""
|
||||
A class for dealing with TeX macros.
|
||||
|
||||
made out of PublisherBase in blog_tools.py
|
||||
from great2.blog
|
||||
"""
|
||||
|
||||
_macros = r"""
|
||||
\def\AA{\mathcal{A}}
|
||||
\def\atan{\mathrm{atan}}
|
||||
\def\AVaR{\mathsf{AVaR}}
|
||||
\def\bbeta{\mathbf{\beta}}
|
||||
\def\bb{\mathbf b}
|
||||
\def\bm{\mathbf }
|
||||
\def\biTVaR{\mathsf{biTVaR}}
|
||||
\def\corr{\mathsf{Corr}}
|
||||
\def\cov{\mathsf{cov}}
|
||||
\def\cp{\mathsf{CP}}
|
||||
\def\CTE{\mathsf{CTE}}
|
||||
\def\CVaR{\mathsf{CVaR}}
|
||||
\def\dint{\displaystyle\int}
|
||||
\def\dsum{\displaystyle\sum}
|
||||
\def\ecirc{\accentset{\circ} e}
|
||||
\def\ecirc{\accentset{\circ} e}
|
||||
\def\EPD{\mathsf{EPD}}
|
||||
\def\ES{\mathsf{ES}}
|
||||
\def\E{\mathsf{E}}
|
||||
\def\FFF{\mathscr{F}}
|
||||
\def\FF{\mathcal{F}}
|
||||
\def\HH{\mathbf{H}}
|
||||
\def\kpx{{{}_kp_x}}
|
||||
\def\MM{\mathcal{M}}
|
||||
\def\NN{\mathbb{N}}
|
||||
\def\nudge{2}
|
||||
\def\norm{}
|
||||
\def\OO{\mathscr{O}}
|
||||
\def\PPP{\mathscr{P}}
|
||||
\def\PP{\mathsf{P}}
|
||||
\def\Pr{\mathsf{Pr}}
|
||||
\def\QQ{\mathsf{Q}}
|
||||
\def\RR{\mathbb{R}}
|
||||
\def\SD{\mathsf{SD}}
|
||||
\def\TCE{\mathsf{TCE}}
|
||||
\def\TVaR{\mathsf{TVaR}}
|
||||
\def\Var{\mathsf{Var}}
|
||||
\def\var{\mathsf{var}}
|
||||
\def\VaR{\mathsf{VaR}}
|
||||
\def\WCE{\mathsf{WCE}}
|
||||
\def\ww{\mathbf{w}}
|
||||
\def\XXX{\mathcal{X}}
|
||||
\def\xx{\mathbf{x}}
|
||||
\def\XX{\mathbf{X}}
|
||||
\def\yy{\mathbf{y}}
|
||||
\def\ZZZ{\mathcal{Z}}
|
||||
\def\ZZ{\mathbb{Z}}
|
||||
"""
|
||||
|
||||
@staticmethod
|
||||
def process_tex_macros(text):
|
||||
"""Expand standard general.tex macros in the text."""
|
||||
m, regex = TeXMacros.tex_to_dict(TeXMacros._macros.strip())
|
||||
return re.sub(regex, lambda x: m.get(x[0]), text, flags=re.MULTILINE)
|
||||
|
||||
@staticmethod
|
||||
def tex_to_dict(text):
|
||||
"""
|
||||
Convert text, a series of def{} macros into a dictionary
|
||||
returns the dictionary and the regex of all keys
|
||||
"""
|
||||
smacros = text.split('\n')
|
||||
smacros = [TeXMacros.tex_splitter(i) for i in smacros]
|
||||
m = {i: j for (i, j) in smacros}
|
||||
regex = '|'.join([re.escape(k) for k in m.keys()])
|
||||
return m, regex
|
||||
|
||||
@staticmethod
|
||||
def tex_splitter(x):
|
||||
"""
|
||||
x is a single def style tex macro
|
||||
"""
|
||||
x = x.replace('\\def', '')
|
||||
i = x.find('{')
|
||||
return x[:i], x[i + 1:-1]
|
||||
|
||||
def find_tex_snippeets(in_dir='\\S\\TELOS\\PIR\\docs',
|
||||
out_file='tex_list.csv'):
|
||||
"""Ripgrep / TeX macro expand list of TeX snippets."""
|
||||
result = subprocess.run(
|
||||
['rg', '-N', '-o', '--no-filename', '-g', '*.md', r'\$.+?\$', in_dir],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
check=True
|
||||
)
|
||||
output_text = result.stdout
|
||||
tm = TeXMacros()
|
||||
txt = tm.process_tex_macros(output_text)
|
||||
tex = txt.split('\n')
|
||||
stex = set(tex)
|
||||
stext = [i for i in stex if len(i) and i.find('\\PP') < 0 and i.find('$$') < 0]
|
||||
df = pd.DataFrame({'expr': stext})
|
||||
if out_file != '':
|
||||
p = Path(__file__).parent / out_file
|
||||
print(p)
|
||||
df.to_csv(p, encoding='utf-8')
|
||||
return df
|
||||
@@ -62,11 +62,11 @@ class GTConfigModel(BaseModel):
|
||||
table_vrule_width: int = Field(
|
||||
1, description="Width of vertical rule separating index from body"
|
||||
)
|
||||
hrule_widths: Optional[tuple[int, int, int]] = Field(
|
||||
(0, 0, 0), description="Tuple of three ints for horizontal rule widths (for multiindex use)"
|
||||
hrule_widths: Optional[tuple[float, float, float]] = Field(
|
||||
(0, 0, 0), description="Tuple of three floats for horizontal rule widths (for multiindex use)"
|
||||
)
|
||||
vrule_widths: Optional[tuple[int, int, int]] = Field(
|
||||
(0, 0, 0), description="Tuple of three ints for vertical rule widths (for multiindex columns)"
|
||||
vrule_widths: Optional[tuple[float, float, float]] = Field(
|
||||
(0, 0, 0), description="Tuple of three floats for vertical rule widths (for multiindex columns)"
|
||||
)
|
||||
|
||||
sparsify: bool = Field(
|
||||
|
||||
@@ -261,7 +261,11 @@ class GT(object):
|
||||
base_config = GTConfigModel()
|
||||
|
||||
# access through config
|
||||
self.config = base_config.model_copy(update=overrides)
|
||||
# update and validate; need to merge to avoid repeated args
|
||||
merged = dict(base_config.model_dump(), **overrides)
|
||||
self.config = GTConfigModel(**merged)
|
||||
# no validation
|
||||
# self.config = base_config.model_copy(update=overrides)
|
||||
|
||||
# deal with alternative input modes for df: None, DataFrame, Series, markdown text table
|
||||
if df is None:
|
||||
|
||||
@@ -26,3 +26,11 @@ def df_short_hash(df, length=12):
|
||||
hash_str = base64.b32encode(hash_bytes).decode("utf-8").rstrip("=") # Trim padding
|
||||
|
||||
return f"T{hash_str[:length]}" # Prefix with 'T' to ensure a valid ID
|
||||
|
||||
|
||||
def txt_short_hash(txt):
|
||||
hasher = hashlib.md5()
|
||||
hasher.update(txt.encode('utf-8'))
|
||||
hash_bytes = hasher.digest()
|
||||
hash_str = base64.b32encode(hash_bytes).decode("utf-8").rstrip("=") # Trim padding
|
||||
return hash_str[::2]
|
||||
|
||||
+52
-16
@@ -5,7 +5,8 @@ GPT from SJMM design.
|
||||
"""
|
||||
|
||||
from datetime import datetime, timedelta
|
||||
from itertools import cycle
|
||||
from importlib.resources import files
|
||||
from itertools import cycle, chain
|
||||
from math import prod
|
||||
from pathlib import Path
|
||||
from typing import Optional, Union
|
||||
@@ -81,21 +82,39 @@ class TestDataFrameFactory:
|
||||
self._index_namer = cycle(nwl)
|
||||
|
||||
# read words and create cycler
|
||||
p = Path(__file__).parent / 'words-12.md'
|
||||
assert p.exists()
|
||||
txt = p.read_text(encoding='utf-8')
|
||||
data_path = files('greater_tables').joinpath('data', 'words-12.md')
|
||||
with data_path.open('r', encoding='utf-8') as f:
|
||||
txt = f.read()
|
||||
word_list = txt.split('\n')
|
||||
temp = word_list[:]
|
||||
random.shuffle(temp)
|
||||
self._word_gen = cycle(temp)
|
||||
|
||||
# read tex expressions and create cycler
|
||||
tex_list = pd.read_csv(Path(__file__).parent /
|
||||
'tex_list.csv')['expr'].to_list()
|
||||
data_path = files('greater_tables').joinpath('data', 'tex_list.csv')
|
||||
with data_path.open('r', encoding='utf-8') as f:
|
||||
tex_list = pd.read_csv(f, index_col=0)['expr'].to_list()
|
||||
# trim down slightly
|
||||
tex_list = [i for i in tex_list if len(i) < 50]
|
||||
random.shuffle(tex_list)
|
||||
self._tex_gen = cycle(tex_list)
|
||||
|
||||
self.simple_namer = {
|
||||
'd': 'date',
|
||||
'f': 'float',
|
||||
'h': 'hash',
|
||||
'i': 'integer',
|
||||
'l': 'large_float',
|
||||
'm': 'yr-mo',
|
||||
'p': 'path',
|
||||
'r': 'ratio',
|
||||
's': 'string',
|
||||
't': 'time',
|
||||
'v': 'extreme_float',
|
||||
'x': 'tex',
|
||||
'y': 'year',
|
||||
}
|
||||
|
||||
# lengths of index (word count) sampled from:
|
||||
self.index_value_lengths = [1]*10 + [2] * 4 + [3]
|
||||
|
||||
@@ -113,13 +132,14 @@ class TestDataFrameFactory:
|
||||
l log float (greater range than float)
|
||||
m year - month
|
||||
p path (filename)
|
||||
r ratio (smaller floats, for percents)
|
||||
sx string length x
|
||||
t time
|
||||
v very large range float
|
||||
x tex text - an equation
|
||||
y year
|
||||
|
||||
|
||||
Args:
|
||||
Args:
|
||||
rows: Number of rows.
|
||||
columns: Column type spec (int for all float cols, or string type codes).
|
||||
index: Index level types (int for RangeIndex or string like 'ti').
|
||||
@@ -168,7 +188,7 @@ class TestDataFrameFactory:
|
||||
['d', 'f', 'i', 's3', 'l', 'h', 't', 'p'], size=self.rng.integers(3, 7))
|
||||
missing = round(float(self.rng.uniform(0, 0.15)), 2)
|
||||
index = ''.join(self.rng.choice(
|
||||
['t', 'd', 'i', 's2'], size=index_levels))
|
||||
['t', 'd', 'y', 'i', 's2'], size=index_levels))
|
||||
col_index = ''.join(self.rng.choice(
|
||||
['s', 's2', 's2', 's3'], size=column_levels))
|
||||
return self.make(rows=rows, columns=''.join(col_types), index=index, col_index=col_index, missing=missing)
|
||||
@@ -182,18 +202,22 @@ class TestDataFrameFactory:
|
||||
else:
|
||||
col_types = self._parse_colspec(columns)
|
||||
# if col_index is an int then use all strings of that depth
|
||||
if isinstance(col_index, int):
|
||||
col_index_types = ['s'] * col_index
|
||||
if col_index == 'simple':
|
||||
col_idx = map(self.simple_namer.get, [i[0] for i in col_types])
|
||||
col_idx = pd.Index(col_idx, name='simple')
|
||||
else:
|
||||
col_index_types = self._parse_colspec(col_index)
|
||||
if isinstance(col_index, int):
|
||||
col_index_types = ['s'] * col_index
|
||||
else:
|
||||
col_index_types = self._parse_colspec(col_index)
|
||||
col_idx = self._make_index(col_index_types, len(col_types))
|
||||
if isinstance(index, int):
|
||||
index = ['s'] * index
|
||||
else:
|
||||
index = self._parse_colspec(index)
|
||||
print(index)
|
||||
# print(index)
|
||||
# col names are a transposed index.
|
||||
df = pd.DataFrame(index=range(rows))
|
||||
col_idx = self._make_index(col_index_types, len(col_types))
|
||||
for dt, c in zip(col_types, range(len(col_idx))):
|
||||
df[c] = self._generate_column(dt, rows)
|
||||
df.columns = col_idx
|
||||
@@ -210,15 +234,23 @@ class TestDataFrameFactory:
|
||||
return pd.Series([" ".join(self.word() for i in range(max_words)) for j in range(n)])
|
||||
if dtype == 'f':
|
||||
return pd.Series(self.rng.normal(loc=100000, scale=250000, size=n))
|
||||
if dtype == 'r':
|
||||
return pd.Series(self.rng.normal(loc=0.5, scale=0.35, size=n))
|
||||
if dtype == 'l':
|
||||
# log float (greater range)
|
||||
return pd.Series(np.exp(self.rng.normal(loc=-4 / 2 + 4, scale=4, size=n)))
|
||||
if dtype == 'v':
|
||||
# log float (greater range)
|
||||
sc = 5
|
||||
return pd.Series(np.exp(self.rng.normal(loc=-sc**2 / 2 + 10, scale=sc, size=n)))
|
||||
if dtype == 'i':
|
||||
return pd.Series(self.rng.integers(-1e4, 1e6, size=n), dtype='int64')
|
||||
if dtype == 'd':
|
||||
start_date = TestDataFrameFactory.random_date_within_last_n_years(
|
||||
10)
|
||||
return pd.Series(pd.date_range(start=start_date, periods=n, freq='D'))
|
||||
if dtype == 'y':
|
||||
return pd.Series(random.sample(range(1990, 2031), n))
|
||||
if dtype == 't':
|
||||
start_dt = datetime.now() - timedelta(days=365 * 2)
|
||||
return pd.Series([
|
||||
@@ -347,7 +379,11 @@ class TestDataFrameFactory:
|
||||
for w, k in zip(level_value_lengths, level_choices)]
|
||||
x = [[next(j) for j in r] for i in range(rows)]
|
||||
names = random.sample(name_word_list, levels)
|
||||
idx = pd.MultiIndex.from_tuples(
|
||||
random.sample(x, rows), names=names).sort_values()
|
||||
if levels == 1:
|
||||
idx = pd.Index(
|
||||
list(chain.from_iterable(random.sample(x, rows))), name=names[0]).sort_values()
|
||||
else:
|
||||
idx = pd.MultiIndex.from_tuples(
|
||||
random.sample(x, rows), names=names).sort_values()
|
||||
assert idx.is_unique
|
||||
return idx
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,188 @@
|
||||
"""
|
||||
Create and display svg files from tikz tex tables.
|
||||
|
||||
Good for testing.
|
||||
|
||||
From great2.blog
|
||||
"""
|
||||
|
||||
from datetime import datetime
|
||||
import pandas as pd
|
||||
from pathlib import Path
|
||||
import re
|
||||
import yaml
|
||||
from itertools import count
|
||||
from subprocess import Popen, PIPE
|
||||
from IPython.display import display, Markdown, SVG
|
||||
|
||||
from . hasher import txt_short_hash
|
||||
|
||||
|
||||
class TikzProcessor():
|
||||
_tex_template_full = """\\documentclass[10pt, border=5mm]{{standalone}}
|
||||
|
||||
% needs lualatex - uncomment for Wiley fonts
|
||||
%\\usepackage{{fontspec}}
|
||||
%\\setmainfont{{Stix Two Text}}
|
||||
%\\usepackage{{unicode-math}}
|
||||
%\\setmathfont{{Stix Two Math}}
|
||||
|
||||
\\usepackage{{amsfonts}}
|
||||
\\usepackage{{url}}
|
||||
\\usepackage{{tikz}}
|
||||
\\usepackage{{color}}
|
||||
\\usetikzlibrary{{arrows,calc,positioning,shadows.blur,decorations.pathreplacing}}
|
||||
\\usetikzlibrary{{automata}}
|
||||
\\usetikzlibrary{{fit}}
|
||||
\\usetikzlibrary{{snakes}}
|
||||
\\usetikzlibrary{{intersections}}
|
||||
\\usetikzlibrary{{decorations.markings,decorations.text,decorations.pathmorphing,decorations.shapes}}
|
||||
\\usetikzlibrary{{decorations.fractals,decorations.footprints}}
|
||||
\\usetikzlibrary{{graphs}}
|
||||
\\usetikzlibrary{{matrix}}
|
||||
\\usetikzlibrary{{shapes.geometric}}
|
||||
\\usetikzlibrary{{mindmap, shadows}}
|
||||
\\usetikzlibrary{{backgrounds}}
|
||||
\\usetikzlibrary{{cd}}
|
||||
|
||||
% really common macros
|
||||
\\newcommand{{\\grtspacer}}{{\\vphantom{{lp}}}}
|
||||
|
||||
\\def\\dfrac{{\\displaystyle\\frac}}
|
||||
\\def\\dint{{\\displaystyle\\int}}
|
||||
|
||||
\\begin{{document}}
|
||||
|
||||
{tikz_begin}{tikz_code}{tikz_end}
|
||||
|
||||
\\end{{document}}
|
||||
"""
|
||||
# --------------------------------------------
|
||||
_tex_template = """
|
||||
% really common macros
|
||||
\\newcommand{{\\grtspacer}}{{\\vphantom{{lp}}}}
|
||||
|
||||
\\def\\dfrac{{\\displaystyle\\frac}}
|
||||
\\def\\dint{{\\displaystyle\\int}}
|
||||
|
||||
\\begin{{document}}
|
||||
|
||||
{tikz_begin}{tikz_code}{tikz_end}
|
||||
|
||||
\\end{{document}}
|
||||
"""
|
||||
|
||||
def split_tikz(self):
|
||||
"""
|
||||
Split text to get the tikzpicture. Format is
|
||||
|
||||
initial text pip then groups of four:
|
||||
|
||||
1. begin tag ``(1::4)``
|
||||
2. tikz code ``(2::4)``
|
||||
3. end tag ``(3::4)``
|
||||
4. non-related text ``(4::4)``
|
||||
|
||||
"""
|
||||
return re.split(r'(\\begin{tikz(?:cd|picture)}|\\end{tikz(?:cd|picture)})', self.txt)
|
||||
|
||||
def __init__(self, txt, base_path='.', tex_engine='pdflatex'):
|
||||
"""
|
||||
TikzProcessor (from TikzConvertyer): process a tex tikz text string into svg.
|
||||
The program
|
||||
|
||||
* creates a pdf and svg from the tikz blob
|
||||
|
||||
lualatex is more robust, but slower...
|
||||
pdflatex can't handle the fancy wiley fonts
|
||||
|
||||
"""
|
||||
self.txt = txt
|
||||
self.tex_engine = tex_engine
|
||||
# directory for TeX and images
|
||||
self.base_path = Path(base_path).resolve()
|
||||
self.out_path = self.base_path / 'tikz'
|
||||
self.out_path.mkdir(exist_ok=True)
|
||||
self.file_path = self.out_path / txt_short_hash(txt)
|
||||
|
||||
def process_tikz(self, verbose=False):
|
||||
"""
|
||||
Process the tikz into pdf and svg
|
||||
"""
|
||||
# container contains a tikzpicture
|
||||
svg_path = self.file_path.with_suffix('.svg')
|
||||
tex_path = self.file_path.with_suffix('.tex')
|
||||
|
||||
# make tex code for a stand-alone document
|
||||
tikz_begin, tikz_code, tikz_end = self.split_tikz()[
|
||||
1:4]
|
||||
tex_code = self._tex_template.format(
|
||||
tikz_begin=tikz_begin, tikz_code=tikz_code, tikz_end=tikz_end)
|
||||
tex_path.write_text(tex_code, encoding='utf-8')
|
||||
print(
|
||||
f'TIKZ: created temp file = {tex_path.name}')
|
||||
pdf_file = tex_path.with_suffix('.pdf')
|
||||
print(f'TIKZ: Update pdf file')
|
||||
if self.tex_engine == 'pdflatex':
|
||||
# faster with template
|
||||
# TODO EVID hard coded template
|
||||
template_path = Path('tikz_format.fmt')
|
||||
assert template_path.exists()
|
||||
template = str(template_path)
|
||||
command = ['pdflatex', f'--fmt={template}',
|
||||
f'--output-directory={str(tex_path.parent.resolve())}',
|
||||
str(tex_path.resolve())]
|
||||
else:
|
||||
# for STIX fonts, no template
|
||||
command = ['lualatex',
|
||||
f'--output-directory={str(tex_path.parent.resolve())}',
|
||||
str(tex_path.resolve())]
|
||||
if verbose:
|
||||
print(f'TIKZ: TeX Command={" ".join(command)}')
|
||||
TikzProcessor.run_command(command)
|
||||
# to recreate
|
||||
(tex_path.parent /
|
||||
f'make_tikz.bat').write_text(" ".join(command))
|
||||
if verbose:
|
||||
print(
|
||||
f'TIKZ: Creating svg file for Tikz (using new pdf2svg util)')
|
||||
# https://github.com/jalios/pdf2svg-windows
|
||||
command = [
|
||||
'C:\\temp\\pdf2svg-windows\\dist-64bits\\pdf2svg',
|
||||
str(pdf_file.resolve()), str(svg_path.resolve())]
|
||||
# seems to return info on stderr?
|
||||
if verbose:
|
||||
print(f'PDF->SVG: {" ".join(command)}')
|
||||
TikzProcessor.run_command(command, flag=False)
|
||||
if not verbose:
|
||||
# tidy up
|
||||
tex_path.unlink()
|
||||
tex_path.with_suffix('.aux').unlink()
|
||||
tex_path.with_suffix('.log').unlink()
|
||||
pdf_file.unlink()
|
||||
|
||||
@staticmethod
|
||||
def run_command(command, flag=True):
|
||||
"""
|
||||
Run a command and show results. Allows for weird xx behavior
|
||||
|
||||
:param command:
|
||||
:param flag:
|
||||
:return:
|
||||
"""
|
||||
with Popen(command, stdout=PIPE, stderr=PIPE, universal_newlines=True) as p:
|
||||
line1 = p.stdout.read()
|
||||
line2 = p.stderr.read()
|
||||
exit_code = p.poll()
|
||||
if line1:
|
||||
print('\n' + line1[-250:])
|
||||
if line2:
|
||||
if flag:
|
||||
raise ValueError(line2)
|
||||
else:
|
||||
print(line2)
|
||||
return exit_code
|
||||
|
||||
def display(self):
|
||||
"""display in Jupyter Lab."""
|
||||
display(SVG(self.file_path.with_suffix('.svg')))
|
||||
@@ -0,0 +1,133 @@
|
||||
"""
|
||||
Create and display SVG files from TikZ pictures embedded in LaTeX.
|
||||
|
||||
Good for testing. Outputs are cached by hash. PDF→SVG uses pdf2svg.
|
||||
|
||||
GPT re-write of my old great2.blog code.
|
||||
"""
|
||||
|
||||
import re
|
||||
from pathlib import Path
|
||||
from subprocess import run, Popen, PIPE
|
||||
from IPython.display import SVG, display
|
||||
|
||||
from .hasher import txt_short_hash
|
||||
|
||||
|
||||
class TikzProcessor:
|
||||
# Full TeX preamble to generate a .fmt if needed
|
||||
_tex_template_full = r"""\documentclass[10pt, border=5mm]{standalone}
|
||||
\usepackage{amsfonts}
|
||||
\usepackage{url}
|
||||
\usepackage{tikz}
|
||||
\usepackage{color}
|
||||
\usetikzlibrary{arrows,calc,positioning,shadows.blur,decorations.pathreplacing}
|
||||
\usetikzlibrary{automata,fit,snakes,intersections}
|
||||
\usetikzlibrary{decorations.markings,decorations.text,decorations.pathmorphing,decorations.shapes}
|
||||
\usetikzlibrary{decorations.fractals,decorations.footprints}
|
||||
\usetikzlibrary{graphs,matrix,shapes.geometric}
|
||||
\usetikzlibrary{mindmap,shadows,backgrounds,cd}
|
||||
\dump
|
||||
"""
|
||||
|
||||
# Minimal template to embed user tikz
|
||||
_tex_template = r"""
|
||||
\newcommand{{\grtspacer}}{{\vphantom{{lp}}}}
|
||||
\def\dfrac{{\displaystyle\frac}}
|
||||
\def\dint{{\displaystyle\int}}
|
||||
\begin{{document}}
|
||||
{tikz_begin}{tikz_code}{tikz_end}
|
||||
\end{{document}}
|
||||
"""
|
||||
|
||||
|
||||
def __init__(self, txt, base_path='.', tex_engine='pdflatex'):
|
||||
self.txt = txt
|
||||
self.tex_engine = tex_engine
|
||||
self.base_path = Path(base_path).resolve()
|
||||
self.out_path = self.base_path / 'tikz'
|
||||
self.out_path.mkdir(exist_ok=True)
|
||||
self.file_path = self.out_path / txt_short_hash(txt)
|
||||
self.format_file = self.out_path / 'tikz_format.fmt'
|
||||
|
||||
def split_tikz(self):
|
||||
"""Split text to extract the TikZ picture."""
|
||||
return re.split(r'(\\begin{tikz(?:cd|picture)}|\\end{tikz(?:cd|picture)})', self.txt)
|
||||
|
||||
def ensure_format_file(self):
|
||||
"""Create format file for faster compilation if missing."""
|
||||
if self.format_file.exists():
|
||||
return
|
||||
print('building format file...')
|
||||
tmp = self.out_path / 'tikz_format.tex'
|
||||
tmp.write_text(self._tex_template_full, encoding='utf-8')
|
||||
self.run_command([
|
||||
'pdflatex',
|
||||
f'-ini',
|
||||
f'-jobname={self.format_file.stem}',
|
||||
'&pdflatex',
|
||||
tmp.name,
|
||||
], raise_on_error=True, cwd=self.out_path)
|
||||
# tmp.unlink()
|
||||
(self.out_path / f'{self.format_file.stem}.log').unlink()
|
||||
print('building format file...success', self.format_file.resolve())
|
||||
|
||||
def process_tikz(self, verbose=False):
|
||||
"""Compile TikZ to PDF and convert to SVG."""
|
||||
tikz_begin, tikz_code, tikz_end = self.split_tikz()[1:4]
|
||||
tex_code = self._tex_template.format(
|
||||
tikz_begin=tikz_begin,
|
||||
tikz_code=tikz_code,
|
||||
tikz_end=tikz_end
|
||||
)
|
||||
|
||||
tex_path = self.file_path.with_suffix('.tex')
|
||||
tex_path.write_text(tex_code, encoding='utf-8')
|
||||
pdf_path = tex_path.with_suffix('.pdf')
|
||||
svg_path = tex_path.with_suffix('.svg')
|
||||
|
||||
self.ensure_format_file()
|
||||
|
||||
tex_cmd = [
|
||||
'pdflatex',
|
||||
f'--fmt={self.format_file.stem}',
|
||||
f'--output-directory={str(tex_path.parent)}',
|
||||
str(tex_path)
|
||||
]
|
||||
if verbose:
|
||||
print("Running:", " ".join(tex_cmd))
|
||||
self.run_command(tex_cmd)
|
||||
|
||||
(tex_path.parent / 'make_tikz.bat').write_text(" ".join(tex_cmd), encoding='utf-8')
|
||||
|
||||
svg_cmd = [
|
||||
'C:\\temp\\pdf2svg-windows\\dist-64bits\\pdf2svg',
|
||||
str(pdf_path),
|
||||
str(svg_path)
|
||||
]
|
||||
if verbose:
|
||||
print("Running:", " ".join(svg_cmd))
|
||||
self.run_command(svg_cmd, raise_on_error=False)
|
||||
|
||||
if not verbose:
|
||||
for ext in ('.tex', '.aux', '.log', '.pdf'):
|
||||
path = tex_path.with_suffix(ext)
|
||||
if path.exists():
|
||||
path.unlink()
|
||||
|
||||
def display(self):
|
||||
"""Display the SVG in Jupyter."""
|
||||
display(SVG(self.file_path.with_suffix('.svg')))
|
||||
|
||||
@staticmethod
|
||||
def run_command(command, raise_on_error=True, cwd=None):
|
||||
"""Run command with subprocess and show output."""
|
||||
with Popen(command, cwd=cwd, stdout=PIPE, stderr=PIPE, universal_newlines=True) as p:
|
||||
stdout, stderr = p.communicate()
|
||||
if stdout:
|
||||
print(stdout.strip()[-250:])
|
||||
if stderr:
|
||||
if raise_on_error:
|
||||
raise RuntimeError(stderr.strip())
|
||||
else:
|
||||
print(stderr.strip())
|
||||
@@ -30,6 +30,9 @@ classifiers = [
|
||||
include = ["greater_tables"]
|
||||
exclude = ["img", "tests", "docs"]
|
||||
|
||||
[tool.setuptools.package-data]
|
||||
"greater_tables" = ["data/*.csv", "data/*.md"]
|
||||
|
||||
[tool.setuptools.dynamic]
|
||||
version = { attr = "greater_tables.__version__" }
|
||||
|
||||
|
||||
Reference in New Issue
Block a user