Source code for lightwin.config.csv_formatter

"""Provide helper function to split configuration entries.

Idea is to have properly formatter configuration tables in the HTML
documentation.

"""

import re


[docs] def format_long_columns(long: str, max_width: int) -> str: """Format cell content to fit within ``max_width``. A cell spanning over several lines should start and end by a single ``"`` character. Two line skips should be inserted where the line breaks. """ if len(long) < max_width: return long chunks = chunk(long, max_width) joined = "\n\n".join(chunks) return joined
OPEN_PUNCT = "([{" CLOSE_PUNCT = ".,:;!?)]}"
[docs] def _lex(text: str) -> list[str]: """Tokenize text. Tokenize into: - roles like |A| (keeps trailing punctuation) - backtick blocks like `...` possibly with adjacent leading '(' or trailing ')','.', etc. - words (no spaces) - single punctuation tokens (if not attached to backtick/role) Spaces are skipped; spacing is decided at assembly time. """ tokens: list[str] = [] i = 0 n = len(text) while i < n: ch = text[i] if ch.isspace(): i += 1 continue # role starting at i: :name:`...` if ch == ":": m = re.match(r":[A-Za-z0-9_]+:`[^`]+`", text[i:]) if m: token = m.group(0) end = i + m.end() # attach contiguous trailing punctuation (.,:;!?) or closing )]}. while end < n and text[end] in CLOSE_PUNCT: token += text[end] end += 1 # attach a preceding opening punctuation if it was the immediate previous token if ( tokens and i > 0 and text[i - 1] in OPEN_PUNCT and tokens[-1] == text[i - 1] ): tokens.pop() token = text[i - 1] + token tokens.append(token) i = end continue # backtick block if ch == "`": j = text.find("`", i + 1) if j == -1: # unmatched - treat as word-ish k = i + 1 while ( k < n and not text[k].isspace() and text[k] not in "`()[]{}" ): k += 1 tokens.append(text[i:k]) i = k continue token = text[i : j + 1] end = j + 1 # attach contiguous trailing punctuation while end < n and text[end] in CLOSE_PUNCT: token += text[end] end += 1 # if an opening punctuation directly precedes the backtick and we previously emitted it, # attach that opening punct as a prefix to keep the group together. if ( tokens and i > 0 and text[i - 1] in OPEN_PUNCT and tokens[-1] == text[i - 1] ): tokens.pop() token = text[i - 1] + token tokens.append(token) i = end continue # single punctuation if ch in OPEN_PUNCT + CLOSE_PUNCT: tokens.append(ch) i += 1 continue # normal word (stop at whitespace, backtick or bracket; also stop before a role start) j = i while j < n and not text[j].isspace() and text[j] not in "`()[]{}": # if a role starts here, break if text[j] == ":" and re.match(r":[A-Za-z0-9_]+:`", text[j:]): break j += 1 tokens.append(text[i:j]) i = j return tokens
[docs] def _visible_len(token: str) -> int: """Length used for wrapping decisions. - For backtick/role tokens, do NOT count the backticks or the `:role:` prefix. - But DO count any prefix opening punctuation and suffix closing punctuation. """ m = re.match( r"(?P<prefix>[\(\[\{]*)?(?:(?::(?P<rolename>[A-Za-z0-9_]+):)?`(?P<inner>[^`]+)`)(?P<suffix>[\)\]\}\.,:;!?]*)?$", token, ) if m: prefix = m.group("prefix") or "" inner = m.group("inner") or "" suffix = m.group("suffix") or "" return len(prefix) + len(inner) + len(suffix) # fallback: if something like ":role:rest" (without backticks) appears, # ignore the :role: length m2 = re.match( r"(?P<prefix>[\(\[\{]*)?:(?P<rolename>[A-Za-z0-9_]+):(?P<rest>.*)$", token, ) if m2: prefix = m2.group("prefix") or "" rest = m2.group("rest") or "" return len(prefix) + len(rest) return len(token)
[docs] def _needs_space_between(prev: str, token: str) -> bool: """Decide whether to insert a space between prev and token. - No space before a closing punctuation (.,:;!?)}]). - No space after an opening punctuation ([( { ) (i.e. if prev ends with opening). - Otherwise, insert a space. """ if not prev: return False if token and token[0] in CLOSE_PUNCT: return False if prev and prev[-1] in OPEN_PUNCT: return False return True
[docs] def _split_normal_word(word: str, max_width: int) -> list[str]: """Hyphenate word when needed (allowed to sit next to other words)""" if len(word) <= max_width: return [word] out: list[str] = [] w = word while len(w) > max_width: out.append(w[: max_width - 1] + "-") w = w[max_width - 1 :] out.append(w) return out
[docs] def _split_backtick(token: str, max_width: int) -> list[str]: """Split a backtick token into wrapped chunks. Where each chunk's inner content length is <= max_width (max_width applies to the inner content). - Keeps prefix opening punctuation only on the first chunk. - Keeps suffix closing punctuation only on the last chunk. - Prefers splitting at '_' first; otherwise splits at spaces. - If any atomic piece (between delimiters) is longer than max_width, returns [token] (do not hyphenate inside variable names). """ m = re.match( r"(?P<prefix>[\(\[\{]*)?`(?P<inner>[^`]+)`(?P<suffix>[\)\]\}\.,:;!?]*)?$", token, ) if not m: return [token] prefix = m.group("prefix") or "" inner = m.group("inner") or "" suffix = m.group("suffix") or "" # prefer underscores if "_" in inner: parts = inner.split("_") if any(len(p) > max_width for p in parts): return [token] pieces = [p + "_" for p in parts[:-1]] + [parts[-1]] chunks: list[str] = [] cur = "" for piece in pieces: if not cur: cur = piece elif len(cur) + len(piece) <= max_width: cur += piece else: chunks.append(cur) cur = piece if cur: chunks.append(cur) out: list[str] = [] for i, ch in enumerate(chunks): s = "`" + ch + "`" if i == 0 and prefix: s = prefix + s if i == len(chunks) - 1 and suffix: s = s + suffix out.append(s) return out else: # split on spaces (do not keep trailing spaces inside pieces) words = inner.split(" ") if any(len(w) > max_width for w in words): return [token] chunks = [] cur = "" for w in words: if not cur: cur = w elif len(cur) + 1 + len(w) <= max_width: cur = cur + " " + w else: chunks.append(cur) cur = w if cur: chunks.append(cur) out: list[str] = [] for i, ch in enumerate(chunks): s = "`" + ch + "`" if i == 0 and prefix: s = prefix + s if i == len(chunks) - 1 and suffix: s = s + suffix out.append(s) return out
[docs] def _targeted_split_backtick( token: str, rem: int, max_width: int ) -> list[str] | None: """Try to split the backtick token so that: - the first chunk (with prefix) fits within 'rem' (available room on current line), - and the rest (with suffix) can be split to chunks that fit max_width lines. We attempt to choose the largest first-chunk (by trying split points from the end). For stability with the test-suite we require the first chunk to contain at least two atomic parts (so we avoid silly 1-word-first-chunk splits). """ m = re.match( r"(?P<prefix>[\(\[\{]*)?`(?P<inner>[^`]+)`(?P<suffix>[\)\]\}\.,:;!?]*)?$", token, ) if not m: return None prefix = m.group("prefix") or "" inner = m.group("inner") or "" suffix = m.group("suffix") or "" prefix_len = len(prefix) if "_" in inner: parts = inner.split("_") if any(len(p) > max_width for p in parts): return None def join_parts(a, b): seg = "_".join(parts[a:b]) return seg + ("_" if b < len(parts) else "") else: parts = inner.split(" ") if any(len(p) > max_width for p in parts): return None def join_parts(a, b): return " ".join(parts[a:b]) n = len(parts) # try splits; require first chunk to contain at least two parts for stability for k in range(n - 1, 1, -1): first_inner = join_parts(0, k) if prefix_len + len(first_inner) <= rem: rest_inner = join_parts(k, n) rest_token = "`" + rest_inner + "`" + suffix rest_chunks = _split_backtick(rest_token, max_width) if rest_chunks: first_chunk = prefix + "`" + first_inner + "`" return [first_chunk] + rest_chunks # else continue trying smaller k return None
[docs] def chunk(text: str, max_width: int) -> list[str]: """Split text into lines of length <= max_width (best-effort). - reST roles (`:role:`...``) are atomic; the `:role:` prefix is not counted. - Backtick tokens are split at '_' (preferred) or spaces (no hyphenation inside words). - Normal words wrap on spaces; very long words are hyphenated. - Spacing rules: no space before closing punctuation; no space after opening punctuation. """ tokens = _lex(text) lines: list[str] = [] cur_text = "" cur_len = 0 # visible length for tok in tokens: is_role = tok.startswith(":") and "`" in tok is_backtick = tok.startswith("`") or ( tok and tok[0] in OPEN_PUNCT and "`" in tok ) if is_role: t_chunks = [tok] elif is_backtick: add_space = ( 1 if cur_len > 0 and _needs_space_between(cur_text, tok) else 0 ) full_len = _visible_len(tok) if cur_len + add_space + full_len <= max_width: t_chunks = [tok] else: rem = max_width - cur_len - add_space t_try = None if rem >= 1 and ("_" in tok or " " in tok): t_try = _targeted_split_backtick(tok, rem, max_width) t_chunks = t_try if t_try else _split_backtick(tok, max_width) else: # normal word add_space = ( 1 if cur_len > 0 and _needs_space_between(cur_text, tok) else 0 ) if cur_len + add_space + len(tok) <= max_width: t_chunks = [tok] else: t_chunks = _split_normal_word(tok, max_width) # emit chunks for i, chunk_tok in enumerate(t_chunks): add_space = ( 1 if cur_len > 0 and _needs_space_between(cur_text, chunk_tok) else 0 ) if cur_len + add_space + _visible_len(chunk_tok) > max_width: # start a new line if cur_text: lines.append(cur_text) cur_text = chunk_tok cur_len = _visible_len(chunk_tok) else: if add_space: cur_text += " " cur_len += 1 cur_text += chunk_tok cur_len += _visible_len(chunk_tok) if cur_text: lines.append(cur_text) return lines