Source code for pytexmd.filter.text

"""Section and theorem filter classes and utilities for pytexmd.

This module provides classes and functions for parsing and processing LaTeX sections,
theorems, references, and formatting for Markdown/MyST conversion.
"""

__all__ = [
    "Ref",
    "EqRef",
    "Proof",
    "Textbf",
    "Emph",
    "Para",
    "Cite",
    "get_all_filters",
    "get_number_within_equation",
    "get_theoremSearchers",
    "Textit",
    "ProofLabel",
]
from typing import List, Tuple
from .core import *
from .splitting import *

PRF_TYPES = {
    "algorithm": "prf:algorithm",
    "axiom": "prf:axiom",
    "conjecture": "prf:conjecture",
    "corollary": "prf:corollary",
    "criteria": "prf:criteria",
    "definition": "prf:definition",
    "example": "prf:example",
    "lemma": "prf:lemma",
    "observation": "prf:observation",
    "property": "prf:property",
    "proposition": "prf:proposition",
    "proof": "prf:proof",
    "remark": "prf:remark",
    "theorem": "prf:theorem"
}

"""
proof_theorem_types = [
    ("theorem", "Theorem"),
    ("definition", "Definition"),
    ("remark", "Remark"),
    ("claim", "Claim"),  # <-- custom type
]
"""
for key in list(PRF_TYPES.keys()):
    PRF_TYPES[key+"s"] = PRF_TYPES[key]
    
[docs] class ProofLabel(Element): """Element for MyST label. Args: modifiable_content (str): Content to process. parent (Element): Parent element. label_ref (str): Label reference. Example: >>> label = ProofLabel("content", None, "mylabel") >>> isinstance(label, ProofLabel) True """ def __init__(self, modifiable_content: str, parent: Element, label_ref: str): super().__init__(modifiable_content, parent) self.label_ref = label_call(label_ref,LabelType.PRF_REF)
[docs] @staticmethod def position(string: str) -> int: return position_of(string,"\\label")
[docs] @staticmethod def split_and_create(string: str, parent: Element) -> Tuple[str, 'ProofLabel', str]: pre,post = split_on_next(string,"\\label") label_ref,post = split_on_first_brace(post) return pre,ProofLabel("",parent,label_ref),post
[docs] def to_string(self) -> str: return "\n:label: "+self.label_ref.strip()
[docs] class Ref(Element): """Element for LaTeX \\ref reference. Example: >>> ref = Ref("content", None, "mylabel") >>> isinstance(ref, Ref) True """ def __init__(self, modifiable_content: str, parent: Element, label_ref: str): super().__init__(modifiable_content, parent) self.label_ref = label_ref
[docs] @staticmethod def position(input: str) -> int: return position_of(input,"\\ref")
[docs] @staticmethod def split_and_create(input: str, parent: Element) -> Tuple[str, 'Ref', str]: pre,post = split_on_next(input,"\\ref") label_ref,post = split_on_first_brace(post) return pre,Ref("",parent,label_ref),post
[docs] def to_string(self) -> str: return ref_call(self.label_ref)
[docs] class EqRef(Element): """Element for LaTeX \\eqref reference. Example: >>> eqref = EqRef("content", None, "eq1") >>> isinstance(eqref, EqRef) True """ def __init__(self, modifiable_content: str, parent: Element, label_ref: str): super().__init__(modifiable_content, parent) self.label_ref = ref_call(label_ref)
[docs] @staticmethod def position(input: str) -> int: return position_of(input,"\\eqref")
[docs] @staticmethod def split_and_create(input: str, parent: Element) -> Tuple[str, 'Ref', str]: pre,post = split_on_next(input,"\\eqref") label_ref,post = split_on_first_brace(post) return pre,Ref("",parent,label_ref),post#TODO maybewrong
[docs] def to_string(self) -> str: return "[](#"+self.label_ref+")"
[docs] class Proof(Element): """Element for LaTeX proof environment. Example: >>> proof = Proof("content", None) >>> isinstance(proof, Proof) True """ def __init__(self, modifiable_content: str, parent: Element): modifiable_content = modifiable_content.lstrip() """self.name = "Proof" if not split_rename(modifiable_content) is None: self.name,modifiable_content = split_rename(modifiable_content) self.name += "." """ super().__init__(modifiable_content,parent)
[docs] @staticmethod def position(input: str) -> int: if "\\begin{proof}" in input: return position_of(input,"\\begin{proof}") else: return -1
[docs] @staticmethod def split_and_create(input: str, parent: Element) -> Tuple[str, 'Proof', str]: pre,content,post = begin_end_split(input,"\\begin{proof}","\\end{proof}") content = content.strip() if content == "": return pre,Undefined("",parent),post out = Proof("",parent) out.children = [] if content.startswith("["): # Extract title from the content title, content = split_on_first_brace(content,"[", "]") out.children.append(Undefined(title.strip(),out)) else: out.children.append(Undefined("",out)) content = content.strip() if content.startswith("\\label"): label_ref,content = split_on_first_brace(content[len("\\label"):]) content = content.strip() label = ProofLabel("",out,label_ref.strip()) out.children.append(label) out.children.append(Undefined("\n"+content,out)) return pre,out,post
[docs] def to_string(self) -> str: pre = f"\n:::{{prf:proof}}\n" out = "" for child in self.children: out += child.to_string() out = out.rstrip() out = pre +out+ "\n:::\n" return out
[docs] class Textbf(Element): """Element for LaTeX \\textbf command. Example: >>> bold = Textbf("content", None) >>> isinstance(bold, Textbf) True """ def __init__(self, modifiable_content: str, parent: Element): super().__init__(modifiable_content,parent)
[docs] @staticmethod def position(input: str) -> int: return position_of(input,"\\textbf")
[docs] @staticmethod def split_and_create(input: str, parent: Element) -> Tuple[str, 'Textbf', str]: pre,post = split_on_next(input,"\\textbf") name,post = split_on_first_brace(post) return pre,Textbf(name,parent),post
[docs] def to_string(self) -> str: out = "**" for child in self.children: out += child.to_string() out += "**" return out
[docs] class Cite(Element): """Element for LaTeX \\cite command. Example: >>> cite = Cite("content", None, ["ref1", "ref2"]) >>> isinstance(cite, Cite) True """ def __init__(self, modifiable_content: str, parent: Element, citations: list[str], rename:str): super().__init__(modifiable_content,parent) self.citations = citations self.rename = rename
[docs] @staticmethod def position(input: str) -> int: return position_of(input,"\\cite")
[docs] @staticmethod def split_and_create(input: str, parent: Element) -> Tuple[str, 'Cite', str]: pre,post = split_on_next(input,"\\cite") post = post.strip() if post.startswith("["): # Extract title from the content rename, post = split_on_first_brace(post,"[", "]") rename = rename.strip() else: rename = "" name,post = split_on_first_brace(post) tmp = "" for elem in name.split(" "): tmp += elem citations = tmp.split(",") return pre,Cite("",parent,citations,rename),post
[docs] def to_string(self) -> str: out = "[" for elem in self.citations: out += f"@{elem.strip()}; " out = out[:-2] if self.rename != "": out += f", {self.rename.strip()}" out += "]" return out
[docs] class Emph(Element): """Element for LaTeX \\emph command. Example: >>> emph = Emph("content", None) >>> isinstance(emph, Emph) True """ def __init__(self, modifiable_content: str, parent: Element): super().__init__(modifiable_content,parent)
[docs] @staticmethod def position(input: str) -> int: return position_of(input,"\\emph")
[docs] @staticmethod def split_and_create(input: str, parent: Element) -> Tuple[str, 'Emph', str]: pre,post = split_on_next(input,"\\emph") name,post = split_on_first_brace(post) return pre,Emph(name,parent),post
[docs] def to_string(self) -> str: out = "*" for child in self.children: out += child.to_string() out += "*" return out
[docs] class Textit(Element): """Element for LaTeX \\textit command. Example: >>> textit = Textit("content", None) >>> isinstance(textit, Textit) True """ def __init__(self, modifiable_content: str, parent: Element): super().__init__(modifiable_content,parent)
[docs] @staticmethod def position(input: str) -> int: return position_of(input,"\\textit")
[docs] @staticmethod def split_and_create(input: str, parent: Element) -> Tuple[str, 'Emph', str]: pre,post = split_on_next(input,"\\textit") name,post = split_on_first_brace(post) return pre,Emph(name,parent),post
[docs] def to_string(self) -> str: out = "*" for child in self.children: out += child.to_string() out += "*" return out
class TheoremElement(Element): """Element for LaTeX theorem environments. Example: >>> thm = TheoremElement("content", None, "Theorem", "theorem", ["section"]) >>> isinstance(thm, TheoremElement) True """ def __init__(self, parent: Element, display_name: str, theorem_env_name: str, enum_parent_class): super().__init__("",parent) self.display_name = display_name theorem_type = "prf:theorem"#"admonition" if display_name.lower() in PRF_TYPES.keys(): theorem_type = PRF_TYPES[display_name.lower()] self.theorem_type = theorem_type def to_string(self) -> str: """Convert to Markdown theorem block. Returns: str: Markdown theorem block. """ pre = "\n:::{"+self.theorem_type+"} " out = "" for child in self.children: out += child.to_string() out = out.rstrip() """ if out.startswith("["): _,middle,out = begin_end_split(out,"[","]") out = middle.strip()+"\n" + out.lstrip() else: out = "\n"+out.lstrip() """ out = pre + out out += "\n:::\n" return out class TheoremSearcher(Searcher): """Searcher for LaTeX theorem environments. Example: >>> searcher = TheoremSearcher("theorem", ["section"], "Theorem") >>> isinstance(searcher, TheoremSearcher) True """ def __init__(self, theorem_env_name: str, enum_parent_class, display_name: str): self.display_name = display_name self.theorem_env_name = theorem_env_name self.enum_parent_class = enum_parent_class def position(self, input: str) -> int: return position_of(input,"\\begin{" + self.theorem_env_name + "}") def split_and_create(self, input: str, parent: Element) -> Tuple[str, TheoremElement, str]: pre,content,post = begin_end_split(input,"\\begin{"+self.theorem_env_name+"}","\\end{"+self.theorem_env_name+"}") #strip only " " content = content.strip() if content == "": return pre,Undefined("",parent),post out = TheoremElement(parent,self.display_name,self.theorem_env_name,self.enum_parent_class) out.children = [] if content.startswith("["): # Extract title from the content title, content = split_on_first_brace(content,"[", "]") out.children.append(Undefined(title.strip(),out)) else: out.children.append(Undefined("",out)) content = content.strip() if content.startswith("\\label"): label_ref,content = split_on_first_brace(content[len("\\label"):]) content = content.strip() label = ProofLabel("",out,label_ref.strip()) out.children.append(label) out.children.append(Undefined("\n"+content,out)) return pre,out,post
[docs] def get_theoremSearchers(input: str) -> list: """Extract theorem searchers from LaTeX preamble. Args: input (str): LaTeX preamble string. Returns: list: List of TheoremSearcher instances. Example: >>> result = get_theoremSearchers(r"\\newtheorem{theorem}{Theorem}") >>> isinstance(result, list) True """ need_fix = [] pending_envs = [] shared_parent_fixer = {"section":"document","subsection":"section"} while True: pre,post = split_on_next(input,"\\newtheorem") if input == pre: break theorem_env_name,post = split_on_first_brace(post) display_name = "" if first_char_brace(post): enum_parent_class = "" display_name,post = split_on_first_brace(post) if first_char_brace(post,"["): enum_parent_class,post = split_on_first_brace(post,"[","]") else: enum_parent_class = None shared_parent_fixer[theorem_env_name] = enum_parent_class pending_envs.append((theorem_env_name,enum_parent_class,display_name)) else: if first_char_brace(post,"["): shared_parent,post = split_on_first_brace(post,"[","]") display_name,post = split_on_first_brace(post) need_fix.append((theorem_env_name,shared_parent,display_name)) else: print("theoremenv error in ",theorem_env_name) input = post for theorem_env_name,shared_parent,display_name in need_fix: pending_envs.append((theorem_env_name,shared_parent_fixer[shared_parent],display_name)) out = [] for elem in pending_envs: print(elem) out.append(TheoremSearcher(*elem)) return out
[docs] def get_number_within_equation(input: str) -> str: """Extract equation numbering context from LaTeX string. Args: input (str): LaTeX string. Returns: str: Numbering context or "document". Example: >>> get_number_within_equation("abc\\numberwithin{equation}{section}") 'section' """ input = input.split("\\numberwithin{equation}") if len(input) == 1: return "document" out,_ = split_on_first_brace(input[1]) return out
[docs] def get_all_filters() -> list: """Returns all section-related filter classes/searchers. Returns: list: List of filter classes/searchers. Example: >>> filters = get_all_filters() >>> isinstance(filters, list) True """ return [Emph,Textbf,Textit,Ref,EqRef,Cite]