Source code for pytexmd.filter.preprocessor

__all__ = ["do_commands","do_newenvironment"]

from .splitting import first_char_brace,split_on_first_brace,split_on_next,begin_end_split,position_of

def execute_on_pattern(string: str, arg_num: int, command_name: str, command_pattern: str) -> str:
    """
    Expands a LaTeX command pattern in the string.

    Args:
        string (str): The input string.
        arg_num (int): Number of arguments for the command.
        command_name (str): The command name to match.
        command_pattern (str): The pattern to replace.

    Returns:
        str: The string with expanded command patterns.

    Example:
        >>> s = r"\\foo{a}{b}"
        >>> execute_on_pattern(s, 2, "\\foo", "#1+#2")
        'a+b'
    """
    out = ""
    while(True):
        pattern_instance = command_pattern
        pre,post = split_on_next(string,command_name)
        if string == pre:    
            out += pre
            break
        for k in range(arg_num):
            
            content = ""
            if first_char_brace(post):  
                content,post = split_on_first_brace(post)
            elif first_char_brace(post,"["):
                content,post = split_on_first_brace(post,"[","]")
            pattern_instance = pattern_instance.replace("#"+str(k+1),content)

        string = post
        out += pre + pattern_instance

    return out

[docs] def do_commands(string: str) -> str: """ Processes all LaTeX \\newcommand definitions and applies them. Args: string (str): The input string. Returns: str: The string with commands expanded. Example: >>> s = r"\\newcommand{\\foo}[2]{#1+#2} \\foo{a}{b}" >>> do_commands(s) 'a+b' """ toprocess = string out = "" all_commands = [] while True: # pre,post = split_on_next(toprocess,"\\newcommand") out += pre if toprocess == pre: break command_name,post = split_on_first_brace(post) arg_num = 0 if first_char_brace(post,"["): arg_num,post = split_on_first_brace(post,"[","]") arg_num = int(arg_num) command_pattern,post = split_on_first_brace(post) toprocess = post all_commands.append((arg_num,command_name,command_pattern)) while True: tmp = string for arg_num,command_name,command_pattern in all_commands: tmp = execute_on_pattern(tmp,arg_num,command_name,command_pattern) if tmp == string: break string = tmp return string
def execute_enviroment_on_pattern(string: str, environment_name: str, arg_num: int, begin: str, end: str) -> str: """ Expands a LaTeX environment pattern in the string. Args: string (str): The input string. environment_name (str): The environment name to match. arg_num (int): Number of arguments for the environment. begin (str): Begin pattern. end (str): End pattern. Returns: str: The string with expanded environment patterns. Example: >>> s = r"\\begin{foo}{a}{b}content\\end{foo}" >>> execute_enviroment_on_pattern(s, "foo", 2, "<b>#1 #2>", "</b>") '<b>a b>content</b>' """ out = "" while(True): begin_instance = begin end_instance = end if not "\\begin{"+environment_name+"}" in string: out += string break pre,content,post = begin_end_split(string,"\\begin{"+environment_name+"}","\\end{"+environment_name+"}") for k in range(arg_num): argument = "" if first_char_brace(content): argument,content = split_on_first_brace(content) elif first_char_brace(content,"["): argument,content = split_on_first_brace(content,"[","]") end_instance = end_instance.replace("#"+str(k+1),argument) begin_instance = begin_instance.replace("#"+str(k+1),argument) string = post out += pre + begin_instance + content + end_instance return out
[docs] def do_newenvironment(string: str) -> str: """ Processes all LaTeX \\newenvironment definitions and applies them. Args: string (str): The input string. Returns: str: The string with environments expanded. Example: >>> s = r"\\newenvironment{foo}[2]{<b>#1 #2>}{</b>} \\begin{foo}{a}{b}content\\end{foo}" >>> do_newenvironment(s) '<b>a b>content</b>' """ toprocess = string out = "" all_env = [] while True: pre,post = split_on_next(toprocess,"\\newenvironment") out += pre if toprocess == pre: break environment_name,post = split_on_first_brace(post) arg_num = 0 if first_char_brace(post,"["): arg_num,post = split_on_first_brace(post,"[","]") arg_num = int(arg_num) post = "{" + split_on_next(post,"{")[1] begin,post = split_on_first_brace(post) end,post = split_on_first_brace(post) toprocess = post all_env.append((environment_name,arg_num,begin,end)) while True: tmp = string for environment_name,arg_num,begin,end in all_env: print("applying enviroment ",environment_name) tmp = execute_enviroment_on_pattern(tmp,environment_name,arg_num,begin,end) if tmp == string: break string = tmp return string
def clean_junk_safe(latex_content:str)->str: """ Cleans up LaTeX content by removing unnecessary characters and formatting issues. Args: latex_content (str): The LaTeX content to be cleaned. Returns: str: The cleaned LaTeX content. Example: >>> clean_junk_safe("foo bar\\n\\n\\n") 'foo bar\\n\\n' """ while "\\"*4 in latex_content: latex_content = latex_content.replace("\\"*4, "") while "\t" in latex_content: latex_content = latex_content.replace("\t"," ") while " " in latex_content: latex_content = latex_content.replace(" "," ") while " \n" in latex_content: latex_content = latex_content.replace(" \n","\n") while "\n " in latex_content: latex_content = latex_content.replace("\n ","\n") while "\n\n\n" in latex_content: latex_content = latex_content.replace("\n\n\n","\n\n") XXDOUBLENEWLINEXX = "XXDOUBLENEWLINEXX" latex_content = latex_content.replace("\n\n",XXDOUBLENEWLINEXX) XXNEWLINECOMMANDXX = "XXNEWLINECOMMANDXX" latex_content = latex_content.replace("\n\\",XXNEWLINECOMMANDXX) XXBRACENEWLINEXX = "XXBRACENEWLINEXX" latex_content = latex_content.replace("}\n",XXBRACENEWLINEXX) XXBRACKETNEWLINEXX = "XXBRACKETNEWLINEXX" latex_content = latex_content.replace("]\n",XXBRACKETNEWLINEXX) XXKLAMMERNEWLINEXX = "XXKLAMMERNEWLINEXX" latex_content = latex_content.replace(")\n",XXKLAMMERNEWLINEXX) latex_content = latex_content.replace("\n"," ") latex_content = latex_content.replace(XXDOUBLENEWLINEXX,"\n\n") latex_content = latex_content.replace(XXNEWLINECOMMANDXX,"\n\\") latex_content = latex_content.replace(XXBRACENEWLINEXX,"}\n") latex_content = latex_content.replace(XXBRACKETNEWLINEXX,"]\n") latex_content = latex_content.replace(XXKLAMMERNEWLINEXX,")\n") return latex_content def run_preprocessor(string: str) -> str: """ Runs the full preprocessor pipeline on the input string. Args: string (str): The input string. Returns: str: The processed string. Example: >>> s = r"\\newcommand{\\foo}[1]{#1!} \\foo{bar}" >>> run_preprocessor(s) 'bar!' """ string = clean_junk_safe(string) string = do_commands(string) string = do_newenvironment(string) return string