Source code for pytexmd.filter.splitting

__all__ = ["get_all_allchars_no_abc","save_command_split","first_char_brace","split_on_first_brace","split_rename","split_on_next","begin_end_split","position_of"]

from typing import Tuple, List, Optional, Union, Callable

[docs] def get_all_allchars_no_abc()->str: """ Returns a string of non-alphabetic ASCII characters. Returns: str: String containing non-alphabetic ASCII characters. Example: >>> chars = get_all_allchars_no_abc() >>> isinstance(chars, str) True """ return '!"#$%&\'()*+,-./:;<=>?@[\\]^_`{|}~\n ' # is removed
[docs] def save_command_split(string:str, split_on:str)->List[str]: """ Splits a string on a given substring, preserving certain patterns. Args: string (str): The input string to split. split_on (str): The substring to split on. Returns: List[str]: List of split string segments. Raises: ValueError: If input types are incorrect. Example: >>> parts = save_command_split("foo$bar$baz", "$") >>> parts ['foo', 'bar', 'baz'] """ if not isinstance(string,str): raise ValueError("Input must be a string") if not isinstance(split_on,str): raise ValueError("split_on must be a string") if split_on == "$": return string.split("$") for appendix in get_all_allchars_no_abc(): string = string.replace(split_on + appendix,"XXXsplit_meXXX"+appendix) string = string.split("XXXsplit_meXXX") return string
def save_replace(string:str, old:str, new:str)->str: """ Replaces occurrences of a substring in a string, preserving certain patterns. Args: string (str): The input string. old (str): The substring to be replaced. new (str): The substring to replace with. Returns: str: The modified string with replacements. Raises: ValueError: If input types are incorrect. Example: >>> result = save_replace("foo$bar$baz", "$", "#") >>> result 'foo#bar#baz' """ if not isinstance(string,str): raise ValueError("Input must be a string") if not isinstance(old,str): raise ValueError("old must be a string") if not isinstance(new,str): raise ValueError("new must be a string") for appendix in get_all_allchars_no_abc(): string = string.replace(old + appendix,"XXXsplit_meXXX"+appendix) string = string.replace("XXXsplit_meXXX",new) return string
[docs] def first_char_brace(string:str, begin_brace:str = "{")->bool: """ Checks if the first non-whitespace character of a string is a given brace. Args: string (str): The input string. begin_brace (str, optional): The brace character to check. Defaults to "{". Returns: bool: True if first character is the brace, False otherwise. Raises: ValueError: If input types are incorrect. Example: >>> is_brace = first_char_brace(" {foo}") >>> is_brace True """ if not isinstance(string,str): raise ValueError("Input must be a string") if not isinstance(begin_brace,str): raise ValueError("begin_brace must be a string") string = string.lstrip() if len(string) == 0: return False return string[0] == begin_brace
[docs] def split_on_first_brace(string:str, begin_brace = "{",end_brace = "}", error_replacement="brace_error")->Tuple[str,str]: """ Splits a string on the first matching pair of braces. Args: string (str): The input string. begin_brace (str, optional): The opening brace. Defaults to "{". end_brace (str, optional): The closing brace. Defaults to "}". error_replacement (str, optional): Replacement string if brace not found. Defaults to "brace_error". Returns: Tuple[str, str]: Content inside braces, and the remaining string. Raises: ValueError: If input types are incorrect. Example: >>> inside, rest = split_on_first_brace("{foo}bar") >>> inside 'foo' >>> rest 'bar' """ if not isinstance(string,str): raise ValueError("Input must be a string") if not isinstance(begin_brace,str) or not isinstance(end_brace,str): raise ValueError("begin_brace and end_brace must be strings") string = string.lstrip() if len(string) == 0: return error_replacement,string if string[0] != begin_brace: return error_replacement,string brace_count = 0 out1 = "" for elem in string: out1 += elem if elem == begin_brace: brace_count = brace_count + 1 if elem == end_brace: brace_count = brace_count - 1 if brace_count == 0: break out2 = string[len(out1):] out1 = out1[1:-1] return out1, out2
[docs] def split_rename(string: str) -> Optional[Tuple[str, str]]: """ Splits the input string into a name and the remaining string if the first character is a '['. Args: string (str): The input string. Returns: Optional[Tuple[str, str]]: A tuple containing the name and the remaining string, or None if the first character is not '['. Raises: ValueError: If input is not a string. Example: >>> name, rest = split_rename("[foo]bar") >>> name 'foo' >>> rest 'bar' """ if not isinstance(string, str): raise ValueError("Input must be a string") string = string.lstrip() if len(string) == 0: return None if string[0] == "[": name,post = split_on_first_brace(string,"[","]") return name,post else: return None
[docs] def split_on_next(string:str, split_on:str, save_split:bool = True)->Tuple[str,str]: """ Splits a string on the next occurrence of a substring. Args: string (str): The input string. split_on (str): The substring to split on. save_split (bool, optional): Whether to use save_command_split. Defaults to True. Returns: Tuple[str, str]: The part before and after the split. Raises: ValueError: If input types are incorrect. Example: >>> before, after = split_on_next("foo$bar$baz", "$") >>> before 'foo' >>> after 'bar$baz' """ if not isinstance(string,str): raise ValueError("Input must be a string") if not isinstance(split_on,str): raise ValueError("split_on must be a string") if save_split: tmp = save_command_split(string,split_on) else: tmp = string.split(split_on) pre = tmp[0] post = string[len(pre + split_on):] return pre, post
[docs] def begin_end_split(string:str, begin_name:str, end_name:str, save_split:bool = False)->Tuple[str,str,str]: """ Splits a string into three parts: before, between, and after given begin and end substrings. Args: string (str): The input string. begin_name (str): The substring marking the beginning. end_name (str): The substring marking the end. save_split (bool, optional): Whether to use save_command_split. Defaults to False. Returns: Tuple[str, str, str]: The parts before, between, and after the delimiters. Raises: ValueError: If input types are incorrect. Example: >>> pre, mid, post = begin_end_split("a\\begin{env}b\\end{env}c", "\\begin{env}", "\\end{env}") >>> pre 'a' >>> mid 'b' >>> post 'c' """ if not isinstance(string,str): raise ValueError("Input must be a string") if not isinstance(begin_name,str): raise ValueError("begin_name must be a string") if not isinstance(end_name,str): raise ValueError("end_name must be a string") pre,xanda = split_on_next(string,begin_name,save_split) begin_num = 1 middle = "" while True: posbegin = position_of(xanda,begin_name,save_split) posend = position_of(xanda,end_name,save_split) if posbegin!=-1 and posbegin < posend: ptmp,xtmp = split_on_next(xanda,begin_name,save_split) middle += ptmp + begin_name xanda = xtmp begin_num = begin_num + 1 else: ptmp,xtmp = split_on_next(xanda,end_name,save_split) begin_num = begin_num - 1 if begin_num == 0: middle += ptmp return pre,middle,xtmp else: middle += ptmp + end_name xanda = xtmp
[docs] def position_of(string:str, begin_name:str, save_split:bool = True)->int: """ Finds the position of a substring in a string. Args: string (str): The input string. begin_name (str): The substring to find. save_split (bool, optional): Whether to use save_command_split. Defaults to True. Returns: int: The position index, or -1 if not found. Raises: ValueError: If input types are incorrect. Example: >>> pos = position_of("foo$bar", "$") >>> pos 3 """ if not isinstance(string,str): raise ValueError("Input must be a string") if not isinstance(begin_name,str): raise ValueError("begin_name must be a string") if begin_name in string: if save_split: tmp = save_command_split(string,begin_name) else: tmp = string.split(begin_name) if len(tmp) == 1: return -1 return len(tmp[0]) else: return -1