Source code for prompt_optimizer.poptim.name_replace_optim

import nltk

from prompt_optimizer.poptim.base import PromptOptim


[docs]class NameReplaceOptim(PromptOptim): """ NameReplaceOptim is a prompt optimization technique based on replacing names in the prompt. Some names have lower token count (1) than others. Higher token count names can be replaced by such names to reduce token complexity. `self.opti_names` contains the pre-made list of such names for `tiktokenizer`. The list will need to be modified for other tokenizers. It inherits from the PromptOptim base class. Example: >>> from prompt_optimizer.poptim import NameReplaceOptim >>> p_optimizer = NameReplaceOptim() >>> res = p_optimizer("example prompt...") >>> optimized_prompt = res.content """ def __init__(self, verbose: bool = False, metrics: list = []): """ Initializes the NameReplaceOptim. Args: verbose (bool, optional): Flag indicating whether to enable verbose output. Defaults to False. metrics (list, optional): A list of metric names to evaluate during optimization. Defaults to an empty list. """ super().__init__(verbose, metrics) self.opti_names = self.get_opti_names()
[docs] def download(self): """ Downloads the required NLTK resources. """ nltk.download("punkt") nltk.download("averaged_perceptron_tagger") nltk.download("maxent_ne_chunker") nltk.download("words")
[docs] def process(self, text: str) -> nltk.Tree: """ Processes the text using NLTK to identify named entities. Args: text (str): The text to process. Returns: nltk.Tree: The parsed sentence tree containing named entities. """ tokens = nltk.tokenize.word_tokenize(text) pos = nltk.pos_tag(tokens) sentence_tree = nltk.ne_chunk(pos, binary=False) return sentence_tree
[docs] def get_opti_names(self) -> list: """ Retrieves the list of optimized names. Returns: list: The list of optimized names. """ opti_names = """Rene Asa Zion Avery Gray Morgan Story Arden Kit Lux Sol Avery Pat Sky Arden Clair Storm Ellery Arin Sol Alpha Arie Rio Isa Aris Ara Adel Tam Lin Aly Bao Tru True Toy Adi Cache Chi Han Amil Amel Eri Truth Hoa Indy Vertis Chai Ottie Ary Aki Rei Bay Ova Shell Rael Gal Sher Elim Dae Zell Wen Audi""" opti_names = [name.strip() for name in opti_names.split()] return opti_names
[docs] def gen_name_map(self, text: str) -> dict: """ Generates a mapping of names in the prompt to optimized names. Args: text (str): The prompt text. Returns: dict: The mapping of names to optimized names. """ name_list = [] try: sentence_tree = self.process(text) except Exception: self.download() sentence_tree = self.process(text) for subtree in sentence_tree.subtrees(filter=lambda t: t.label() == "PERSON"): person = [] name = "" for leaf in subtree.leaves(): person.append(leaf[0]) if len(person) > 1: for part in person: name += part + " " name = name.strip() if name not in name_list: name_list.append(name) mapping = dict(zip(name_list[: len(self.opti_names)], self.opti_names)) return mapping
[docs] def opti_name_replace(self, text: str, mapping: dict) -> str: """ Replaces names in the text with optimized names based on the mapping. Args: text (str): The text to perform name replacement. mapping (dict): The mapping of names to optimized names. Returns: str: The text with replaced names. """ for old_name in mapping: new_name = mapping[old_name] text = text.replace(old_name, new_name) return text
[docs] def optimize(self, prompt: str) -> str: """ Runs the prompt optimization technique on the prompt. Args: prompt (str): The prompt text. Returns: str: The optimized prompt text. """ mapping = self.gen_name_map(prompt) opti_prompt = self.opti_name_replace(prompt, mapping) return opti_prompt