Source code for chemsource.config

"""
Configuration module for chemsource.

This module contains configuration classes and constants used throughout the chemsource package.
"""

from typing import Optional, List, Any

#: Default prompt template for chemical compound classification
BASE_PROMPT = ("You are a helpful scientist that will classify the provided compound \
COMPOUND_NAME using only the information provided as any combination of the \
following: MEDICAL, ENDOGENOUS, FOOD, PERSONAL CARE, INDUSTRIAL. Note that \
MEDICAL refers to compounds actively used as approved medications in \
humans or in late-stage clinical trials in humans. Note that ENDOGENOUS \
refers to compounds that are produced by the human body specifically. \
ENDOGENOUS excludes essential nutrients that cannot be synthesized by the \
human body. Note that FOOD refers to compounds present in natural food items \
or food additives. Note that PERSONAL CARE refers to non-medicated compounds \
typically used for activities such as skincare, beauty, and fitness. Note \
that INDUSTRIAL should be used only for synthetic compounds not used as a \
contributing ingredient in the medical, personal care, or food industries. \
Specify INFO instead if more information is needed. DO NOT MAKE ANY \
ASSUMPTIONS, USE ONLY THE INFORMATION PROVIDED AFTER THE COMPOUND NAME \
BY THE USER. A classification of INFO will also be rewarded when \
correctly applied and is strongly encouraged if information is of poor \
quality, if there is not enough information, or if you are not completely \
confident in your answer.  Provide the output as a plain text separated \
by commas, and provide only the categories listed (either list a \
combination of INDUSTRIAL, ENDOGENOUS, PERSONAL CARE, MEDICAL, FOOD or \
list INFO), with no justification. Provided Information:\n")


[docs] class Config: """ Configuration class for chemsource parameters. This class manages all configuration parameters for the chemsource system, including API keys, model settings, and output formatting options. Args: model_api_key (str, optional): API key for the language model service. model (str, optional): Name of the language model to use. Defaults to "gpt-4o". temperature (float, optional): Temperature parameter for model creativity. Defaults to 0. top_p (float, optional): Top-p parameter for nucleus sampling. Defaults to 0. ncbi_key (str, optional): API key for NCBI/PubMed access. prompt (str, optional): Custom prompt template. Defaults to BASE_PROMPT. max_tokens (int, optional): Maximum number of tokens for model context. Defaults to 250000. clean_output (bool, optional): Whether to clean and validate output. Defaults to False. explanation (bool, optional): Whether to expect explanations in model responses. Only effective when clean_output=True. Defaults to False. explanation_separator (str, optional): Delimiter separating explanation from classification. Only used when both clean_output and explanation are True. Defaults to "EXPLANATION_COMPLETE". allowed_categories (List[str], optional): List of allowed categories for filtering. Defaults to None. custom_client (Any, optional): Custom OpenAI client instance. Defaults to None. Attributes: model_api_key (str): The model API key. model (str): The language model name. temperature (float): The temperature parameter. top_p (float): The top-p parameter. ncbi_key (str): The NCBI API key. prompt (str): The prompt template. max_tokens (int): The maximum token limit. clean_output (bool): Whether output cleaning is enabled. explanation (bool): Whether to extract explanations from responses. explanation_separator (str): The delimiter for separating explanations. allowed_categories (List[str]): The allowed categories list. custom_client (Any): The custom client instance. """
[docs] def __init__(self, model_api_key: Optional[str] = None, model: str = "gpt-4o", temperature: float = 0, top_p: float = 0.0000001, ncbi_key: Optional[str] = None, prompt: str = BASE_PROMPT, max_tokens: int = 250000, clean_output: bool = False, explanation: bool = False, explanation_separator: str = "EXPLANATION_COMPLETE", output_explanation: bool = False, allowed_categories: Optional[List[str]] = None, custom_client: Optional[Any] = None) -> None: self.model_api_key = model_api_key self.model = model self.temperature = temperature self.top_p = top_p self.ncbi_key = ncbi_key self.prompt = prompt self.max_tokens = max_tokens self.clean_output = clean_output self.explanation = explanation self.explanation_separator = explanation_separator self.output_explanation = output_explanation self.allowed_categories = allowed_categories self.custom_client = custom_client
[docs] def set_ncbi_key(self, ncbi_key: Optional[str]) -> None: """ Set the NCBI API key. Args: ncbi_key (str, optional): The NCBI API key to set. """ self.ncbi_key = ncbi_key
[docs] def set_model_api_key(self, model_api_key: Optional[str]) -> None: """ Set the model API key. Args: model_api_key (str, optional): The model API key to set. """ self.model_api_key = model_api_key
[docs] def set_model(self, model: str) -> None: """ Set the language model name. Args: model (str): The name of the language model to use. """ self.model = model
[docs] def set_prompt(self, prompt: str) -> None: """ Set the prompt template. Args: prompt (str): The prompt template to use for classification. """ self.prompt = prompt
[docs] def set_token_limit(self, max_tokens: int) -> None: """ Set the maximum token limit. Args: max_tokens (int): The maximum number of tokens for model context. """ self.max_tokens = max_tokens
[docs] def set_temperature(self, temperature: float) -> None: """ Set the temperature parameter for model creativity. Args: temperature (float): The temperature value (0.0 to 1.0). """ self.temperature = temperature
[docs] def set_top_p(self, top_p: float) -> None: """ Set the top-p parameter for nucleus sampling. Args: top_p (float): The top-p value (0.0 to 1.0). """ self.top_p = top_p
[docs] def set_clean_output(self, clean_output: bool) -> None: """ Set whether to enable output cleaning and validation. Args: clean_output (bool): Whether to clean and validate output. """ self.clean_output = clean_output
[docs] def set_explanation(self, explanation: bool) -> None: """ Set whether to include explanations in the output. Args: explanation (bool): Whether to include explanations. """ self.explanation = explanation
[docs] def set_explanation_separator(self, explanation_separator: str) -> None: """ Set the explanation separator string. Args: explanation_separator (str): The string that separates explanations in the output. """ self.explanation_separator = explanation_separator
[docs] def set_explanation_output(self, output_explanation: bool) -> None: """ Set whether to output explanations along with classifications. Args: output_explanation (bool): Whether to output explanations. """ self.output_explanation = output_explanation
[docs] def set_allowed_categories(self, allowed_categories: Optional[List[str]]) -> None: """ Set the list of allowed categories for filtering. Args: allowed_categories (List[str], optional): List of allowed categories. """ self.allowed_categories = allowed_categories
[docs] def set_custom_client(self, custom_client: Optional[Any]) -> None: """ Set a custom OpenAI client instance. Args: custom_client (Any, optional): Custom OpenAI client instance. """ self.custom_client = custom_client
[docs] def configure(self, ncbi_key: Optional[str] = None, model_api_key: Optional[str] = None, model: str = "gpt-4o", temperature: float = 0, top_p: float = 0, prompt: str = BASE_PROMPT, max_tokens: int = 250000, clean_output: bool = False, explanation: bool = False, explanation_separator: str = "EXPLANATION_COMPLETE", output_explanation: bool = False, allowed_categories: Optional[List[str]] = None, custom_client: Optional[Any] = None) -> None: """ Configure all parameters at once. Args: ncbi_key (str, optional): API key for NCBI/PubMed access. model_api_key (str, optional): API key for the language model service. model (str, optional): Name of the language model to use. Defaults to "gpt-4o". temperature (float, optional): Temperature parameter for model creativity. Defaults to 0. top_p (float, optional): Top-p parameter for nucleus sampling. Defaults to 0. prompt (str, optional): Custom prompt template. Defaults to BASE_PROMPT. max_tokens (int, optional): Maximum number of tokens for model context. Defaults to 250000. clean_output (bool, optional): Whether to clean and validate output. Defaults to False. explanation (bool, optional): Whether to expect explanations in model responses. Defaults to False. explanation_separator (str, optional): Delimiter separating explanation from classification. Defaults to "EXPLANATION_COMPLETE". output_explanation (bool, optional): Whether to return the explanation text alongside classification. Defaults to False. allowed_categories (List[str], optional): List of allowed categories for filtering. Defaults to None. custom_client (Any, optional): Custom OpenAI client instance. Defaults to None. """ self.model_api_key = model_api_key self.model = model self.ncbi_key = ncbi_key self.prompt = prompt self.temperature = temperature self.top_p = top_p self.max_tokens = max_tokens self.clean_output = clean_output self.explanation = explanation self.explanation_separator = explanation_separator self.output_explanation = output_explanation self.allowed_categories = allowed_categories self.custom_client = custom_client
[docs] def configuration(self) -> dict: """ Get the current configuration as a dictionary with masked sensitive data. Returns: dict: A dictionary containing all configuration parameters with API keys masked. """ if self.model_api_key is None: model_api_key_display = None else: model_api_key_display = "*" * len(self.model_api_key) if self.ncbi_key is None: ncbi_key_display = None else: ncbi_key_display = "*" * len(self.ncbi_key) return {"model_api_key": model_api_key_display, "ncbi_key": ncbi_key_display, "model": self.model, "prompt": self.prompt, "token_limit": self.max_tokens, "temperature": self.temperature, "top_p": self.top_p, "clean_output": self.clean_output, "explanation": self.explanation, "explanation_separator": self.explanation_separator, "output_explanation": self.output_explanation, "allowed_categories": self.allowed_categories, "custom_client": self.custom_client }