import os ollama_api_url = os.getenv("OPENAI_URL", "http://ollama:11434") # Default Ollama local endpoint frontend_url = os.getenv("FRONTEND_URL", "https://backstory.ketrenos.com") user_dir = "/opt/backstory/users" user_info_file = "info.json" # Relative to "{user_dir}/{user}" default_username = "jketreno" rag_content_dir = "rag-content" # Relative to "{user_dir}/{user}" # Path to candidate full resume resume_doc_dir = f"{rag_content_dir}/resume" # Relative to "{user_dir}/{user} resume_doc = "resume.md" persist_directory = "db" # Relative to "{user_dir}/{user}" # Model name License Notes # model = "deepseek-r1:7b" # MIT Tool calls don"t work # model = "gemma3:4b" # Gemma Requires newer ollama https://ai.google.dev/gemma/terms # model = "llama3.2" # Llama Good results; qwen seems slightly better https://huggingface.co/meta-llama/Llama-3.2-1B/blob/main/LICENSE.txt # model = "mistral:7b" # Apache 2.0 Tool calls don"t work # model = "qwen3:8b" # Apache 2.0 Requires newer ollama model = "qwen2.5:7b" # Apache 2.0 Good results model = os.getenv("MODEL_NAME", model) # Embedding model for producing vectors to use in RAG embedding_model = os.getenv("EMBEDDING_MODEL_NAME", "mxbai-embed-large") # Maximum context size to allow the LLM to use. This starts # smaller and will go up if different agents are requesting larger # contexts. Changing context size requires the LLM to reload, which # can take a few seconds. max_context = 2048 * 8 * 2 # Where to store session json files context_dir = "/opt/backstory/sessions" # Location of frontend container's build output mapped into the container static_content = os.getenv("STATIC_DIRECTORY", "/opt/backstory/frontend/deployed") logging_level = os.getenv("LOGGING_LEVEL", "INFO").upper() # RAG and Vector DB settings ## Where to read RAG content chunk_buffer = 5 # Number of lines before and after chunk beyond the portion used in embedding (to return to callers) # Maximum number of entries for ChromaDB to find default_rag_top_k = 50 # Cosine Distance Equivalent Similarity Retrieval Characteristics # 0.2 - 0.3 0.85 - 0.90 Very strict, highly precise results only # 0.3 - 0.5 0.75 - 0.85 Strong relevance, good precision # 0.5 - 0.7 0.65 - 0.75 Balanced precision/recall # 0.7 - 0.9 0.55 - 0.65 Higher recall, more inclusive # 0.9 - 1.2 0.40 - 0.55 Very inclusive, may include tangential content default_rag_threshold = 0.75 # Only used for testing; backstory-prod does not use this key_path = "/opt/backstory/keys/key.pem" cert_path = "/opt/backstory/keys/cert.pem" host = os.getenv("BACKSTORY_HOST", "0.0.0.0") port = int(os.getenv("BACKSTORY_PORT", "8911")) api_prefix = "/api/1.0" debug = os.getenv("BACKSTORY_DEBUG", "false").lower() in ("true", "1", "yes") # Used for filtering tracebacks app_path = "/opt/backstory/src/backend"