backstory/src/backend/defines.py
2025-06-18 13:53:07 -07:00

67 lines
2.8 KiB
Python

import os
ollama_api_url = "http://ollama:11434" # Default Ollama local endpoint
user_dir = "/opt/backstory/users"
user_info_file = "info.json" # Relative to "{user_dir}/{user}"
default_username = "jketreno"
rag_content_dir = "rag-content" # Relative to "{user_dir}/{user}"
# Path to candidate full resume
resume_doc_dir = f"{rag_content_dir}/resume" # Relative to "{user_dir}/{user}
resume_doc = "resume.md"
persist_directory = "db" # Relative to "{user_dir}/{user}"
# Model name License Notes
# model = "deepseek-r1:7b" # MIT Tool calls don"t work
# model = "gemma3:4b" # Gemma Requires newer ollama https://ai.google.dev/gemma/terms
# model = "llama3.2" # Llama Good results; qwen seems slightly better https://huggingface.co/meta-llama/Llama-3.2-1B/blob/main/LICENSE.txt
# model = "mistral:7b" # Apache 2.0 Tool calls don"t work
model = "qwen2.5:7b" # Apache 2.0 Good results
# model = "qwen3:8b" # Apache 2.0 Requires newer ollama
model = os.getenv("MODEL_NAME", model)
# Embedding model for producing vectors to use in RAG
embedding_model = os.getenv("EMBEDDING_MODEL_NAME", "mxbai-embed-large")
# Maximum context size to allow the LLM to use. This starts
# smaller and will go up if different agents are requesting larger
# contexts. Changing context size requires the LLM to reload, which
# can take a few seconds.
max_context = 2048 * 8 * 2
# Where to store session json files
context_dir = "/opt/backstory/sessions"
# Location of frontend container's build output mapped into the container
static_content = os.getenv("STATIC_DIRECTORY", "/opt/backstory/frontend/deployed")
logging_level = os.getenv("LOGGING_LEVEL", "INFO").upper()
# RAG and Vector DB settings
## Where to read RAG content
chunk_buffer = 5 # Number of lines before and after chunk beyond the portion used in embedding (to return to callers)
# Maximum number of entries for ChromaDB to find
default_rag_top_k = 50
# Cosine Distance Equivalent Similarity Retrieval Characteristics
# 0.2 - 0.3 0.85 - 0.90 Very strict, highly precise results only
# 0.3 - 0.5 0.75 - 0.85 Strong relevance, good precision
# 0.5 - 0.7 0.65 - 0.75 Balanced precision/recall
# 0.7 - 0.9 0.55 - 0.65 Higher recall, more inclusive
# 0.9 - 1.2 0.40 - 0.55 Very inclusive, may include tangential content
default_rag_threshold = 0.75
# Only used for testing; backstory-prod does not use this
key_path = "/opt/backstory/keys/key.pem"
cert_path = "/opt/backstory/keys/cert.pem"
host = os.getenv("BACKSTORY_HOST", "0.0.0.0")
port = int(os.getenv("BACKSTORY_PORT", "8911"))
api_prefix = "/api/1.0"
debug = os.getenv("BACKSTORY_DEBUG", "false").lower() in ("true", "1", "yes")
# Used for filtering tracebacks
app_path = "/opt/backstory/src/backend"