backstory/src/backend/defines.py

import os

ollama_api_url = "http://ollama:11434"  # Default Ollama local endpoint

user_dir = "/opt/backstory/users"
user_info_file = "info.json"  # Relative to "{user_dir}/{user}"
default_username = "jketreno"
rag_content_dir = "rag-content"  # Relative to "{user_dir}/{user}"
# Path to candidate full resume
resume_doc_dir = f"{rag_content_dir}/resume"  # Relative to "{user_dir}/{user}
resume_doc = "resume.md"
persist_directory = "db"  # Relative to "{user_dir}/{user}"

# Model name                 License       Notes
# model = "deepseek-r1:7b" # MIT           Tool calls don"t work
# model = "gemma3:4b"      # Gemma         Requires newer ollama                      https://ai.google.dev/gemma/terms
# model = "llama3.2"       # Llama         Good results; qwen seems slightly better   https://huggingface.co/meta-llama/Llama-3.2-1B/blob/main/LICENSE.txt
# model = "mistral:7b"     # Apache 2.0    Tool calls don"t work
model = "qwen2.5:7b"  # Apache 2.0    Good results
# model = "qwen3:8b"       # Apache 2.0    Requires newer ollama
model = os.getenv("MODEL_NAME", model)

# Embedding model for producing vectors to use in RAG
embedding_model = os.getenv("EMBEDDING_MODEL_NAME", "mxbai-embed-large")

# Maximum context size to allow the LLM to use. This starts
# smaller and will go up if different agents are requesting larger
# contexts. Changing context size requires the LLM to reload, which
# can take a few seconds.
max_context = 2048 * 8 * 2

# Where to store session json files
context_dir = "/opt/backstory/sessions"

# Location of frontend container's build output mapped into the container
static_content = os.getenv("STATIC_DIRECTORY", "/opt/backstory/frontend/deployed")

logging_level = os.getenv("LOGGING_LEVEL", "INFO").upper()

# RAG and Vector DB settings
## Where to read RAG content

chunk_buffer = 5  # Number of lines before and after chunk beyond the portion used in embedding (to return to callers)

# Maximum number of entries for ChromaDB to find
default_rag_top_k = 50

# Cosine Distance  Equivalent Similarity  Retrieval Characteristics
# 0.2 - 0.3        0.85 - 0.90            Very strict, highly precise results only
# 0.3 - 0.5        0.75 - 0.85            Strong relevance, good precision
# 0.5 - 0.7        0.65 - 0.75            Balanced precision/recall
# 0.7 - 0.9        0.55 - 0.65            Higher recall, more inclusive
# 0.9 - 1.2        0.40 - 0.55            Very inclusive, may include tangential content
default_rag_threshold = 0.75

# Only used for testing; backstory-prod does not use this
key_path = "/opt/backstory/keys/key.pem"
cert_path = "/opt/backstory/keys/cert.pem"

host = os.getenv("BACKSTORY_HOST", "0.0.0.0")
port = int(os.getenv("BACKSTORY_PORT", "8911"))
api_prefix = "/api/1.0"
debug = os.getenv("BACKSTORY_DEBUG", "false").lower() in ("true", "1", "yes")

# Used for filtering tracebacks
app_path = "/opt/backstory/src/backend"