67 lines
2.8 KiB
Python
67 lines
2.8 KiB
Python
import os
|
|
|
|
ollama_api_url = "http://ollama:11434" # Default Ollama local endpoint
|
|
|
|
user_dir = "/opt/backstory/users"
|
|
user_info_file = "info.json" # Relative to "{user_dir}/{user}"
|
|
default_username = "jketreno"
|
|
rag_content_dir = "rag-content" # Relative to "{user_dir}/{user}"
|
|
# Path to candidate full resume
|
|
resume_doc_dir = f"{rag_content_dir}/resume" # Relative to "{user_dir}/{user}
|
|
resume_doc = "resume.md"
|
|
persist_directory = "db" # Relative to "{user_dir}/{user}"
|
|
|
|
# Model name License Notes
|
|
# model = "deepseek-r1:7b" # MIT Tool calls don"t work
|
|
# model = "gemma3:4b" # Gemma Requires newer ollama https://ai.google.dev/gemma/terms
|
|
# model = "llama3.2" # Llama Good results; qwen seems slightly better https://huggingface.co/meta-llama/Llama-3.2-1B/blob/main/LICENSE.txt
|
|
# model = "mistral:7b" # Apache 2.0 Tool calls don"t work
|
|
model = "qwen2.5:7b" # Apache 2.0 Good results
|
|
# model = "qwen3:8b" # Apache 2.0 Requires newer ollama
|
|
model = os.getenv("MODEL_NAME", model)
|
|
|
|
# Embedding model for producing vectors to use in RAG
|
|
embedding_model = os.getenv("EMBEDDING_MODEL_NAME", "mxbai-embed-large")
|
|
|
|
# Maximum context size to allow the LLM to use. This starts
|
|
# smaller and will go up if different agents are requesting larger
|
|
# contexts. Changing context size requires the LLM to reload, which
|
|
# can take a few seconds.
|
|
max_context = 2048 * 8 * 2
|
|
|
|
# Where to store session json files
|
|
context_dir = "/opt/backstory/sessions"
|
|
|
|
# Location of frontend container's build output mapped into the container
|
|
static_content = os.getenv("STATIC_DIRECTORY", "/opt/backstory/frontend/deployed")
|
|
|
|
logging_level = os.getenv("LOGGING_LEVEL", "INFO").upper()
|
|
|
|
# RAG and Vector DB settings
|
|
## Where to read RAG content
|
|
|
|
chunk_buffer = 5 # Number of lines before and after chunk beyond the portion used in embedding (to return to callers)
|
|
|
|
# Maximum number of entries for ChromaDB to find
|
|
default_rag_top_k = 50
|
|
|
|
# Cosine Distance Equivalent Similarity Retrieval Characteristics
|
|
# 0.2 - 0.3 0.85 - 0.90 Very strict, highly precise results only
|
|
# 0.3 - 0.5 0.75 - 0.85 Strong relevance, good precision
|
|
# 0.5 - 0.7 0.65 - 0.75 Balanced precision/recall
|
|
# 0.7 - 0.9 0.55 - 0.65 Higher recall, more inclusive
|
|
# 0.9 - 1.2 0.40 - 0.55 Very inclusive, may include tangential content
|
|
default_rag_threshold = 0.75
|
|
|
|
# Only used for testing; backstory-prod does not use this
|
|
key_path = "/opt/backstory/keys/key.pem"
|
|
cert_path = "/opt/backstory/keys/cert.pem"
|
|
|
|
host = os.getenv("BACKSTORY_HOST", "0.0.0.0")
|
|
port = int(os.getenv("BACKSTORY_PORT", "8911"))
|
|
api_prefix = "/api/1.0"
|
|
debug = os.getenv("BACKSTORY_DEBUG", "false").lower() in ("true", "1", "yes")
|
|
|
|
# Used for filtering tracebacks
|
|
app_path = "/opt/backstory/src/backend"
|