Fixed RAG context -- it was using the wrong model for embedding

This commit is contained in:
James Ketr 2025-04-26 09:33:52 -07:00
parent 98b43d298a
commit 8fd517f2f2
6 changed files with 102 additions and 56 deletions

View File

@ -159,6 +159,7 @@ const Controls = ({ sessionId, setSnack, connectionBase }: ControlsParams) => {
sendMessageHistoryLength(messageHistoryLength);
}, [messageHistoryLength, setMessageHistoryLength, connectionBase, sessionId, setSnack]);
const reset = async (types: ("rags" | "tools" | "history" | "system_prompt" | "message_history_length")[], message: string = "Update successful.") => {
try {
const response = await fetch(connectionBase + `/api/reset/${sessionId}`, {
@ -406,6 +407,7 @@ const Controls = ({ sessionId, setSnack, connectionBase }: ControlsParams) => {
</div>
</AccordionActions>
</Accordion>
<Accordion>
<AccordionSummary expandIcon={<ExpandMoreIcon />}>
<Typography component="span">Tunables</Typography>
@ -429,6 +431,7 @@ const Controls = ({ sessionId, setSnack, connectionBase }: ControlsParams) => {
/>
</AccordionActions>
</Accordion>
<Accordion>
<AccordionSummary expandIcon={<ExpandMoreIcon />}>
<Typography component="span">Tools</Typography>
@ -449,6 +452,7 @@ const Controls = ({ sessionId, setSnack, connectionBase }: ControlsParams) => {
}</FormGroup>
</AccordionActions>
</Accordion>
<Accordion>
<AccordionSummary expandIcon={<ExpandMoreIcon />}>
<Typography component="span">RAG</Typography>
@ -457,12 +461,15 @@ const Controls = ({ sessionId, setSnack, connectionBase }: ControlsParams) => {
These RAG databases can be enabled / disabled for adding additional context based on the chat request.
</AccordionDetails>
<AccordionActions>
<FormGroup sx={{ p: 1 }}>
<FormGroup sx={{ p: 1, flexGrow: 1, justifyContent: "flex-start" }}>
{
rags.map((rag, index) =>
<Box key={index}>
<Box key={index} sx={{ display: "flex", flexGrow: 1, flexDirection: "column" }}>
<Divider />
<FormControlLabel control={<Switch checked={rag.enabled} />} onChange={() => toggle("rag", index)} label={rag?.name} />
<FormControlLabel
control={<Switch checked={rag.enabled} />}
onChange={() => toggle("rag", index)} label={rag?.name}
/>
<Typography>{rag?.description}</Typography>
</Box>
)

View File

@ -0,0 +1,56 @@
import { useState } from 'react';
import ContentCopyIcon from '@mui/icons-material/ContentCopy';
import CheckIcon from '@mui/icons-material/Check';
import IconButton from '@mui/material/IconButton';
import { Tooltip } from '@mui/material';
import { SxProps, Theme } from '@mui/material';
interface CopyBubbleProps {
content: string | undefined,
sx?: SxProps<Theme>;
}
const CopyBubble = ({
content,
sx,
} : CopyBubbleProps) => {
const [copied, setCopied] = useState(false);
const handleCopy = () => {
if (content === undefined) {
return;
}
navigator.clipboard.writeText(content.trim()).then(() => {
setCopied(true);
setTimeout(() => setCopied(false), 2000); // Reset after 2 seconds
});
};
return (
<Tooltip title="Copy to clipboard" placement="top" arrow>
<IconButton
onClick={handleCopy}
sx={{
position: 'absolute',
top: 0,
right: 0,
width: 24,
height: 24,
opacity: 0.75,
bgcolor: 'background.paper',
'&:hover': { bgcolor: 'action.hover', opacity: 1 },
...sx,
}}
size="small"
color={copied ? "success" : "default"}
>
{copied ? <CheckIcon sx={{ width: 16, height: 16 }} /> : <ContentCopyIcon sx={{ width: 16, height: 16 }} />}
</IconButton>
</Tooltip>
);
}
export {
CopyBubble
}

View File

@ -12,22 +12,19 @@ import TableHead from '@mui/material/TableHead';
import TableRow from '@mui/material/TableRow';
import Box from '@mui/material/Box';
import Button from '@mui/material/Button';
import IconButton from '@mui/material/IconButton';
import CardContent from '@mui/material/CardContent';
import CardActions from '@mui/material/CardActions';
import Collapse from '@mui/material/Collapse';
import Typography from '@mui/material/Typography';
import ExpandMoreIcon from '@mui/icons-material/ExpandMore';
import { ExpandMore } from './ExpandMore';
import ContentCopyIcon from '@mui/icons-material/ContentCopy';
import CheckIcon from '@mui/icons-material/Check';
import { ChatBubble } from './ChatBubble';
import { StyledMarkdown } from './StyledMarkdown';
import { Tooltip } from '@mui/material';
import { VectorVisualizer } from './VectorVisualizer';
import { SetSnackType } from './Snack';
import { CopyBubble } from './CopyBubble';
type MessageRoles = 'info' | 'user' | 'assistant' | 'system' | 'status' | 'error' | 'content';
@ -127,7 +124,7 @@ const MessageMeta = ({ ...props }: MessageMetaProps) => {
</Box>
</AccordionSummary>
<AccordionDetails>
<pre>{props.full_query}</pre>
<pre style={{ "display": "block", "position": "relative" }}><CopyBubble content={props.full_query.trim()} />{props.full_query.trim()}</pre>
</AccordionDetails>
</Accordion>
}
@ -221,20 +218,8 @@ const ChatQuery = ({ text, submitQuery }: ChatQueryInterface) => {
const Message = ({ message, submitQuery, isFullWidth, sessionId, setSnack, connectionBase }: MessageProps) => {
const [expanded, setExpanded] = useState<boolean>(false);
const [copied, setCopied] = useState(false);
const textFieldRef = useRef(null);
const handleCopy = () => {
if (message === undefined || message.content === undefined) {
return;
}
navigator.clipboard.writeText(message.content.trim()).then(() => {
setCopied(true);
setTimeout(() => setCopied(false), 2000); // Reset after 2 seconds
});
};
const handleExpandClick = () => {
setExpanded(!expanded);
};
@ -266,25 +251,7 @@ const Message = ({ message, submitQuery, isFullWidth, sessionId, setSnack, conne
overflowX: "auto"
}}>
<CardContent ref={textFieldRef} sx={{ position: "relative", display: "flex", flexDirection: "column", overflowX: "auto", m: 0, p: 0 }}>
<Tooltip title="Copy to clipboard" placement="top" arrow>
<IconButton
onClick={handleCopy}
sx={{
position: 'absolute',
top: 0,
right: 0,
width: 24,
height: 24,
opacity: 0.75,
bgcolor: 'background.paper',
'&:hover': { bgcolor: 'action.hover', opacity: 1 },
}}
size="small"
color={copied ? "success" : "default"}
>
{copied ? <CheckIcon sx={{ width: 16, height: 16 }} /> : <ContentCopyIcon sx={{ width: 16, height: 16 }} />}
</IconButton>
</Tooltip>
<CopyBubble content={message?.content} />
{message.role !== 'user' ?
<StyledMarkdown

View File

@ -123,6 +123,7 @@ def system_info(model):
"Graphics Card": get_graphics_cards(),
"CPU": get_cpu_info(),
"LLM Model": model,
"Embedding Model": defines.embedding_model,
"Context length": defines.max_context
}

View File

@ -1,17 +1,17 @@
import os
ollama_api_url="http://ollama:11434" # Default Ollama local endpoint
#model = "deepseek-r1:7b" # Tool calls don't work
#model="mistral:7b" # Tool calls don't work
#model = "deepseek-r1:7b" # Tool calls don"t work
#model="mistral:7b" # Tool calls don"t work
#model = "llama3.2"
model = os.getenv('MODEL_NAME', 'qwen2.5:7b')
encoding_model="mxbai-embed-large"
persist_directory = os.getenv('PERSIST_DIR', "/opt/backstory/chromadb")
model = os.getenv("MODEL_NAME", "qwen2.5:7b")
embedding_model = os.getenv("EMBEDDING_MODEL_NAME", "mxbai-embed-large")
persist_directory = os.getenv("PERSIST_DIR", "/opt/backstory/chromadb")
max_context = 2048*8*2
doc_dir = "/opt/backstory/docs/"
session_dir = "/opt/backstory/sessions"
static_content = '/opt/backstory/frontend/deployed'
resume_doc = '/opt/backstory/docs/resume/generic.txt'
static_content = "/opt/backstory/frontend/deployed"
resume_doc = "/opt/backstory/docs/resume/generic.txt"
# Only used for testing; backstory-prod will not use this
key_path = '/opt/backstory/keys/key.pem'
cert_path = '/opt/backstory/keys/cert.pem'
key_path = "/opt/backstory/keys/key.pem"
cert_path = "/opt/backstory/keys/cert.pem"

View File

@ -16,6 +16,7 @@ import numpy as np
import chromadb
import ollama
from langchain.text_splitter import CharacterTextSplitter
from sentence_transformers import SentenceTransformer
from langchain.schema import Document
from watchdog.observers import Observer
from watchdog.events import FileSystemEventHandler
@ -34,9 +35,12 @@ __all__ = [
'start_file_watcher'
]
DEFAULT_CHUNK_SIZE=750
DEFAULT_CHUNK_OVERLAP=100
class ChromaDBFileWatcher(FileSystemEventHandler):
def __init__(self, llm, watch_directory, loop, persist_directory=None, collection_name="documents",
chunk_size=500, chunk_overlap=200, recreate=False):
chunk_size=DEFAULT_CHUNK_SIZE, chunk_overlap=DEFAULT_CHUNK_OVERLAP, recreate=False):
self.llm = llm
self.watch_directory = watch_directory
self.persist_directory = persist_directory or defines.persist_directory
@ -45,6 +49,8 @@ class ChromaDBFileWatcher(FileSystemEventHandler):
self.chunk_overlap = chunk_overlap
self.loop = loop
#self.embedding_model = SentenceTransformer('all-MiniLM-L6-v2')
# Path for storing file hash state
self.hash_state_path = os.path.join(self.persist_directory, f"{collection_name}_hash_state.json")
@ -58,7 +64,9 @@ class ChromaDBFileWatcher(FileSystemEventHandler):
# Setup text splitter
self.text_splitter = CharacterTextSplitter(
chunk_size=chunk_size,
chunk_overlap=chunk_overlap
chunk_overlap=chunk_overlap,
separator="\n\n", # Respect paragraph/section breaks
length_function=len
)
# Track file hashes and processing state
@ -319,15 +327,22 @@ class ChromaDBFileWatcher(FileSystemEventHandler):
def get_embedding(self, text, normalize=True):
"""Generate embeddings using Ollama."""
#response = self.embedding_model.encode(text) # Outputs 384-dim vectors
response = self.llm.embeddings(
model=defines.model,
prompt=text,
options={"num_ctx": self.chunk_size * 3} # No need waste ctx space
)
model=defines.embedding_model,
prompt=text)
embedding = response['embedding']
# response = self.llm.embeddings.create(
# model=defines.embedding_model,
# input=text,
# options={"num_ctx": self.chunk_size * 3} # No need waste ctx space
# )
if normalize:
normalized = self._normalize_embeddings(response["embedding"])
normalized = self._normalize_embeddings(embedding)
return normalized
return response["embedding"]
return embedding
def add_embeddings_to_collection(self, chunks):
"""Add embeddings for chunks to the collection."""