Fixed RAG context -- it was using the wrong model for embedding

This commit is contained in:
James Ketr 2025-04-26 09:33:52 -07:00
parent 98b43d298a
commit 8fd517f2f2
6 changed files with 102 additions and 56 deletions

View File

@ -159,6 +159,7 @@ const Controls = ({ sessionId, setSnack, connectionBase }: ControlsParams) => {
sendMessageHistoryLength(messageHistoryLength); sendMessageHistoryLength(messageHistoryLength);
}, [messageHistoryLength, setMessageHistoryLength, connectionBase, sessionId, setSnack]); }, [messageHistoryLength, setMessageHistoryLength, connectionBase, sessionId, setSnack]);
const reset = async (types: ("rags" | "tools" | "history" | "system_prompt" | "message_history_length")[], message: string = "Update successful.") => { const reset = async (types: ("rags" | "tools" | "history" | "system_prompt" | "message_history_length")[], message: string = "Update successful.") => {
try { try {
const response = await fetch(connectionBase + `/api/reset/${sessionId}`, { const response = await fetch(connectionBase + `/api/reset/${sessionId}`, {
@ -406,6 +407,7 @@ const Controls = ({ sessionId, setSnack, connectionBase }: ControlsParams) => {
</div> </div>
</AccordionActions> </AccordionActions>
</Accordion> </Accordion>
<Accordion> <Accordion>
<AccordionSummary expandIcon={<ExpandMoreIcon />}> <AccordionSummary expandIcon={<ExpandMoreIcon />}>
<Typography component="span">Tunables</Typography> <Typography component="span">Tunables</Typography>
@ -429,6 +431,7 @@ const Controls = ({ sessionId, setSnack, connectionBase }: ControlsParams) => {
/> />
</AccordionActions> </AccordionActions>
</Accordion> </Accordion>
<Accordion> <Accordion>
<AccordionSummary expandIcon={<ExpandMoreIcon />}> <AccordionSummary expandIcon={<ExpandMoreIcon />}>
<Typography component="span">Tools</Typography> <Typography component="span">Tools</Typography>
@ -449,6 +452,7 @@ const Controls = ({ sessionId, setSnack, connectionBase }: ControlsParams) => {
}</FormGroup> }</FormGroup>
</AccordionActions> </AccordionActions>
</Accordion> </Accordion>
<Accordion> <Accordion>
<AccordionSummary expandIcon={<ExpandMoreIcon />}> <AccordionSummary expandIcon={<ExpandMoreIcon />}>
<Typography component="span">RAG</Typography> <Typography component="span">RAG</Typography>
@ -457,12 +461,15 @@ const Controls = ({ sessionId, setSnack, connectionBase }: ControlsParams) => {
These RAG databases can be enabled / disabled for adding additional context based on the chat request. These RAG databases can be enabled / disabled for adding additional context based on the chat request.
</AccordionDetails> </AccordionDetails>
<AccordionActions> <AccordionActions>
<FormGroup sx={{ p: 1 }}> <FormGroup sx={{ p: 1, flexGrow: 1, justifyContent: "flex-start" }}>
{ {
rags.map((rag, index) => rags.map((rag, index) =>
<Box key={index}> <Box key={index} sx={{ display: "flex", flexGrow: 1, flexDirection: "column" }}>
<Divider /> <Divider />
<FormControlLabel control={<Switch checked={rag.enabled} />} onChange={() => toggle("rag", index)} label={rag?.name} /> <FormControlLabel
control={<Switch checked={rag.enabled} />}
onChange={() => toggle("rag", index)} label={rag?.name}
/>
<Typography>{rag?.description}</Typography> <Typography>{rag?.description}</Typography>
</Box> </Box>
) )

View File

@ -0,0 +1,56 @@
import { useState } from 'react';
import ContentCopyIcon from '@mui/icons-material/ContentCopy';
import CheckIcon from '@mui/icons-material/Check';
import IconButton from '@mui/material/IconButton';
import { Tooltip } from '@mui/material';
import { SxProps, Theme } from '@mui/material';
interface CopyBubbleProps {
content: string | undefined,
sx?: SxProps<Theme>;
}
const CopyBubble = ({
content,
sx,
} : CopyBubbleProps) => {
const [copied, setCopied] = useState(false);
const handleCopy = () => {
if (content === undefined) {
return;
}
navigator.clipboard.writeText(content.trim()).then(() => {
setCopied(true);
setTimeout(() => setCopied(false), 2000); // Reset after 2 seconds
});
};
return (
<Tooltip title="Copy to clipboard" placement="top" arrow>
<IconButton
onClick={handleCopy}
sx={{
position: 'absolute',
top: 0,
right: 0,
width: 24,
height: 24,
opacity: 0.75,
bgcolor: 'background.paper',
'&:hover': { bgcolor: 'action.hover', opacity: 1 },
...sx,
}}
size="small"
color={copied ? "success" : "default"}
>
{copied ? <CheckIcon sx={{ width: 16, height: 16 }} /> : <ContentCopyIcon sx={{ width: 16, height: 16 }} />}
</IconButton>
</Tooltip>
);
}
export {
CopyBubble
}

View File

@ -12,22 +12,19 @@ import TableHead from '@mui/material/TableHead';
import TableRow from '@mui/material/TableRow'; import TableRow from '@mui/material/TableRow';
import Box from '@mui/material/Box'; import Box from '@mui/material/Box';
import Button from '@mui/material/Button'; import Button from '@mui/material/Button';
import IconButton from '@mui/material/IconButton';
import CardContent from '@mui/material/CardContent'; import CardContent from '@mui/material/CardContent';
import CardActions from '@mui/material/CardActions'; import CardActions from '@mui/material/CardActions';
import Collapse from '@mui/material/Collapse'; import Collapse from '@mui/material/Collapse';
import Typography from '@mui/material/Typography'; import Typography from '@mui/material/Typography';
import ExpandMoreIcon from '@mui/icons-material/ExpandMore'; import ExpandMoreIcon from '@mui/icons-material/ExpandMore';
import { ExpandMore } from './ExpandMore'; import { ExpandMore } from './ExpandMore';
import ContentCopyIcon from '@mui/icons-material/ContentCopy';
import CheckIcon from '@mui/icons-material/Check';
import { ChatBubble } from './ChatBubble'; import { ChatBubble } from './ChatBubble';
import { StyledMarkdown } from './StyledMarkdown'; import { StyledMarkdown } from './StyledMarkdown';
import { Tooltip } from '@mui/material';
import { VectorVisualizer } from './VectorVisualizer'; import { VectorVisualizer } from './VectorVisualizer';
import { SetSnackType } from './Snack'; import { SetSnackType } from './Snack';
import { CopyBubble } from './CopyBubble';
type MessageRoles = 'info' | 'user' | 'assistant' | 'system' | 'status' | 'error' | 'content'; type MessageRoles = 'info' | 'user' | 'assistant' | 'system' | 'status' | 'error' | 'content';
@ -127,7 +124,7 @@ const MessageMeta = ({ ...props }: MessageMetaProps) => {
</Box> </Box>
</AccordionSummary> </AccordionSummary>
<AccordionDetails> <AccordionDetails>
<pre>{props.full_query}</pre> <pre style={{ "display": "block", "position": "relative" }}><CopyBubble content={props.full_query.trim()} />{props.full_query.trim()}</pre>
</AccordionDetails> </AccordionDetails>
</Accordion> </Accordion>
} }
@ -221,20 +218,8 @@ const ChatQuery = ({ text, submitQuery }: ChatQueryInterface) => {
const Message = ({ message, submitQuery, isFullWidth, sessionId, setSnack, connectionBase }: MessageProps) => { const Message = ({ message, submitQuery, isFullWidth, sessionId, setSnack, connectionBase }: MessageProps) => {
const [expanded, setExpanded] = useState<boolean>(false); const [expanded, setExpanded] = useState<boolean>(false);
const [copied, setCopied] = useState(false);
const textFieldRef = useRef(null); const textFieldRef = useRef(null);
const handleCopy = () => {
if (message === undefined || message.content === undefined) {
return;
}
navigator.clipboard.writeText(message.content.trim()).then(() => {
setCopied(true);
setTimeout(() => setCopied(false), 2000); // Reset after 2 seconds
});
};
const handleExpandClick = () => { const handleExpandClick = () => {
setExpanded(!expanded); setExpanded(!expanded);
}; };
@ -266,25 +251,7 @@ const Message = ({ message, submitQuery, isFullWidth, sessionId, setSnack, conne
overflowX: "auto" overflowX: "auto"
}}> }}>
<CardContent ref={textFieldRef} sx={{ position: "relative", display: "flex", flexDirection: "column", overflowX: "auto", m: 0, p: 0 }}> <CardContent ref={textFieldRef} sx={{ position: "relative", display: "flex", flexDirection: "column", overflowX: "auto", m: 0, p: 0 }}>
<Tooltip title="Copy to clipboard" placement="top" arrow> <CopyBubble content={message?.content} />
<IconButton
onClick={handleCopy}
sx={{
position: 'absolute',
top: 0,
right: 0,
width: 24,
height: 24,
opacity: 0.75,
bgcolor: 'background.paper',
'&:hover': { bgcolor: 'action.hover', opacity: 1 },
}}
size="small"
color={copied ? "success" : "default"}
>
{copied ? <CheckIcon sx={{ width: 16, height: 16 }} /> : <ContentCopyIcon sx={{ width: 16, height: 16 }} />}
</IconButton>
</Tooltip>
{message.role !== 'user' ? {message.role !== 'user' ?
<StyledMarkdown <StyledMarkdown

View File

@ -123,6 +123,7 @@ def system_info(model):
"Graphics Card": get_graphics_cards(), "Graphics Card": get_graphics_cards(),
"CPU": get_cpu_info(), "CPU": get_cpu_info(),
"LLM Model": model, "LLM Model": model,
"Embedding Model": defines.embedding_model,
"Context length": defines.max_context "Context length": defines.max_context
} }

View File

@ -1,17 +1,17 @@
import os import os
ollama_api_url="http://ollama:11434" # Default Ollama local endpoint ollama_api_url="http://ollama:11434" # Default Ollama local endpoint
#model = "deepseek-r1:7b" # Tool calls don't work #model = "deepseek-r1:7b" # Tool calls don"t work
#model="mistral:7b" # Tool calls don't work #model="mistral:7b" # Tool calls don"t work
#model = "llama3.2" #model = "llama3.2"
model = os.getenv('MODEL_NAME', 'qwen2.5:7b') model = os.getenv("MODEL_NAME", "qwen2.5:7b")
encoding_model="mxbai-embed-large" embedding_model = os.getenv("EMBEDDING_MODEL_NAME", "mxbai-embed-large")
persist_directory = os.getenv('PERSIST_DIR', "/opt/backstory/chromadb") persist_directory = os.getenv("PERSIST_DIR", "/opt/backstory/chromadb")
max_context = 2048*8*2 max_context = 2048*8*2
doc_dir = "/opt/backstory/docs/" doc_dir = "/opt/backstory/docs/"
session_dir = "/opt/backstory/sessions" session_dir = "/opt/backstory/sessions"
static_content = '/opt/backstory/frontend/deployed' static_content = "/opt/backstory/frontend/deployed"
resume_doc = '/opt/backstory/docs/resume/generic.txt' resume_doc = "/opt/backstory/docs/resume/generic.txt"
# Only used for testing; backstory-prod will not use this # Only used for testing; backstory-prod will not use this
key_path = '/opt/backstory/keys/key.pem' key_path = "/opt/backstory/keys/key.pem"
cert_path = '/opt/backstory/keys/cert.pem' cert_path = "/opt/backstory/keys/cert.pem"

View File

@ -16,6 +16,7 @@ import numpy as np
import chromadb import chromadb
import ollama import ollama
from langchain.text_splitter import CharacterTextSplitter from langchain.text_splitter import CharacterTextSplitter
from sentence_transformers import SentenceTransformer
from langchain.schema import Document from langchain.schema import Document
from watchdog.observers import Observer from watchdog.observers import Observer
from watchdog.events import FileSystemEventHandler from watchdog.events import FileSystemEventHandler
@ -34,9 +35,12 @@ __all__ = [
'start_file_watcher' 'start_file_watcher'
] ]
DEFAULT_CHUNK_SIZE=750
DEFAULT_CHUNK_OVERLAP=100
class ChromaDBFileWatcher(FileSystemEventHandler): class ChromaDBFileWatcher(FileSystemEventHandler):
def __init__(self, llm, watch_directory, loop, persist_directory=None, collection_name="documents", def __init__(self, llm, watch_directory, loop, persist_directory=None, collection_name="documents",
chunk_size=500, chunk_overlap=200, recreate=False): chunk_size=DEFAULT_CHUNK_SIZE, chunk_overlap=DEFAULT_CHUNK_OVERLAP, recreate=False):
self.llm = llm self.llm = llm
self.watch_directory = watch_directory self.watch_directory = watch_directory
self.persist_directory = persist_directory or defines.persist_directory self.persist_directory = persist_directory or defines.persist_directory
@ -45,6 +49,8 @@ class ChromaDBFileWatcher(FileSystemEventHandler):
self.chunk_overlap = chunk_overlap self.chunk_overlap = chunk_overlap
self.loop = loop self.loop = loop
#self.embedding_model = SentenceTransformer('all-MiniLM-L6-v2')
# Path for storing file hash state # Path for storing file hash state
self.hash_state_path = os.path.join(self.persist_directory, f"{collection_name}_hash_state.json") self.hash_state_path = os.path.join(self.persist_directory, f"{collection_name}_hash_state.json")
@ -58,7 +64,9 @@ class ChromaDBFileWatcher(FileSystemEventHandler):
# Setup text splitter # Setup text splitter
self.text_splitter = CharacterTextSplitter( self.text_splitter = CharacterTextSplitter(
chunk_size=chunk_size, chunk_size=chunk_size,
chunk_overlap=chunk_overlap chunk_overlap=chunk_overlap,
separator="\n\n", # Respect paragraph/section breaks
length_function=len
) )
# Track file hashes and processing state # Track file hashes and processing state
@ -319,15 +327,22 @@ class ChromaDBFileWatcher(FileSystemEventHandler):
def get_embedding(self, text, normalize=True): def get_embedding(self, text, normalize=True):
"""Generate embeddings using Ollama.""" """Generate embeddings using Ollama."""
#response = self.embedding_model.encode(text) # Outputs 384-dim vectors
response = self.llm.embeddings( response = self.llm.embeddings(
model=defines.model, model=defines.embedding_model,
prompt=text, prompt=text)
options={"num_ctx": self.chunk_size * 3} # No need waste ctx space embedding = response['embedding']
)
# response = self.llm.embeddings.create(
# model=defines.embedding_model,
# input=text,
# options={"num_ctx": self.chunk_size * 3} # No need waste ctx space
# )
if normalize: if normalize:
normalized = self._normalize_embeddings(response["embedding"]) normalized = self._normalize_embeddings(embedding)
return normalized return normalized
return response["embedding"] return embedding
def add_embeddings_to_collection(self, chunks): def add_embeddings_to_collection(self, chunks):
"""Add embeddings for chunks to the collection.""" """Add embeddings for chunks to the collection."""