diff --git a/src/ketr-chat/src/App.tsx b/src/ketr-chat/src/App.tsx index 3251c71..f43095f 100644 --- a/src/ketr-chat/src/App.tsx +++ b/src/ketr-chat/src/App.tsx @@ -50,16 +50,16 @@ import '@fontsource/roboto/700.css'; const welcomeMarkdown = ` # Welcome to Ketr-Chat. -This system has real-time access to weather, stocks, the current time, and can answer questions about the contents of a website. +Hi, my author is James Ketrenos. He built this LLM agent in order to provide answers to any questions you may have about his work history. -**NOTE**: As of right now, the LLM model being used is refusing to use enabled tools when RAG is enabled to provide context. -So, in order to use the real-time information, you need to click the Settings ![settings](settings.png) icon, open RAG, and disable JPK: ![disable JPK](disable-jpk.png). +In addition to being a RAG enabled expert system, the LLM is configured with real-time access to weather, stocks, the current time, and can answer questions about the contents of a website. Ask things like: * What are the headlines from CNBC? * What is the weather in Portland, OR? * What is James Ketrenos' work history? * What are the stock value of the most traded companies? + * What programming languages has James used? `; const welcomeMessage = { @@ -400,6 +400,11 @@ const Message = ({ message }: MessageInterface) => { ); } +type ContextStatus = { + context_used: number, + max_context: number +}; + const App = () => { const [query, setQuery] = useState(''); const [conversation, setConversation] = useState([]); @@ -417,6 +422,7 @@ const App = () => { const [systemPrompt, setSystemPrompt] = useState(""); const [serverSystemPrompt, setServerSystemPrompt] = useState(""); const [systemInfo, setSystemInfo] = useState(undefined); + const [contextStatus, setContextStatus] = useState({ context_used: 0, max_context: 0 }); // Scroll to bottom of conversation when conversation updates useEffect(() => { @@ -454,6 +460,24 @@ const App = () => { }); }, [systemInfo, setSystemInfo, loc, setSnack, sessionId]) + const updateContextStatus = useCallback(() => { + fetch(getConnectionBase(loc) + `/api/context-status/${sessionId}`, { + method: 'GET', + headers: { + 'Content-Type': 'application/json', + }, + }) + .then(response => response.json()) + .then(data => { + console.log(`Session id: ${sessionId} -- history returned from server with ${data.length} entries`) + setContextStatus(data); + }) + .catch(error => { + console.error('Error getting context status:', error); + setSnack("Unable to obtain context status.", "error"); + }); + }, [setContextStatus, loc, setSnack, sessionId]); + // Set the initial chat history to "loading" or the welcome message if loaded. useEffect(() => { if (sessionId === undefined) { @@ -477,8 +501,9 @@ const App = () => { console.error('Error generating session ID:', error); setSnack("Unable to obtain chat history.", "error"); }); + updateContextStatus(); } - }, [sessionId, setConversation, loc, setSnack]); + }, [sessionId, setConversation, updateContextStatus, loc, setSnack]); // Extract the sessionId from the URL if present, otherwise // request a sessionId from the server. @@ -835,6 +860,7 @@ const App = () => { ...prev.filter(msg => msg.id !== processingId), update.message ]); + updateContextStatus(); } else if (update.status === 'error') { // Show error setConversation(prev => [ @@ -969,7 +995,7 @@ const App = () => { /> - + {/* Context used: {Math.round(100 * contextStatus.context_used / contextStatus.max_context)}% {contextStatus.context_used}/{contextStatus.max_context} */} line_length - len(current_line) - (1 if current_line else 0): - # If we already have content on the line, add it to results - if current_line: - result_lines.append(current_line) - current_line = "" - - # Get hyphenation points for the word - hyphenated = h.syllables(word) - - if not hyphenated: - # If no hyphenation points found, just add the word to a new line - result_lines.append(word) - continue - - # Try to find a suitable hyphenation point - partial_word = "" - for syllable in hyphenated: - if len(partial_word) + len(syllable) + 1 > line_length: - # Add hyphen to the partial word and start a new line - if partial_word: - result_lines.append(partial_word + "-") - partial_word = syllable - else: - # If a single syllable is too long, just add it - result_lines.append(syllable) - else: - partial_word += syllable - - # Don't forget the remaining part - if partial_word: - current_line = partial_word - - else: - # Start a new line with this word - result_lines.append(current_line) - current_line = word - - # Don't forget any remaining content - if current_line: - result_lines.append(current_line) - - return result_lines - - -# %% -def total_json_length(dict_array): - total = 0 - for item in dict_array: - # Convert dictionary to minimized JSON string - json_string = json.dumps(item, separators=(',', ':')) - total += len(json_string) - return total async def AnalyzeSite(url, question): """ @@ -607,6 +505,17 @@ class WebServer: except: return JSONResponse({ "status": "error" }), 405 + @self.app.get('/api/context-status/{context_id}') + async def get_context_status(context_id): + if not is_valid_uuid(context_id): + logging.warning(f"Invalid context_id: {context_id}") + return JSONResponse({"error": "Invalid context_id"}, status_code=400) + context_used = 0 + context = self.upsert_context(context_id) + # TODO: Switch this to use the tokenizer values instead of 75% of character length + for message in context["llm_history"]: + context_used += round((len(message["role"]) + len(message["content"])) * 3 / 4) + return JSONResponse({"context_used": context_used, "max_context": defines.max_context}) @self.app.get('/api/health') async def health_check(): diff --git a/src/utils/defines.py b/src/utils/defines.py index 6066eff..1d0676c 100644 --- a/src/utils/defines.py +++ b/src/utils/defines.py @@ -1,7 +1,7 @@ ollama_api_url="http://ollama:11434" # Default Ollama local endpoint #model = "deepseek-r1:7b" -model = "llama3.2" -#model="qwen2.5:7b" +#model = "llama3.2" +model="qwen2.5:7b" encoding_model="mxbai-embed-large" persist_directory="./chromadb" -max_context = 2048*8 \ No newline at end of file +max_context = 2048*8*2 \ No newline at end of file