Up context to 32k

Added context-status API
2025-04-01 23:30:01 -07:00 · 2025-04-01 23:30:01 -07:00 · 8b046deb86
commit 8b046deb86
parent 5f1f641dba
3 changed files with 45 additions and 110 deletions
--- a/src/ketr-chat/src/App.tsx
+++ b/src/ketr-chat/src/App.tsx
@ -50,16 +50,16 @@ import '@fontsource/roboto/700.css';
 const welcomeMarkdown = `
 # Welcome to Ketr-Chat.

-This system has real-time access to weather, stocks, the current time, and can answer questions about the contents of a website.
+Hi, my author is James Ketrenos. He built this LLM agent in order to provide answers to any questions you may have about his work history.

-**NOTE**: As of right now, the LLM model being used is refusing to use enabled tools when RAG is enabled to provide context.
-So, in order to use the real-time information, you need to click the Settings ![settings](settings.png) icon, open RAG, and disable JPK: ![disable JPK](disable-jpk.png).
+In addition to being a RAG enabled expert system, the LLM is configured with real-time access to weather, stocks, the current time, and can answer questions about the contents of a website.

 Ask things like:
  * What are the headlines from CNBC?
  * What is the weather in Portland, OR?
  * What is James Ketrenos' work history?
  * What are the stock value of the most traded companies?
+  * What programming languages has James used?
 `;

 const welcomeMessage = {
@ -400,6 +400,11 @@ const Message = ({ message }: MessageInterface) => {
  );
 }

+type ContextStatus = {
+  context_used: number,
+  max_context: number
+};
+
 const App = () => {
  const [query, setQuery] = useState('');
  const [conversation, setConversation] = useState<MessageList>([]);
@ -417,6 +422,7 @@ const App = () => {
  const [systemPrompt, setSystemPrompt] = useState<string>("");
  const [serverSystemPrompt, setServerSystemPrompt] = useState<string>("");
  const [systemInfo, setSystemInfo] = useState<SystemInfo | undefined>(undefined);
+  const [contextStatus, setContextStatus] = useState<ContextStatus>({ context_used: 0, max_context: 0 });

  // Scroll to bottom of conversation when conversation updates
  useEffect(() => {
@ -454,6 +460,24 @@ const App = () => {
      });
  }, [systemInfo, setSystemInfo, loc, setSnack, sessionId])

+  const updateContextStatus = useCallback(() => {
+    fetch(getConnectionBase(loc) + `/api/context-status/${sessionId}`, {
+      method: 'GET',
+      headers: {
+        'Content-Type': 'application/json',
+      },
+    })
+      .then(response => response.json())
+      .then(data => {
+        console.log(`Session id: ${sessionId} -- history returned from server with ${data.length} entries`)
+        setContextStatus(data);
+      })
+      .catch(error => {
+        console.error('Error getting context status:', error);
+        setSnack("Unable to obtain context status.", "error");
+      });
+  }, [setContextStatus, loc, setSnack, sessionId]);
+
  // Set the initial chat history to "loading" or the welcome message if loaded.
  useEffect(() => {
    if (sessionId === undefined) {
@ -477,8 +501,9 @@ const App = () => {
          console.error('Error generating session ID:', error);
          setSnack("Unable to obtain chat history.", "error");
        });
+      updateContextStatus();
    }
-  }, [sessionId, setConversation, loc, setSnack]);
+  }, [sessionId, setConversation, updateContextStatus, loc, setSnack]);

  // Extract the sessionId from the URL if present, otherwise
  // request a sessionId from the server.
@ -835,6 +860,7 @@ const App = () => {
                ...prev.filter(msg => msg.id !== processingId),
                update.message
              ]);
+              updateContextStatus();
            } else if (update.status === 'error') {
              // Show error
              setConversation(prev => [
@ -969,7 +995,7 @@ const App = () => {
              />
            </div>
          </Box>
-
+          {/* <Box sx={{ mt: "-1rem", ml: "0.25rem", fontSize: "0.6rem", color: "darkgrey", position: "sticky" }}>Context used: {Math.round(100 * contextStatus.context_used / contextStatus.max_context)}% {contextStatus.context_used}/{contextStatus.max_context}</Box> */}
          <Box className="Query" sx={{ display: "flex", flexDirection: "row", p: 1 }}>
            <TextField
              variant="outlined"
--- a/src/server.py
+++ b/src/server.py
@ -190,110 +190,8 @@ def setup_logging(level):
    
    logging.info(f"Logging is set to {level} level.")

-# %%
-def is_words_downloaded():
-    try:
-        from nltk.corpus import words
-        words.words()  # Attempt to access the dataset
-        return True
-    except LookupError:
-        return False
-
-if not is_words_downloaded():
-    logging.info("Downloading nltk words corpus for random nick generation")
-    nltk.download('words')

 # %%
-def split_paragraph_with_hyphenation(text, line_length=80, language='en_US'):
-    """
-    Split a paragraph into multiple lines with proper hyphenation.
-    
-    Args:
-        text (str): The text to split.
-        line_length (int): The maximum length of each line.
-        language (str): The language code for hyphenation rules.
-        
-    Returns:
-        [str]: The text split into multiple lines with proper hyphenation.
-    """
-    # Initialize the hyphenator for the specified language
-    h = hyphenator.Hyphenator(language)
-    
-    # First attempt: try to wrap without hyphenation
-    lines = textwrap.wrap(text, width=line_length)
-
-    # If any lines are too long, we need to apply hyphenation
-    result_lines = []
-    
-    for line in lines:
-        # If the line is already short enough, keep it as is
-        if len(line) <= line_length:
-            result_lines.append(line)
-            continue
-        
-        # Otherwise, we need to hyphenate
-        words = line.split()
-        current_line = ""
-        
-        for word in words:
-            # If adding the word doesn't exceed the limit, add it
-            if len(current_line) + len(word) + (1 if current_line else 0) <= line_length:
-                if current_line:
-                    current_line += " "
-                current_line += word
-            # If the word itself is too long, hyphenate it
-            elif len(word) > line_length - len(current_line) - (1 if current_line else 0):
-                # If we already have content on the line, add it to results
-                if current_line:
-                    result_lines.append(current_line)
-                    current_line = ""
-                
-                # Get hyphenation points for the word
-                hyphenated = h.syllables(word)
-                
-                if not hyphenated:
-                    # If no hyphenation points found, just add the word to a new line
-                    result_lines.append(word)
-                    continue
-                
-                # Try to find a suitable hyphenation point
-                partial_word = ""
-                for syllable in hyphenated:
-                    if len(partial_word) + len(syllable) + 1 > line_length:
-                        # Add hyphen to the partial word and start a new line
-                        if partial_word:
-                            result_lines.append(partial_word + "-")
-                            partial_word = syllable
-                        else:
-                            # If a single syllable is too long, just add it
-                            result_lines.append(syllable)
-                    else:
-                        partial_word += syllable
-                
-                # Don't forget the remaining part
-                if partial_word:
-                    current_line = partial_word
-                
-            else:
-                # Start a new line with this word
-                result_lines.append(current_line)
-                current_line = word
-        
-        # Don't forget any remaining content
-        if current_line:
-            result_lines.append(current_line)
-    
-    return result_lines
-
-
-# %%
-def total_json_length(dict_array):
-    total = 0
-    for item in dict_array:
-        # Convert dictionary to minimized JSON string
-        json_string = json.dumps(item, separators=(',', ':'))
-        total += len(json_string)
-    return total

 async def AnalyzeSite(url, question):
    """
@ -607,6 +505,17 @@ class WebServer:
            except:
                return JSONResponse({ "status": "error" }), 405

+        @self.app.get('/api/context-status/{context_id}')
+        async def get_context_status(context_id):
+            if not is_valid_uuid(context_id):
+                logging.warning(f"Invalid context_id: {context_id}")
+                return JSONResponse({"error": "Invalid context_id"}, status_code=400)
+            context_used = 0
+            context = self.upsert_context(context_id)
+            # TODO: Switch this to use the tokenizer values instead of 75% of character length
+            for message in context["llm_history"]:
+                context_used += round((len(message["role"]) + len(message["content"])) * 3 / 4)
+            return JSONResponse({"context_used": context_used, "max_context": defines.max_context})

        @self.app.get('/api/health')
        async def health_check():
--- a/src/utils/defines.py
+++ b/src/utils/defines.py
@ -1,7 +1,7 @@
 ollama_api_url="http://ollama:11434"  # Default Ollama local endpoint
 #model = "deepseek-r1:7b"
-model = "llama3.2"
-#model="qwen2.5:7b"
+#model = "llama3.2"
+model="qwen2.5:7b"
 encoding_model="mxbai-embed-large"
 persist_directory="./chromadb"
-max_context = 2048*8
+max_context = 2048*8*2