Up context to 32k

Added context-status API
2025-04-01 23:30:01 -07:00 · 2025-04-01 23:30:01 -07:00 · 8b046deb86
commit 8b046deb86
parent 5f1f641dba
3 changed files with 45 additions and 110 deletions
--- a/src/ketr-chat/src/App.tsx
+++ b/src/ketr-chat/src/App.tsx
@ -50,16 +50,16 @@ import '@fontsource/roboto/700.css';
 const welcomeMarkdown = `
 # Welcome to Ketr-Chat.
-This system has real-time access to weather, stocks, the current time, and can answer questions about the contents of a website.
+Hi, my author is James Ketrenos. He built this LLM agent in order to provide answers to any questions you may have about his work history.
-**NOTE**: As of right now, the LLM model being used is refusing to use enabled tools when RAG is enabled to provide context.
+In addition to being a RAG enabled expert system, the LLM is configured with real-time access to weather, stocks, the current time, and can answer questions about the contents of a website.
 So, in order to use the real-time information, you need to click the Settings ![settings](settings.png) icon, open RAG, and disable JPK: ![disable JPK](disable-jpk.png).
 Ask things like:
  * What are the headlines from CNBC?
  * What is the weather in Portland, OR?
  * What is James Ketrenos' work history?
  * What are the stock value of the most traded companies?
  * What programming languages has James used?
 `;
 const welcomeMessage = {
@ -400,6 +400,11 @@ const Message = ({ message }: MessageInterface) => {
  );
 }
 type ContextStatus = {
  context_used: number,
  max_context: number
 };
 const App = () => {
  const [query, setQuery] = useState('');
  const [conversation, setConversation] = useState<MessageList>([]);
@ -417,6 +422,7 @@ const App = () => {
  const [systemPrompt, setSystemPrompt] = useState<string>("");
  const [serverSystemPrompt, setServerSystemPrompt] = useState<string>("");
  const [systemInfo, setSystemInfo] = useState<SystemInfo | undefined>(undefined);
  const [contextStatus, setContextStatus] = useState<ContextStatus>({ context_used: 0, max_context: 0 });
  // Scroll to bottom of conversation when conversation updates
  useEffect(() => {
@ -454,6 +460,24 @@ const App = () => {
      });
  }, [systemInfo, setSystemInfo, loc, setSnack, sessionId])
  const updateContextStatus = useCallback(() => {
    fetch(getConnectionBase(loc) + `/api/context-status/${sessionId}`, {
      method: 'GET',
      headers: {
        'Content-Type': 'application/json',
      },
    })
      .then(response => response.json())
      .then(data => {
        console.log(`Session id: ${sessionId} -- history returned from server with ${data.length} entries`)
        setContextStatus(data);
      })
      .catch(error => {
        console.error('Error getting context status:', error);
        setSnack("Unable to obtain context status.", "error");
      });
  }, [setContextStatus, loc, setSnack, sessionId]);
  // Set the initial chat history to "loading" or the welcome message if loaded.
  useEffect(() => {
    if (sessionId === undefined) {
@ -477,8 +501,9 @@ const App = () => {
          console.error('Error generating session ID:', error);
          setSnack("Unable to obtain chat history.", "error");
        });
      updateContextStatus();
    }
-  }, [sessionId, setConversation, loc, setSnack]);
+  }, [sessionId, setConversation, updateContextStatus, loc, setSnack]);
  // Extract the sessionId from the URL if present, otherwise
  // request a sessionId from the server.
@ -835,6 +860,7 @@ const App = () => {
                ...prev.filter(msg => msg.id !== processingId),
                update.message
              ]);
              updateContextStatus();
            } else if (update.status === 'error') {
              // Show error
              setConversation(prev => [
@ -969,7 +995,7 @@ const App = () => {
              />
            </div>
          </Box>
-
+          {/* <Box sx={{ mt: "-1rem", ml: "0.25rem", fontSize: "0.6rem", color: "darkgrey", position: "sticky" }}>Context used: {Math.round(100 * contextStatus.context_used / contextStatus.max_context)}% {contextStatus.context_used}/{contextStatus.max_context}</Box> */}
          <Box className="Query" sx={{ display: "flex", flexDirection: "row", p: 1 }}>
            <TextField
              variant="outlined"
--- a/src/server.py
+++ b/src/server.py
@ -190,110 +190,8 @@ def setup_logging(level):
    logging.info(f"Logging is set to {level} level.")
 # %%
 def is_words_downloaded():
    try:
        from nltk.corpus import words
        words.words()  # Attempt to access the dataset
        return True
    except LookupError:
        return False
 if not is_words_downloaded():
    logging.info("Downloading nltk words corpus for random nick generation")
    nltk.download('words')
 # %%
 def split_paragraph_with_hyphenation(text, line_length=80, language='en_US'):
    """
    Split a paragraph into multiple lines with proper hyphenation.
    Args:
        text (str): The text to split.
        line_length (int): The maximum length of each line.
        language (str): The language code for hyphenation rules.
    Returns:
        [str]: The text split into multiple lines with proper hyphenation.
    """
    # Initialize the hyphenator for the specified language
    h = hyphenator.Hyphenator(language)
    # First attempt: try to wrap without hyphenation
    lines = textwrap.wrap(text, width=line_length)
    # If any lines are too long, we need to apply hyphenation
    result_lines = []
    for line in lines:
        # If the line is already short enough, keep it as is
        if len(line) <= line_length:
            result_lines.append(line)
            continue
        # Otherwise, we need to hyphenate
        words = line.split()
        current_line = ""
        for word in words:
            # If adding the word doesn't exceed the limit, add it
            if len(current_line) + len(word) + (1 if current_line else 0) <= line_length:
                if current_line:
                    current_line += " "
                current_line += word
            # If the word itself is too long, hyphenate it
            elif len(word) > line_length - len(current_line) - (1 if current_line else 0):
                # If we already have content on the line, add it to results
                if current_line:
                    result_lines.append(current_line)
                    current_line = ""
                # Get hyphenation points for the word
                hyphenated = h.syllables(word)
                if not hyphenated:
                    # If no hyphenation points found, just add the word to a new line
                    result_lines.append(word)
                    continue
                # Try to find a suitable hyphenation point
                partial_word = ""
                for syllable in hyphenated:
                    if len(partial_word) + len(syllable) + 1 > line_length:
                        # Add hyphen to the partial word and start a new line
                        if partial_word:
                            result_lines.append(partial_word + "-")
                            partial_word = syllable
                        else:
                            # If a single syllable is too long, just add it
                            result_lines.append(syllable)
                    else:
                        partial_word += syllable
                # Don't forget the remaining part
                if partial_word:
                    current_line = partial_word
            else:
                # Start a new line with this word
                result_lines.append(current_line)
                current_line = word
        # Don't forget any remaining content
        if current_line:
            result_lines.append(current_line)
    return result_lines
 # %%
 def total_json_length(dict_array):
    total = 0
    for item in dict_array:
        # Convert dictionary to minimized JSON string
        json_string = json.dumps(item, separators=(',', ':'))
        total += len(json_string)
    return total
 async def AnalyzeSite(url, question):
    """
@ -607,6 +505,17 @@ class WebServer:
            except:
                return JSONResponse({ "status": "error" }), 405
        @self.app.get('/api/context-status/{context_id}')
        async def get_context_status(context_id):
            if not is_valid_uuid(context_id):
                logging.warning(f"Invalid context_id: {context_id}")
                return JSONResponse({"error": "Invalid context_id"}, status_code=400)
            context_used = 0
            context = self.upsert_context(context_id)
            # TODO: Switch this to use the tokenizer values instead of 75% of character length
            for message in context["llm_history"]:
                context_used += round((len(message["role"]) + len(message["content"])) * 3 / 4)
            return JSONResponse({"context_used": context_used, "max_context": defines.max_context})
        @self.app.get('/api/health')
        async def health_check():
--- a/src/utils/defines.py
+++ b/src/utils/defines.py
@ -1,7 +1,7 @@
 ollama_api_url="http://ollama:11434"  # Default Ollama local endpoint
 #model = "deepseek-r1:7b"
-model = "llama3.2"
+#model = "llama3.2"
-#model="qwen2.5:7b"
+model="qwen2.5:7b"
 encoding_model="mxbai-embed-large"
 persist_directory="./chromadb"
-max_context = 2048*8
+max_context = 2048*8*2