Up context to 32k

Added context-status API
This commit is contained in:
James Ketr 2025-04-01 23:30:01 -07:00
parent 5f1f641dba
commit 8b046deb86
3 changed files with 45 additions and 110 deletions

View File

@ -50,16 +50,16 @@ import '@fontsource/roboto/700.css';
const welcomeMarkdown = ` const welcomeMarkdown = `
# Welcome to Ketr-Chat. # Welcome to Ketr-Chat.
This system has real-time access to weather, stocks, the current time, and can answer questions about the contents of a website. Hi, my author is James Ketrenos. He built this LLM agent in order to provide answers to any questions you may have about his work history.
**NOTE**: As of right now, the LLM model being used is refusing to use enabled tools when RAG is enabled to provide context. In addition to being a RAG enabled expert system, the LLM is configured with real-time access to weather, stocks, the current time, and can answer questions about the contents of a website.
So, in order to use the real-time information, you need to click the Settings ![settings](settings.png) icon, open RAG, and disable JPK: ![disable JPK](disable-jpk.png).
Ask things like: Ask things like:
* What are the headlines from CNBC? * What are the headlines from CNBC?
* What is the weather in Portland, OR? * What is the weather in Portland, OR?
* What is James Ketrenos' work history? * What is James Ketrenos' work history?
* What are the stock value of the most traded companies? * What are the stock value of the most traded companies?
* What programming languages has James used?
`; `;
const welcomeMessage = { const welcomeMessage = {
@ -400,6 +400,11 @@ const Message = ({ message }: MessageInterface) => {
); );
} }
type ContextStatus = {
context_used: number,
max_context: number
};
const App = () => { const App = () => {
const [query, setQuery] = useState(''); const [query, setQuery] = useState('');
const [conversation, setConversation] = useState<MessageList>([]); const [conversation, setConversation] = useState<MessageList>([]);
@ -417,6 +422,7 @@ const App = () => {
const [systemPrompt, setSystemPrompt] = useState<string>(""); const [systemPrompt, setSystemPrompt] = useState<string>("");
const [serverSystemPrompt, setServerSystemPrompt] = useState<string>(""); const [serverSystemPrompt, setServerSystemPrompt] = useState<string>("");
const [systemInfo, setSystemInfo] = useState<SystemInfo | undefined>(undefined); const [systemInfo, setSystemInfo] = useState<SystemInfo | undefined>(undefined);
const [contextStatus, setContextStatus] = useState<ContextStatus>({ context_used: 0, max_context: 0 });
// Scroll to bottom of conversation when conversation updates // Scroll to bottom of conversation when conversation updates
useEffect(() => { useEffect(() => {
@ -454,6 +460,24 @@ const App = () => {
}); });
}, [systemInfo, setSystemInfo, loc, setSnack, sessionId]) }, [systemInfo, setSystemInfo, loc, setSnack, sessionId])
const updateContextStatus = useCallback(() => {
fetch(getConnectionBase(loc) + `/api/context-status/${sessionId}`, {
method: 'GET',
headers: {
'Content-Type': 'application/json',
},
})
.then(response => response.json())
.then(data => {
console.log(`Session id: ${sessionId} -- history returned from server with ${data.length} entries`)
setContextStatus(data);
})
.catch(error => {
console.error('Error getting context status:', error);
setSnack("Unable to obtain context status.", "error");
});
}, [setContextStatus, loc, setSnack, sessionId]);
// Set the initial chat history to "loading" or the welcome message if loaded. // Set the initial chat history to "loading" or the welcome message if loaded.
useEffect(() => { useEffect(() => {
if (sessionId === undefined) { if (sessionId === undefined) {
@ -477,8 +501,9 @@ const App = () => {
console.error('Error generating session ID:', error); console.error('Error generating session ID:', error);
setSnack("Unable to obtain chat history.", "error"); setSnack("Unable to obtain chat history.", "error");
}); });
updateContextStatus();
} }
}, [sessionId, setConversation, loc, setSnack]); }, [sessionId, setConversation, updateContextStatus, loc, setSnack]);
// Extract the sessionId from the URL if present, otherwise // Extract the sessionId from the URL if present, otherwise
// request a sessionId from the server. // request a sessionId from the server.
@ -835,6 +860,7 @@ const App = () => {
...prev.filter(msg => msg.id !== processingId), ...prev.filter(msg => msg.id !== processingId),
update.message update.message
]); ]);
updateContextStatus();
} else if (update.status === 'error') { } else if (update.status === 'error') {
// Show error // Show error
setConversation(prev => [ setConversation(prev => [
@ -969,7 +995,7 @@ const App = () => {
/> />
</div> </div>
</Box> </Box>
{/* <Box sx={{ mt: "-1rem", ml: "0.25rem", fontSize: "0.6rem", color: "darkgrey", position: "sticky" }}>Context used: {Math.round(100 * contextStatus.context_used / contextStatus.max_context)}% {contextStatus.context_used}/{contextStatus.max_context}</Box> */}
<Box className="Query" sx={{ display: "flex", flexDirection: "row", p: 1 }}> <Box className="Query" sx={{ display: "flex", flexDirection: "row", p: 1 }}>
<TextField <TextField
variant="outlined" variant="outlined"

View File

@ -190,110 +190,8 @@ def setup_logging(level):
logging.info(f"Logging is set to {level} level.") logging.info(f"Logging is set to {level} level.")
# %%
def is_words_downloaded():
try:
from nltk.corpus import words
words.words() # Attempt to access the dataset
return True
except LookupError:
return False
if not is_words_downloaded():
logging.info("Downloading nltk words corpus for random nick generation")
nltk.download('words')
# %% # %%
def split_paragraph_with_hyphenation(text, line_length=80, language='en_US'):
"""
Split a paragraph into multiple lines with proper hyphenation.
Args:
text (str): The text to split.
line_length (int): The maximum length of each line.
language (str): The language code for hyphenation rules.
Returns:
[str]: The text split into multiple lines with proper hyphenation.
"""
# Initialize the hyphenator for the specified language
h = hyphenator.Hyphenator(language)
# First attempt: try to wrap without hyphenation
lines = textwrap.wrap(text, width=line_length)
# If any lines are too long, we need to apply hyphenation
result_lines = []
for line in lines:
# If the line is already short enough, keep it as is
if len(line) <= line_length:
result_lines.append(line)
continue
# Otherwise, we need to hyphenate
words = line.split()
current_line = ""
for word in words:
# If adding the word doesn't exceed the limit, add it
if len(current_line) + len(word) + (1 if current_line else 0) <= line_length:
if current_line:
current_line += " "
current_line += word
# If the word itself is too long, hyphenate it
elif len(word) > line_length - len(current_line) - (1 if current_line else 0):
# If we already have content on the line, add it to results
if current_line:
result_lines.append(current_line)
current_line = ""
# Get hyphenation points for the word
hyphenated = h.syllables(word)
if not hyphenated:
# If no hyphenation points found, just add the word to a new line
result_lines.append(word)
continue
# Try to find a suitable hyphenation point
partial_word = ""
for syllable in hyphenated:
if len(partial_word) + len(syllable) + 1 > line_length:
# Add hyphen to the partial word and start a new line
if partial_word:
result_lines.append(partial_word + "-")
partial_word = syllable
else:
# If a single syllable is too long, just add it
result_lines.append(syllable)
else:
partial_word += syllable
# Don't forget the remaining part
if partial_word:
current_line = partial_word
else:
# Start a new line with this word
result_lines.append(current_line)
current_line = word
# Don't forget any remaining content
if current_line:
result_lines.append(current_line)
return result_lines
# %%
def total_json_length(dict_array):
total = 0
for item in dict_array:
# Convert dictionary to minimized JSON string
json_string = json.dumps(item, separators=(',', ':'))
total += len(json_string)
return total
async def AnalyzeSite(url, question): async def AnalyzeSite(url, question):
""" """
@ -607,6 +505,17 @@ class WebServer:
except: except:
return JSONResponse({ "status": "error" }), 405 return JSONResponse({ "status": "error" }), 405
@self.app.get('/api/context-status/{context_id}')
async def get_context_status(context_id):
if not is_valid_uuid(context_id):
logging.warning(f"Invalid context_id: {context_id}")
return JSONResponse({"error": "Invalid context_id"}, status_code=400)
context_used = 0
context = self.upsert_context(context_id)
# TODO: Switch this to use the tokenizer values instead of 75% of character length
for message in context["llm_history"]:
context_used += round((len(message["role"]) + len(message["content"])) * 3 / 4)
return JSONResponse({"context_used": context_used, "max_context": defines.max_context})
@self.app.get('/api/health') @self.app.get('/api/health')
async def health_check(): async def health_check():

View File

@ -1,7 +1,7 @@
ollama_api_url="http://ollama:11434" # Default Ollama local endpoint ollama_api_url="http://ollama:11434" # Default Ollama local endpoint
#model = "deepseek-r1:7b" #model = "deepseek-r1:7b"
model = "llama3.2" #model = "llama3.2"
#model="qwen2.5:7b" model="qwen2.5:7b"
encoding_model="mxbai-embed-large" encoding_model="mxbai-embed-large"
persist_directory="./chromadb" persist_directory="./chromadb"
max_context = 2048*8 max_context = 2048*8*2