Up context to 32k
Added context-status API
This commit is contained in:
parent
5f1f641dba
commit
8b046deb86
@ -50,16 +50,16 @@ import '@fontsource/roboto/700.css';
|
||||
const welcomeMarkdown = `
|
||||
# Welcome to Ketr-Chat.
|
||||
|
||||
This system has real-time access to weather, stocks, the current time, and can answer questions about the contents of a website.
|
||||
Hi, my author is James Ketrenos. He built this LLM agent in order to provide answers to any questions you may have about his work history.
|
||||
|
||||
**NOTE**: As of right now, the LLM model being used is refusing to use enabled tools when RAG is enabled to provide context.
|
||||
So, in order to use the real-time information, you need to click the Settings  icon, open RAG, and disable JPK: .
|
||||
In addition to being a RAG enabled expert system, the LLM is configured with real-time access to weather, stocks, the current time, and can answer questions about the contents of a website.
|
||||
|
||||
Ask things like:
|
||||
* What are the headlines from CNBC?
|
||||
* What is the weather in Portland, OR?
|
||||
* What is James Ketrenos' work history?
|
||||
* What are the stock value of the most traded companies?
|
||||
* What programming languages has James used?
|
||||
`;
|
||||
|
||||
const welcomeMessage = {
|
||||
@ -400,6 +400,11 @@ const Message = ({ message }: MessageInterface) => {
|
||||
);
|
||||
}
|
||||
|
||||
type ContextStatus = {
|
||||
context_used: number,
|
||||
max_context: number
|
||||
};
|
||||
|
||||
const App = () => {
|
||||
const [query, setQuery] = useState('');
|
||||
const [conversation, setConversation] = useState<MessageList>([]);
|
||||
@ -417,6 +422,7 @@ const App = () => {
|
||||
const [systemPrompt, setSystemPrompt] = useState<string>("");
|
||||
const [serverSystemPrompt, setServerSystemPrompt] = useState<string>("");
|
||||
const [systemInfo, setSystemInfo] = useState<SystemInfo | undefined>(undefined);
|
||||
const [contextStatus, setContextStatus] = useState<ContextStatus>({ context_used: 0, max_context: 0 });
|
||||
|
||||
// Scroll to bottom of conversation when conversation updates
|
||||
useEffect(() => {
|
||||
@ -454,6 +460,24 @@ const App = () => {
|
||||
});
|
||||
}, [systemInfo, setSystemInfo, loc, setSnack, sessionId])
|
||||
|
||||
const updateContextStatus = useCallback(() => {
|
||||
fetch(getConnectionBase(loc) + `/api/context-status/${sessionId}`, {
|
||||
method: 'GET',
|
||||
headers: {
|
||||
'Content-Type': 'application/json',
|
||||
},
|
||||
})
|
||||
.then(response => response.json())
|
||||
.then(data => {
|
||||
console.log(`Session id: ${sessionId} -- history returned from server with ${data.length} entries`)
|
||||
setContextStatus(data);
|
||||
})
|
||||
.catch(error => {
|
||||
console.error('Error getting context status:', error);
|
||||
setSnack("Unable to obtain context status.", "error");
|
||||
});
|
||||
}, [setContextStatus, loc, setSnack, sessionId]);
|
||||
|
||||
// Set the initial chat history to "loading" or the welcome message if loaded.
|
||||
useEffect(() => {
|
||||
if (sessionId === undefined) {
|
||||
@ -477,8 +501,9 @@ const App = () => {
|
||||
console.error('Error generating session ID:', error);
|
||||
setSnack("Unable to obtain chat history.", "error");
|
||||
});
|
||||
updateContextStatus();
|
||||
}
|
||||
}, [sessionId, setConversation, loc, setSnack]);
|
||||
}, [sessionId, setConversation, updateContextStatus, loc, setSnack]);
|
||||
|
||||
// Extract the sessionId from the URL if present, otherwise
|
||||
// request a sessionId from the server.
|
||||
@ -835,6 +860,7 @@ const App = () => {
|
||||
...prev.filter(msg => msg.id !== processingId),
|
||||
update.message
|
||||
]);
|
||||
updateContextStatus();
|
||||
} else if (update.status === 'error') {
|
||||
// Show error
|
||||
setConversation(prev => [
|
||||
@ -969,7 +995,7 @@ const App = () => {
|
||||
/>
|
||||
</div>
|
||||
</Box>
|
||||
|
||||
{/* <Box sx={{ mt: "-1rem", ml: "0.25rem", fontSize: "0.6rem", color: "darkgrey", position: "sticky" }}>Context used: {Math.round(100 * contextStatus.context_used / contextStatus.max_context)}% {contextStatus.context_used}/{contextStatus.max_context}</Box> */}
|
||||
<Box className="Query" sx={{ display: "flex", flexDirection: "row", p: 1 }}>
|
||||
<TextField
|
||||
variant="outlined"
|
||||
|
113
src/server.py
113
src/server.py
@ -190,110 +190,8 @@ def setup_logging(level):
|
||||
|
||||
logging.info(f"Logging is set to {level} level.")
|
||||
|
||||
# %%
|
||||
def is_words_downloaded():
|
||||
try:
|
||||
from nltk.corpus import words
|
||||
words.words() # Attempt to access the dataset
|
||||
return True
|
||||
except LookupError:
|
||||
return False
|
||||
|
||||
if not is_words_downloaded():
|
||||
logging.info("Downloading nltk words corpus for random nick generation")
|
||||
nltk.download('words')
|
||||
|
||||
# %%
|
||||
def split_paragraph_with_hyphenation(text, line_length=80, language='en_US'):
|
||||
"""
|
||||
Split a paragraph into multiple lines with proper hyphenation.
|
||||
|
||||
Args:
|
||||
text (str): The text to split.
|
||||
line_length (int): The maximum length of each line.
|
||||
language (str): The language code for hyphenation rules.
|
||||
|
||||
Returns:
|
||||
[str]: The text split into multiple lines with proper hyphenation.
|
||||
"""
|
||||
# Initialize the hyphenator for the specified language
|
||||
h = hyphenator.Hyphenator(language)
|
||||
|
||||
# First attempt: try to wrap without hyphenation
|
||||
lines = textwrap.wrap(text, width=line_length)
|
||||
|
||||
# If any lines are too long, we need to apply hyphenation
|
||||
result_lines = []
|
||||
|
||||
for line in lines:
|
||||
# If the line is already short enough, keep it as is
|
||||
if len(line) <= line_length:
|
||||
result_lines.append(line)
|
||||
continue
|
||||
|
||||
# Otherwise, we need to hyphenate
|
||||
words = line.split()
|
||||
current_line = ""
|
||||
|
||||
for word in words:
|
||||
# If adding the word doesn't exceed the limit, add it
|
||||
if len(current_line) + len(word) + (1 if current_line else 0) <= line_length:
|
||||
if current_line:
|
||||
current_line += " "
|
||||
current_line += word
|
||||
# If the word itself is too long, hyphenate it
|
||||
elif len(word) > line_length - len(current_line) - (1 if current_line else 0):
|
||||
# If we already have content on the line, add it to results
|
||||
if current_line:
|
||||
result_lines.append(current_line)
|
||||
current_line = ""
|
||||
|
||||
# Get hyphenation points for the word
|
||||
hyphenated = h.syllables(word)
|
||||
|
||||
if not hyphenated:
|
||||
# If no hyphenation points found, just add the word to a new line
|
||||
result_lines.append(word)
|
||||
continue
|
||||
|
||||
# Try to find a suitable hyphenation point
|
||||
partial_word = ""
|
||||
for syllable in hyphenated:
|
||||
if len(partial_word) + len(syllable) + 1 > line_length:
|
||||
# Add hyphen to the partial word and start a new line
|
||||
if partial_word:
|
||||
result_lines.append(partial_word + "-")
|
||||
partial_word = syllable
|
||||
else:
|
||||
# If a single syllable is too long, just add it
|
||||
result_lines.append(syllable)
|
||||
else:
|
||||
partial_word += syllable
|
||||
|
||||
# Don't forget the remaining part
|
||||
if partial_word:
|
||||
current_line = partial_word
|
||||
|
||||
else:
|
||||
# Start a new line with this word
|
||||
result_lines.append(current_line)
|
||||
current_line = word
|
||||
|
||||
# Don't forget any remaining content
|
||||
if current_line:
|
||||
result_lines.append(current_line)
|
||||
|
||||
return result_lines
|
||||
|
||||
|
||||
# %%
|
||||
def total_json_length(dict_array):
|
||||
total = 0
|
||||
for item in dict_array:
|
||||
# Convert dictionary to minimized JSON string
|
||||
json_string = json.dumps(item, separators=(',', ':'))
|
||||
total += len(json_string)
|
||||
return total
|
||||
|
||||
async def AnalyzeSite(url, question):
|
||||
"""
|
||||
@ -607,6 +505,17 @@ class WebServer:
|
||||
except:
|
||||
return JSONResponse({ "status": "error" }), 405
|
||||
|
||||
@self.app.get('/api/context-status/{context_id}')
|
||||
async def get_context_status(context_id):
|
||||
if not is_valid_uuid(context_id):
|
||||
logging.warning(f"Invalid context_id: {context_id}")
|
||||
return JSONResponse({"error": "Invalid context_id"}, status_code=400)
|
||||
context_used = 0
|
||||
context = self.upsert_context(context_id)
|
||||
# TODO: Switch this to use the tokenizer values instead of 75% of character length
|
||||
for message in context["llm_history"]:
|
||||
context_used += round((len(message["role"]) + len(message["content"])) * 3 / 4)
|
||||
return JSONResponse({"context_used": context_used, "max_context": defines.max_context})
|
||||
|
||||
@self.app.get('/api/health')
|
||||
async def health_check():
|
||||
|
@ -1,7 +1,7 @@
|
||||
ollama_api_url="http://ollama:11434" # Default Ollama local endpoint
|
||||
#model = "deepseek-r1:7b"
|
||||
model = "llama3.2"
|
||||
#model="qwen2.5:7b"
|
||||
#model = "llama3.2"
|
||||
model="qwen2.5:7b"
|
||||
encoding_model="mxbai-embed-large"
|
||||
persist_directory="./chromadb"
|
||||
max_context = 2048*8
|
||||
max_context = 2048*8*2
|
Loading…
x
Reference in New Issue
Block a user