Up context to 32k
Added context-status API
This commit is contained in:
parent
5f1f641dba
commit
8b046deb86
@ -50,16 +50,16 @@ import '@fontsource/roboto/700.css';
|
|||||||
const welcomeMarkdown = `
|
const welcomeMarkdown = `
|
||||||
# Welcome to Ketr-Chat.
|
# Welcome to Ketr-Chat.
|
||||||
|
|
||||||
This system has real-time access to weather, stocks, the current time, and can answer questions about the contents of a website.
|
Hi, my author is James Ketrenos. He built this LLM agent in order to provide answers to any questions you may have about his work history.
|
||||||
|
|
||||||
**NOTE**: As of right now, the LLM model being used is refusing to use enabled tools when RAG is enabled to provide context.
|
In addition to being a RAG enabled expert system, the LLM is configured with real-time access to weather, stocks, the current time, and can answer questions about the contents of a website.
|
||||||
So, in order to use the real-time information, you need to click the Settings  icon, open RAG, and disable JPK: .
|
|
||||||
|
|
||||||
Ask things like:
|
Ask things like:
|
||||||
* What are the headlines from CNBC?
|
* What are the headlines from CNBC?
|
||||||
* What is the weather in Portland, OR?
|
* What is the weather in Portland, OR?
|
||||||
* What is James Ketrenos' work history?
|
* What is James Ketrenos' work history?
|
||||||
* What are the stock value of the most traded companies?
|
* What are the stock value of the most traded companies?
|
||||||
|
* What programming languages has James used?
|
||||||
`;
|
`;
|
||||||
|
|
||||||
const welcomeMessage = {
|
const welcomeMessage = {
|
||||||
@ -400,6 +400,11 @@ const Message = ({ message }: MessageInterface) => {
|
|||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
type ContextStatus = {
|
||||||
|
context_used: number,
|
||||||
|
max_context: number
|
||||||
|
};
|
||||||
|
|
||||||
const App = () => {
|
const App = () => {
|
||||||
const [query, setQuery] = useState('');
|
const [query, setQuery] = useState('');
|
||||||
const [conversation, setConversation] = useState<MessageList>([]);
|
const [conversation, setConversation] = useState<MessageList>([]);
|
||||||
@ -417,6 +422,7 @@ const App = () => {
|
|||||||
const [systemPrompt, setSystemPrompt] = useState<string>("");
|
const [systemPrompt, setSystemPrompt] = useState<string>("");
|
||||||
const [serverSystemPrompt, setServerSystemPrompt] = useState<string>("");
|
const [serverSystemPrompt, setServerSystemPrompt] = useState<string>("");
|
||||||
const [systemInfo, setSystemInfo] = useState<SystemInfo | undefined>(undefined);
|
const [systemInfo, setSystemInfo] = useState<SystemInfo | undefined>(undefined);
|
||||||
|
const [contextStatus, setContextStatus] = useState<ContextStatus>({ context_used: 0, max_context: 0 });
|
||||||
|
|
||||||
// Scroll to bottom of conversation when conversation updates
|
// Scroll to bottom of conversation when conversation updates
|
||||||
useEffect(() => {
|
useEffect(() => {
|
||||||
@ -454,6 +460,24 @@ const App = () => {
|
|||||||
});
|
});
|
||||||
}, [systemInfo, setSystemInfo, loc, setSnack, sessionId])
|
}, [systemInfo, setSystemInfo, loc, setSnack, sessionId])
|
||||||
|
|
||||||
|
const updateContextStatus = useCallback(() => {
|
||||||
|
fetch(getConnectionBase(loc) + `/api/context-status/${sessionId}`, {
|
||||||
|
method: 'GET',
|
||||||
|
headers: {
|
||||||
|
'Content-Type': 'application/json',
|
||||||
|
},
|
||||||
|
})
|
||||||
|
.then(response => response.json())
|
||||||
|
.then(data => {
|
||||||
|
console.log(`Session id: ${sessionId} -- history returned from server with ${data.length} entries`)
|
||||||
|
setContextStatus(data);
|
||||||
|
})
|
||||||
|
.catch(error => {
|
||||||
|
console.error('Error getting context status:', error);
|
||||||
|
setSnack("Unable to obtain context status.", "error");
|
||||||
|
});
|
||||||
|
}, [setContextStatus, loc, setSnack, sessionId]);
|
||||||
|
|
||||||
// Set the initial chat history to "loading" or the welcome message if loaded.
|
// Set the initial chat history to "loading" or the welcome message if loaded.
|
||||||
useEffect(() => {
|
useEffect(() => {
|
||||||
if (sessionId === undefined) {
|
if (sessionId === undefined) {
|
||||||
@ -477,8 +501,9 @@ const App = () => {
|
|||||||
console.error('Error generating session ID:', error);
|
console.error('Error generating session ID:', error);
|
||||||
setSnack("Unable to obtain chat history.", "error");
|
setSnack("Unable to obtain chat history.", "error");
|
||||||
});
|
});
|
||||||
|
updateContextStatus();
|
||||||
}
|
}
|
||||||
}, [sessionId, setConversation, loc, setSnack]);
|
}, [sessionId, setConversation, updateContextStatus, loc, setSnack]);
|
||||||
|
|
||||||
// Extract the sessionId from the URL if present, otherwise
|
// Extract the sessionId from the URL if present, otherwise
|
||||||
// request a sessionId from the server.
|
// request a sessionId from the server.
|
||||||
@ -835,6 +860,7 @@ const App = () => {
|
|||||||
...prev.filter(msg => msg.id !== processingId),
|
...prev.filter(msg => msg.id !== processingId),
|
||||||
update.message
|
update.message
|
||||||
]);
|
]);
|
||||||
|
updateContextStatus();
|
||||||
} else if (update.status === 'error') {
|
} else if (update.status === 'error') {
|
||||||
// Show error
|
// Show error
|
||||||
setConversation(prev => [
|
setConversation(prev => [
|
||||||
@ -969,7 +995,7 @@ const App = () => {
|
|||||||
/>
|
/>
|
||||||
</div>
|
</div>
|
||||||
</Box>
|
</Box>
|
||||||
|
{/* <Box sx={{ mt: "-1rem", ml: "0.25rem", fontSize: "0.6rem", color: "darkgrey", position: "sticky" }}>Context used: {Math.round(100 * contextStatus.context_used / contextStatus.max_context)}% {contextStatus.context_used}/{contextStatus.max_context}</Box> */}
|
||||||
<Box className="Query" sx={{ display: "flex", flexDirection: "row", p: 1 }}>
|
<Box className="Query" sx={{ display: "flex", flexDirection: "row", p: 1 }}>
|
||||||
<TextField
|
<TextField
|
||||||
variant="outlined"
|
variant="outlined"
|
||||||
|
113
src/server.py
113
src/server.py
@ -190,110 +190,8 @@ def setup_logging(level):
|
|||||||
|
|
||||||
logging.info(f"Logging is set to {level} level.")
|
logging.info(f"Logging is set to {level} level.")
|
||||||
|
|
||||||
# %%
|
|
||||||
def is_words_downloaded():
|
|
||||||
try:
|
|
||||||
from nltk.corpus import words
|
|
||||||
words.words() # Attempt to access the dataset
|
|
||||||
return True
|
|
||||||
except LookupError:
|
|
||||||
return False
|
|
||||||
|
|
||||||
if not is_words_downloaded():
|
|
||||||
logging.info("Downloading nltk words corpus for random nick generation")
|
|
||||||
nltk.download('words')
|
|
||||||
|
|
||||||
# %%
|
# %%
|
||||||
def split_paragraph_with_hyphenation(text, line_length=80, language='en_US'):
|
|
||||||
"""
|
|
||||||
Split a paragraph into multiple lines with proper hyphenation.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
text (str): The text to split.
|
|
||||||
line_length (int): The maximum length of each line.
|
|
||||||
language (str): The language code for hyphenation rules.
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
[str]: The text split into multiple lines with proper hyphenation.
|
|
||||||
"""
|
|
||||||
# Initialize the hyphenator for the specified language
|
|
||||||
h = hyphenator.Hyphenator(language)
|
|
||||||
|
|
||||||
# First attempt: try to wrap without hyphenation
|
|
||||||
lines = textwrap.wrap(text, width=line_length)
|
|
||||||
|
|
||||||
# If any lines are too long, we need to apply hyphenation
|
|
||||||
result_lines = []
|
|
||||||
|
|
||||||
for line in lines:
|
|
||||||
# If the line is already short enough, keep it as is
|
|
||||||
if len(line) <= line_length:
|
|
||||||
result_lines.append(line)
|
|
||||||
continue
|
|
||||||
|
|
||||||
# Otherwise, we need to hyphenate
|
|
||||||
words = line.split()
|
|
||||||
current_line = ""
|
|
||||||
|
|
||||||
for word in words:
|
|
||||||
# If adding the word doesn't exceed the limit, add it
|
|
||||||
if len(current_line) + len(word) + (1 if current_line else 0) <= line_length:
|
|
||||||
if current_line:
|
|
||||||
current_line += " "
|
|
||||||
current_line += word
|
|
||||||
# If the word itself is too long, hyphenate it
|
|
||||||
elif len(word) > line_length - len(current_line) - (1 if current_line else 0):
|
|
||||||
# If we already have content on the line, add it to results
|
|
||||||
if current_line:
|
|
||||||
result_lines.append(current_line)
|
|
||||||
current_line = ""
|
|
||||||
|
|
||||||
# Get hyphenation points for the word
|
|
||||||
hyphenated = h.syllables(word)
|
|
||||||
|
|
||||||
if not hyphenated:
|
|
||||||
# If no hyphenation points found, just add the word to a new line
|
|
||||||
result_lines.append(word)
|
|
||||||
continue
|
|
||||||
|
|
||||||
# Try to find a suitable hyphenation point
|
|
||||||
partial_word = ""
|
|
||||||
for syllable in hyphenated:
|
|
||||||
if len(partial_word) + len(syllable) + 1 > line_length:
|
|
||||||
# Add hyphen to the partial word and start a new line
|
|
||||||
if partial_word:
|
|
||||||
result_lines.append(partial_word + "-")
|
|
||||||
partial_word = syllable
|
|
||||||
else:
|
|
||||||
# If a single syllable is too long, just add it
|
|
||||||
result_lines.append(syllable)
|
|
||||||
else:
|
|
||||||
partial_word += syllable
|
|
||||||
|
|
||||||
# Don't forget the remaining part
|
|
||||||
if partial_word:
|
|
||||||
current_line = partial_word
|
|
||||||
|
|
||||||
else:
|
|
||||||
# Start a new line with this word
|
|
||||||
result_lines.append(current_line)
|
|
||||||
current_line = word
|
|
||||||
|
|
||||||
# Don't forget any remaining content
|
|
||||||
if current_line:
|
|
||||||
result_lines.append(current_line)
|
|
||||||
|
|
||||||
return result_lines
|
|
||||||
|
|
||||||
|
|
||||||
# %%
|
|
||||||
def total_json_length(dict_array):
|
|
||||||
total = 0
|
|
||||||
for item in dict_array:
|
|
||||||
# Convert dictionary to minimized JSON string
|
|
||||||
json_string = json.dumps(item, separators=(',', ':'))
|
|
||||||
total += len(json_string)
|
|
||||||
return total
|
|
||||||
|
|
||||||
async def AnalyzeSite(url, question):
|
async def AnalyzeSite(url, question):
|
||||||
"""
|
"""
|
||||||
@ -607,6 +505,17 @@ class WebServer:
|
|||||||
except:
|
except:
|
||||||
return JSONResponse({ "status": "error" }), 405
|
return JSONResponse({ "status": "error" }), 405
|
||||||
|
|
||||||
|
@self.app.get('/api/context-status/{context_id}')
|
||||||
|
async def get_context_status(context_id):
|
||||||
|
if not is_valid_uuid(context_id):
|
||||||
|
logging.warning(f"Invalid context_id: {context_id}")
|
||||||
|
return JSONResponse({"error": "Invalid context_id"}, status_code=400)
|
||||||
|
context_used = 0
|
||||||
|
context = self.upsert_context(context_id)
|
||||||
|
# TODO: Switch this to use the tokenizer values instead of 75% of character length
|
||||||
|
for message in context["llm_history"]:
|
||||||
|
context_used += round((len(message["role"]) + len(message["content"])) * 3 / 4)
|
||||||
|
return JSONResponse({"context_used": context_used, "max_context": defines.max_context})
|
||||||
|
|
||||||
@self.app.get('/api/health')
|
@self.app.get('/api/health')
|
||||||
async def health_check():
|
async def health_check():
|
||||||
|
@ -1,7 +1,7 @@
|
|||||||
ollama_api_url="http://ollama:11434" # Default Ollama local endpoint
|
ollama_api_url="http://ollama:11434" # Default Ollama local endpoint
|
||||||
#model = "deepseek-r1:7b"
|
#model = "deepseek-r1:7b"
|
||||||
model = "llama3.2"
|
#model = "llama3.2"
|
||||||
#model="qwen2.5:7b"
|
model="qwen2.5:7b"
|
||||||
encoding_model="mxbai-embed-large"
|
encoding_model="mxbai-embed-large"
|
||||||
persist_directory="./chromadb"
|
persist_directory="./chromadb"
|
||||||
max_context = 2048*8
|
max_context = 2048*8*2
|
Loading…
x
Reference in New Issue
Block a user