Starting to work again
This commit is contained in:
parent
dc55196311
commit
5806563777
@ -24,6 +24,23 @@ interface ConversationHandle {
|
||||
submitQuery: (query: string) => void;
|
||||
}
|
||||
|
||||
interface BackstoryMessage {
|
||||
prompt: string;
|
||||
preamble: string;
|
||||
content: string;
|
||||
response: string;
|
||||
metadata: {
|
||||
rag: { documents: [] };
|
||||
tools: string[];
|
||||
eval_count: number;
|
||||
eval_duration: number;
|
||||
prompt_eval_count: number;
|
||||
prompt_eval_duration: number;
|
||||
};
|
||||
actions: string[];
|
||||
timestamp: string;
|
||||
};
|
||||
|
||||
interface ConversationProps {
|
||||
className?: string, // Override default className
|
||||
type: ConversationMode, // Type of Conversation chat
|
||||
@ -168,7 +185,33 @@ const Conversation = forwardRef<ConversationHandle, ConversationProps>(({
|
||||
setNoInteractions(true);
|
||||
} else {
|
||||
console.log(`History returned for ${type} from server with ${messages.length} entries:`, messages)
|
||||
setConversation(messages);
|
||||
|
||||
const backstoryMessages: BackstoryMessage[] = messages;
|
||||
|
||||
// type MessageData = {
|
||||
// role: MessageRoles,
|
||||
// content: string,
|
||||
// disableCopy?: boolean,
|
||||
// user?: string,
|
||||
// title?: string,
|
||||
// origin?: string,
|
||||
// display?: string, /* Messages generated on the server for filler should not be shown */
|
||||
// id?: string,
|
||||
// isProcessing?: boolean,
|
||||
// metadata?: MessageMetaData
|
||||
// };
|
||||
setConversation(backstoryMessages.flatMap((message: BackstoryMessage) => [{
|
||||
role: 'user',
|
||||
content: message.prompt || "",
|
||||
}, {
|
||||
role: 'assistant',
|
||||
prompt: message.prompt || "",
|
||||
preamble: message.preamble || "",
|
||||
full_content: message.content || "",
|
||||
content: message.response || "",
|
||||
metadata: message.metadata,
|
||||
actions: message.actions,
|
||||
}] as MessageList));
|
||||
setNoInteractions(false);
|
||||
}
|
||||
setProcessingMessage(undefined);
|
||||
@ -372,10 +415,20 @@ const Conversation = forwardRef<ConversationHandle, ConversationProps>(({
|
||||
update.message = onResponse(update.message);
|
||||
}
|
||||
setProcessingMessage(undefined);
|
||||
const backstoryMessage: BackstoryMessage = update.message;
|
||||
setConversation([
|
||||
...conversationRef.current,
|
||||
update.message
|
||||
])
|
||||
...conversationRef.current, {
|
||||
role: 'user',
|
||||
content: backstoryMessage.prompt || "",
|
||||
}, {
|
||||
role: 'assistant',
|
||||
prompt: backstoryMessage.prompt || "",
|
||||
preamble: backstoryMessage.preamble || "",
|
||||
full_content: backstoryMessage.content || "",
|
||||
content: backstoryMessage.response || "",
|
||||
metadata: backstoryMessage.metadata,
|
||||
actions: backstoryMessage.actions,
|
||||
}] as MessageList);
|
||||
// Add a small delay to ensure React has time to update the UI
|
||||
await new Promise(resolve => setTimeout(resolve, 0));
|
||||
|
||||
@ -414,10 +467,20 @@ const Conversation = forwardRef<ConversationHandle, ConversationProps>(({
|
||||
update.message = onResponse(update.message);
|
||||
}
|
||||
setProcessingMessage(undefined);
|
||||
const backstoryMessage: BackstoryMessage = update.message;
|
||||
setConversation([
|
||||
...conversationRef.current,
|
||||
update.message
|
||||
]);
|
||||
...conversationRef.current, {
|
||||
role: 'user',
|
||||
content: backstoryMessage.prompt || "",
|
||||
}, {
|
||||
role: 'assistant',
|
||||
prompt: backstoryMessage.prompt || "",
|
||||
preamble: backstoryMessage.preamble || "",
|
||||
full_content: backstoryMessage.content || "",
|
||||
content: backstoryMessage.response || "",
|
||||
metadata: backstoryMessage.metadata,
|
||||
actions: backstoryMessage.actions,
|
||||
}] as MessageList);
|
||||
}
|
||||
} catch (e) {
|
||||
setSnack("Error processing query", "error")
|
||||
|
@ -224,8 +224,7 @@ const MessageMeta = (props: MessageMetaProps) => {
|
||||
<AccordionDetails>
|
||||
{Object.entries(props.messageProps.message)
|
||||
.filter(([key, value]) => key !== undefined && value !== undefined)
|
||||
.map(([key, value]) => (<>
|
||||
{(typeof (value) !== "string" || value?.trim() !== "") &&
|
||||
.map(([key, value]) => (typeof (value) !== "string" || value?.trim() !== "") &&
|
||||
<Accordion key={key}>
|
||||
<AccordionSummary sx={{ fontSize: "1rem", fontWeight: "bold" }} expandIcon={<ExpandMoreIcon />}>
|
||||
{key}
|
||||
@ -235,7 +234,7 @@ const MessageMeta = (props: MessageMetaProps) => {
|
||||
Object.entries(value)
|
||||
.filter(([key, value]) => key !== undefined && value !== undefined)
|
||||
.map(([key, value]) => (
|
||||
<Accordion key={`${key}-metadata`}>
|
||||
<Accordion key={`metadata.${key}`}>
|
||||
<AccordionSummary sx={{ fontSize: "1rem", fontWeight: "bold" }} expandIcon={<ExpandMoreIcon />}>
|
||||
{key}
|
||||
</AccordionSummary>
|
||||
@ -248,11 +247,8 @@ const MessageMeta = (props: MessageMetaProps) => {
|
||||
<pre>{typeof (value) !== "object" ? value : JSON.stringify(value)}</pre>
|
||||
}
|
||||
</AccordionDetails>
|
||||
</Accordion>
|
||||
}
|
||||
</>
|
||||
)
|
||||
)}
|
||||
</Accordion>
|
||||
)}
|
||||
</AccordionDetails>
|
||||
</Accordion>
|
||||
|
||||
|
57
src/kill-server.sh
Executable file
57
src/kill-server.sh
Executable file
@ -0,0 +1,57 @@
|
||||
#!/bin/bash
|
||||
|
||||
# Ensure input was provided
|
||||
if [[ -z "$1" ]]; then
|
||||
echo "Usage: $0 <path/to/python_script.py>"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Resolve user-supplied path to absolute path
|
||||
TARGET=$(readlink -f "$1")
|
||||
|
||||
if [[ ! -f "$TARGET" ]]; then
|
||||
echo "Target file '$TARGET' not found."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Loop through python processes and resolve each script path
|
||||
PID=""
|
||||
for pid in $(pgrep -f python); do
|
||||
if [[ -r "/proc/$pid/cmdline" ]]; then
|
||||
# Get the full command line, null-separated
|
||||
cmdline=$(tr '\0' ' ' < "/proc/$pid/cmdline")
|
||||
# Extract the script argument (naively assumes it's the first non-option)
|
||||
script_arg=$(echo "$cmdline" | awk '{for (i=2;i<=NF;i++) if ($i !~ /^-/) {print $i; exit}}')
|
||||
|
||||
# Try resolving the script path relative to the process's cwd
|
||||
script_path=$(readlink -f "/proc/$pid/cwd/$script_arg" 2>/dev/null)
|
||||
|
||||
if [[ "$script_path" == "$TARGET" ]]; then
|
||||
PID=$pid
|
||||
break
|
||||
fi
|
||||
fi
|
||||
done
|
||||
|
||||
if [[ -z "$PID" ]]; then
|
||||
echo "No Python process found running '$TARGET'."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
echo "Found process $PID running $TARGET"
|
||||
|
||||
# Get times
|
||||
file_time=$(stat -c %Y "$TARGET")
|
||||
proc_time=$(stat -c %Y "/proc/$PID")
|
||||
|
||||
# if (( file_time > proc_time )); then
|
||||
# echo "Script '$TARGET' is newer than process $PID. Killing $PID"
|
||||
kill -9 "$PID"
|
||||
if [[ $? -ne 0 ]]; then
|
||||
echo "Failed to kill process $PID."
|
||||
exit 1
|
||||
fi
|
||||
echo "Process $PID killed."
|
||||
# else
|
||||
# echo "Script '$TARGET' is older than or same age as process $PID."
|
||||
# fi
|
367
src/server.py
367
src/server.py
@ -44,7 +44,7 @@ from sklearn.preprocessing import MinMaxScaler
|
||||
|
||||
from utils import (
|
||||
rag as Rag,
|
||||
Context, Conversation, Message, Chat, Resume, JobDescription, FactCheck,
|
||||
Context, Conversation, Session, Message, Chat, Resume, JobDescription, FactCheck,
|
||||
defines
|
||||
)
|
||||
|
||||
@ -74,8 +74,8 @@ When answering queries, follow these steps:
|
||||
- When both <|context|> and tool outputs are relevant, synthesize information from both sources to provide the most complete answer
|
||||
- Always prioritize the most up-to-date and relevant information, whether it comes from <|context|> or tools
|
||||
- If <|context|> and tool outputs contain conflicting information, prefer the tool outputs as they likely represent more current data
|
||||
- If there is information in the <|context|>, <|job_description|>, or <|work_history|> sections to enhance the answer, incorporate it seamlessly and refer to it as 'the latest information' or 'recent data' instead of mentioning '<|context|>' (etc.) or quoting it directly.
|
||||
- Avoid phrases like 'According to the <|context|>' or similar references to the <|context|>, <|job_description|>, or <|work_history|> tags.
|
||||
- If there is information in the <|context|>, <|job_description|>, or <|context|> sections to enhance the answer, incorporate it seamlessly and refer to it as 'the latest information' or 'recent data' instead of mentioning '<|context|>' (etc.) or quoting it directly.
|
||||
- Avoid phrases like 'According to the <|context|>' or similar references to the <|context|>, <|job_description|>, or <|context|> tags.
|
||||
|
||||
Always use tools and <|context|> when possible. Be concise, and never make up information. If you do not know the answer, say so.
|
||||
"""
|
||||
@ -83,21 +83,21 @@ Always use tools and <|context|> when possible. Be concise, and never make up in
|
||||
system_generate_resume = f"""
|
||||
Launched on {DateTime()}.
|
||||
|
||||
You are a professional resume writer. Your task is to write a concise, polished, and tailored resume for a specific job based only on the individual's <|work_history|>.
|
||||
You are a professional resume writer. Your task is to write a concise, polished, and tailored resume for a specific job based only on the individual's <|context|>.
|
||||
|
||||
When answering queries, follow these steps:
|
||||
|
||||
- You must not invent or assume any inforation not explicitly present in the <|work_history|>.
|
||||
- You must not invent or assume any inforation not explicitly present in the <|context|>.
|
||||
- Analyze the <|job_description|> to identify skills required for the job.
|
||||
- Use the <|job_description|> provided to guide the focus, tone, and relevant skills or experience to highlight from the <|work_history|>.
|
||||
- Identify and emphasize the experiences, achievements, and responsibilities from the <|work_history|> that best align with the <|job_description|>.
|
||||
- Only provide information from <|work_history|> items if it is relevant to the <|job_description|>.
|
||||
- Do not use the <|job_description|> skills unless listed in <|work_history|>.
|
||||
- Do not include any information unless it is provided in <|work_history|>.
|
||||
- Use the <|work_history|> to create a polished, professional resume.
|
||||
- Use the <|job_description|> provided to guide the focus, tone, and relevant skills or experience to highlight from the <|context|>.
|
||||
- Identify and emphasize the experiences, achievements, and responsibilities from the <|context|> that best align with the <|job_description|>.
|
||||
- Only provide information from <|context|> items if it is relevant to the <|job_description|>.
|
||||
- Do not use the <|job_description|> skills unless listed in <|context|>.
|
||||
- Do not include any information unless it is provided in <|context|>.
|
||||
- Use the <|context|> to create a polished, professional resume.
|
||||
- Do not list any locations or mailing addresses in the resume.
|
||||
- If there is information in the <|context|>, <|job_description|>, <|work_history|>, or <|resume|> sections to enhance the answer, incorporate it seamlessly and refer to it using natural language instead of mentioning '<|job_description|>' (etc.) or quoting it directly.
|
||||
- Avoid phrases like 'According to the <|context|>' or similar references to the <|context|>, <|job_description|>, or <|work_history|> tags.
|
||||
- If there is information in the <|context|>, <|job_description|>, <|context|>, or <|resume|> sections to enhance the answer, incorporate it seamlessly and refer to it using natural language instead of mentioning '<|job_description|>' (etc.) or quoting it directly.
|
||||
- Avoid phrases like 'According to the <|context|>' or similar references to the <|context|>, <|job_description|>, or <|context|> tags.
|
||||
- Ensure the langauge is clear, concise, and aligned with industry standards for professional resumes.
|
||||
|
||||
Structure the resume professionally with the following sections where applicable:
|
||||
@ -105,7 +105,7 @@ Structure the resume professionally with the following sections where applicable
|
||||
* Name: Use full name
|
||||
* Professional Summary: A 2-4 sentence overview tailored to the job.
|
||||
* Skills: A bullet list of key skills derived from the work history and relevant to the job.
|
||||
* Professional Experience: A detailed list of roles, achievements, and responsibilities from <|work_history|> that relate to the <|job_description|>.
|
||||
* Professional Experience: A detailed list of roles, achievements, and responsibilities from <|context|> that relate to the <|job_description|>.
|
||||
* Education: Include only if available in the work history.
|
||||
* Notes: Indicate the initial draft of the resume was generated using the Backstory application.
|
||||
|
||||
@ -114,17 +114,27 @@ Structure the resume professionally with the following sections where applicable
|
||||
system_fact_check = f"""
|
||||
Launched on {DateTime()}.
|
||||
|
||||
You are a professional resume fact checker. Your task is to identify any inaccuracies in the <|resume|> based on the individual's <|work_history|>.
|
||||
You are a professional resume fact checker. Your task is to identify any inaccuracies in the <|resume|> based on the individual's <|context|>.
|
||||
|
||||
If there are inaccuracies, list them in a bullet point format.
|
||||
|
||||
When answering queries, follow these steps:
|
||||
- You must not invent or assume any information not explicitly present in the <|work_history|>.
|
||||
- Analyze the <|resume|> to identify any discrepancies or inaccuracies based on the <|work_history|>.
|
||||
- If there is information in the <|context|>, <|job_description|>, <|work_history|>, or <|resume|> sections to enhance the answer, incorporate it seamlessly and refer to it using natural language instead of mentioning '<|job_description|>' (etc.) or quoting it directly.
|
||||
- Avoid phrases like 'According to the <|context|>' or similar references to the <|context|>, <|job_description|>, <|resume|>, or <|work_history|> tags.
|
||||
- You must not invent or assume any information not explicitly present in the <|context|>.
|
||||
- Analyze the <|resume|> to identify any discrepancies or inaccuracies based on the <|context|>.
|
||||
- If there is information in the <|context|>, <|job_description|>, <|context|>, or <|resume|> sections to enhance the answer, incorporate it seamlessly and refer to it using natural language instead of mentioning '<|job_description|>' (etc.) or quoting it directly.
|
||||
- Avoid phrases like 'According to the <|context|>' or similar references to the <|context|>, <|job_description|>, <|resume|>, or <|context|> tags.
|
||||
""".strip()
|
||||
|
||||
system_fact_check_QA = f"""
|
||||
Launched on {DateTime()}.
|
||||
|
||||
You are a professional resume fact checker.
|
||||
|
||||
You are provided with a <|resume|> which was generated by you, the <|context|> you used to generate that <|resume|>, and a <|fact_check|> generated by you when you analyzed <|context|> against the <|resume|> to identify dicrepancies.
|
||||
|
||||
Your task is to answer questions about the <|fact_check|> you generated based on the <|resume|> and <|context>.
|
||||
"""
|
||||
|
||||
system_job_description = f"""
|
||||
Launched on {DateTime()}.
|
||||
|
||||
@ -133,8 +143,8 @@ You are a hiring and job placing specialist. Your task is to answers about a job
|
||||
When answering queries, follow these steps:
|
||||
- Analyze the <|job_description|> to provide insights for the asked question.
|
||||
- If any financial information is requested, be sure to account for inflation.
|
||||
- If there is information in the <|context|>, <|job_description|>, <|work_history|>, or <|resume|> sections to enhance the answer, incorporate it seamlessly and refer to it using natural language instead of mentioning '<|job_description|>' (etc.) or quoting it directly.
|
||||
- Avoid phrases like 'According to the <|context|>' or similar references to the <|context|>, <|job_description|>, <|resume|>, or <|work_history|> tags.
|
||||
- If there is information in the <|context|>, <|job_description|>, <|context|>, or <|resume|> sections to enhance the answer, incorporate it seamlessly and refer to it using natural language instead of mentioning '<|job_description|>' (etc.) or quoting it directly.
|
||||
- Avoid phrases like 'According to the <|context|>' or similar references to the <|context|>, <|job_description|>, <|resume|>, or <|context|> tags.
|
||||
""".strip()
|
||||
|
||||
def get_installed_ram():
|
||||
@ -263,7 +273,7 @@ def setup_logging(level):
|
||||
|
||||
# %%
|
||||
|
||||
async def AnalyzeSite(url, question):
|
||||
async def AnalyzeSite(llm, model: str, url : str, question : str):
|
||||
"""
|
||||
Fetches content from a URL, extracts the text, and uses Ollama to summarize it.
|
||||
|
||||
@ -273,7 +283,6 @@ async def AnalyzeSite(url, question):
|
||||
Returns:
|
||||
str: A summary of the website content
|
||||
"""
|
||||
global model, client
|
||||
try:
|
||||
# Fetch the webpage
|
||||
headers = {
|
||||
@ -308,7 +317,7 @@ async def AnalyzeSite(url, question):
|
||||
|
||||
# Generate summary using Ollama
|
||||
prompt = f"CONTENTS:\n\n{text}\n\n{question}"
|
||||
response = client.generate(model=model,
|
||||
response = llm.generate(model=model,
|
||||
system="You are given the contents of {url}. Answer the question about the contents",
|
||||
prompt=prompt)
|
||||
|
||||
@ -344,68 +353,6 @@ def find_summarize_tool(tools):
|
||||
def llm_tools(tools):
|
||||
return [tool for tool in tools if tool.get("enabled", False) == True]
|
||||
|
||||
# %%
|
||||
async def handle_tool_calls(message):
|
||||
"""
|
||||
Process tool calls and yield status updates along the way.
|
||||
The last yielded item will be a tuple containing (tool_result, tools_used).
|
||||
"""
|
||||
tools_used = []
|
||||
all_responses = []
|
||||
|
||||
for i, tool_call in enumerate(message["tool_calls"]):
|
||||
arguments = tool_call["function"]["arguments"]
|
||||
tool = tool_call["function"]["name"]
|
||||
|
||||
# Yield status update before processing each tool
|
||||
yield {"status": "processing", "message": f"Processing tool {i+1}/{len(message['tool_calls'])}: {tool}..."}
|
||||
|
||||
# Process the tool based on its type
|
||||
match tool:
|
||||
case "TickerValue":
|
||||
ticker = arguments.get("ticker")
|
||||
if not ticker:
|
||||
ret = None
|
||||
else:
|
||||
ret = TickerValue(ticker)
|
||||
tools_used.append({ "tool": f"{tool}({ticker})", "result": ret})
|
||||
|
||||
case "AnalyzeSite":
|
||||
url = arguments.get("url")
|
||||
question = arguments.get("question", "what is the summary of this content?")
|
||||
|
||||
# Additional status update for long-running operations
|
||||
yield {"status": "processing", "message": f"Retrieving and summarizing content from {url}..."}
|
||||
ret = await AnalyzeSite(url, question)
|
||||
tools_used.append({ "tool": f"{tool}('{url}', '{question}')", "result": ret })
|
||||
|
||||
case "DateTime":
|
||||
tz = arguments.get("timezone")
|
||||
ret = DateTime(tz)
|
||||
tools_used.append({ "tool": f"{tool}('{tz}')", "result": ret })
|
||||
|
||||
case "WeatherForecast":
|
||||
city = arguments.get("city")
|
||||
state = arguments.get("state")
|
||||
|
||||
yield {"status": "processing", "message": f"Fetching weather data for {city}, {state}..."}
|
||||
ret = WeatherForecast(city, state)
|
||||
tools_used.append({ "tool": f"{tool}('{city}', '{state}')", "result": ret })
|
||||
|
||||
case _:
|
||||
ret = None
|
||||
|
||||
# Build response for this tool
|
||||
tool_response = {
|
||||
"role": "tool",
|
||||
"content": str(ret),
|
||||
"name": tool_call["function"]["name"]
|
||||
}
|
||||
all_responses.append(tool_response)
|
||||
|
||||
# Yield the final result as the last item
|
||||
final_result = all_responses[0] if len(all_responses) == 1 else all_responses
|
||||
yield (final_result, tools_used)
|
||||
|
||||
|
||||
|
||||
@ -562,11 +509,11 @@ class WebServer:
|
||||
case "chat":
|
||||
prompt = system_message
|
||||
case "job_description":
|
||||
prompt = system_job_description
|
||||
prompt = system_generate_resume
|
||||
case "resume":
|
||||
prompt = system_generate_resume
|
||||
case "fact_check":
|
||||
prompt = system_fact_check
|
||||
prompt = system_message
|
||||
|
||||
session.system_prompt = prompt
|
||||
response["system_prompt"] = { "system_prompt": prompt }
|
||||
@ -592,7 +539,7 @@ class WebServer:
|
||||
if not tmp:
|
||||
continue
|
||||
logging.info(f"Resetting {reset_operation} for {mode}")
|
||||
context.conversation = []
|
||||
context.conversation = Conversation()
|
||||
context.context_tokens = round(len(str(session.system_prompt)) * 3 / 4) # Estimate context usage
|
||||
response["history"] = []
|
||||
response["context_used"] = session.context_tokens
|
||||
@ -695,40 +642,47 @@ class WebServer:
|
||||
@self.app.post("/api/chat/{context_id}/{session_type}")
|
||||
async def post_chat_endpoint(context_id: str, session_type: str, request: Request):
|
||||
logging.info(f"{request.method} {request.url.path}")
|
||||
if not is_valid_uuid(context_id):
|
||||
logging.warning(f"Invalid context_id: {context_id}")
|
||||
return JSONResponse({"error": "Invalid context_id"}, status_code=400)
|
||||
context = self.upsert_context(context_id)
|
||||
try:
|
||||
if not is_valid_uuid(context_id):
|
||||
logging.warning(f"Invalid context_id: {context_id}")
|
||||
return JSONResponse({"error": "Invalid context_id"}, status_code=400)
|
||||
context = self.upsert_context(context_id)
|
||||
|
||||
|
||||
session = context.get_session(session_type)
|
||||
if not session:
|
||||
try:
|
||||
session = context.create_session(session_type)
|
||||
data = await request.json()
|
||||
session = context.get_session(session_type)
|
||||
if not session and session_type == "job_description":
|
||||
logging.info(f"Session {session_type} not found. Returning empty history.")
|
||||
# Create a new session if it doesn't exist
|
||||
session = context.get_or_create_session("job_description", system_prompt=system_generate_resume, job_description=data["content"])
|
||||
except Exception as e:
|
||||
logging.info(f"Attempt to create session type: {session_type} failed", e)
|
||||
return JSONResponse({ "error": f"{session_type} is not recognized", "context": context.id }, status_code=404)
|
||||
|
||||
data = await request.json()
|
||||
|
||||
# Create a custom generator that ensures flushing
|
||||
async def flush_generator():
|
||||
async for message in self.generate_response(context=context, session_type=session_type, content=data["content"]):
|
||||
# Convert to JSON and add newline
|
||||
yield json.dumps(message) + "\n"
|
||||
# Save the history as its generated
|
||||
self.save_context(context_id)
|
||||
# Explicitly flush after each yield
|
||||
await asyncio.sleep(0) # Allow the event loop to process the write
|
||||
|
||||
# Return StreamingResponse with appropriate headers
|
||||
return StreamingResponse(
|
||||
flush_generator(),
|
||||
media_type="application/json",
|
||||
headers={
|
||||
"Cache-Control": "no-cache",
|
||||
"Connection": "keep-alive",
|
||||
"X-Accel-Buffering": "no" # Prevents Nginx buffering if you're using it
|
||||
}
|
||||
)
|
||||
# Create a custom generator that ensures flushing
|
||||
async def flush_generator():
|
||||
async for message in self.generate_response(context=context, session=session, content=data["content"]):
|
||||
# Convert to JSON and add newline
|
||||
yield json.dumps(message) + "\n"
|
||||
# Save the history as its generated
|
||||
self.save_context(context_id)
|
||||
# Explicitly flush after each yield
|
||||
await asyncio.sleep(0) # Allow the event loop to process the write
|
||||
|
||||
# Return StreamingResponse with appropriate headers
|
||||
return StreamingResponse(
|
||||
flush_generator(),
|
||||
media_type="application/json",
|
||||
headers={
|
||||
"Cache-Control": "no-cache",
|
||||
"Connection": "keep-alive",
|
||||
"X-Accel-Buffering": "no" # Prevents Nginx buffering if you're using it
|
||||
}
|
||||
)
|
||||
except Exception as e:
|
||||
logging.error(f"Error in post_chat_endpoint: {e}")
|
||||
return JSONResponse({"error": str(e)}, status_code=500)
|
||||
|
||||
@self.app.post("/api/context")
|
||||
async def create_context():
|
||||
@ -877,7 +831,70 @@ class WebServer:
|
||||
def get_optimal_ctx_size(self, context, messages, ctx_buffer = 4096):
|
||||
ctx = round(context + len(str(messages)) * 3 / 4)
|
||||
return max(defines.max_context, min(2048, ctx + ctx_buffer))
|
||||
|
||||
|
||||
# %%
|
||||
async def handle_tool_calls(self, message):
|
||||
"""
|
||||
Process tool calls and yield status updates along the way.
|
||||
The last yielded item will be a tuple containing (tool_result, tools_used).
|
||||
"""
|
||||
tools_used = []
|
||||
all_responses = []
|
||||
|
||||
for i, tool_call in enumerate(message["tool_calls"]):
|
||||
arguments = tool_call["function"]["arguments"]
|
||||
tool = tool_call["function"]["name"]
|
||||
|
||||
# Yield status update before processing each tool
|
||||
yield {"status": "processing", "message": f"Processing tool {i+1}/{len(message['tool_calls'])}: {tool}..."}
|
||||
|
||||
# Process the tool based on its type
|
||||
match tool:
|
||||
case "TickerValue":
|
||||
ticker = arguments.get("ticker")
|
||||
if not ticker:
|
||||
ret = None
|
||||
else:
|
||||
ret = TickerValue(ticker)
|
||||
tools_used.append({ "tool": f"{tool}({ticker})", "result": ret})
|
||||
|
||||
case "AnalyzeSite":
|
||||
url = arguments.get("url")
|
||||
question = arguments.get("question", "what is the summary of this content?")
|
||||
|
||||
# Additional status update for long-running operations
|
||||
yield {"status": "processing", "message": f"Retrieving and summarizing content from {url}..."}
|
||||
ret = await AnalyzeSite(llm=self.llm, model=self.model, url=url, question=question)
|
||||
tools_used.append({ "tool": f"{tool}('{url}', '{question}')", "result": ret })
|
||||
|
||||
case "DateTime":
|
||||
tz = arguments.get("timezone")
|
||||
ret = DateTime(tz)
|
||||
tools_used.append({ "tool": f"{tool}('{tz}')", "result": ret })
|
||||
|
||||
case "WeatherForecast":
|
||||
city = arguments.get("city")
|
||||
state = arguments.get("state")
|
||||
|
||||
yield {"status": "processing", "message": f"Fetching weather data for {city}, {state}..."}
|
||||
ret = WeatherForecast(city, state)
|
||||
tools_used.append({ "tool": f"{tool}('{city}', '{state}')", "result": ret })
|
||||
|
||||
case _:
|
||||
ret = None
|
||||
|
||||
# Build response for this tool
|
||||
tool_response = {
|
||||
"role": "tool",
|
||||
"content": str(ret),
|
||||
"name": tool_call["function"]["name"]
|
||||
}
|
||||
all_responses.append(tool_response)
|
||||
|
||||
# Yield the final result as the last item
|
||||
final_result = all_responses[0] if len(all_responses) == 1 else all_responses
|
||||
yield (final_result, tools_used)
|
||||
|
||||
def upsert_context(self, context_id = None) -> Context:
|
||||
"""
|
||||
Upsert a context based on the provided context_id.
|
||||
@ -956,7 +973,7 @@ class WebServer:
|
||||
# * First message sets Fact Check and is Q&A
|
||||
# * Has content
|
||||
# * Then Q&A of Fact Check
|
||||
async def generate_response(self, context : Context, session_type : str, content : str):
|
||||
async def generate_response(self, context : Context, session : Session, content : str):
|
||||
if not self.file_watcher:
|
||||
return
|
||||
|
||||
@ -967,21 +984,6 @@ class WebServer:
|
||||
|
||||
self.processing = True
|
||||
|
||||
# Check if the session_type is valid
|
||||
if not context.is_valid_session_type(session_type):
|
||||
yield {"status": "error", "message": f"Session type {session_type} is not invalid."}
|
||||
self.processing = False
|
||||
return
|
||||
|
||||
session = context.get_session(session_type)
|
||||
if session is None:
|
||||
session = context.create_session(session_type)
|
||||
|
||||
if session is None:
|
||||
yield {"status": "error", "message": f"Session type {session_type} is not invalid."}
|
||||
self.processing = False
|
||||
return
|
||||
|
||||
conversation : Conversation = session.conversation
|
||||
|
||||
message = Message(prompt=content)
|
||||
@ -997,17 +999,18 @@ class WebServer:
|
||||
enable_rag = False
|
||||
|
||||
# RAG is disabled when asking questions about the resume
|
||||
if session_type == "resume":
|
||||
if session.session_type == "resume":
|
||||
enable_rag = False
|
||||
|
||||
# The first time through each session session_type a content_seed may be set for
|
||||
# future chat sessions; use it once, then clear it
|
||||
message.preamble = session.get_and_reset_content_seed()
|
||||
system_prompt = session.system_prompt
|
||||
|
||||
# After the first time a particular session session_type is used, it is handled as a chat.
|
||||
# The number of messages indicating the session is ready for chat varies based on
|
||||
# the session_type of session
|
||||
process_type = session_type
|
||||
process_type = session.session_type
|
||||
match process_type:
|
||||
case "job_description":
|
||||
logging.info(f"job_description user_history len: {len(conversation.messages)}")
|
||||
@ -1029,8 +1032,7 @@ class WebServer:
|
||||
self.processing = False
|
||||
return
|
||||
|
||||
logging.info("TODO: Re-enable tools...")
|
||||
#enable_tools = True
|
||||
enable_tools = True
|
||||
|
||||
# Generate RAG content if enabled, based on the content
|
||||
rag_context = ""
|
||||
@ -1069,8 +1071,7 @@ class WebServer:
|
||||
Use that information to respond to:"""
|
||||
|
||||
# Use the mode specific system_prompt instead of 'chat'
|
||||
logging.info("Fix this... reimplement")
|
||||
#system_prompt = context.get_session(session_type).system_prompt
|
||||
system_prompt = session.system_prompt
|
||||
|
||||
# On first entry, a single job_description is provided ("user")
|
||||
# Generate a resume to append to RESUME history
|
||||
@ -1109,8 +1110,11 @@ Use that information to respond to:"""
|
||||
<|job_description|>
|
||||
{message.prompt}
|
||||
"""
|
||||
|
||||
context.get_or_create_session("job_description").set_content_seed(message.preamble + "<|question|>\nUse the above information to respond to this prompt: ")
|
||||
tmp = context.get_session("job_description")
|
||||
if not tmp:
|
||||
raise Exception(f"Job description session not found.")
|
||||
# Set the content seed for the job_description session
|
||||
tmp.set_content_seed(message.preamble + "<|question|>\nUse the above information to respond to this prompt: ")
|
||||
|
||||
message.preamble += f"""
|
||||
<|rules|>
|
||||
@ -1118,24 +1122,30 @@ Use that information to respond to:"""
|
||||
2. Do not use content from the <|job_description|> in the response unless the <|context|> or <|resume|> mentions them.
|
||||
|
||||
<|question|>
|
||||
Respond to the above information to respond to this prompt: "
|
||||
Use to the above information to respond to this prompt:
|
||||
"""
|
||||
|
||||
# For all future calls to job_description, use the system_job_description
|
||||
session.system_prompt = system_job_description
|
||||
|
||||
# Seed the history for job_description
|
||||
messages = [ {
|
||||
"role": "user", "content": message.prompt
|
||||
}, {
|
||||
"role": "assistant", "content": "Job description stored to use in future queries.", "display": "hide"
|
||||
} ]
|
||||
# Strip out the 'display' key when adding to llm_history
|
||||
conversation.extend([{**m, "origin": "job_description"} for m in messages])
|
||||
stuffingMessage = Message(prompt=message.prompt)
|
||||
stuffingMessage.response = "Job description stored to use in future queries."
|
||||
stuffingMessage.metadata["origin"] = "job_description"
|
||||
stuffingMessage.metadata["display"] = "hide"
|
||||
conversation.add_message(stuffingMessage)
|
||||
|
||||
message.add_action("generate_resume")
|
||||
|
||||
logging.info("TODO: Convert these to generators, eg generate_resume() and then manually add results into session'resume'")
|
||||
logging.info("TODO: For subsequent runs, have the Session handler generate the follow up prompts so they can have correct context preamble")
|
||||
|
||||
# Switch to resume session for LLM responses
|
||||
message.metadata["origin"] = "resume"
|
||||
session = context.get_or_create_session("resume")
|
||||
system_prompt = session.system_prompt
|
||||
llm_history = session.llm_history = []
|
||||
user_history = session.user_history = []
|
||||
# message.metadata["origin"] = "resume"
|
||||
# session = context.get_or_create_session("resume")
|
||||
# system_prompt = session.system_prompt
|
||||
# llm_history = session.llm_history = []
|
||||
# user_history = session.user_history = []
|
||||
|
||||
# Ignore the passed in content and invoke Fact Check
|
||||
case "resume":
|
||||
@ -1203,12 +1213,22 @@ Use the above <|resume|> and <|job_description|> to answer this query:
|
||||
"role": "assistant", "content": "Resume fact checked.", "origin": "resume", "display": "hide"
|
||||
} ]
|
||||
# Do not add this to the LLM history; it is only used for UI presentation
|
||||
conversation.extend(messages)
|
||||
|
||||
stuffingMessage = Message(prompt="Fact check resume")
|
||||
stuffingMessage.response = "Resume fact checked."
|
||||
stuffingMessage.metadata["origin"] = "resume"
|
||||
stuffingMessage.metadata["display"] = "hide"
|
||||
stuffingMessage.actions = [ "fact_check" ]
|
||||
logging.info("TODO: Switch this to use actions to keep the UI from showingit")
|
||||
conversation.add_message(stuffingMessage)
|
||||
|
||||
# For all future calls to job_description, use the system_job_description
|
||||
logging.info("TODO: Create a system_resume_QA prompt to use for the resume session")
|
||||
session.system_prompt = system_prompt
|
||||
|
||||
# Switch to fact_check session for LLM responses
|
||||
message.metadata["origin"] = "fact_check"
|
||||
session = context.get_or_create_session("fact_check")
|
||||
system_prompt = session.system_prompt
|
||||
session = context.get_or_create_session("fact_check", system_prompt=system_fact_check)
|
||||
|
||||
llm_history = session.llm_history = []
|
||||
user_history = session.user_history = []
|
||||
|
||||
@ -1220,7 +1240,8 @@ Use the above <|resume|> and <|job_description|> to answer this query:
|
||||
# user_history.append({"role": "user", "content": content, "origin": message.metadata["origin"]})
|
||||
# message.metadata["full_query"] = llm_history[-1]["content"]
|
||||
|
||||
messages = create_system_message(session.system_prompt)
|
||||
# Uses cached system_prompt as session.system_prompt may have been updated for follow up questions
|
||||
messages = create_system_message(system_prompt)
|
||||
if context.message_history_length:
|
||||
to_add = conversation.messages[-context.message_history_length:]
|
||||
else:
|
||||
@ -1233,18 +1254,30 @@ Use the above <|resume|> and <|job_description|> to answer this query:
|
||||
"role": "assistant",
|
||||
"content": m.response,
|
||||
} ])
|
||||
|
||||
message.content = message.preamble + message.prompt
|
||||
|
||||
# To send to the LLM
|
||||
messages.append({
|
||||
"role": "user",
|
||||
"content": message.preamble + message.prompt,
|
||||
"content": message.content
|
||||
})
|
||||
|
||||
# Add the system message to the beginning of the messages list
|
||||
message.content = f"""
|
||||
<|system_prompt|>
|
||||
{system_prompt}
|
||||
|
||||
{message.preamble}
|
||||
{message.prompt}"""
|
||||
|
||||
# Estimate token length of new messages
|
||||
ctx_size = self.get_optimal_ctx_size(context.get_or_create_session(process_type).context_tokens, messages=message.prompt)
|
||||
|
||||
if len(conversation.messages) > 2:
|
||||
processing_message = f"Processing {'RAG augmented ' if enable_rag else ''}query..."
|
||||
else:
|
||||
match session_type:
|
||||
match session.session_type:
|
||||
case "job_description":
|
||||
processing_message = f"Generating {'RAG augmented ' if enable_rag else ''}resume..."
|
||||
case "resume":
|
||||
@ -1283,7 +1316,7 @@ Use the above <|resume|> and <|job_description|> to answer this query:
|
||||
tool_result = None
|
||||
|
||||
# Process all yielded items from the handler
|
||||
async for item in handle_tool_calls(tool_message):
|
||||
async for item in self.handle_tool_calls(tool_message):
|
||||
if isinstance(item, tuple) and len(item) == 2:
|
||||
# This is the final result tuple (tool_result, tools_used)
|
||||
tool_result, tools_used = item
|
||||
@ -1326,7 +1359,7 @@ Use the above <|resume|> and <|job_description|> to answer this query:
|
||||
|
||||
reply = response["message"]["content"]
|
||||
message.response = reply
|
||||
message.metadata["origin"] = session_type
|
||||
message.metadata["origin"] = session.session_type
|
||||
# final_message = {"role": "assistant", "content": reply }
|
||||
|
||||
# # history is provided to the LLM and should not have additional metadata
|
||||
@ -1341,9 +1374,7 @@ Use the above <|resume|> and <|job_description|> to answer this query:
|
||||
yield {
|
||||
"status": "done",
|
||||
"message": {
|
||||
"role": "assistant",
|
||||
"content": message.response,
|
||||
"metadata": message.metadata
|
||||
**message.model_dump(mode='json'),
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -54,7 +54,7 @@ class Context(BaseModel):
|
||||
|
||||
# Find the matching subclass
|
||||
for session_cls in Session.__subclasses__():
|
||||
if session_cls.__fields__["session_type"].default == session_type:
|
||||
if session_cls.model_fields["session_type"].default == session_type:
|
||||
# Create the session instance with provided kwargs
|
||||
session = session_cls(session_type=session_type, **kwargs)
|
||||
self.sessions.append(session)
|
||||
|
@ -1,6 +1,5 @@
|
||||
from pydantic import BaseModel
|
||||
from typing import List
|
||||
from datetime import datetime, timezone
|
||||
from .message import Message
|
||||
|
||||
class Conversation(BaseModel):
|
||||
|
@ -18,6 +18,13 @@ class Message(BaseModel):
|
||||
actions: List[str] = []
|
||||
timestamp: datetime = datetime.now(timezone.utc)
|
||||
|
||||
def add_action(self, action: str | list[str]) -> None:
|
||||
"""Add a actions(s) to the message."""
|
||||
if isinstance(action, str):
|
||||
self.actions.append(action)
|
||||
else:
|
||||
self.actions.extend(action)
|
||||
|
||||
def get_summary(self) -> str:
|
||||
"""Return a summary of the message."""
|
||||
response_summary = (
|
||||
|
@ -4,7 +4,7 @@ from .conversation import Conversation
|
||||
|
||||
class Session(BaseModel):
|
||||
session_type: Literal["resume", "job_description", "fact_check", "chat"]
|
||||
system_prompt: str = "You are a helpful assistant."
|
||||
system_prompt: str # Mandatory
|
||||
conversation: Conversation = Conversation()
|
||||
context_tokens: int = 0
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user