Compare commits

..

No commits in common. "6923132655ef0e9fd5acbc797cd465f57534787c" and "538caba9f4d9f24d96f6c4995f6807460cbfff7d" have entirely different histories.

9 changed files with 169 additions and 312 deletions

View File

@ -5,7 +5,8 @@ Backstory is an AI Resume agent that provides context into a diverse career narr
* WIP: Through the use of several custom Language Processing Modules (LPM), develop a comprehensive set of test and validation data based on the input documents. While manual review of content should be performed to ensure accuracy, several LLM techniques are employed in the LPM in order to isolate and remove hallucinations and inaccuracies in the test and validation data. * WIP: Through the use of several custom Language Processing Modules (LPM), develop a comprehensive set of test and validation data based on the input documents. While manual review of content should be performed to ensure accuracy, several LLM techniques are employed in the LPM in order to isolate and remove hallucinations and inaccuracies in the test and validation data.
* WIP: Utilizing quantized low-rank adaption (QLoRA) and parameter effecient tine tuning (PEFT,) provide a hyper parameter tuned and customized LLM for use in chat and content creation scenarios with expert knowledge about the individual. * WIP: Utilizing quantized low-rank adaption (QLoRA) and parameter effecient tine tuning (PEFT,) provide a hyper parameter tuned and customized LLM for use in chat and content creation scenarios with expert knowledge about the individual.
* Post-training, utilize additional RAG content to further enhance the information domain used in conversations and content generation. * Post-training, utilize additional RAG content to further enhance the information domain used in conversations and content generation.
* An integrated document publishing work flow that will transform a "Job Description" into a customized "Resume" for the person the LLM has been trained on, incorporating a multi-stage "Fact Check" to reduce hallucination. * An integrated document publishing work flow that will transform a "Job Description" into a customized "Resume" for the person the LLM has been trained on.
* "Fact Check" the resulting resume against the RAG content directly provided by the user in order to remove hallucinations.
While it can run a variety of LLM models, Backstory is currently running Qwen2.5:7b. In addition to the standard model, the chat pipeline also exposes several utility tools for the LLM to use to obtain real-time data. While it can run a variety of LLM models, Backstory is currently running Qwen2.5:7b. In addition to the standard model, the chat pipeline also exposes several utility tools for the LLM to use to obtain real-time data.
@ -19,7 +20,7 @@ Before you spend too much time learning how to customize Backstory, you may want
The `./docs` directory has been seeded with an AI generated persona. That directory is only used during development; actual content should be put into the `./docs-prod` directory. The `./docs` directory has been seeded with an AI generated persona. That directory is only used during development; actual content should be put into the `./docs-prod` directory.
Launching with the defaults (which includes the AI generated persona), you can ask things like `Who is Eliza Morgan?` Launching with the defaults, you can ask things like `Who is Eliza Morgan?`
If you want to seed your own data: If you want to seed your own data:

View File

@ -24,24 +24,20 @@ flowchart TD
end end
subgraph "Stage 1B: Candidate Analysis" subgraph "Stage 1B: Candidate Analysis"
B1[Resume Input] --> B5[Candidate Analysis LLM] B1[Resume & Context Input] --> B2[Candidate Analysis LLM]
B5 --> B4[Candidate Qualifications JSON] B2 --> B3[Candidate Qualifications JSON]
B2[Candidate Info] --> B3[RAG]
B3[RAG] --> B2[Candidate Info]
A3[Job Requirements JSON] --> B3[RAG]
B3[RAG] --> B5
end end
subgraph "Stage 1C: Mapping Analysis" subgraph "Stage 1C: Mapping Analysis"
C1[Job Requirements JSON] --> C3[Mapping Analysis LLM] C1[Job Requirements JSON] --> C2[Candidate Qualifications JSON]
C2[Candidate Qualifications JSON] --> C3 C2 --> C3[Mapping Analysis LLM]
C3 --> C4[Skills Mapping JSON] C3 --> C4[Skills Mapping JSON]
end end
end end
subgraph "Stage 2: Resume Generation" subgraph "Stage 2: Resume Generation"
D1[Skills Mapping JSON] --> D3[Resume Generation LLM] D1[Skills Mapping JSON] --> D2[Original Resume Reference]
D2[Original Resume Reference] --> D3 D2 --> D3[Resume Generation LLM]
D3 --> D4[Tailored Resume Draft] D3 --> D4[Tailored Resume Draft]
end end
@ -56,13 +52,12 @@ flowchart TD
end end
A3 --> C1 A3 --> C1
B4 --> C2 B3 --> C2
C4 --> D1 C4 --> D1
C4 --> E1
D4 --> E3 D4 --> E3
style A2 fill:#f9d77e,stroke:#333,stroke-width:2px style A2 fill:#f9d77e,stroke:#333,stroke-width:2px
style B5 fill:#f9d77e,stroke:#333,stroke-width:2px style B2 fill:#f9d77e,stroke:#333,stroke-width:2px
style C3 fill:#f9d77e,stroke:#333,stroke-width:2px style C3 fill:#f9d77e,stroke:#333,stroke-width:2px
style D3 fill:#f9d77e,stroke:#333,stroke-width:2px style D3 fill:#f9d77e,stroke:#333,stroke-width:2px
style E4 fill:#f9d77e,stroke:#333,stroke-width:2px style E4 fill:#f9d77e,stroke:#333,stroke-width:2px

View File

@ -15,6 +15,7 @@ import Box from '@mui/material/Box';
import CssBaseline from '@mui/material/CssBaseline'; import CssBaseline from '@mui/material/CssBaseline';
import MenuIcon from '@mui/icons-material/Menu'; import MenuIcon from '@mui/icons-material/Menu';
import { Snack, SeverityType } from './Snack'; import { Snack, SeverityType } from './Snack';
import { ConversationHandle } from './Conversation'; import { ConversationHandle } from './Conversation';
import { QueryOptions } from './ChatQuery'; import { QueryOptions } from './ChatQuery';
@ -115,8 +116,8 @@ const App = () => {
children: <AboutPage {...{ sessionId, setSnack, submitQuery: handleSubmitChatQuery, route: subRoute, setRoute: setSubRoute }} /> children: <AboutPage {...{ sessionId, setSnack, submitQuery: handleSubmitChatQuery, route: subRoute, setRoute: setSubRoute }} />
}; };
const controlsTab: BackstoryTabProps = { const settingsTab: BackstoryTabProps = {
path: "controls", path: "settings",
tabProps: { tabProps: {
sx: { flexShrink: 1, flexGrow: 0, fontSize: '1rem' }, sx: { flexShrink: 1, flexGrow: 0, fontSize: '1rem' },
icon: <SettingsIcon /> icon: <SettingsIcon />
@ -144,7 +145,7 @@ const App = () => {
resumeBuilderTab, resumeBuilderTab,
contextVisualizerTab, contextVisualizerTab,
aboutTab, aboutTab,
controlsTab, settingsTab,
]; ];
}, [sessionId, setSnack, subRoute]); }, [sessionId, setSnack, subRoute]);
@ -213,7 +214,7 @@ const App = () => {
const path_session = pathParts.length < 2 ? pathParts[0] : pathParts[1]; const path_session = pathParts.length < 2 ? pathParts[0] : pathParts[1];
if (!isValidUUIDv4(path_session)) { if (!isValidUUIDv4(path_session)) {
console.log(`Invalid session id ${path_session}-- creating new session`); console.log(`Invalid session id ${path_session}-- creating new session`);
fetchSession(); fetchSession([pathParts[0]]);
} else { } else {
let tabIndex = tabs.findIndex((tab) => tab.path === currentPath); let tabIndex = tabs.findIndex((tab) => tab.path === currentPath);
if (-1 === tabIndex) { if (-1 === tabIndex) {

View File

@ -1,4 +1,4 @@
import React, { useRef, useEffect, ChangeEvent, KeyboardEvent, useState } from 'react'; import React, { useRef, useEffect, ChangeEvent, KeyboardEvent } from 'react';
import { useTheme } from '@mui/material/styles'; import { useTheme } from '@mui/material/styles';
import './BackstoryTextField.css'; import './BackstoryTextField.css';
@ -7,7 +7,8 @@ interface BackstoryTextFieldProps {
disabled?: boolean; disabled?: boolean;
multiline?: boolean; multiline?: boolean;
placeholder?: string; placeholder?: string;
onEnter: (value: string) => void; onChange?: (e: ChangeEvent<HTMLTextAreaElement | HTMLInputElement>) => void;
onKeyDown?: (e: KeyboardEvent<HTMLTextAreaElement | HTMLInputElement>) => void;
} }
const BackstoryTextField: React.FC<BackstoryTextFieldProps> = ({ const BackstoryTextField: React.FC<BackstoryTextFieldProps> = ({
@ -15,12 +16,12 @@ const BackstoryTextField: React.FC<BackstoryTextFieldProps> = ({
disabled = false, disabled = false,
multiline = false, multiline = false,
placeholder, placeholder,
onEnter onChange,
onKeyDown,
}) => { }) => {
const theme = useTheme(); const theme = useTheme();
const textareaRef = useRef<HTMLTextAreaElement>(null); const textareaRef = useRef<HTMLTextAreaElement>(null);
const shadowRef = useRef<HTMLTextAreaElement>(null); const shadowRef = useRef<HTMLTextAreaElement>(null);
const [editValue, setEditValue] = useState<string>(value);
useEffect(() => { useEffect(() => {
if (multiline && textareaRef.current && shadowRef.current) { if (multiline && textareaRef.current && shadowRef.current) {
@ -42,10 +43,15 @@ const BackstoryTextField: React.FC<BackstoryTextFieldProps> = ({
} }
}, [value, multiline, textareaRef, shadowRef, placeholder]); }, [value, multiline, textareaRef, shadowRef, placeholder]);
const handleKeyDown = (event: KeyboardEvent<HTMLTextAreaElement | HTMLInputElement>) => { const handleChange = (e: ChangeEvent<HTMLTextAreaElement | HTMLInputElement>) => {
if (event.key === 'Enter' && (!multiline || !event.shiftKey)) { if (onChange) {
setEditValue(''); onChange(e);
onEnter(event.currentTarget.value); }
};
const handleKeyDown = (e: KeyboardEvent<HTMLTextAreaElement | HTMLInputElement>) => {
if (onKeyDown) {
onKeyDown(e);
} }
}; };
@ -68,10 +74,10 @@ const BackstoryTextField: React.FC<BackstoryTextFieldProps> = ({
<input <input
className="BackstoryTextField" className="BackstoryTextField"
type="text" type="text"
value={editValue} value={value}
disabled={disabled} disabled={disabled}
placeholder={placeholder} placeholder={placeholder}
onChange={(e) => { setEditValue(e.target.value); }} onChange={handleChange}
onKeyDown={handleKeyDown} onKeyDown={handleKeyDown}
style={sharedStyle} style={sharedStyle}
/> />
@ -83,10 +89,10 @@ const BackstoryTextField: React.FC<BackstoryTextFieldProps> = ({
<textarea <textarea
className="BackstoryTextField" className="BackstoryTextField"
ref={textareaRef} ref={textareaRef}
value={editValue} value={value}
disabled={disabled} disabled={disabled}
placeholder={placeholder} placeholder={placeholder}
onChange={(e) => { setEditValue(e.target.value); }} onChange={handleChange}
onKeyDown={handleKeyDown} onKeyDown={handleKeyDown}
style={{ style={{
...sharedStyle, ...sharedStyle,

View File

@ -246,8 +246,10 @@ const Conversation = forwardRef<ConversationHandle, ConversationProps>(({
} }
}; };
const handleEnter = (value: string) => { const handleKeyPress = (event: any) => {
sendQuery(value); if (event.key === 'Enter' && !event.shiftKey) {
sendQuery(query);
}
}; };
useImperativeHandle(ref, () => ({ useImperativeHandle(ref, () => ({
@ -551,7 +553,8 @@ const Conversation = forwardRef<ConversationHandle, ConversationProps>(({
disabled={processing} disabled={processing}
multiline={multiline ? true : false} multiline={multiline ? true : false}
value={query} value={query}
onEnter={handleEnter} onChange={(e: any) => setQuery(e.target.value)}
onKeyDown={handleKeyPress}
placeholder={prompt} placeholder={prompt}
/> />
</div> </div>

View File

@ -287,7 +287,7 @@ const Message = (props: MessageProps) => {
overflow: "auto", /* Handles scrolling for the div */ overflow: "auto", /* Handles scrolling for the div */
}} }}
> >
<StyledMarkdown streaming={message.role === "streaming"} {...{ content: formattedContent, submitQuery, sessionId, setSnack }} /> <StyledMarkdown {...{ content: formattedContent, submitQuery, sessionId, setSnack }} />
</Scrollable> </Scrollable>
: :
<Typography <Typography

View File

@ -84,8 +84,7 @@ const ResumeBuilderPage: React.FC<BackstoryPageProps> = ({
} }
/* Filter out the 2nd and 3rd (0-based) */ /* Filter out the 2nd and 3rd (0-based) */
const filtered = messages;//.filter((m, i) => i !== 1 && i !== 2); const filtered = messages.filter((m, i) => i !== 1 && i !== 2);
console.warn("Set filtering back on");
return filtered; return filtered;
}, [setHasResume, setHasFacts]); }, [setHasResume, setHasFacts]);

View File

@ -17,11 +17,10 @@ interface StyledMarkdownProps extends BackstoryElementProps {
className?: string, className?: string,
content: string, content: string,
sx?: SxProps, sx?: SxProps,
streaming?: boolean,
}; };
const StyledMarkdown: React.FC<StyledMarkdownProps> = (props: StyledMarkdownProps) => { const StyledMarkdown: React.FC<StyledMarkdownProps> = (props: StyledMarkdownProps) => {
const { className, content, submitQuery, sx, streaming } = props; const { className, content, submitQuery, sx } = props;
const theme = useTheme(); const theme = useTheme();
const overrides: any = { const overrides: any = {
@ -29,16 +28,16 @@ const StyledMarkdown: React.FC<StyledMarkdownProps> = (props: StyledMarkdownProp
component: (element: any) => { component: (element: any) => {
const { className } = element.children.props; const { className } = element.children.props;
const content = element.children?.props?.children || ""; const content = element.children?.props?.children || "";
if (className === "lang-mermaid" && !streaming) { if (className === "lang-mermaid") {
return <Mermaid className="Mermaid" chart={content} />; return <Mermaid className="Mermaid" chart={content} />;
} }
if (className === "lang-markdown") { if (className === "lang-markdown") {
return <MuiMarkdown children={content} />; return <MuiMarkdown children={content} />;
} }
if (className === "lang-json" && !streaming) { if (className === "lang-json") {
try { try {
const fixed = jsonrepair(content); const fixed = jsonrepair(content);
return <Scrollable className="JsonViewScrollable"> return <Scrollable autoscroll className="JsonViewScrollable">
<JsonView <JsonView
className="JsonView" className="JsonView"
style={{ style={{

View File

@ -8,7 +8,6 @@ import json
import traceback import traceback
import asyncio import asyncio
import time import time
from collections import defaultdict
from . base import Agent, agent_registry, LLMMessage from . base import Agent, agent_registry, LLMMessage
from .. conversation import Conversation from .. conversation import Conversation
@ -167,9 +166,9 @@ class JobDescription(Agent):
if not self.context: if not self.context:
raise ValueError("Context is not set for this agent.") raise ValueError("Context is not set for this agent.")
# async for message in super().prepare_message(message): async for message in super().prepare_message(message):
# if message.status != "done": if message.status != "done":
# yield message yield message
# Always add the job description, user resume, and question # Always add the job description, user resume, and question
message.preamble["job_description"] = self.job_description message.preamble["job_description"] = self.job_description
message.preamble["resume"] = self.context.user_resume message.preamble["resume"] = self.context.user_resume
@ -186,48 +185,12 @@ class JobDescription(Agent):
original_prompt = message.prompt original_prompt = message.prompt
logger.info("TODO: Implement delay queing; busy for same agent, otherwise return queue size and estimated wait time") async for message in super().process_message(llm=llm, model=model, message=message):
spinner: List[str] = ['\\', '|', '/', '-']
tick : int = 0
while self.context.processing:
message.status = "waiting"
message.response = f"Busy processing another request. Please wait. {spinner[tick]}"
tick = (tick + 1) % len(spinner)
yield message
await asyncio.sleep(1) # Allow the event loop to process the write
self.context.processing = True
original_message = message.model_copy()
self.llm = llm
self.model = model
self.metrics.generate_count.labels(agent=self.agent_type).inc()
with self.metrics.generate_duration.labels(agent=self.agent_type).time():
job_description = message.preamble["job_description"]
resume = message.preamble["resume"]
try:
async for message in self.generate_factual_tailored_resume(message=message, job_description=job_description, resume=resume):
if message.status != "done": if message.status != "done":
yield message yield message
message.prompt = original_message.prompt if message.status == "error":
yield message
except Exception as e:
message.status = "error"
logger.error(message.response)
message.response = f"Error in resume generation process: {str(e)}"
logger.error(message.response)
logger.error(traceback.format_exc())
yield message
return return
# Done processing, add message to conversation
message.status = "done"
self.conversation.add(message)
self.context.processing = False
# Add the "Job requirements" message # Add the "Job requirements" message
if "generate_factual_tailored_resume" in message.metadata and "job_requirements" in message.metadata["generate_factual_tailored_resume"]: if "generate_factual_tailored_resume" in message.metadata and "job_requirements" in message.metadata["generate_factual_tailored_resume"]:
new_message = Message(prompt="Show job requirements") new_message = Message(prompt="Show job requirements")
@ -420,60 +383,32 @@ class JobDescription(Agent):
metadata["error"] = message.response metadata["error"] = message.response
raise raise
def format_rag_context(self, rag_results: List[Dict[str, Any]]) -> str:
"""
Format RAG results from process_job_requirements into a structured string.
Args:
rag_results: List of dictionaries from process_job_requirements.
Returns:
A formatted string for inclusion in the prompt.
"""
if not rag_results:
return "No additional context available."
# Group results by category and subcategory
grouped_context = defaultdict(list)
for result in rag_results:
key = f"{result['category']}/{result['subcategory']}".strip("/")
grouped_context[key].append({
"query": result["context"],
"content": result["content"][:100] + "..." if len(result["content"]) > 100 else result["content"]
})
# Format as a structured string
context_lines = ["Additional Context from Document Retrieval:"]
for category, items in grouped_context.items():
context_lines.append(f"\nCategory: {category}")
for item in items:
context_lines.append(f"- Query: {item['query']}")
context_lines.append(f" Relevant Document: {item['content']}")
return "\n".join(context_lines)
# Stage 1B: Candidate Analysis Implementation # Stage 1B: Candidate Analysis Implementation
def create_candidate_analysis_prompt(self, resume: str, rag_results: List[Dict[str, Any]]) -> tuple[str, str]: def create_candidate_analysis_prompt(self, resume: str, context: str) -> tuple[str, str]:
"""Create the prompt for candidate qualifications analysis.""" """Create the prompt for candidate qualifications analysis."""
# system_prompt = """ # system_prompt = """
# You are an objective resume analyzer. Create a comprehensive inventory of all skills, experiences, and qualifications present in the candidate's materials. # You are an objective resume analyzer. Your task is to catalog ALL skills, experiences, and qualifications
# present in a candidate's materials WITHOUT any reference to any job description.
# CORE PRINCIPLES: # ## INSTRUCTIONS:
# - Analyze ONLY the candidate's resume and provided context
# - Focus ONLY on the candidate's actual qualifications # 1. Analyze ONLY the candidate's resume and context provided.
# - Do not reference any job requirements # 2. Create a comprehensive inventory of the candidate's actual qualifications.
# - Include only explicitly mentioned information # 3. DO NOT consider any job requirements - this is a pure candidate analysis task.
# 4. For each qualification, cite exactly where in the materials it appears.
# 5. DO NOT duplicate or repeat time periods or skills once listed.
# ## OUTPUT FORMAT:
# OUTPUT FORMAT:
# ```json # ```json
# { # {
# "candidate_qualifications": { # "candidate_qualifications": {
# "technical_skills": [ # "technical_skills": [
# { # {
# "skill": "skill name", # "skill": "skill name",
# "evidence_location": "where in resume this appears", # "evidence": "exact quote from materials",
# "expertise_level": "stated level or 'unspecified'" # "source": "resume or context",
# "expertise_level": "explicit level mentioned or 'unspecified'"
# } # }
# ], # ],
# "work_experience": [ # "work_experience": [
@ -491,34 +426,41 @@ class JobDescription(Agent):
# "degree": "degree name", # "degree": "degree name",
# "institution": "institution name", # "institution": "institution name",
# "completed": true/false, # "completed": true/false,
# "graduation_date": "date or 'ongoing'" # "evidence": "exact quote from materials"
# } # }
# ], # ],
# "projects": [ # "projects": [
# { # {
# "name": "project name", # "name": "project name",
# "description": "brief description", # "description": "brief description",
# "technologies_used": ["tech1", "tech2"] # "technologies_used": ["tech1", "tech2"],
# "evidence": "exact quote from materials"
# } # }
# ], # ],
# "soft_skills": [ # "soft_skills": [
# { # {
# "skill": "skill name", # "skill": "skill name",
# "context": "brief mention of where this appears" # "evidence": "exact quote or inference basis",
# "source": "resume or context"
# } # }
# ] # ]
# } # }
# } # }
# ```
# Be thorough and precise. Include ONLY skills and experiences explicitly mentioned in the materials.
# For each entry, provide the exact text evidence from the materials that supports its inclusion.
# Do not make assumptions about skills based on job titles or project names - only include skills explicitly mentioned.
# """ # """
system_prompt = """\
system_prompt = """
You are an objective resume analyzer. Create a comprehensive inventory of all skills, experiences, and qualifications present in the candidate's materials. You are an objective resume analyzer. Create a comprehensive inventory of all skills, experiences, and qualifications present in the candidate's materials.
CORE PRINCIPLES: CORE PRINCIPLES:
- Analyze ONLY the candidate's resume and provided context. - Analyze ONLY the candidate's resume and provided context
- Focus ONLY on the candidate's actual qualifications explicitly mentioned in the resume. - Focus ONLY on the candidate's actual qualifications
- Use the additional context to clarify or provide background for terms, skills, or experiences mentioned in the resume (e.g., to understand the scope of a skill like 'Python' or a role's responsibilities). - Do not reference any job requirements
- Do NOT treat the context as job requirements or infer qualifications not explicitly stated in the resume. - Include only explicitly mentioned information
- Include only explicitly mentioned information from the resume, supplemented by context where relevant.
OUTPUT FORMAT: OUTPUT FORMAT:
```json ```json
@ -565,7 +507,7 @@ OUTPUT FORMAT:
} }
} }
""" """
context = self.format_rag_context(rag_results)
prompt = f"Resume:\n{resume}\n\nAdditional Context:\n{context}" prompt = f"Resume:\n{resume}\n\nAdditional Context:\n{context}"
return system_prompt, prompt return system_prompt, prompt
@ -624,10 +566,10 @@ OUTPUT FORMAT:
message.status = "done" message.status = "done"
yield message yield message
async def analyze_candidate_qualifications(self, message: Message, resume: str, rag_context: List[Dict[str, Any]], metadata: Dict[str, Any]) -> AsyncGenerator[Message, None]: async def analyze_candidate_qualifications(self, message: Message, resume: str, additional_context: str, metadata: Dict[str, Any]) -> AsyncGenerator[Message, None]:
"""Analyze candidate qualifications from resume and context.""" """Analyze candidate qualifications from resume and context."""
try: try:
system_prompt, prompt = self.create_candidate_analysis_prompt(resume, rag_context) system_prompt, prompt = self.create_candidate_analysis_prompt(resume, additional_context)
metadata["system_prompt"] = system_prompt metadata["system_prompt"] = system_prompt
metadata["prompt"] = prompt metadata["prompt"] = prompt
async for message in self.call_llm(message, system_prompt, prompt): async for message in self.call_llm(message, system_prompt, prompt):
@ -1022,116 +964,7 @@ Based on the reference data above, please create a corrected version of the resu
metadata["results"] = message.response metadata["results"] = message.response
yield message yield message
def process_job_requirements(self, job_requirements: Dict[str, Any]) -> List[Dict[str, Any]]: async def generate_factual_tailored_resume(self, message: Message, job_description: str, resume: str, additional_context: str = "") -> AsyncGenerator[Message, None]:
"""
Process job requirements JSON, gather RAG documents using find_similar, remove duplicates,
and return top 20 results.
Args:
job_requirements: Dictionary containing job requirements.
retriever: Instance of RagRetriever with find_similar method.
Returns:
List of up to 20 ChromaDB documents, sorted by combined importance and similarity score.
"""
if self.context is None or self.context.file_watcher is None:
raise ValueError(f"context or file_watcher is None on {self.agent_type}")
retriever = self.context.file_watcher
# Importance weights for each category
importance_weights = {
("technical_skills", "required"): 1.0,
("technical_skills", "preferred"): 0.8,
("experience_requirements", "required"): 0.95,
("experience_requirements", "preferred"): 0.75,
("education_requirements", ""): 0.7,
("soft_skills", ""): 0.6,
("industry_knowledge", ""): 0.65,
("responsibilities", ""): 0.85,
("company_values", ""): 0.5
}
# Store all RAG results with metadata
all_results = []
def traverse_requirements(data: Any, category: str = "", subcategory: str = ""):
"""
Recursively traverse the job requirements and gather RAG documents.
"""
if isinstance(data, dict):
for key, value in data.items():
new_subcategory = key if category else ""
traverse_requirements(value, category or key, new_subcategory)
elif isinstance(data, list):
for item in data:
# Determine the weight key
weight_key = (category, subcategory) if subcategory else (category, "")
weight = importance_weights.get(weight_key, 0.5) # Default weight
# Call find_similar for the item
try:
rag_results = retriever.find_similar(item, top_k=20, threshold=0.4) # Strict matching
# Process each result
for doc_id, content, distance, metadata in zip(
rag_results["ids"],
rag_results["documents"],
rag_results["distances"],
rag_results["metadatas"]
):
# Convert cosine distance to similarity score (higher is better)
similarity_score = 1 - distance # Cosine distance to similarity
all_results.append({
"id": doc_id,
"content": content,
"score": similarity_score,
"weight": weight,
"context": item,
"category": category,
"subcategory": subcategory,
"metadata": metadata
})
except Exception as e:
logger.error(f"Error processing context '{item}': {e}")
# Start traversal
traverse_requirements(job_requirements)
# Remove duplicates based on document ID
unique_results = []
seen_ids = set()
for result in all_results:
if result["id"] not in seen_ids:
seen_ids.add(result["id"])
unique_results.append(result)
# Sort by combined score (weight * similarity score)
sorted_results = sorted(
unique_results,
key=lambda x: x["weight"] * x["score"],
reverse=True
)
# Return top 10 results
return sorted_results[:10]
async def generate_rag_content(self, message: Message, job_requirements: Dict[str, Any]) -> AsyncGenerator[Message, None]:
results = self.process_job_requirements(job_requirements = job_requirements)
message.response = f"Retrieved {len(results)} documents."
message.metadata["rag_context"] = results
# for result in results:
# message.response += f"""
# ID: {result['id']}, Context: {result['context']}, \
# Category: {result['category']}/{result['subcategory']}, \
# Similarity Score: {result['score']:.3f}, \
# Combined Score: {result['weight'] * result['score']:.3f}, \
# Content: {result['content']}
# """
message.status = "done"
yield message
return
async def generate_factual_tailored_resume(self, message: Message, job_description: str, resume: str) -> AsyncGenerator[Message, None]:
""" """
Main function to generate a factually accurate tailored resume. Main function to generate a factually accurate tailored resume.
@ -1143,9 +976,6 @@ Based on the reference data above, please create a corrected version of the resu
Returns: Returns:
Dict containing the generated resume and supporting analysis Dict containing the generated resume and supporting analysis
""" """
if self.context is None:
raise ValueError(f"context is None in {self.agent_type}")
message.status = "thinking" message.status = "thinking"
logger.info(message.response) logger.info(message.response)
yield message yield message
@ -1169,21 +999,13 @@ Based on the reference data above, please create a corrected version of the resu
message.response = "Multi-stage RAG resume generation process: Stage 1B: Analyzing candidate qualifications" message.response = "Multi-stage RAG resume generation process: Stage 1B: Analyzing candidate qualifications"
logger.info(message.response) logger.info(message.response)
yield message yield message
async for message in self.generate_rag_content(message, job_requirements):
if message.status != "done":
yield message
if message.status == "error":
return
rag_context = message.metadata["rag_context"]
metadata["analyze_candidate_qualifications"] = { metadata["analyze_candidate_qualifications"] = {
"rag_context": rag_context "additional_context": additional_context
} }
async for message in self.analyze_candidate_qualifications( async for message in self.analyze_candidate_qualifications(
message=message, message=message,
resume=resume, resume=resume,
rag_context=rag_context, additional_context=additional_context,
metadata=metadata["analyze_candidate_qualifications"]): metadata=metadata["analyze_candidate_qualifications"]):
if message.status != "done": if message.status != "done":
yield message yield message
@ -1301,6 +1123,37 @@ Based on the reference data above, please create a corrected version of the resu
logger.info("Resume generation process completed successfully") logger.info("Resume generation process completed successfully")
return return
# Main orchestration function
async def generate_llm_response(self, llm: Any, model: str, message: Message, temperature=0.7) -> AsyncGenerator[Message, None]:
logger.info(f"{self.agent_type} - {inspect.stack()[0].function}")
original_message = message.model_copy()
self.llm = llm
self.model = model
self.metrics.generate_count.labels(agent=self.agent_type).inc()
with self.metrics.generate_duration.labels(agent=self.agent_type).time():
job_description = message.preamble["job_description"]
resume = message.preamble["resume"]
additional_context = message.preamble["context"]
try:
async for message in self.generate_factual_tailored_resume(message=message, job_description=job_description, resume=resume, additional_context=additional_context):
if message.status != "done":
yield message
message.prompt = original_message.prompt
yield message
return
except Exception as e:
message.status = "error"
logger.error(message.response)
message.response = f"Error in resume generation process: {str(e)}"
logger.error(message.response)
logger.error(traceback.format_exc())
yield message
return
# Register the base agent # Register the base agent
agent_registry.register(JobDescription._agent_type, JobDescription) agent_registry.register(JobDescription._agent_type, JobDescription)