Switching to one-call per skill

This commit is contained in:
James Ketr 2025-05-12 16:57:20 -07:00
parent 4e772ab8ea
commit a1798b58ac
3 changed files with 78 additions and 125 deletions

View File

@ -4,13 +4,12 @@ The system follows a carefully designed pipeline with isolated stages to prevent
The system uses a pipeline of isolated analysis and generation steps:
1. **Stage 1: Isolated Analysis** (three sub-stages)
1. **Stage 1: Isolated Analysis**
- **1A: Job Analysis** - Extracts requirements from job description only
- **1B: Candidate Analysis** - Catalogs qualifications from resume/context only
- **1C: Mapping Analysis** - Identifies legitimate matches between requirements and qualifications
- **1B: Skill-Based Assessment** - For each required skill, determine a Individisual Skill Assessment, adding it to a Skill Assessments Collection.
2. **Stage 2: Resume Generation**
- Uses mapping output to create a tailored resume with evidence-based content
- Uses Skills Asessments Collection to generate a tailored resume.
3. **Stage 3: Verification**
- Performs fact-checking to catch any remaining fabrications
@ -23,63 +22,62 @@ flowchart TD
A2 --> A3[Job Requirements JSON]
end
subgraph "Stage 1B: Candidate Analysis"
B1[Resume Input] --> B5[Candidate Analysis LLM]
B5 --> B4[Candidate Qualifications JSON]
B2[Candidate Info] --> B3[RAG]
B3[RAG] --> B2[Candidate Info]
A3[Job Requirements JSON] --> B3[RAG]
B3[RAG] --> B5
end
subgraph "Stage 1C: Mapping Analysis"
C1[Job Requirements JSON] --> C3[Mapping Analysis LLM]
C2[Candidate Qualifications JSON] --> C3
C3 --> C4[Skills Mapping JSON]
subgraph "Stage 1B: Skill-Based Assessment"
B1[Resume Input] --> B2[Candidate Info]
B2 --> B3[RAG System]
A3 --> B4[Skill Assessment Generator]
B3 --> B4
B4 --> B5{For Each Required Skill}
B5 --> B6[Skill-Focused LLM Query]
B6 --> B7[Individual Skill Assessment]
B7 --> B8[Skill Assessments Collection]
end
end
subgraph "Stage 2: Resume Generation"
D1[Skills Mapping JSON] --> D3[Resume Generation LLM]
D2[Original Resume Reference] --> D3
D3 --> D4[Tailored Resume Draft]
C1[Skill Assessments Collection] --> C2[Resume Generator]
C3[Original Resume Reference] --> C2
C4[Candidate Information] --> C2
C2 --> C5[Resume Generation Prompt]
C5 --> C6[Resume Generation LLM]
C6 --> C7[Tailored Resume Draft]
end
subgraph "Stage 3: Verification"
E1[Skills Mapping JSON] --> E2[Original Materials]
E2 --> E3[Tailored Resume Draft]
E3 --> E4[Verification LLM]
E4 --> E5{Verification Check}
E5 -->|PASS| E6[Approved Resume]
E5 -->|FAIL| E7[Correction Instructions]
E7 --> D3
subgraph "Stage 3: Statistics & Verification"
D1[Job Requirements JSON] --> D2[Match Statistics Calculator]
D3[Skill Assessments Collection] --> D2
D2 --> D4[Match Statistics]
D4 --> D5[Verification LLM]
C7 --> D5
D5 --> D6{Verification Check}
D6 -->|PASS| D7[Approved Resume]
D6 -->|FAIL| D8[Correction Instructions]
D8 --> C2
end
A3 --> C1
B4 --> C2
C4 --> D1
C4 --> E1
D4 --> E3
A3 --> B4
B8 --> C1
B8 --> D3
B1 --> C3
style A2 fill:#f9d77e,stroke:#333,stroke-width:2px
style B5 fill:#f9d77e,stroke:#333,stroke-width:2px
style C3 fill:#f9d77e,stroke:#333,stroke-width:2px
style D3 fill:#f9d77e,stroke:#333,stroke-width:2px
style E4 fill:#f9d77e,stroke:#333,stroke-width:2px
style E5 fill:#a3e4d7,stroke:#333,stroke-width:2px
style E6 fill:#aed6f1,stroke:#333,stroke-width:2px
style E7 fill:#f5b7b1,stroke:#333,stroke-width:2px
style B6 fill:#f9d77e,stroke:#333,stroke-width:2px
style C6 fill:#f9d77e,stroke:#333,stroke-width:2px
style D5 fill:#f9d77e,stroke:#333,stroke-width:2px
style B5 fill:#a3e4d7,stroke:#333,stroke-width:2px
style D6 fill:#a3e4d7,stroke:#333,stroke-width:2px
style D7 fill:#aed6f1,stroke:#333,stroke-width:2px
style D8 fill:#f5b7b1,stroke:#333,stroke-width:2px
```
## Stage 1: Isolated Analysis (three separate sub-stages)
## Stage 1: Isolated Analysis
1. **Job Analysis**: Extracts requirements from just the job description
2. **Candidate Analysis**: Catalogs qualifications from just the resume/context
3. **Mapping Analysis**: Identifies legitimate matches between requirements and qualifications
2. **Candidate Analysis**: Catalogs qualifications for each job requirement from just the resume/context
## Stage 2: Resume Generation
Creates a tailored resume using only verified information from the mapping
Creates a tailored resume using the skills collection and candidate information.
## Stage 3: Verification
@ -90,7 +88,7 @@ Creates a tailored resume using only verified information from the mapping
The system uses several techniques to prevent fabrication:
* **Isolation of Analysis Stages**: By analyzing the job and candidate separately, the system prevents the LLM from prematurely creating connections that might lead to fabrication.
* **Isolation of Analysis Stages**: By analyzing the job and candidate separately, and having the LLM only provide evidence of a single skill per pass, the system prevents the LLM from prematurely creating connections that might lead to fabrication.
* **Evidence Requirements**: Each qualification included must have explicit evidence from the original materials.
* **Conservative Transferability**: The system is instructed to be conservative when claiming skills are transferable.
* **Verification Layer**: A dedicated verification step acts as a safety check to catch any remaining fabrications.

View File

@ -436,11 +436,11 @@ class JobDescription(Agent):
# Group results by category and subcategory
grouped_context = defaultdict(list)
for result in rag_results:
key = f"{result['category']}/{result['subcategory']}".strip("/")
grouped_context[key].append({
"query": result["context"],
"content": result["content"][:100] + "..." if len(result["content"]) > 100 else result["content"]
})
key = f"{result['category']}/{result['subcategory']}".strip("/")
grouped_context[key].append({
"query": result["context"],
"content": result["content"][:100] + "..." if len(result["content"]) > 100 else result["content"]
})
# Format as a structured string
context_lines = ["Additional Context from Document Retrieval:"]
@ -454,120 +454,70 @@ class JobDescription(Agent):
# Stage 1B: Candidate Analysis Implementation
def create_candidate_analysis_prompt(self, resume: str, rag_results: List[Dict[str, Any]]) -> tuple[str, str]:
"""Create the prompt for candidate qualifications analysis."""
# system_prompt = """
# You are an objective resume analyzer. Create a comprehensive inventory of all skills, experiences, and qualifications present in the candidate's materials.
# CORE PRINCIPLES:
# - Analyze ONLY the candidate's resume and provided context
# - Focus ONLY on the candidate's actual qualifications
# - Do not reference any job requirements
# - Include only explicitly mentioned information
# OUTPUT FORMAT:
# ```json
# {
# "candidate_qualifications": {
# "technical_skills": [
# {
# "skill": "skill name",
# "evidence_location": "where in resume this appears",
# "expertise_level": "stated level or 'unspecified'"
# }
# ],
# "work_experience": [
# {
# "role": "job title",
# "company": "company name",
# "duration": "time period",
# "responsibilities": ["resp1", "resp2"],
# "technologies_used": ["tech1", "tech2"],
# "achievements": ["achievement1", "achievement2"]
# }
# ],
# "education": [
# {
# "degree": "degree name",
# "institution": "institution name",
# "completed": true/false,
# "graduation_date": "date or 'ongoing'"
# }
# ],
# "projects": [
# {
# "name": "project name",
# "description": "brief description",
# "technologies_used": ["tech1", "tech2"]
# }
# ],
# "soft_skills": [
# {
# "skill": "skill name",
# "context": "brief mention of where this appears"
# }
# ]
# }
# }
# """
system_prompt = """\
You are an objective resume analyzer. Create a comprehensive inventory of all skills, experiences, and qualifications present in the candidate's materials.
"""Create the prompt for candidate qualifications analysis."""
system_prompt = """\
You are an objective resume analyzer. Create a concise inventory of the candidate's key skills, experiences, and qualifications based on their resume.
CORE PRINCIPLES:
- Analyze ONLY the candidate's resume and provided context.
- Focus ONLY on the candidate's actual qualifications explicitly mentioned in the resume.
- Use the additional context to clarify or provide background for terms, skills, or experiences mentioned in the resume (e.g., to understand the scope of a skill like 'Python' or a role's responsibilities).
- Do NOT treat the context as job requirements or infer qualifications not explicitly stated in the resume.
- Include only explicitly mentioned information from the resume, supplemented by context where relevant.
- Focus on the most significant and relevant qualifications explicitly mentioned.
- Limit your analysis to the most important items in each category.
- Prioritize brevity and completeness over exhaustiveness.
- Complete the entire analysis in one response without getting stuck on any section.
OUTPUT FORMAT:
```json
{
"candidate_qualifications": {
"technical_skills": [
// Include MAX 10 most important technical skills
{
"skill": "skill name",
"evidence_location": "where in resume this appears",
"evidence_location": "brief reference",
"expertise_level": "stated level or 'unspecified'"
}
],
"work_experience": [
// Include MAX 5 most recent or relevant positions
{
"role": "job title",
"company": "company name",
"duration": "time period",
"responsibilities": ["resp1", "resp2"],
"technologies_used": ["tech1", "tech2"],
"achievements": ["achievement1", "achievement2"]
"responsibilities": ["resp1", "resp2"], // MAX 3 key responsibilities
"technologies_used": ["tech1", "tech2"], // MAX 5 technologies
"achievements": ["achievement1"] // MAX 2 achievements
}
],
"education": [
// Include ALL education entries (typically 1-3)
{
"degree": "degree name",
"institution": "institution name",
"completed": true/false,
"graduation_date": "date or 'ongoing'"
"completed": true/false
}
],
"projects": [
// Include MAX 3 most significant projects
{
"name": "project name",
"description": "brief description",
"technologies_used": ["tech1", "tech2"]
"description": "one sentence description",
"technologies_used": ["tech1", "tech2"] // MAX 3 technologies
}
],
"soft_skills": [
// Include MAX 5 most prominent soft skills
{
"skill": "skill name",
"context": "brief mention of where this appears"
"context": "brief mention"
}
]
}
}
IMPORTANT: If at any point you find yourself repeating items or getting stuck, STOP that section and move to the next. It's better to provide a partial analysis than to get stuck in a loop.
"""
context = self.format_rag_context(rag_results)
prompt = f"Resume:\n{resume}\n\nAdditional Context:\n{context}"
return system_prompt, prompt
context = self.format_rag_context(rag_results)
prompt = f"Resume:\n{resume}\n\nAdditional Context:\n{context}"
return system_prompt, prompt
async def call_llm(self, message: Message, system_prompt, prompt, temperature=0.7):
logger.info(f"{self.agent_type} - {inspect.stack()[0].function}")

View File

@ -167,6 +167,7 @@ class ChromaDBFileWatcher(FileSystemEventHandler):
if os.path.isfile(file_path):
# Do not put the Resume in RAG as it is provideded with all queries.
if file_path == defines.resume_doc:
logging.info(f"Not adding {file_path} to RAG -- primary resume")
continue
files_checked += 1
current_hash = self._get_file_hash(file_path)
@ -217,6 +218,10 @@ class ChromaDBFileWatcher(FileSystemEventHandler):
if file_path in self.processing_files:
logging.info(f"{file_path} already in queue. Not adding.")
return
if file_path == defines.resume_doc:
logging.info(f"Not adding {file_path} to RAG -- primary resume")
return
try:
logging.info(f"{file_path} not in queue. Adding.")