From a1798b58acaae1238d2e488e95f808fdec760a58 Mon Sep 17 00:00:00 2001 From: James Ketrenos Date: Mon, 12 May 2025 16:57:20 -0700 Subject: [PATCH] Switching to one-call per skill --- frontend/public/docs/resume-generation.md | 88 +++++++++-------- src/utils/agents/job_description.py | 110 ++++++---------------- src/utils/rag.py | 5 + 3 files changed, 78 insertions(+), 125 deletions(-) diff --git a/frontend/public/docs/resume-generation.md b/frontend/public/docs/resume-generation.md index 426b8b2..c93217f 100644 --- a/frontend/public/docs/resume-generation.md +++ b/frontend/public/docs/resume-generation.md @@ -4,13 +4,12 @@ The system follows a carefully designed pipeline with isolated stages to prevent The system uses a pipeline of isolated analysis and generation steps: -1. **Stage 1: Isolated Analysis** (three sub-stages) +1. **Stage 1: Isolated Analysis** - **1A: Job Analysis** - Extracts requirements from job description only - - **1B: Candidate Analysis** - Catalogs qualifications from resume/context only - - **1C: Mapping Analysis** - Identifies legitimate matches between requirements and qualifications + - **1B: Skill-Based Assessment** - For each required skill, determine a Individisual Skill Assessment, adding it to a Skill Assessments Collection. 2. **Stage 2: Resume Generation** - - Uses mapping output to create a tailored resume with evidence-based content + - Uses Skills Asessments Collection to generate a tailored resume. 3. **Stage 3: Verification** - Performs fact-checking to catch any remaining fabrications @@ -23,63 +22,62 @@ flowchart TD A2 --> A3[Job Requirements JSON] end - subgraph "Stage 1B: Candidate Analysis" - B1[Resume Input] --> B5[Candidate Analysis LLM] - B5 --> B4[Candidate Qualifications JSON] - B2[Candidate Info] --> B3[RAG] - B3[RAG] --> B2[Candidate Info] - A3[Job Requirements JSON] --> B3[RAG] - B3[RAG] --> B5 - end - - subgraph "Stage 1C: Mapping Analysis" - C1[Job Requirements JSON] --> C3[Mapping Analysis LLM] - C2[Candidate Qualifications JSON] --> C3 - C3 --> C4[Skills Mapping JSON] + subgraph "Stage 1B: Skill-Based Assessment" + B1[Resume Input] --> B2[Candidate Info] + B2 --> B3[RAG System] + A3 --> B4[Skill Assessment Generator] + B3 --> B4 + B4 --> B5{For Each Required Skill} + B5 --> B6[Skill-Focused LLM Query] + B6 --> B7[Individual Skill Assessment] + B7 --> B8[Skill Assessments Collection] end end subgraph "Stage 2: Resume Generation" - D1[Skills Mapping JSON] --> D3[Resume Generation LLM] - D2[Original Resume Reference] --> D3 - D3 --> D4[Tailored Resume Draft] + C1[Skill Assessments Collection] --> C2[Resume Generator] + C3[Original Resume Reference] --> C2 + C4[Candidate Information] --> C2 + C2 --> C5[Resume Generation Prompt] + C5 --> C6[Resume Generation LLM] + C6 --> C7[Tailored Resume Draft] end - subgraph "Stage 3: Verification" - E1[Skills Mapping JSON] --> E2[Original Materials] - E2 --> E3[Tailored Resume Draft] - E3 --> E4[Verification LLM] - E4 --> E5{Verification Check} - E5 -->|PASS| E6[Approved Resume] - E5 -->|FAIL| E7[Correction Instructions] - E7 --> D3 + subgraph "Stage 3: Statistics & Verification" + D1[Job Requirements JSON] --> D2[Match Statistics Calculator] + D3[Skill Assessments Collection] --> D2 + D2 --> D4[Match Statistics] + D4 --> D5[Verification LLM] + C7 --> D5 + D5 --> D6{Verification Check} + D6 -->|PASS| D7[Approved Resume] + D6 -->|FAIL| D8[Correction Instructions] + D8 --> C2 end - A3 --> C1 - B4 --> C2 - C4 --> D1 - C4 --> E1 - D4 --> E3 + A3 --> B4 + B8 --> C1 + B8 --> D3 + B1 --> C3 style A2 fill:#f9d77e,stroke:#333,stroke-width:2px - style B5 fill:#f9d77e,stroke:#333,stroke-width:2px - style C3 fill:#f9d77e,stroke:#333,stroke-width:2px - style D3 fill:#f9d77e,stroke:#333,stroke-width:2px - style E4 fill:#f9d77e,stroke:#333,stroke-width:2px - style E5 fill:#a3e4d7,stroke:#333,stroke-width:2px - style E6 fill:#aed6f1,stroke:#333,stroke-width:2px - style E7 fill:#f5b7b1,stroke:#333,stroke-width:2px + style B6 fill:#f9d77e,stroke:#333,stroke-width:2px + style C6 fill:#f9d77e,stroke:#333,stroke-width:2px + style D5 fill:#f9d77e,stroke:#333,stroke-width:2px + style B5 fill:#a3e4d7,stroke:#333,stroke-width:2px + style D6 fill:#a3e4d7,stroke:#333,stroke-width:2px + style D7 fill:#aed6f1,stroke:#333,stroke-width:2px + style D8 fill:#f5b7b1,stroke:#333,stroke-width:2px ``` -## Stage 1: Isolated Analysis (three separate sub-stages) +## Stage 1: Isolated Analysis 1. **Job Analysis**: Extracts requirements from just the job description -2. **Candidate Analysis**: Catalogs qualifications from just the resume/context -3. **Mapping Analysis**: Identifies legitimate matches between requirements and qualifications +2. **Candidate Analysis**: Catalogs qualifications for each job requirement from just the resume/context ## Stage 2: Resume Generation -Creates a tailored resume using only verified information from the mapping +Creates a tailored resume using the skills collection and candidate information. ## Stage 3: Verification @@ -90,7 +88,7 @@ Creates a tailored resume using only verified information from the mapping The system uses several techniques to prevent fabrication: -* **Isolation of Analysis Stages**: By analyzing the job and candidate separately, the system prevents the LLM from prematurely creating connections that might lead to fabrication. +* **Isolation of Analysis Stages**: By analyzing the job and candidate separately, and having the LLM only provide evidence of a single skill per pass, the system prevents the LLM from prematurely creating connections that might lead to fabrication. * **Evidence Requirements**: Each qualification included must have explicit evidence from the original materials. * **Conservative Transferability**: The system is instructed to be conservative when claiming skills are transferable. * **Verification Layer**: A dedicated verification step acts as a safety check to catch any remaining fabrications. diff --git a/src/utils/agents/job_description.py b/src/utils/agents/job_description.py index 5e6eb87..cd9f428 100644 --- a/src/utils/agents/job_description.py +++ b/src/utils/agents/job_description.py @@ -436,11 +436,11 @@ class JobDescription(Agent): # Group results by category and subcategory grouped_context = defaultdict(list) for result in rag_results: - key = f"{result['category']}/{result['subcategory']}".strip("/") - grouped_context[key].append({ - "query": result["context"], - "content": result["content"][:100] + "..." if len(result["content"]) > 100 else result["content"] - }) + key = f"{result['category']}/{result['subcategory']}".strip("/") + grouped_context[key].append({ + "query": result["context"], + "content": result["content"][:100] + "..." if len(result["content"]) > 100 else result["content"] + }) # Format as a structured string context_lines = ["Additional Context from Document Retrieval:"] @@ -454,120 +454,70 @@ class JobDescription(Agent): # Stage 1B: Candidate Analysis Implementation def create_candidate_analysis_prompt(self, resume: str, rag_results: List[Dict[str, Any]]) -> tuple[str, str]: - """Create the prompt for candidate qualifications analysis.""" - - # system_prompt = """ - # You are an objective resume analyzer. Create a comprehensive inventory of all skills, experiences, and qualifications present in the candidate's materials. - - # CORE PRINCIPLES: - # - Analyze ONLY the candidate's resume and provided context - # - Focus ONLY on the candidate's actual qualifications - # - Do not reference any job requirements - # - Include only explicitly mentioned information - - # OUTPUT FORMAT: - # ```json - # { - # "candidate_qualifications": { - # "technical_skills": [ - # { - # "skill": "skill name", - # "evidence_location": "where in resume this appears", - # "expertise_level": "stated level or 'unspecified'" - # } - # ], - # "work_experience": [ - # { - # "role": "job title", - # "company": "company name", - # "duration": "time period", - # "responsibilities": ["resp1", "resp2"], - # "technologies_used": ["tech1", "tech2"], - # "achievements": ["achievement1", "achievement2"] - # } - # ], - # "education": [ - # { - # "degree": "degree name", - # "institution": "institution name", - # "completed": true/false, - # "graduation_date": "date or 'ongoing'" - # } - # ], - # "projects": [ - # { - # "name": "project name", - # "description": "brief description", - # "technologies_used": ["tech1", "tech2"] - # } - # ], - # "soft_skills": [ - # { - # "skill": "skill name", - # "context": "brief mention of where this appears" - # } - # ] - # } - # } - # """ - system_prompt = """\ -You are an objective resume analyzer. Create a comprehensive inventory of all skills, experiences, and qualifications present in the candidate's materials. + """Create the prompt for candidate qualifications analysis.""" + system_prompt = """\ +You are an objective resume analyzer. Create a concise inventory of the candidate's key skills, experiences, and qualifications based on their resume. CORE PRINCIPLES: - Analyze ONLY the candidate's resume and provided context. -- Focus ONLY on the candidate's actual qualifications explicitly mentioned in the resume. -- Use the additional context to clarify or provide background for terms, skills, or experiences mentioned in the resume (e.g., to understand the scope of a skill like 'Python' or a role's responsibilities). -- Do NOT treat the context as job requirements or infer qualifications not explicitly stated in the resume. -- Include only explicitly mentioned information from the resume, supplemented by context where relevant. +- Focus on the most significant and relevant qualifications explicitly mentioned. +- Limit your analysis to the most important items in each category. +- Prioritize brevity and completeness over exhaustiveness. +- Complete the entire analysis in one response without getting stuck on any section. OUTPUT FORMAT: -```json { "candidate_qualifications": { "technical_skills": [ + // Include MAX 10 most important technical skills { "skill": "skill name", - "evidence_location": "where in resume this appears", + "evidence_location": "brief reference", "expertise_level": "stated level or 'unspecified'" } ], "work_experience": [ + // Include MAX 5 most recent or relevant positions { "role": "job title", "company": "company name", "duration": "time period", - "responsibilities": ["resp1", "resp2"], - "technologies_used": ["tech1", "tech2"], - "achievements": ["achievement1", "achievement2"] + "responsibilities": ["resp1", "resp2"], // MAX 3 key responsibilities + "technologies_used": ["tech1", "tech2"], // MAX 5 technologies + "achievements": ["achievement1"] // MAX 2 achievements } ], "education": [ + // Include ALL education entries (typically 1-3) { "degree": "degree name", "institution": "institution name", - "completed": true/false, - "graduation_date": "date or 'ongoing'" + "completed": true/false } ], "projects": [ + // Include MAX 3 most significant projects { "name": "project name", - "description": "brief description", - "technologies_used": ["tech1", "tech2"] + "description": "one sentence description", + "technologies_used": ["tech1", "tech2"] // MAX 3 technologies } ], "soft_skills": [ + // Include MAX 5 most prominent soft skills { "skill": "skill name", - "context": "brief mention of where this appears" + "context": "brief mention" } ] } } + +IMPORTANT: If at any point you find yourself repeating items or getting stuck, STOP that section and move to the next. It's better to provide a partial analysis than to get stuck in a loop. """ - context = self.format_rag_context(rag_results) - prompt = f"Resume:\n{resume}\n\nAdditional Context:\n{context}" - return system_prompt, prompt + context = self.format_rag_context(rag_results) + prompt = f"Resume:\n{resume}\n\nAdditional Context:\n{context}" + return system_prompt, prompt async def call_llm(self, message: Message, system_prompt, prompt, temperature=0.7): logger.info(f"{self.agent_type} - {inspect.stack()[0].function}") diff --git a/src/utils/rag.py b/src/utils/rag.py index 5c48615..d9b335b 100644 --- a/src/utils/rag.py +++ b/src/utils/rag.py @@ -167,6 +167,7 @@ class ChromaDBFileWatcher(FileSystemEventHandler): if os.path.isfile(file_path): # Do not put the Resume in RAG as it is provideded with all queries. if file_path == defines.resume_doc: + logging.info(f"Not adding {file_path} to RAG -- primary resume") continue files_checked += 1 current_hash = self._get_file_hash(file_path) @@ -217,6 +218,10 @@ class ChromaDBFileWatcher(FileSystemEventHandler): if file_path in self.processing_files: logging.info(f"{file_path} already in queue. Not adding.") return + + if file_path == defines.resume_doc: + logging.info(f"Not adding {file_path} to RAG -- primary resume") + return try: logging.info(f"{file_path} not in queue. Adding.")