backstory/src/resume-flow/impl-full.md

26 KiB

from typing import Dict, List, Any, Optional, Union import json import logging import re

Configure logging

logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s') logger = logging.getLogger(name)

Define LLM interface

def call_llm(prompt: str, temperature: float = 0.2) -> str: """ Call your LLM of choice with the given prompt and parameters. Implement connection to your specific LLM provider here. """ # Replace with your actual LLM call implementation pass

Helper functions

def extract_json_from_text(text: str) -> str: """Extract JSON string from text that may contain other content.""" json_pattern = r'json\s*([\s\S]*?)\s*' match = re.search(json_pattern, text) if match: return match.group(1).strip()

# Try to find JSON without the markdown code block
json_pattern = r'({[\s\S]*})'
match = re.search(json_pattern, text)
if match:
    return match.group(1).strip()

raise ValueError("No JSON found in the response")

def validate_job_requirements(job_requirements: Dict) -> None: """Validate the structure of job requirements.""" required_keys = ["job_requirements"]

if not all(key in job_requirements for key in required_keys):
    missing = [key for key in required_keys if key not in job_requirements]
    raise ValueError(f"Missing required keys in job requirements: {missing}")

# Additional validation can be added here

def validate_candidate_qualifications(candidate_qualifications: Dict) -> None: """Validate the structure of candidate qualifications.""" required_keys = ["candidate_qualifications"]

if not all(key in candidate_qualifications for key in required_keys):
    missing = [key for key in required_keys if key not in candidate_qualifications]
    raise ValueError(f"Missing required keys in candidate qualifications: {missing}")

# Additional validation can be added here

def validate_skills_mapping(skills_mapping: Dict) -> None: """Validate the structure of skills mapping.""" required_keys = ["skills_mapping", "resume_recommendations"]

if not all(key in skills_mapping for key in required_keys):
    missing = [key for key in required_keys if key not in skills_mapping]
    raise ValueError(f"Missing required keys in skills mapping: {missing}")

# Additional validation can be added here

def extract_header_from_resume(resume: str) -> str: """Extract header information from the original resume.""" # Simple implementation - in practice, you might want a more sophisticated approach lines = resume.strip().split("\n") # Take the first few non-empty lines as the header header_lines = [] for line in lines[:10]: # Arbitrarily choose first 10 lines to search if line.strip(): header_lines.append(line) if len(header_lines) >= 4: # Assume header is no more than 4 lines break return "\n".join(header_lines)

Stage 1A: Job Analysis Implementation

def create_job_analysis_prompt(job_description: str) -> str: """Create the prompt for job requirements analysis.""" system_prompt = """ You are an objective job requirements analyzer. Your task is to extract and categorize the specific skills, experiences, and qualifications required in a job description WITHOUT any reference to any candidate.

## INSTRUCTIONS:

1. Analyze ONLY the job description provided.
2. Extract and categorize all requirements and preferences.
3. DO NOT consider any candidate information - this is a pure job analysis task.

## OUTPUT FORMAT:

```json
{
  "job_requirements": {
    "technical_skills": {
      "required": ["skill1", "skill2"],
      "preferred": ["skill1", "skill2"]
    },
    "experience_requirements": {
      "required": ["exp1", "exp2"],
      "preferred": ["exp1", "exp2"]
    },
    "education_requirements": ["req1", "req2"],
    "soft_skills": ["skill1", "skill2"],
    "industry_knowledge": ["knowledge1", "knowledge2"],
    "responsibilities": ["resp1", "resp2"],
    "company_values": ["value1", "value2"]
  }
}
```

Be specific and detailed in your extraction. Break down compound requirements into individual components.
For example, "5+ years experience with React, Node.js and MongoDB" should be separated into:
- Experience: "5+ years software development"
- Technical skills: "React", "Node.js", "MongoDB"

Avoid vague categorizations and be precise about whether skills are explicitly required or just preferred.
"""

prompt = f"{system_prompt}\n\nJob Description:\n{job_description}"
return prompt

def analyze_job_requirements(job_description: str) -> Dict: """Analyze job requirements from job description.""" try: prompt = create_job_analysis_prompt(job_description) response = call_llm(prompt)

    # Extract JSON from response
    json_str = extract_json_from_text(response)
    job_requirements = json.loads(json_str)
    
    # Validate structure
    validate_job_requirements(job_requirements)
    
    return job_requirements
except Exception as e:
    logger.error(f"Error in job requirements analysis: {str(e)}")
    raise

Stage 1B: Candidate Analysis Implementation

def create_candidate_analysis_prompt(resume: str, context: str) -> str: """Create the prompt for candidate qualifications analysis.""" system_prompt = """ You are an objective resume analyzer. Your task is to catalog ALL skills, experiences, and qualifications present in a candidate's materials WITHOUT any reference to any job description.

## INSTRUCTIONS:

1. Analyze ONLY the candidate's resume and context provided.
2. Create a comprehensive inventory of the candidate's actual qualifications.
3. DO NOT consider any job requirements - this is a pure candidate analysis task.
4. For each qualification, cite exactly where in the materials it appears.

## OUTPUT FORMAT:

```json
{
  "candidate_qualifications": {
    "technical_skills": [
      {
        "skill": "skill name",
        "evidence": "exact quote from materials",
        "source": "resume or context",
        "expertise_level": "explicit level mentioned or 'unspecified'"
      }
    ],
    "work_experience": [
      {
        "role": "job title",
        "company": "company name",
        "duration": "time period",
        "responsibilities": ["resp1", "resp2"],
        "technologies_used": ["tech1", "tech2"],
        "achievements": ["achievement1", "achievement2"]
      }
    ],
    "education": [
      {
        "degree": "degree name",
        "institution": "institution name",
        "completed": true/false,
        "evidence": "exact quote from materials"
      }
    ],
    "projects": [
      {
        "name": "project name",
        "description": "brief description",
        "technologies_used": ["tech1", "tech2"],
        "evidence": "exact quote from materials"
      }
    ],
    "soft_skills": [
      {
        "skill": "skill name",
        "evidence": "exact quote or inference basis",
        "source": "resume or context"
      }
    ]
  }
}
```

Be thorough and precise. Include ONLY skills and experiences explicitly mentioned in the materials.
For each entry, provide the exact text evidence from the materials that supports its inclusion.
Do not make assumptions about skills based on job titles or project names - only include skills explicitly mentioned.
"""

prompt = f"{system_prompt}\n\nResume:\n{resume}\n\nAdditional Context:\n{context}"
return prompt

def analyze_candidate_qualifications(resume: str, context: str) -> Dict: """Analyze candidate qualifications from resume and context.""" try: prompt = create_candidate_analysis_prompt(resume, context) response = call_llm(prompt)

    # Extract JSON from response
    json_str = extract_json_from_text(response)
    candidate_qualifications = json.loads(json_str)
    
    # Validate structure
    validate_candidate_qualifications(candidate_qualifications)
    
    return candidate_qualifications
except Exception as e:
    logger.error(f"Error in candidate qualifications analysis: {str(e)}")
    raise

Stage 1C: Mapping Analysis Implementation

def create_mapping_analysis_prompt(job_requirements: Dict, candidate_qualifications: Dict) -> str: """Create the prompt for mapping analysis.""" system_prompt = """ You are an objective skills mapper. Your task is to identify legitimate matches between job requirements and candidate qualifications WITHOUT fabricating or stretching the truth.

## INSTRUCTIONS:

1. Use ONLY the structured job requirements and candidate qualifications provided.
2. Create a mapping that shows where the candidate's actual skills and experiences align with job requirements.
3. Identify gaps where the candidate lacks required qualifications.
4. Suggest legitimate transferable skills ONLY when there is reasonable evidence.

## OUTPUT FORMAT:

```json
{
  "skills_mapping": {
    "direct_matches": [
      {
        "job_requirement": "required skill",
        "candidate_qualification": "matching skill",
        "evidence": "exact quote from candidate materials"
      }
    ],
    "transferable_skills": [
      {
        "job_requirement": "required skill",
        "candidate_qualification": "transferable skill",
        "reasoning": "explanation of legitimate transferability",
        "evidence": "exact quote from candidate materials"
      }
    ],
    "gap_analysis": {
      "missing_required_skills": ["skill1", "skill2"],
      "missing_preferred_skills": ["skill1", "skill2"],
      "missing_experience": ["exp1", "exp2"]
    }
  },
  "resume_recommendations": {
    "highlight_points": [
      {
        "qualification": "candidate's qualification",
        "relevance": "why this is highly relevant to the job"
      }
    ],
    "transferable_narratives": [
      {
        "from": "candidate's actual experience",
        "to": "job requirement",
        "suggested_framing": "how to honestly present the transfer"
      }
    ],
    "honest_limitations": [
      "frank assessment of major qualification gaps"
    ]
  }
}
```

CRITICAL RULES:
1. A "direct match" requires the EXACT SAME skill in both job requirements and candidate qualifications
2. A "transferable skill" must have legitimate, defensible connection - do not stretch credibility
3. All "missing_required_skills" MUST be acknowledged - do not ignore major gaps
4. Every match or transfer claim must cite specific evidence from the candidate materials
5. Be conservative in claiming transferability - when in doubt, list as missing
"""

prompt = f"{system_prompt}\n\nJob Requirements:\n{json.dumps(job_requirements, indent=2)}\n\n"
prompt += f"Candidate Qualifications:\n{json.dumps(candidate_qualifications, indent=2)}"
return prompt

def create_skills_mapping(job_requirements: Dict, candidate_qualifications: Dict) -> Dict: """Create mapping between job requirements and candidate qualifications.""" try: prompt = create_mapping_analysis_prompt(job_requirements, candidate_qualifications) response = call_llm(prompt)

    # Extract JSON from response
    json_str = extract_json_from_text(response)
    skills_mapping = json.loads(json_str)
    
    # Validate structure
    validate_skills_mapping(skills_mapping)
    
    return skills_mapping
except Exception as e:
    logger.error(f"Error in skills mapping analysis: {str(e)}")
    raise

Stage 2: Resume Generation Implementation

def create_resume_generation_prompt(skills_mapping: Dict, candidate_qualifications: Dict, original_header: str) -> str: """Create the prompt for resume generation.""" system_prompt = """ You are a professional resume writer whose primary concern is FACTUAL ACCURACY. Your task is to create a tailored resume that presents the candidate's actual qualifications in the most relevant way for this job, using ONLY information that has been verified in the skills mapping.

## INSTRUCTIONS:

1. Use ONLY the information provided in the skills mapping JSON
2. Each skill, experience, or achievement you include MUST appear in either "direct_matches" or "transferable_skills"
3. DO NOT include skills listed in "missing_required_skills" or "missing_preferred_skills"
4. Format a professional resume with these sections:
   - Header with name and contact information (exactly as provided in original resume)
   - Professional Summary (focused on verified matching and transferable skills)
   - Skills (ONLY from "direct_matches" and "transferable_skills" sections)
   - Professional Experience (highlighting experiences referenced in the mapping)
   - Education (exactly as listed in the candidate qualifications)

5. Follow these principles:
   - Use the exact wording from "highlight_points" and "transferable_narratives" when describing experiences
   - Maintain original job titles, companies, and dates exactly as provided
   - Use achievement-oriented language that emphasizes results and impact
   - Prioritize experiences that directly relate to the job requirements

## EVIDENCE REQUIREMENT:

For each skill or qualification you include in the resume, you MUST be able to trace it to:
1. A specific entry in "direct_matches" or "transferable_skills", AND
2. The original evidence citation in the candidate qualifications

If you cannot meet both these requirements for any content, DO NOT include it.

## FORMAT REQUIREMENTS:

- Create a clean, professional resume format
- Use consistent formatting for similar elements
- Ensure readability with appropriate white space
- Use bullet points for skills and achievements
- Include a final note: "Note: Initial draft of the resume was generated using the Backstory application written by James Ketrenos."

## FINAL VERIFICATION:

Before completing the resume:
1. Check that EVERY skill listed appears in either "direct_matches" or "transferable_skills"
2. Verify that no skills from "missing_required_skills" are included
3. Ensure all experience descriptions can be traced to evidence in candidate qualifications
4. Confirm that transferable skills are presented honestly without exaggeration
"""

prompt = f"{system_prompt}\n\nSkills Mapping:\n{json.dumps(skills_mapping, indent=2)}\n\n"
prompt += f"Candidate Qualifications:\n{json.dumps(candidate_qualifications, indent=2)}\n\n"
prompt += f"Original Resume Header:\n{original_header}"
return prompt

def generate_tailored_resume(skills_mapping: Dict, candidate_qualifications: Dict, original_header: str) -> str: """Generate a tailored resume based on skills mapping.""" try: prompt = create_resume_generation_prompt(skills_mapping, candidate_qualifications, original_header) response = call_llm(prompt, temperature=0.4) # Slightly higher temperature for better writing return response except Exception as e: logger.error(f"Error in resume generation: {str(e)}") raise

Stage 3: Verification Implementation

def create_verification_prompt(generated_resume: str, skills_mapping: Dict, candidate_qualifications: Dict) -> str: """Create the prompt for resume verification.""" system_prompt = """ You are a critical resume fact-checker responsible for verifying the accuracy of a tailored resume. Your task is to identify and flag any fabricated or embellished information that does not appear in the candidate's original materials.

## INSTRUCTIONS:

1. Compare the tailored resume against:
   - The structured skills mapping
   - The candidate's original qualifications
   
2. Perform a line-by-line verification focusing on:
   - Skills claimed vs. skills verified in original materials
   - Experience descriptions vs. actual documented experience
   - Projects and achievements vs. documented accomplishments
   - Technical knowledge claims vs. verified technical background

3. Create a verification report with these sections:

## OUTPUT FORMAT:

```json
{
  "verification_results": {
    "factual_accuracy": {
      "status": "PASS/FAIL",
      "issues": [
        {
          "claim": "The specific claim in the resume",
          "issue": "Why this is problematic",
          "source_check": "Result of checking against source materials",
          "suggested_correction": "How to fix this issue"
        }
      ]
    },
    "skill_verification": {
      "status": "PASS/FAIL",
      "unverified_skills": ["skill1", "skill2"]
    },
    "experience_verification": {
      "status": "PASS/FAIL",
      "problematic_statements": [
        {
          "statement": "The problematic experience statement",
          "issue": "Why this is problematic",
          "suggested_correction": "How to fix this issue"
        }
      ]
    },
    "overall_assessment": "APPROVED/NEEDS REVISION",
    "correction_instructions": "Specific instructions for correcting the resume"
  }
}
```

## CRITICAL VERIFICATION CRITERIA:

1. Any skill mentioned in the resume MUST appear verbatim in the skills mapping
2. Any technology experience claimed MUST be explicitly documented in original materials
3. Role descriptions must not imply expertise with technologies not listed in original materials
4. "Transferable skills" must be reasonably transferable, not stretches or fabrications
5. Job titles, dates, and companies must match exactly with original materials
6. Professional summary must not imply experience with technologies from the job description that aren't in the candidate's background

## SPECIAL ATTENTION:

Pay particular attention to subtle fabrications such as:
- Vague wording that implies experience ("worked with", "familiar with", "utilized") with technologies not in original materials
- Reframing unrelated experience to falsely imply relevance to the job requirements
- Adding technologies to project descriptions that weren't mentioned in the original materials
- Exaggerating level of involvement or responsibility in projects or roles
- Creating achievements that weren't documented in the original materials
"""

prompt = f"{system_prompt}\n\nTailored Resume:\n{generated_resume}\n\n"
prompt += f"Skills Mapping:\n{json.dumps(skills_mapping, indent=2)}\n\n"
prompt += f"Candidate Qualifications:\n{json.dumps(candidate_qualifications, indent=2)}"
return prompt

def verify_resume(generated_resume: str, skills_mapping: Dict, candidate_qualifications: Dict) -> Dict: """Verify the generated resume for accuracy against original materials.""" try: prompt = create_verification_prompt(generated_resume, skills_mapping, candidate_qualifications) response = call_llm(prompt)

    # Extract JSON from response
    json_str = extract_json_from_text(response)
    verification_results = json.loads(json_str)
    
    return verification_results
except Exception as e:
    logger.error(f"Error in resume verification: {str(e)}")
    raise

def correct_resume_issues(generated_resume: str, verification_results: Dict, skills_mapping: Dict, candidate_qualifications: Dict, original_header: str) -> str: """Correct issues in the resume based on verification results.""" if verification_results["verification_results"]["overall_assessment"] == "APPROVED": return generated_resume

system_prompt = """
You are a professional resume editor with a focus on factual accuracy. Your task is to correct
the identified issues in a tailored resume according to the verification report.

## INSTRUCTIONS:

1. Make ONLY the changes specified in the verification report
2. Ensure all corrections maintain factual accuracy based on the skills mapping
3. Do not introduce any new claims or skills not present in the verification data
4. Maintain the original format and structure of the resume

## PROCESS:

1. For each issue in the verification report:
   - Identify the problematic text in the resume
   - Replace it with the suggested correction
   - Ensure the correction is consistent with the rest of the resume

2. After making all corrections:
   - Review the revised resume for consistency
   - Ensure no factual inaccuracies have been introduced
   - Check that all formatting remains professional

Return the fully corrected resume.
"""

prompt = f"{system_prompt}\n\nOriginal Resume:\n{generated_resume}\n\n"
prompt += f"Verification Results:\n{json.dumps(verification_results, indent=2)}\n\n"
prompt += f"Skills Mapping:\n{json.dumps(skills_mapping, indent=2)}\n\n"
prompt += f"Candidate Qualifications:\n{json.dumps(candidate_qualifications, indent=2)}\n\n"
prompt += f"Original Resume Header:\n{original_header}"

try:
    response = call_llm(prompt, temperature=0.3)
    return response
except Exception as e:
    logger.error(f"Error in resume correction: {str(e)}")
    raise

Main orchestration function

def generate_factual_tailored_resume(job_description: str, resume: str, additional_context: str = "") -> Dict: """ Main function to generate a factually accurate tailored resume.

Args:
    job_description: The job description text
    resume: The candidate's original resume text
    additional_context: Any additional context about the candidate (optional)
    
Returns:
    Dict containing the generated resume and supporting analysis
"""
try:
    logger.info("Starting multi-stage RAG resume generation process")
    
    # Stage 1A: Analyze job requirements
    logger.info("Stage 1A: Analyzing job requirements")
    job_requirements = analyze_job_requirements(job_description)
    
    # Stage 1B: Analyze candidate qualifications
    logger.info("Stage 1B: Analyzing candidate qualifications")
    candidate_qualifications = analyze_candidate_qualifications(resume, additional_context)
    
    # Stage 1C: Create skills mapping
    logger.info("Stage 1C: Creating skills mapping")
    skills_mapping = create_skills_mapping(job_requirements, candidate_qualifications)
    
    # Extract header from original resume
    original_header = extract_header_from_resume(resume)
    
    # Stage 2: Generate tailored resume
    logger.info("Stage 2: Generating tailored resume")
    generated_resume = generate_tailored_resume(skills_mapping, candidate_qualifications, original_header)
    
    # Stage 3: Verify resume
    logger.info("Stage 3: Verifying resume for accuracy")
    verification_results = verify_resume(generated_resume, skills_mapping, candidate_qualifications)
    
    # Handle corrections if needed
    if verification_results["verification_results"]["overall_assessment"] == "NEEDS REVISION":
        logger.info("Correcting issues found in verification")
        generated_resume = correct_resume_issues(
            generated_resume, 
            verification_results, 
            skills_mapping, 
            candidate_qualifications,
            original_header
        )
        
        # Re-verify after corrections
        logger.info("Re-verifying corrected resume")
        verification_results = verify_resume(generated_resume, skills_mapping, candidate_qualifications)
    
    # Return the final results
    result = {
        "job_requirements": job_requirements,
        "candidate_qualifications": candidate_qualifications,
        "skills_mapping": skills_mapping,
        "generated_resume": generated_resume,
        "verification_results": verification_results
    }
    
    logger.info("Resume generation process completed successfully")
    return result
    
except Exception as e:
    logger.error(f"Error in resume generation process: {str(e)}")
    raise

Command-line interface

if name == "main": import argparse

parser = argparse.ArgumentParser(description="Generate a factually accurate tailored resume")
parser.add_argument("--job", required=True, help="Path to job description file")
parser.add_argument("--resume", required=True, help="Path to resume file")
parser.add_argument("--context", help="Path to additional context file")
parser.add_argument("--output", default="output.json", help="Path to output file")

args = parser.parse_args()

# Read input files
with open(args.job, 'r') as f:
    job_description = f.read()

with open(args.resume, 'r') as f:
    resume = f.read()

additional_context = ""
if args.context:
    with open(args.context, 'r') as f:
        additional_context = f.read()

# Generate resume
result = generate_factual_tailored_resume(job_description, resume, additional_context)

# Write output
with open(args.output, 'w') as f:
    json.dump(result, f, indent=2)

# Write the resume to a separate file for convenience
with open("generated_resume.txt", 'w') as f:
    f.write(result["generated_resume"])

print(f"Resume generation complete. Results saved to {args.output}")
print(f"Generated resume saved to generated_resume.txt")