backstory/src/backend/agents/skill_match.py

from __future__ import annotations
from typing import (
    Literal,
    ClassVar,
    Any,
    AsyncGenerator,
    Optional,
    #    override
)  # NOTE: You must import Optional for late binding to work
import json

from database.core import RedisDatabase

from .base import Agent, agent_registry
from models import (
    ApiActivityType,
    ApiMessage,
    ChatMessage,
    ChatMessageError,
    ChatMessageRagSearch,
    ChatMessageSkillAssessment,
    ApiStatusType,
    ChatMessageStatus,
    EvidenceDetail,
    SkillAssessment,
    Tunables,
)
from logger import logger
import backstory_traceback as traceback


class SkillMatchAgent(Agent):
    agent_type: Literal["skill_match"] = "skill_match"  # type: ignore
    _agent_type: ClassVar[str] = agent_type  # Add this for registration

    def generate_skill_assessment_prompt(self, skill, rag_context):
        """
        Generate a system prompt to query the LLM for evidence of a specific skill

        Parameters:
        - skill (str): The specific skill to assess from job requirements
        - rag_contexty (str): Additional RAG content queried from candidate documents

        Returns:
        - str: A system prompt tailored to assess the specific skill
        """

        system_prompt = f"""You are an objective skill assessor. Your task is to determine if a candidate possesses
a SPECIFIC skill based solely on their resume and supporting evidence.

## SKILL TO ASSESS:
"{skill}"

## INSTRUCTIONS:
1. Focus exclusively on assessing the candidate's proficiency with the skill: "{skill}".
2. Examine the resume and supporting documents for both explicit mentions and clearly demonstrated applications of the skill.
3. Do NOT infer the skill unless it is either:
   - Directly mentioned in association with experience, or
   - Clearly evidenced through relevant tools, technologies, responsibilities, or outcomes.
   - Referenced as being used or listed as part of a broader initiative.
4. Evaluate each piece of evidence and assign a confidence rating:
   - STRONG: Explicit use with clear context or repeated/prolonged experience
   - MODERATE: Inferred through tools, environments, or outcomes (e.g., Python used in a listed project/tool)
   - WEAK: Mentioned in a list or indirectly implied without context
   - NONE: No relevant evidence
5. Provide up to 10 evidence_details. Each should include:
   - source: where the evidence appears
   - quote: an exact snippet from the document(s)
   - context: a short rationale explaining how this supports the skill claim
6. When no supporting evidence exists, output a "NONE" rating with an empty evidence details list.
7. IMPORTANT: Even if the skill is only listed in a technologies or languages section, consider this valid evidence.

## OUTPUT FORMAT:
```json
{{
  "skill_assessment": {{
    "skill": "{skill}",
    "evidence_found": true/false,
    "evidence_strength": "STRONG/MODERATE/WEAK/NONE",
    "assessment": "short (one to two sentence) assessment of the candidate's proficiency with {skill}",
    "description": "short (two to three sentence) description of what the {skill} is, independent of whether the candidate has that skill or not",
    "evidence_details": [
      {{
        "source": "resume section/position/project",
        "quote": "exact text from resume showing evidence",
        "context": "brief explanation of how this demonstrates the skill"
      }}
    ]
  }}
}}
```

IMPORTANT: Be factual and precise. If you cannot find strong evidence for this specific skill, it's better to indicate "evidence_strength": WEAK than to stretch for connections.
Focus only on "{skill}" and not similar skills unless they directly demonstrate the required skill.
Remember that a skill listed in a "Languages" or "Technologies" section should be considered valid evidence.

Adhere strictly to the JSON output format requested. Do not include any additional text or commentary outside the JSON structure.
"""

        prompt = f"""Analyze the candidate information below for evidence of "{skill}".

RESPOND WITH ONLY VALID JSON USING THE EXACT FORMAT SPECIFIED.

<candidate_info>
{rag_context}
</candidate_info>

JSON RESPONSE:"""

        return system_prompt, prompt

    async def generate(
        self,
        llm: Any,
        model: str,
        session_id: str,
        prompt: str,
        database: RedisDatabase,
        extra_context: Optional[dict[str, str | int | float | bool]] = None,
        tunables: Optional[Tunables] = None,
        temperature=0.7,
    ) -> AsyncGenerator[ApiMessage, None]:
        if not self.user:
            error_message = ChatMessageError(
                session_id=session_id,
                content="Agent not attached to user. Attach the agent to a user before generating responses.",
            )
            logger.error(f"⚠️ {error_message.content}")
            yield error_message
            return

        skill = prompt.strip()
        if not skill:
            error_message = ChatMessageError(session_id=session_id, content="Skill cannot be empty.")
            logger.error(f"⚠️ {error_message.content}")
            yield error_message
            return

        generated_message = None
        async for generated_message in self.generate_rag_results(session_id=session_id, prompt=skill):
            if generated_message.status == ApiStatusType.ERROR:
                yield generated_message
                return
            if generated_message.status != ApiStatusType.DONE:
                yield generated_message

        if generated_message is None:
            error_message = ChatMessageError(
                session_id=session_id, content="RAG search did not return a valid response."
            )
            logger.error(f"⚠️ {error_message.content}")
            yield error_message
            return

        if not isinstance(generated_message, ChatMessageRagSearch):
            logger.error(f"Expected ChatMessageRagSearch, got {type(generated_message)}")
            error_message = ChatMessageError(
                session_id=session_id, content="RAG search did not return a valid response."
            )
            yield error_message
            return
        rag_message: ChatMessageRagSearch = generated_message

        rag_context = self.get_rag_context(rag_message)
        logger.info(f"🔍 RAG content retrieved {len(rag_context)} bytes of context")
        system_prompt, prompt = self.generate_skill_assessment_prompt(skill=skill, rag_context=rag_context)

        status_message = ChatMessageStatus(
            session_id=session_id, activity=ApiActivityType.GENERATING, content="Generating skill assessment..."
        )
        yield status_message

        generated_message = None
        async for generated_message in self.llm_one_shot(
            llm=llm, model=model, session_id=session_id, prompt=prompt, system_prompt=system_prompt, temperature=0.7
        ):
            if generated_message.status == ApiStatusType.ERROR:
                logger.error(f"⚠️ {generated_message.content}")
                yield generated_message
                return
            if generated_message.status != ApiStatusType.DONE:
                yield generated_message

        if generated_message is None:
            error_message = ChatMessageError(
                session_id=session_id, content="Skill assessment failed to generate a response."
            )
            logger.error(f"⚠️ {error_message.content}")
            yield error_message
            return

        if not isinstance(generated_message, ChatMessage):
            error_message = ChatMessageError(
                session_id=session_id, content="Skill assessment did not return a valid message."
            )
            logger.error(f"⚠️ {error_message.content}")
            yield error_message
            return
        json_str = self.extract_json_from_text(generated_message.content)
        skill_assessment_data = ""
        skill_assessment = None
        try:
            skill_assessment_data = json.loads(json_str).get("skill_assessment", {})
            skill_assessment = SkillAssessment(
                candidate_id=self.user.id,
                skill=skill,
                skill_modified=skill_assessment_data.get("skill", ""),
                evidence_found=skill_assessment_data.get("evidence_found", False),
                evidence_strength=skill_assessment_data.get("evidence_strength", "NONE").lower(),
                assessment=skill_assessment_data.get("assessment", ""),
                description=skill_assessment_data.get("description", ""),
                rag_results=rag_message.content,
                evidence_details=[
                    EvidenceDetail(
                        source=evidence.get("source", ""),
                        quote=evidence.get("quote", ""),
                        context=evidence.get("context", ""),
                    )
                    for evidence in skill_assessment_data.get("evidence_details", [])
                ],
            )
        except Exception as e:
            error_message = ChatMessageError(
                session_id=session_id,
                content=f"Failed to parse Skill assessment JSON: {str(e)}\n\n{generated_message.content}\n\nJSON:\n{json_str}\n\n",
            )
            logger.error(traceback.format_exc())
            logger.error(f"⚠️ {error_message.content}")
            yield error_message
            return

        # if skill_assessment.evidence_strength == "none":
        #     logger.info("⚠️ No evidence found for skill assessment, returning NONE.")
        #     with open("src/tmp.txt", "w") as f:
        #         f.write(f"Skill: {skill}\n\n")
        #         f.write(f"System prompt:\n{system_prompt}\n\n")
        #         f.write(f"Prompt:\n{prompt}\n\n")
        #         f.write(f"LLM response:\n{skill_message.content}\n\n")
        #         f.write(f"JSON portion:\n{json_str}\n\n")
        #         f.write(f"JSON parsed:\n{json.dumps(skill_assessment_data, indent=2)}\n\n")
        #         f.write(f"Skill assessment data:\n")
        #         f.write(skill_assessment.model_dump_json(indent=2))

        skill_assessment_message = ChatMessageSkillAssessment(
            session_id=session_id,
            status=ApiStatusType.DONE,
            content=json.dumps(skill_assessment_data),
            metadata=generated_message.metadata,
            skill_assessment=skill_assessment,
        )
        yield skill_assessment_message
        logger.info("✅ Skill assessment completed successfully.")
        return


# Register the base agent
agent_registry.register(SkillMatchAgent._agent_type, SkillMatchAgent)