from datetime import UTC, datetime import logging from typing import Any, Dict, List, Optional from .protocols import DatabaseProtocol from ..constants import KEY_PREFIXES logger = logging.getLogger(__name__) class DocumentMixin(DatabaseProtocol): """Mixin for document-related database operations""" async def get_document(self, document_id: str) -> Optional[Dict]: """Get document metadata by ID""" key = f"document:{document_id}" data = await self.redis.get(key) return self._deserialize(data) if data else None async def set_document(self, document_id: str, document_data: Dict): """Set document metadata""" key = f"document:{document_id}" await self.redis.set(key, self._serialize(document_data)) async def delete_document(self, document_id: str): """Delete document metadata""" key = f"document:{document_id}" await self.redis.delete(key) async def delete_all_candidate_documents(self, candidate_id: str) -> int: """Delete all documents for a specific candidate and return count of deleted documents""" try: # Get all document IDs for this candidate key = f"{KEY_PREFIXES['candidate_documents']}{candidate_id}" document_ids = await self.redis.lrange(key, 0, -1)# type: ignore if not document_ids: logger.info(f"No documents found for candidate {candidate_id}") return 0 deleted_count = 0 # Use pipeline for efficient batch operations pipe = self.redis.pipeline() # Delete each document's metadata for doc_id in document_ids: pipe.delete(f"document:{doc_id}") pipe.delete(f"{KEY_PREFIXES['job_requirements']}{doc_id}") deleted_count += 1 # Delete the candidate's document list pipe.delete(key) # Execute all operations await pipe.execute() logger.info(f"Successfully deleted {deleted_count} documents for candidate {candidate_id}") return deleted_count except Exception as e: logger.error(f"Error deleting all documents for candidate {candidate_id}: {e}") raise async def get_candidate_documents(self, candidate_id: str) -> List[Dict]: """Get all documents for a specific candidate""" key = f"{KEY_PREFIXES['candidate_documents']}{candidate_id}" document_ids = await self.redis.lrange(key, 0, -1) # type: ignore if not document_ids: return [] # Get all document metadata pipe = self.redis.pipeline() for doc_id in document_ids: pipe.get(f"document:{doc_id}") values = await pipe.execute() documents = [] for doc_id, value in zip(document_ids, values): if value: doc_data = self._deserialize(value) if doc_data: documents.append(doc_data) else: # Clean up orphaned document ID await self.redis.lrem(key, 0, doc_id)# type: ignore logger.warning(f"Removed orphaned document ID {doc_id} for candidate {candidate_id}") return documents async def add_document_to_candidate(self, candidate_id: str, document_id: str): """Add a document ID to a candidate's document list""" key = f"{KEY_PREFIXES['candidate_documents']}{candidate_id}" await self.redis.rpush(key, document_id)# type: ignore async def remove_document_from_candidate(self, candidate_id: str, document_id: str): """Remove a document ID from a candidate's document list""" key = f"{KEY_PREFIXES['candidate_documents']}{candidate_id}" await self.redis.lrem(key, 0, document_id)# type: ignore async def update_document(self, document_id: str, updates: Dict) -> Dict[Any, Any] | None: """Update document metadata""" document_data = await self.get_document(document_id) if document_data: document_data.update(updates) await self.set_document(document_id, document_data) return document_data return None async def get_documents_by_rag_status(self, candidate_id: str, include_in_rag: bool = True) -> List[Dict]: """Get candidate documents filtered by RAG inclusion status""" all_documents = await self.get_candidate_documents(candidate_id) return [doc for doc in all_documents if doc.get("include_in_rag", False) == include_in_rag] async def bulk_update_document_rag_status(self, candidate_id: str, document_ids: List[str], include_in_rag: bool): """Bulk update RAG status for multiple documents""" pipe = self.redis.pipeline() for doc_id in document_ids: doc_data = await self.get_document(doc_id) if doc_data and doc_data.get("candidate_id") == candidate_id: doc_data["include_in_rag"] = include_in_rag doc_data["updatedAt"] = datetime.now(UTC).isoformat() pipe.set(f"document:{doc_id}", self._serialize(doc_data)) await pipe.execute() async def get_document_count_for_candidate(self, candidate_id: str) -> int: """Get total number of documents for a candidate""" key = f"{KEY_PREFIXES['candidate_documents']}{candidate_id}" return await self.redis.llen(key)# type: ignore async def search_candidate_documents(self, candidate_id: str, query: str) -> List[Dict]: """Search documents by filename for a candidate""" all_documents = await self.get_candidate_documents(candidate_id) query_lower = query.lower() return [ doc for doc in all_documents if (query_lower in doc.get("filename", "").lower() or query_lower in doc.get("originalName", "").lower()) ]