From 699acf9313e8a9cb6d569f770c72890dbf9f7792 Mon Sep 17 00:00:00 2001 From: James Ketrenos Date: Mon, 2 Jun 2025 17:26:07 -0700 Subject: [PATCH] Works --- frontend/src/components/CandidateInfo.tsx | 2 +- frontend/src/components/VectorVisualizer.tsx | 2 +- frontend/src/pages/GenerateCandidate.tsx | 8 +- frontend/src/pages/candidate/Profile.tsx | 17 +- frontend/src/services/api-client.ts | 20 +- frontend/src/types/types.ts | 4 +- src/backend/entities/candidate_entity.py | 78 ------ src/backend/main.py | 241 +++++++++++++++++-- src/backend/models.py | 1 - 9 files changed, 250 insertions(+), 123 deletions(-) diff --git a/frontend/src/components/CandidateInfo.tsx b/frontend/src/components/CandidateInfo.tsx index 4b0531b..02f138c 100644 --- a/frontend/src/components/CandidateInfo.tsx +++ b/frontend/src/components/CandidateInfo.tsx @@ -57,7 +57,7 @@ const CandidateInfo: React.FC = (props: CandidateInfoProps) maxWidth: "80px" }}> = (props: VectorVisualiz }; }; - if (!plotData) return ( + if (!result) return (
Loading visualization...
diff --git a/frontend/src/pages/GenerateCandidate.tsx b/frontend/src/pages/GenerateCandidate.tsx index b39c53b..e1c838d 100644 --- a/frontend/src/pages/GenerateCandidate.tsx +++ b/frontend/src/pages/GenerateCandidate.tsx @@ -45,7 +45,7 @@ const emptyUser: CandidateAI = { languages: [], certifications: [], isAdmin: false, - hasProfile: false, + profileImage: undefined, ragContentSize: 0 }; @@ -381,7 +381,7 @@ const GenerateCandidate = (props: BackstoryElementProps) => { }}> { {processing && } - + diff --git a/frontend/src/pages/candidate/Profile.tsx b/frontend/src/pages/candidate/Profile.tsx index d138e3e..d54367a 100644 --- a/frontend/src/pages/candidate/Profile.tsx +++ b/frontend/src/pages/candidate/Profile.tsx @@ -204,15 +204,12 @@ const CandidateProfilePage: React.FC = (props: BackstoryPage }; // Handle profile image upload - const handleImageUpload = (e: React.ChangeEvent) => { + const handleImageUpload = async (e: React.ChangeEvent) => { if (e.target.files && e.target.files[0]) { - const reader = new FileReader(); - reader.onload = (event) => { - if (event.target?.result) { - setProfileImage(event.target.result.toString()); - } - }; - reader.readAsDataURL(e.target.files[0]); + if (await apiClient.uploadCandidateProfile(e.target.files[0])) { + candidate.profileImage = e.target.files[0].name; + updateUserData(candidate); + } } }; @@ -301,7 +298,7 @@ const CandidateProfilePage: React.FC = (props: BackstoryPage = (props: BackstoryPage border: `2px solid ${theme.palette.primary.main}` }} > - {!profileImage && !candidate.profileImage && } + {!profileImage && } {editMode.basic && ( <> diff --git a/frontend/src/services/api-client.ts b/frontend/src/services/api-client.ts index 1fe2e39..c5b2246 100644 --- a/frontend/src/services/api-client.ts +++ b/frontend/src/services/api-client.ts @@ -528,6 +528,25 @@ class ApiClient { return this.handleApiResponseWithConversion(response, 'Candidate'); } + async uploadCandidateProfile(file: File): Promise { + const formData = new FormData() + formData.append('file', file); + formData.append('filename', file.name); + + const response = await fetch(`${this.baseUrl}/candidates/profile/upload`, { + method: 'POST', + headers: { + // Don't set Content-Type - browser will set it automatically with boundary + 'Authorization': this.defaultHeaders['Authorization'] + }, + body: formData + }); + + const result = await handleApiResponse(response); + + return result; + } + async getCandidates(request: Partial = {}): Promise> { const paginatedRequest = createPaginatedRequest(request); const params = toUrlParams(formatApiRequest(paginatedRequest)); @@ -780,7 +799,6 @@ class ApiClient { */ async uploadCandidateDocument(file: File, includeInRag: boolean = true): Promise { const formData = new FormData() - console.log(file); formData.append('file', file); formData.append('filename', file.name); formData.append('include_in_rag', includeInRag.toString()); diff --git a/frontend/src/types/types.ts b/frontend/src/types/types.ts index 1fd9b1c..9dde1b7 100644 --- a/frontend/src/types/types.ts +++ b/frontend/src/types/types.ts @@ -1,6 +1,6 @@ // Generated TypeScript types from Pydantic models // Source: src/backend/models.py -// Generated on: 2025-06-02T23:04:30.814624 +// Generated on: 2025-06-02T23:24:36.213957 // DO NOT EDIT MANUALLY - This file is auto-generated // ============================ @@ -196,7 +196,6 @@ export interface Candidate { languages?: Array; certifications?: Array; jobApplications?: Array; - hasProfile: boolean; rags?: Array; ragContentSize: number; } @@ -230,7 +229,6 @@ export interface CandidateAI { languages?: Array; certifications?: Array; jobApplications?: Array; - hasProfile: boolean; rags?: Array; ragContentSize: number; isAI: boolean; diff --git a/src/backend/entities/candidate_entity.py b/src/backend/entities/candidate_entity.py index 260ed63..0cf9bd4 100644 --- a/src/backend/entities/candidate_entity.py +++ b/src/backend/entities/candidate_entity.py @@ -107,82 +107,6 @@ class CandidateEntity(Candidate): raise ValueError("initialize() has not been called.") return self.CandidateEntity__observer - @classmethod - def sanitize(cls, user: Dict[str, Any]): - sanitized : Dict[str, Any] = {} - sanitized["username"] = user.get("username", "default") - sanitized["first_name"] = user.get("first_name", sanitized["username"]) - sanitized["last_name"] = user.get("last_name", "") - sanitized["title"] = user.get("title", "") - sanitized["phone"] = user.get("phone", "") - sanitized["location"] = user.get("location", "") - sanitized["email"] = user.get("email", "") - sanitized["full_name"] = user.get("full_name", f"{sanitized["first_name"]} {sanitized["last_name"]}") - sanitized["description"] = user.get("description", "") - profile_image = os.path.join(defines.user_dir, sanitized["username"], "profile.png") - sanitized["has_profile"] = os.path.exists(profile_image) - contact_info = user.get("contact_info", {}) - sanitized["contact_info"] = {} - for key in contact_info: - if not isinstance(contact_info[key], (str, int, float, complex)): - continue - sanitized["contact_info"][key] = contact_info[key] - questions = user.get("questions", [ f"Tell me about {sanitized['first_name']}.", f"What are {sanitized['first_name']}'s professional strengths?"]) - sanitized["user_questions"] = [] - for question in questions: - if type(question) == str: - sanitized["user_questions"].append({"question": question}) - else: - try: - tmp = CandidateQuestion.model_validate(question) - sanitized["user_questions"].append({"question": tmp.question}) - except Exception as e: - continue - return sanitized - - @classmethod - def get_users(cls): - # Initialize an empty list to store parsed JSON data - user_data = [] - - # Define the users directory path - users_dir = os.path.join(defines.user_dir) - - # Check if the users directory exists - if not os.path.exists(users_dir): - return user_data - - # Iterate through all items in the users directory - for item in os.listdir(users_dir): - # Construct the full path to the item - item_path = os.path.join(users_dir, item) - - # Check if the item is a directory - if os.path.isdir(item_path): - # Construct the path to info.json - info_path = os.path.join(item_path, "info.json") - - # Check if info.json exists - if os.path.exists(info_path): - try: - # Read and parse the JSON file - with open(info_path, 'r') as file: - data = json.load(file) - data["username"] = item - profile_image = os.path.join(defines.user_dir, item, "profile.png") - data["has_profile"] = os.path.exists(profile_image) - user_data.append(data) - except json.JSONDecodeError as e: - # Skip files that aren't valid JSON - logger.info(f"Invalid JSON for {info_path}: {str(e)}") - continue - except Exception as e: - # Skip files that can't be read - logger.info(f"Exception processing {info_path}: {str(e)}") - continue - - return user_data - async def initialize(self, prometheus_collector: CollectorRegistry): if self.CandidateEntity__initialized: # Initialization can only be attempted once; if there are multiple attempts, it means @@ -200,8 +124,6 @@ class CandidateEntity(Candidate): vector_db_dir=os.path.join(user_dir, defines.persist_directory) rag_content_dir=os.path.join(user_dir, defines.rag_content_dir) - logger.info(f"CandidateEntity(username={self.username}, user_dir={user_dir} persist_directory={vector_db_dir}, rag_content_dir={rag_content_dir}") - os.makedirs(vector_db_dir, exist_ok=True) os.makedirs(rag_content_dir, exist_ok=True) diff --git a/src/backend/main.py b/src/backend/main.py index 96e82b8..3e31ebb 100644 --- a/src/backend/main.py +++ b/src/backend/main.py @@ -406,14 +406,6 @@ def get_candidate_files_dir(username: str) -> pathlib.Path: files_dir.mkdir(parents=True, exist_ok=True) return files_dir -def get_document_file_path(username: str, document_id: str, filename: str) -> pathlib.Path: - """Get the full file path for a document""" - files_dir = get_candidate_files_dir(username) - # Use document ID + original extension to avoid filename conflicts - file_extension = pathlib.Path(filename).suffix - safe_filename = f"{document_id}{file_extension}" - return files_dir / safe_filename - def get_document_type_from_filename(filename: str) -> DocumentType: """Determine document type from filename extension""" extension = pathlib.Path(filename).suffix.lower() @@ -1501,7 +1493,23 @@ async def upload_candidate_document( ) logger.info(f"📁 Received file upload: filename='{file.filename}', content_type='{file.content_type}', size estimate='{file.size if hasattr(file, 'size') else 'unknown'}'") - + + # Ensure the file does not already exist either in 'files' or in 'rag-content' + file_path = os.path.join(defines.user_dir, candidate.username, "rag-content", file.filename) + if os.path.exists(file_path): + logger.warning(f"⚠️ File already exists: {file_path}") + return JSONResponse( + status_code=400, + content=create_error_response("FILE_EXISTS", "File with this name already exists") + ) + file_path = os.path.join(defines.user_dir, candidate.username, "files", file.filename) + if os.path.exists(file_path): + logger.warning(f"⚠️ File already exists: {file_path}") + return JSONResponse( + status_code=400, + content=create_error_response("FILE_EXISTS", "File with this name already exists") + ) + # Validate file type allowed_types = ['.txt', '.md', '.docx', '.pdf', '.png', '.jpg', '.jpeg', '.gif'] file_extension = pathlib.Path(file.filename).suffix.lower() if file.filename else "" @@ -1542,7 +1550,7 @@ async def upload_candidate_document( ) # Save file to disk - file_path = os.path.join(defines.user_dir, candidate.username, "files", file.filename) + file_path = os.path.join(defines.user_dir, candidate.username, "rag-content" if include_in_rag else "files", file.filename) try: with open(file_path, "wb") as f: @@ -1589,6 +1597,140 @@ async def upload_candidate_document( content=create_error_response("UPLOAD_ERROR", "Failed to upload document") ) +@api_router.post("/candidates/profile/upload") +async def upload_candidate_profile( + file: UploadFile = File(...), + current_user = Depends(get_current_user), + database: RedisDatabase = Depends(get_database) +): + """Upload a document for the current candidate""" + try: + # Verify user is a candidate + if current_user.user_type != "candidate": + logger.warning(f"⚠️ Unauthorized upload attempt by user type: {current_user.user_type}") + return JSONResponse( + status_code=403, + content=create_error_response("FORBIDDEN", "Only candidates can upload their profile") + ) + + candidate: Candidate = current_user + # Validate file type + allowed_types = ['.png', '.jpg', '.jpeg', '.gif'] + file_extension = pathlib.Path(file.filename).suffix.lower() if file.filename else "" + + if file_extension not in allowed_types: + logger.warning(f"⚠️ Invalid file type: {file_extension} for file {file.filename}") + return JSONResponse( + status_code=400, + content=create_error_response( + "INVALID_FILE_TYPE", + f"File type {file_extension} not supported. Allowed types: {', '.join(allowed_types)}" + ) + ) + + # Check file size (limit to 2MB) + max_size = 2 * 1024 * 1024 # 2MB + file_content = await file.read() + if len(file_content) > max_size: + logger.info(f"⚠️ File too large: {file.filename} ({len(file_content)} bytes)") + return JSONResponse( + status_code=400, + content=create_error_response("FILE_TOO_LARGE", "File size exceeds 10MB limit") + ) + + # Save file to disk as "profile." + _, extension = os.path.splitext(file.filename) + file_path = os.path.join(defines.user_dir, candidate.username, f"profile{extension}") + + try: + with open(file_path, "wb") as f: + f.write(file_content) + + logger.info(f"📁 File saved to disk: {file_path}") + + except Exception as e: + logger.error(f"❌ Failed to save file to disk: {e}") + return JSONResponse( + status_code=500, + content=create_error_response("FILE_SAVE_ERROR", "Failed to save file to disk") + ) + + updates = { + "updated_at": datetime.now(UTC).isoformat(), + "profile_image": f"profile{extension}" + } + candidate_dict = candidate.model_dump() + candidate_dict.update(updates) + updated_candidate = Candidate.model_validate(candidate_dict) + await database.set_candidate(candidate.id, updated_candidate.model_dump()) + logger.info(f"📄 Profile image uploaded: {updated_candidate.profile_image} for candidate {candidate.id}") + + return create_success_response(True) + + except Exception as e: + logger.error(traceback.format_exc()) + logger.error(f"❌ Document upload error: {e}") + return JSONResponse( + status_code=500, + content=create_error_response("UPLOAD_ERROR", "Failed to upload document") + ) + +@api_router.get("/candidates/profile/{username}") +async def get_candidate_profile_image( + username: str = Path(..., description="Username of the candidate"), + # current_user = Depends(get_current_user), + database: RedisDatabase = Depends(get_database) +): + """Get profile image of a candidate by username""" + logger.info(f"🔍 Fetching profile image for candidate: {username}") + try: + all_candidates_data = await database.get_all_candidates() + candidates_list = [Candidate.model_validate(data) for data in all_candidates_data.values()] + + # Normalize username to lowercase for case-insensitive search + query_lower = username.lower() + + # Filter by search query + candidates_list = [ + c for c in candidates_list + if (query_lower == c.email.lower() or + query_lower == c.username.lower()) + ] + + if not len(candidates_list): + return JSONResponse( + status_code=404, + content=create_error_response("NOT_FOUND", "Candidate not found") + ) + + candidate = Candidate.model_validate(candidates_list[0]) + if not candidate.profile_image: + logger.warning(f"⚠️ Candidate {candidate.username} has no profile image set") + return JSONResponse( + status_code=404, + content=create_error_response("NOT_FOUND", "Profile image not found") + ) + file_path = os.path.join(defines.user_dir, candidate.username, candidate.profile_image) + file_path = pathlib.Path(file_path) + if not file_path.exists(): + logger.error(f"❌ Profile image file not found on disk: {file_path}") + return JSONResponse( + status_code=404, + content=create_error_response("FILE_NOT_FOUND", "Profile image file not found on disk") + ) + return FileResponse( + file_path, + media_type=f"image/{file_path.suffix[1:]}", # Get extension without dot + filename=candidate.profile_image + ) + except Exception as e: + logger.error(traceback.format_exc()) + logger.error(f"❌ Get candidate profile image failed: {str(e)}") + return JSONResponse( + status_code=500, + content=create_error_response("FETCH_ERROR", "Failed to retrieve profile image") + ) + @api_router.get("/candidates/documents") async def get_candidate_documents( current_user = Depends(get_current_user), @@ -1662,7 +1804,7 @@ async def get_document_content( content=create_error_response("FORBIDDEN", "Cannot access another candidate's document") ) - file_path = os.path.join(defines.user_dir, candidate.username, "files", document.originalName) + file_path = os.path.join(defines.user_dir, candidate.username, "rag-content" if document.include_in_RAG else "files", document.originalName) file_path = pathlib.Path(file_path) if not document.type in [DocumentType.TXT, DocumentType.MARKDOWN]: file_path = file_path.with_suffix('.md') @@ -1737,6 +1879,39 @@ async def update_document( content=create_error_response("FORBIDDEN", "Cannot update another candidate's document") ) + if document.include_in_RAG != updates.include_in_RAG: + # If RAG status is changing, we need to handle file movement + rag_dir = os.path.join(defines.user_dir, candidate.username, "rag-content") + file_dir = os.path.join(defines.user_dir, candidate.username, "files") + os.makedirs(rag_dir, exist_ok=True) + os.makedirs(file_dir, exist_ok=True) + rag_path = os.path.join(rag_dir, document.originalName) + file_path = os.path.join(file_dir, document.originalName) + + if updates.include_in_RAG: + src = pathlib.Path(file_path) + dst = pathlib.Path(rag_path) + # Move to RAG directory + src.rename(dst) + logger.info(f"📁 Moved file to RAG directory") + if document.type != DocumentType.MARKDOWN and document.type != DocumentType.TXT: + src = pathlib.Path(file_path) + src_as_md = src.with_suffix(".md") + if src_as_md.exists(): + dst = pathlib.Path(rag_path).with_suffix(".md") + src_as_md.rename(dst) + else: + src = pathlib.Path(rag_path) + dst = pathlib.Path(file_path) + # Move to regular files directory + src.rename(dst) + logger.info(f"📁 Moved file to regular files directory") + if document.type != DocumentType.MARKDOWN and document.type != DocumentType.TXT: + src_as_md = src.with_suffix(".md") + if src_as_md.exists(): + dst = pathlib.Path(file_path).with_suffix(".md") + src_as_md.rename(dst) + # Apply updates update_dict = {} if updates.filename is not None: @@ -1812,7 +1987,8 @@ async def delete_document( ) # Delete file from disk - file_path = get_document_file_path(candidate.username, document_id, document.originalName) + file_path = os.path.join(defines.user_dir, candidate.username, "rag-content" if document.include_in_RAG else "files", document.originalName) + file_path = pathlib.Path(file_path) try: if file_path.exists(): @@ -1820,6 +1996,14 @@ async def delete_document( logger.info(f"🗑️ File deleted from disk: {file_path}") else: logger.warning(f"⚠️ File not found on disk during deletion: {file_path}") + + # Delete side-car file if it exists + if document.type != DocumentType.MARKDOWN and document.type != DocumentType.TXT: + p = pathlib.Path(file_path) + p_as_md = p.with_suffix(".md") + if p_as_md.exists(): + p_as_md.unlink() + except Exception as e: logger.error(f"❌ Failed to delete file from disk: {e}") # Continue with metadata deletion even if file deletion fails @@ -1940,21 +2124,29 @@ async def post_candidate_vectors( async with entities.get_candidate_entity(candidate=candidate) as candidate_entity: collection = candidate_entity.umap_collection if not collection: - logger.error(f"❌ Candidate collection not found") - return JSONResponse( - status_code=404, - content=create_error_response("NOT_FOUND", "Candidate collection not found") - ) + results = { + "ids": [], + "metadatas": [], + "documents": [], + "embeddings": [], + "size": 0 + } + return create_success_response(results) if dimensions == 2: umap_embedding = candidate_entity.file_watcher.umap_embedding_2d else: umap_embedding = candidate_entity.file_watcher.umap_embedding_3d if len(umap_embedding) == 0: - return JSONResponse( - status_code=404, - content=create_error_response("NOT_FOUND", "Candidate collection embedding not found") - ) + results = { + "ids": [], + "metadatas": [], + "documents": [], + "embeddings": [], + "size": 0 + } + return create_success_response(results) + result = { "ids": collection.get("ids", []), "metadatas": collection.get("metadatas", []), @@ -2646,7 +2838,7 @@ async def post_chat_session_message_stream( chat_session_data["lastActivity"] = datetime.now(UTC).isoformat() await database.set_chat_session(user_message.session_id, chat_session_data) - return stream_agent_response( + return await stream_agent_response( chat_agent=chat_agent, user_message=user_message, candidate=candidate, @@ -2656,10 +2848,10 @@ async def post_chat_session_message_stream( except Exception as e: logger.error(traceback.format_exc()) - logger.error(f"❌ Chat message streaming error: {e}") + logger.error(f"❌ Chat message streaming error") return JSONResponse( status_code=500, - content=create_error_response("STREAMING_ERROR", str(e)) + content=create_error_response("STREAMING_ERROR", "") ) @api_router.get("/chat/sessions/{session_id}/messages") @@ -3167,6 +3359,7 @@ async def log_requests(request: Request, call_next): logger.warning(f"⚠️ Response {request.method} {response.status_code}: Path: {request.url.path}") return response except Exception as e: + logger.error(traceback.format_exc()) logger.error(f"❌ Error processing request: {str(e)}, Path: {request.url.path}, Method: {request.method}") return JSONResponse(status_code=400, content={"detail": "Invalid HTTP request"}) diff --git a/src/backend/models.py b/src/backend/models.py index 5a28588..2bb4517 100644 --- a/src/backend/models.py +++ b/src/backend/models.py @@ -537,7 +537,6 @@ class Candidate(BaseUser): languages: Optional[List[Language]] = None certifications: Optional[List[Certification]] = None job_applications: Optional[List["JobApplication"]] = Field(None, alias="jobApplications") - has_profile: bool = Field(default=False, alias="hasProfile") rags: List[RagEntry] = Field(default_factory=list) rag_content_size : int = 0