Updated system prompts to use better syntax for qwen
Fixed markdownit integration
This commit is contained in:
parent
7672e639f6
commit
581bc4a575
@ -257,6 +257,7 @@ FROM llm-base AS backstory
|
||||
|
||||
COPY /src/requirements.txt /opt/backstory/src/requirements.txt
|
||||
RUN pip install -r /opt/backstory/src/requirements.txt
|
||||
RUN pip install 'markitdown[all]'
|
||||
|
||||
SHELL [ "/bin/bash", "-c" ]
|
||||
|
||||
|
@ -26,7 +26,7 @@ services:
|
||||
- ./sessions:/opt/backstory/sessions:rw # Persist sessions
|
||||
- ./chromadb:/opt/backstory/chromadb:rw # Persist ChromaDB
|
||||
- ./dev-keys:/opt/backstory/keys:ro # Developer keys
|
||||
- ./docs:/opt/backstory/docs:ro # Live mount of RAG content
|
||||
- ./docs:/opt/backstory/docs:rw # Live mount of RAG content
|
||||
- ./src:/opt/backstory/src:rw # Live mount server src
|
||||
- ./frontend:/opt/backstory/frontend:rw # Live mount frontend src
|
||||
cap_add: # used for running ze-monitor within container
|
||||
|
@ -82,6 +82,7 @@ const emojiMap: Record<string, string> = {
|
||||
query: '🔍',
|
||||
resume: '📄',
|
||||
projects: '📁',
|
||||
'performance-reviews': '📄',
|
||||
news: '📰',
|
||||
};
|
||||
|
||||
@ -90,6 +91,7 @@ const colorMap: Record<string, string> = {
|
||||
resume: '#4A7A7D', // Dusty Teal — secondary theme color
|
||||
projects: '#1A2536', // Midnight Blue — rich and deep
|
||||
news: '#D3CDBF', // Warm Gray — soft and neutral
|
||||
'performance-reviews': '#FF0000', // Bright red
|
||||
};
|
||||
|
||||
const sizeMap: Record<string, number> = {
|
||||
|
140
src/server.py
140
src/server.py
@ -139,78 +139,68 @@ DEFAULT_HISTORY_LENGTH=5
|
||||
|
||||
# %%
|
||||
# Globals
|
||||
NAME = "James Ketrenos"
|
||||
|
||||
resume_intro = f"""
|
||||
As an AI/ML professional specializing in creating custom solutions to new problem domains, {NAME} developed a custom
|
||||
language model applications that streamline information processing and content generation. This tailored resume
|
||||
was created using a Retrieval-Augmented Generation system I built to efficiently match my relevant experience
|
||||
with your specific needs—demonstrating both my technical capabilities and commitment to intelligent resource
|
||||
optimization.
|
||||
"""
|
||||
|
||||
system_message = f"""
|
||||
Launched on {DateTime()}.
|
||||
|
||||
When answering queries, follow these steps:
|
||||
|
||||
1. First analyze the query to determine if real-time information might be helpful
|
||||
2. Even when [INFO] is provided, consider whether the tools would provide more current or comprehensive information
|
||||
3. Use the provided tools whenever they would enhance your response, regardless of whether context is also available
|
||||
4. When presenting weather forecasts, include relevant emojis immediately before the corresponding text. For example, for a sunny day, say \"☀️ Sunny\" or if the forecast says there will be \"rain showers, say \"🌧️ Rain showers\". Use this mapping for weather emojis: Sunny: ☀️, Cloudy: ☁️, Rainy: 🌧️, Snowy: ❄️
|
||||
4. When both [INFO] and tool outputs are relevant, synthesize information from both sources to provide the most complete answer
|
||||
5. Always prioritize the most up-to-date and relevant information, whether it comes from [INFO] or tools
|
||||
6. If [INFO] and tool outputs contain conflicting information, prefer the tool outputs as they likely represent more current data
|
||||
7. If there is information in the [INFO], [JOB DESCRIPTION], or [WORK HISTORY] sections to enhance the answer, incorporate it seamlessly and refer to it as 'the latest information' or 'recent data' instead of mentioning '[INFO]' (etc.) or quoting it directly.
|
||||
8. Avoid phrases like 'According to the [INFO]' or similar references to the [INFO], [JOB DESCRIPTION], or [WORK HISTORY] tags.
|
||||
- First analyze the query to determine if real-time information might be helpful
|
||||
- Even when <|context|> is provided, consider whether the tools would provide more current or comprehensive information
|
||||
- Use the provided tools whenever they would enhance your response, regardless of whether context is also available
|
||||
- When presenting weather forecasts, include relevant emojis immediately before the corresponding text. For example, for a sunny day, say \"☀️ Sunny\" or if the forecast says there will be \"rain showers, say \"🌧️ Rain showers\". Use this mapping for weather emojis: Sunny: ☀️, Cloudy: ☁️, Rainy: 🌧️, Snowy: ❄️
|
||||
- When both <|context|> and tool outputs are relevant, synthesize information from both sources to provide the most complete answer
|
||||
- Always prioritize the most up-to-date and relevant information, whether it comes from <|context|> or tools
|
||||
- If <|context|> and tool outputs contain conflicting information, prefer the tool outputs as they likely represent more current data
|
||||
- If there is information in the <|context|>, <|job_description|>, or <|work_history|> sections to enhance the answer, incorporate it seamlessly and refer to it as 'the latest information' or 'recent data' instead of mentioning '<|context|>' (etc.) or quoting it directly.
|
||||
- Avoid phrases like 'According to the <|context|>' or similar references to the <|context|>, <|job_description|>, or <|work_history|> tags.
|
||||
|
||||
Always use tools and [INFO] when possible. Be concise, and never make up information. If you do not know the answer, say so.
|
||||
""".strip()
|
||||
Always use tools and <|context|> when possible. Be concise, and never make up information. If you do not know the answer, say so.
|
||||
"""
|
||||
|
||||
system_generate_resume = f"""
|
||||
Launched on {DateTime()}.
|
||||
|
||||
You are a professional resume writer. Your task is to write a concise, polished, and tailored resume for a specific job based only on the individual's [WORK HISTORY].
|
||||
You are a professional resume writer. Your task is to write a concise, polished, and tailored resume for a specific job based only on the individual's <|work_history|>.
|
||||
|
||||
When answering queries, follow these steps:
|
||||
|
||||
1. You must not invent or assume any inforation not explicitly present in the [WORK HISTORY].
|
||||
2. Analyze the [JOB DESCRIPTION] to identify skills required for the job.
|
||||
3. Use the [JOB DESCRIPTION] provided to guide the focus, tone, and relevant skills or experience to highlight from the [WORK HISTORY].
|
||||
4. Identify and emphasize the experiences, achievements, and responsibilities from the [WORK HISTORY] that best align with the [JOB DESCRIPTION].
|
||||
5. Only provide information from [WORK HISTORY] items if it is relevant to the [JOB DESCRIPTION].
|
||||
6. Do not use the [JOB DESCRIPTION] skills unless listed in [WORK HISTORY].
|
||||
7. Do not include any information unless it is provided in [WORK HISTORY] or [INTRO].
|
||||
8. Use the [INTRO] to highlight the use of AI in generating this resume.
|
||||
9. Use the [WORK HISTORY] to create a polished, professional resume.
|
||||
10. Do not list any locations or mailing addresses in the resume.
|
||||
11. If there is information in the [INFO], [JOB DESCRIPTION], [WORK HISTORY], or [RESUME] sections to enhance the answer, incorporate it seamlessly and refer to it using natural language instead of mentioning '[JOB DESCRIPTION]' (etc.) or quoting it directly.
|
||||
12. Avoid phrases like 'According to the [INFO]' or similar references to the [INFO], [JOB DESCRIPTION], or [WORK HISTORY] tags.
|
||||
- You must not invent or assume any inforation not explicitly present in the <|work_history|>.
|
||||
- Analyze the <|job_description|> to identify skills required for the job.
|
||||
- Use the <|job_description|> provided to guide the focus, tone, and relevant skills or experience to highlight from the <|work_history|>.
|
||||
- Identify and emphasize the experiences, achievements, and responsibilities from the <|work_history|> that best align with the <|job_description|>.
|
||||
- Only provide information from <|work_history|> items if it is relevant to the <|job_description|>.
|
||||
- Do not use the <|job_description|> skills unless listed in <|work_history|>.
|
||||
- Do not include any information unless it is provided in <|work_history|>.
|
||||
- Use the <|work_history|> to create a polished, professional resume.
|
||||
- Do not list any locations or mailing addresses in the resume.
|
||||
- If there is information in the <|context|>, <|job_description|>, <|work_history|>, or <|resume|> sections to enhance the answer, incorporate it seamlessly and refer to it using natural language instead of mentioning '<|job_description|>' (etc.) or quoting it directly.
|
||||
- Avoid phrases like 'According to the <|context|>' or similar references to the <|context|>, <|job_description|>, or <|work_history|> tags.
|
||||
- Ensure the langauge is clear, concise, and aligned with industry standards for professional resumes.
|
||||
|
||||
Structure the resume professionally with the following sections where applicable:
|
||||
|
||||
* "Name: Use full name."
|
||||
* "Professional Summary: A 2-4 sentence overview tailored to the job, using [INTRO] to highlight the use of AI in generating this resume."
|
||||
* "Skills: A bullet list of key skills derived from the work history and relevant to the job."
|
||||
* Professional Experience: A detailed list of roles, achievements, and responsibilities from [WORK HISTORY] that relate to the [JOB DESCRIPTION]."
|
||||
* Name: Use full name
|
||||
* Professional Summary: A 2-4 sentence overview tailored to the job.
|
||||
* Skills: A bullet list of key skills derived from the work history and relevant to the job.
|
||||
* Professional Experience: A detailed list of roles, achievements, and responsibilities from <|work_history|> that relate to the <|job_description|>.
|
||||
* Education: Include only if available in the work history.
|
||||
* Notes: Indicate the initial draft of the resume was generated using the Backstory application.
|
||||
|
||||
Do not include any information unless it is provided in [WORK HISTORY] or [INTRO].
|
||||
Ensure the langauge is clear, concise, and aligned with industry standards for professional resumes.
|
||||
""".strip()
|
||||
|
||||
system_fact_check = f"""
|
||||
Launched on {DateTime()}.
|
||||
|
||||
You are a professional resume fact checker. Your task is to identify any inaccuracies in the [RESUME] based on the individual's [WORK HISTORY].
|
||||
You are a professional resume fact checker. Your task is to identify any inaccuracies in the <|resume|> based on the individual's <|work_history|>.
|
||||
|
||||
If there are inaccuracies, list them in a bullet point format.
|
||||
|
||||
When answering queries, follow these steps:
|
||||
1. You must not invent or assume any information not explicitly present in the [WORK HISTORY].
|
||||
2. Analyze the [RESUME] to identify any discrepancies or inaccuracies based on the [WORK HISTORY].
|
||||
3. If there is information in the [INFO], [JOB DESCRIPTION], [WORK HISTORY], or [RESUME] sections to enhance the answer, incorporate it seamlessly and refer to it using natural language instead of mentioning '[JOB DESCRIPTION]' (etc.) or quoting it directly.
|
||||
4. Avoid phrases like 'According to the [INFO]' or similar references to the [INFO], [JOB DESCRIPTION], [RESUME], or [WORK HISTORY] tags.
|
||||
- You must not invent or assume any information not explicitly present in the <|work_history|>.
|
||||
- Analyze the <|resume|> to identify any discrepancies or inaccuracies based on the <|work_history|>.
|
||||
- If there is information in the <|context|>, <|job_description|>, <|work_history|>, or <|resume|> sections to enhance the answer, incorporate it seamlessly and refer to it using natural language instead of mentioning '<|job_description|>' (etc.) or quoting it directly.
|
||||
- Avoid phrases like 'According to the <|context|>' or similar references to the <|context|>, <|job_description|>, <|resume|>, or <|work_history|> tags.
|
||||
""".strip()
|
||||
|
||||
system_job_description = f"""
|
||||
@ -219,10 +209,10 @@ Launched on {DateTime()}.
|
||||
You are a hiring and job placing specialist. Your task is to answers about a job description.
|
||||
|
||||
When answering queries, follow these steps:
|
||||
1. Analyze the [JOB DESCRIPTION] to provide insights for the asked question.
|
||||
2. If any financial information is requested, be sure to account for inflation.
|
||||
3. If there is information in the [INFO], [JOB DESCRIPTION], [WORK HISTORY], or [RESUME] sections to enhance the answer, incorporate it seamlessly and refer to it using natural language instead of mentioning '[JOB DESCRIPTION]' (etc.) or quoting it directly.
|
||||
4. Avoid phrases like 'According to the [INFO]' or similar references to the [INFO], [JOB DESCRIPTION], [RESUME], or [WORK HISTORY] tags.
|
||||
- Analyze the <|job_description|> to provide insights for the asked question.
|
||||
- If any financial information is requested, be sure to account for inflation.
|
||||
- If there is information in the <|context|>, <|job_description|>, <|work_history|>, or <|resume|> sections to enhance the answer, incorporate it seamlessly and refer to it using natural language instead of mentioning '<|job_description|>' (etc.) or quoting it directly.
|
||||
- Avoid phrases like 'According to the <|context|>' or similar references to the <|context|>, <|job_description|>, <|resume|>, or <|work_history|> tags.
|
||||
""".strip()
|
||||
|
||||
def create_system_message(prompt):
|
||||
@ -1088,12 +1078,15 @@ class WebServer:
|
||||
|
||||
if rag_context:
|
||||
preamble = f"""
|
||||
1. Respond to this query: {content}
|
||||
2. If there is information in the [INFO], [JOB DESCRIPTION], [WORK HISTORY], or [RESUME] sections to enhance the answer, incorporate it seamlessly and refer to it using natural language instead of mentioning '[JOB DESCRIPTION]' (etc.) or quoting it directly.
|
||||
3. Avoid phrases like 'According to the [INFO]' or similar references to the [INFO], [JOB DESCRIPTION], or [WORK HISTORY] tags.
|
||||
[INFO]
|
||||
|
||||
<|rules|>
|
||||
- If there is information in the <|context|>, <|job_description|>, <|work_history|>, or <|resume|> sections to enhance the answer, incorporate it seamlessly and refer to it using natural language instead of mentioning '<|job_description|>' (etc.) or quoting it directly.
|
||||
- Avoid phrases like 'According to the <|context|>' or similar references to the <|context|>, <|job_description|>, or <|work_history|> tags.
|
||||
|
||||
<|context|>
|
||||
{rag_context}
|
||||
[/INFO]
|
||||
|
||||
<|question|>
|
||||
Use that information to respond to:"""
|
||||
|
||||
# Use the mode specific system_prompt instead of 'chat'
|
||||
@ -1122,24 +1115,23 @@ Use that information to respond to:"""
|
||||
rag_context += f"{doc}\n"
|
||||
|
||||
preamble = f"""
|
||||
[INTRO]
|
||||
{resume_intro}
|
||||
[/INTRO]
|
||||
|
||||
[WORK HISTORY]
|
||||
<|work_history|>
|
||||
{rag_context}
|
||||
[/WORK HISTORY]
|
||||
|
||||
[JOB DESCRIPTION]
|
||||
<|job_description|>
|
||||
{content}
|
||||
[/JOB DESCRIPTION]
|
||||
|
||||
"""
|
||||
|
||||
context["sessions"]["job_description"]["content_seed"] = preamble + "Use the above information to answer this query: "
|
||||
context["sessions"]["job_description"]["content_seed"] = preamble + "<|question|>\nUse the above information to answer this query: "
|
||||
|
||||
preamble += f"""
|
||||
1. Use the above [INTRO] and [WORK HISTORY] to create the resume for the [JOB DESCRIPTION].
|
||||
2. Do not use content from the [JOB DESCRIPTION] in the response unless the [WORK HISTORY] mentions them.
|
||||
|
||||
<|rules|>
|
||||
1. Use the above <|<|work_history|> to create the resume for the <|job_description|>.
|
||||
2. Do not use content from the <|job_description|> in the response unless the <|work_history|> mentions them.
|
||||
|
||||
<|question|>
|
||||
"""
|
||||
|
||||
# Seed the history for job_description
|
||||
@ -1185,25 +1177,25 @@ Use that information to respond to:"""
|
||||
rag_context += f"{doc}\n"
|
||||
|
||||
preamble = f"""
|
||||
[WORK HISTORY]
|
||||
<|work_history|>
|
||||
{rag_context}
|
||||
[/WORK HISTORY]
|
||||
|
||||
[RESUME]
|
||||
<|resume|>
|
||||
{resume['content']}
|
||||
[/RESUME]
|
||||
|
||||
Perform the following:
|
||||
1. Do not invent or assume any information not explicitly present in the [WORK HISTORY].
|
||||
2. Analyze the [RESUME] to identify any discrepancies or inaccuracies based on the [WORK HISTORY].
|
||||
<|rules|>
|
||||
1. Do not invent or assume any information not explicitly present in the <|work_history|>.
|
||||
2. Analyze the <|resume|> to identify any discrepancies or inaccuracies based on the <|work_history|>.
|
||||
|
||||
<|question|>
|
||||
"""
|
||||
|
||||
context["sessions"]["resume"]["content_seed"] = f"""
|
||||
[RESUME]
|
||||
<|resume|>
|
||||
{resume["content"]}
|
||||
[/RESUME]
|
||||
|
||||
Use the above [RESUME] to answer this query:
|
||||
<|question|>
|
||||
Use the above <|resume|> to answer this query:
|
||||
"""
|
||||
|
||||
content = "Fact check the resume and report discrepancies."
|
||||
|
@ -1,5 +1,6 @@
|
||||
import os
|
||||
import glob
|
||||
from pathlib import Path
|
||||
import time
|
||||
import hashlib
|
||||
import asyncio
|
||||
@ -12,6 +13,7 @@ import asyncio
|
||||
import json
|
||||
import pickle
|
||||
import numpy as np
|
||||
import re
|
||||
|
||||
import chromadb
|
||||
import ollama
|
||||
@ -21,6 +23,7 @@ from langchain.schema import Document
|
||||
from watchdog.observers import Observer
|
||||
from watchdog.events import FileSystemEventHandler
|
||||
import umap
|
||||
from markitdown import MarkItDown
|
||||
|
||||
# Import your existing modules
|
||||
if __name__ == "__main__":
|
||||
@ -49,6 +52,9 @@ class ChromaDBFileWatcher(FileSystemEventHandler):
|
||||
self.chunk_overlap = chunk_overlap
|
||||
self.loop = loop
|
||||
|
||||
|
||||
self.md = MarkItDown(enable_plugins=False) # Set to True to enable plugins
|
||||
|
||||
#self.embedding_model = SentenceTransformer('all-MiniLM-L6-v2')
|
||||
|
||||
# Path for storing file hash state
|
||||
@ -98,6 +104,14 @@ class ChromaDBFileWatcher(FileSystemEventHandler):
|
||||
def umap_model_3d(self):
|
||||
return self._umap_model_3d
|
||||
|
||||
def _markitdown(self, document : str, markdown : Path):
|
||||
logging.info(f'Converting {document} to {markdown}')
|
||||
try:
|
||||
result = self.md.convert(document)
|
||||
markdown.write_text(result.text_content)
|
||||
except Exception as e:
|
||||
logging.error(f"Error convering via markdownit: {e}")
|
||||
|
||||
def _save_hash_state(self):
|
||||
"""Save the current file hash state to disk."""
|
||||
try:
|
||||
@ -188,9 +202,11 @@ class ChromaDBFileWatcher(FileSystemEventHandler):
|
||||
"""Process a file update event."""
|
||||
# Skip if already being processed
|
||||
if file_path in self.processing_files:
|
||||
logging.info(f"{file_path} already in queue. Not adding.")
|
||||
return
|
||||
|
||||
try:
|
||||
logging.info(f"{file_path} not in queue. Adding.")
|
||||
self.processing_files.add(file_path)
|
||||
|
||||
# Wait a moment to ensure the file write is complete
|
||||
@ -203,6 +219,7 @@ class ChromaDBFileWatcher(FileSystemEventHandler):
|
||||
|
||||
if file_path in self.file_hashes and self.file_hashes[file_path] == current_hash:
|
||||
# File hasn't actually changed in content
|
||||
logging.info(f"Hash has not changed for {file_path}")
|
||||
return
|
||||
|
||||
# Update file hash
|
||||
@ -420,6 +437,14 @@ class ChromaDBFileWatcher(FileSystemEventHandler):
|
||||
asyncio.run_coroutine_threadsafe(self.remove_file_from_collection(file_path), self.loop)
|
||||
logging.info(f"File deleted: {file_path}")
|
||||
|
||||
def on_moved(self, event):
|
||||
"""Handle move deletion events."""
|
||||
if event.is_directory:
|
||||
return
|
||||
|
||||
file_path = event.src_path
|
||||
logging.info(f"TODO: on_moved: ${file_path}")
|
||||
|
||||
def _normalize_embeddings(self, embeddings):
|
||||
"""Normalize the embeddings to unit length."""
|
||||
# Handle both single vector and array of vectors
|
||||
@ -435,12 +460,24 @@ class ChromaDBFileWatcher(FileSystemEventHandler):
|
||||
async def _update_document_in_collection(self, file_path):
|
||||
"""Update a document in the ChromaDB collection."""
|
||||
try:
|
||||
logging.info(f"Updating document in collection: {file_path}")
|
||||
# Remove existing entries for this file
|
||||
existing_results = self.collection.get(where={"path": file_path})
|
||||
if existing_results and 'ids' in existing_results and existing_results['ids']:
|
||||
self.collection.delete(ids=existing_results['ids'])
|
||||
|
||||
extensions = (".docx", ".xlsx", ".xls", ".pdf")
|
||||
if file_path.endswith(extensions):
|
||||
p = Path(file_path)
|
||||
p_as_md = p.with_suffix(".md")
|
||||
if p_as_md.exists():
|
||||
logging.info(f"newer: {p.stat().st_mtime > p_as_md.stat().st_mtime}")
|
||||
|
||||
# If file_path.md doesn't exist or file_path is newer than file_path.md,
|
||||
# fire off markitdown
|
||||
if (not p_as_md.exists()) or (p.stat().st_mtime > p_as_md.stat().st_mtime):
|
||||
self._markitdown(file_path, p_as_md)
|
||||
return
|
||||
|
||||
# Create document object in LangChain format
|
||||
with open(file_path, "r", encoding="utf-8") as f:
|
||||
content = f.read()
|
||||
|
Loading…
x
Reference in New Issue
Block a user