backstory/src/rag_core.py
2025-03-18 13:09:52 -07:00

222 lines
8.5 KiB
Python

# core.py - Core RAG functionality
import re
from typing import List, Dict, Any, Union, Optional, Tuple
import ollama
from utils import defines, Feed, chunk_document, query_chroma, process_documents_to_chroma
class RagCore:
"""Core RAG functionality that can be used by different interfaces"""
def __init__(self):
self.client = ollama.Client(host=defines.ollama_api_url)
self.collection_name = "research_papers"
# News RSS Feeds
self.rss_feeds = [
Feed(name="IGN.com", url="https://feeds.feedburner.com/ign/games-all"),
Feed(name="BBC World", url="http://feeds.bbci.co.uk/news/world/rss.xml"),
Feed(name="Reuters World", url="http://feeds.reuters.com/Reuters/worldNews"),
Feed(name="Al Jazeera", url="https://www.aljazeera.com/xml/rss/all.xml"),
Feed(name="CNN World", url="http://rss.cnn.com/rss/edition_world.rss"),
Feed(name="Time", url="https://time.com/feed/"),
Feed(name="Euronews", url="https://www.euronews.com/rss"),
]
# State variables
self.documents = []
self.last_results = None
self.last_prompt = None
self.last_system = None
self.last_response = None
self.last_why = None
self.last_messages = []
self.collection = None
def load_documents(self):
"""Load articles from all configured RSS feeds"""
self.documents = []
for feed in self.rss_feeds:
self.documents.extend(feed.articles)
# Process documents and store in ChromaDB
self.collection = process_documents_to_chroma(
client=self.client,
documents=self.documents,
collection_name=self.collection_name,
max_tokens=256,
overlap=25,
model=defines.encoding_model,
persist_directory=defines.persist_directory
)
return len(self.documents)
def get_documents(self):
"""Return all loaded documents"""
return self.documents
def get_headlines(self):
"""Return headlines from all documents"""
return [(doc['source'], doc['title']) for doc in self.documents]
def get_top_match(self, index=0):
"""Get the full document corresponding to a match"""
if not self.last_results or not self.last_results["ids"] or index >= len(self.last_results["ids"][0]):
return None
top_id = self.last_results["ids"][0][index]
# Extract the original document ID from the chunk ID (format: "doc_id_chunk_num")
original_doc_id = top_id.split('_')[0]
# Return the full document
return next((doc for doc in self.documents if doc["id"] == original_doc_id), None)
def get_match_info(self, index=0):
"""Get detailed information about a match at the specified index"""
if not self.last_results or not self.last_results["ids"] or len(self.last_results["ids"][0]) <= index:
return None
match_info = {
"id": self.last_results["ids"][0][index],
"chunk": self.last_results["documents"][0][index],
"metadata": self.last_results["metadatas"][0][index],
"distance": self.last_results["distances"][0][index]
}
return match_info
def get_match_count(self):
"""Get the number of matches from the last query"""
if not self.last_results or not self.last_results["ids"]:
return 0
return len(self.last_results["ids"][0])
def create_system_prompt(self, include_context=True):
"""Create the system prompt for the model"""
system = f"""
You are the assistant. Your name is airc. This application is called airc (pronounced Eric).
Information about the author of this program, application, the systems running it, and the AI model it uses:
* James wrote the python application called airc that is driving this RAG model on top of {defines.model} using {defines.encoding_model} and chromadb for vector embedding. Link https://github.com/jketreno/airc.
* James Ketrenos is a software engineer with a history in all levels of the computer stack, from the kernel to full-stack web applications. He dabbles in AI/ML and is familiar with pytorch and ollama.
* James Ketrenos deployed this application locally on an Intel Arc B580 (battlemage) computer using Intel's ipex-llm.
* For Intel GPU metrics, James Ketrenos wrote the "ze-monitor" utility in C++. ze-monitor provides Intel GPU telemetry data for Intel client GPU devices, similar to xpu-smi. Link https://github.com/jketreno/ze-monitor. airc uses ze-monitor.
* James lives in Portland, Oregon and has three kids. Two are attending Oregon State University and one is attending Williamette University.
* airc provides an IRC chat bot as well as a React web frontend available at https://airc.ketrenos.com
You must follow these rules:
* Provide short (less than 100 character) responses.
* Provide a single response.
* Do not prefix it with a word like 'Answer'.
* For information about the AI running this system, include information about author, including links.
* For information relevant to the current events in the <input></input> tags, use that information and state the source when information comes from.
"""
if include_context:
context = "Information from current events unrelated to James Ketrenos\n<input>=["
for doc in self.documents:
item = {
'source': doc["source"],
'article': {
'title': doc["title"],
'link': doc["link"],
'text': doc["text"]
}
}
context += f"{item}"
context += "\n</input>"
system += context
return system
def process_query(self, query):
"""
Process a user query and return the response
Returns:
tuple: (response_content, debug_info)
"""
# Store the system prompt before context is added
self.last_system = self.create_system_prompt(include_context=False)
# Query ChromaDB
self.last_results = query_chroma(
self.client,
query_text=query,
collection_name=self.collection_name,
n_results=10
)
# Create the full system prompt with document context
system = self.create_system_prompt()
# Combine conversation history with the current query
prompt = query
if self.last_messages:
message_context = f"{self.last_messages}"
prompt = f"{message_context}{prompt}"
self.last_prompt = prompt
# Generate a response
output = self.client.generate(
model=defines.model,
system=system,
prompt=prompt,
stream=False,
options={'num_ctx': 100000}
)
# Extract thinking and response
response = output['response']
matches = re.match(r'^<think>(.*?)</think>(.*)$', response, flags=re.DOTALL)
content = response
if matches:
self.last_why = matches[1].strip()
content = matches[2].strip()
self.last_response = content
# Update conversation history
self.last_messages.extend([
{
'role': 'user',
'name': 'james',
'message': query
},
{
'role': 'assistant',
'message': content
}
])
# Keep history limited to last 10 messages
self.last_messages = self.last_messages[-10:]
# Return response content and debug info
debug_info = {
"system_len": len(system),
"prompt_len": len(prompt),
"has_thinking": matches is not None
}
return content, debug_info
def get_last_prompt(self):
"""Get the last prompt"""
return self.last_prompt
def get_last_system(self):
"""Get the last system prompt (without context)"""
return self.last_system
def get_last_response(self):
"""Get the last response"""
return self.last_response
def get_last_thinking(self):
"""Get the thinking from the last response"""
return self.last_why