222 lines
8.5 KiB
Python
222 lines
8.5 KiB
Python
# core.py - Core RAG functionality
|
|
import re
|
|
from typing import List, Dict, Any, Union, Optional, Tuple
|
|
import ollama
|
|
from utils import defines, Feed, chunk_document, query_chroma, process_documents_to_chroma
|
|
|
|
class RagCore:
|
|
"""Core RAG functionality that can be used by different interfaces"""
|
|
|
|
def __init__(self):
|
|
self.client = ollama.Client(host=defines.ollama_api_url)
|
|
self.collection_name = "research_papers"
|
|
|
|
# News RSS Feeds
|
|
self.rss_feeds = [
|
|
Feed(name="IGN.com", url="https://feeds.feedburner.com/ign/games-all"),
|
|
Feed(name="BBC World", url="http://feeds.bbci.co.uk/news/world/rss.xml"),
|
|
Feed(name="Reuters World", url="http://feeds.reuters.com/Reuters/worldNews"),
|
|
Feed(name="Al Jazeera", url="https://www.aljazeera.com/xml/rss/all.xml"),
|
|
Feed(name="CNN World", url="http://rss.cnn.com/rss/edition_world.rss"),
|
|
Feed(name="Time", url="https://time.com/feed/"),
|
|
Feed(name="Euronews", url="https://www.euronews.com/rss"),
|
|
]
|
|
|
|
# State variables
|
|
self.documents = []
|
|
self.last_results = None
|
|
self.last_prompt = None
|
|
self.last_system = None
|
|
self.last_response = None
|
|
self.last_why = None
|
|
self.last_messages = []
|
|
self.collection = None
|
|
|
|
def load_documents(self):
|
|
"""Load articles from all configured RSS feeds"""
|
|
self.documents = []
|
|
for feed in self.rss_feeds:
|
|
self.documents.extend(feed.articles)
|
|
|
|
# Process documents and store in ChromaDB
|
|
self.collection = process_documents_to_chroma(
|
|
client=self.client,
|
|
documents=self.documents,
|
|
collection_name=self.collection_name,
|
|
max_tokens=256,
|
|
overlap=25,
|
|
model=defines.encoding_model,
|
|
persist_directory=defines.persist_directory
|
|
)
|
|
return len(self.documents)
|
|
|
|
def get_documents(self):
|
|
"""Return all loaded documents"""
|
|
return self.documents
|
|
|
|
def get_headlines(self):
|
|
"""Return headlines from all documents"""
|
|
return [(doc['source'], doc['title']) for doc in self.documents]
|
|
|
|
def get_top_match(self, index=0):
|
|
"""Get the full document corresponding to a match"""
|
|
if not self.last_results or not self.last_results["ids"] or index >= len(self.last_results["ids"][0]):
|
|
return None
|
|
|
|
top_id = self.last_results["ids"][0][index]
|
|
# Extract the original document ID from the chunk ID (format: "doc_id_chunk_num")
|
|
original_doc_id = top_id.split('_')[0]
|
|
|
|
# Return the full document
|
|
return next((doc for doc in self.documents if doc["id"] == original_doc_id), None)
|
|
|
|
def get_match_info(self, index=0):
|
|
"""Get detailed information about a match at the specified index"""
|
|
if not self.last_results or not self.last_results["ids"] or len(self.last_results["ids"][0]) <= index:
|
|
return None
|
|
|
|
match_info = {
|
|
"id": self.last_results["ids"][0][index],
|
|
"chunk": self.last_results["documents"][0][index],
|
|
"metadata": self.last_results["metadatas"][0][index],
|
|
"distance": self.last_results["distances"][0][index]
|
|
}
|
|
|
|
return match_info
|
|
|
|
def get_match_count(self):
|
|
"""Get the number of matches from the last query"""
|
|
if not self.last_results or not self.last_results["ids"]:
|
|
return 0
|
|
return len(self.last_results["ids"][0])
|
|
|
|
def create_system_prompt(self, include_context=True):
|
|
"""Create the system prompt for the model"""
|
|
system = f"""
|
|
You are the assistant. Your name is airc. This application is called airc (pronounced Eric).
|
|
|
|
Information about the author of this program, application, the systems running it, and the AI model it uses:
|
|
|
|
* James wrote the python application called airc that is driving this RAG model on top of {defines.model} using {defines.encoding_model} and chromadb for vector embedding. Link https://github.com/jketreno/airc.
|
|
* James Ketrenos is a software engineer with a history in all levels of the computer stack, from the kernel to full-stack web applications. He dabbles in AI/ML and is familiar with pytorch and ollama.
|
|
* James Ketrenos deployed this application locally on an Intel Arc B580 (battlemage) computer using Intel's ipex-llm.
|
|
* For Intel GPU metrics, James Ketrenos wrote the "ze-monitor" utility in C++. ze-monitor provides Intel GPU telemetry data for Intel client GPU devices, similar to xpu-smi. Link https://github.com/jketreno/ze-monitor. airc uses ze-monitor.
|
|
* James lives in Portland, Oregon and has three kids. Two are attending Oregon State University and one is attending Williamette University.
|
|
* airc provides an IRC chat bot as well as a React web frontend available at https://airc.ketrenos.com
|
|
|
|
You must follow these rules:
|
|
|
|
* Provide short (less than 100 character) responses.
|
|
* Provide a single response.
|
|
* Do not prefix it with a word like 'Answer'.
|
|
* For information about the AI running this system, include information about author, including links.
|
|
* For information relevant to the current events in the <input></input> tags, use that information and state the source when information comes from.
|
|
"""
|
|
|
|
if include_context:
|
|
context = "Information from current events unrelated to James Ketrenos\n<input>=["
|
|
for doc in self.documents:
|
|
item = {
|
|
'source': doc["source"],
|
|
'article': {
|
|
'title': doc["title"],
|
|
'link': doc["link"],
|
|
'text': doc["text"]
|
|
}
|
|
}
|
|
context += f"{item}"
|
|
context += "\n</input>"
|
|
system += context
|
|
|
|
return system
|
|
|
|
def process_query(self, query):
|
|
"""
|
|
Process a user query and return the response
|
|
|
|
Returns:
|
|
tuple: (response_content, debug_info)
|
|
"""
|
|
# Store the system prompt before context is added
|
|
self.last_system = self.create_system_prompt(include_context=False)
|
|
|
|
# Query ChromaDB
|
|
self.last_results = query_chroma(
|
|
self.client,
|
|
query_text=query,
|
|
collection_name=self.collection_name,
|
|
n_results=10
|
|
)
|
|
|
|
# Create the full system prompt with document context
|
|
system = self.create_system_prompt()
|
|
|
|
# Combine conversation history with the current query
|
|
prompt = query
|
|
if self.last_messages:
|
|
message_context = f"{self.last_messages}"
|
|
prompt = f"{message_context}{prompt}"
|
|
|
|
self.last_prompt = prompt
|
|
|
|
# Generate a response
|
|
output = self.client.generate(
|
|
model=defines.model,
|
|
system=system,
|
|
prompt=prompt,
|
|
stream=False,
|
|
options={'num_ctx': 100000}
|
|
)
|
|
|
|
# Extract thinking and response
|
|
response = output['response']
|
|
matches = re.match(r'^<think>(.*?)</think>(.*)$', response, flags=re.DOTALL)
|
|
|
|
content = response
|
|
if matches:
|
|
self.last_why = matches[1].strip()
|
|
content = matches[2].strip()
|
|
|
|
self.last_response = content
|
|
|
|
# Update conversation history
|
|
self.last_messages.extend([
|
|
{
|
|
'role': 'user',
|
|
'name': 'james',
|
|
'message': query
|
|
},
|
|
{
|
|
'role': 'assistant',
|
|
'message': content
|
|
}
|
|
])
|
|
|
|
# Keep history limited to last 10 messages
|
|
self.last_messages = self.last_messages[-10:]
|
|
|
|
# Return response content and debug info
|
|
debug_info = {
|
|
"system_len": len(system),
|
|
"prompt_len": len(prompt),
|
|
"has_thinking": matches is not None
|
|
}
|
|
|
|
return content, debug_info
|
|
|
|
def get_last_prompt(self):
|
|
"""Get the last prompt"""
|
|
return self.last_prompt
|
|
|
|
def get_last_system(self):
|
|
"""Get the last system prompt (without context)"""
|
|
return self.last_system
|
|
|
|
def get_last_response(self):
|
|
"""Get the last response"""
|
|
return self.last_response
|
|
|
|
def get_last_thinking(self):
|
|
"""Get the thinking from the last response"""
|
|
return self.last_why
|