import asyncio import argparse import pydle import logging import os import re import time import datetime import asyncio import json import ollama from typing import Dict, Any import ollama import chromadb import feedparser from bs4 import BeautifulSoup OLLAMA_API_URL = "http://ollama:11434" # Default Ollama local endpoint MODEL_NAME = "deepseek-r1:7b" def parse_args(): parser = argparse.ArgumentParser(description="AI is Really Cool") parser.add_argument("--server", type=str, default="irc.libera.chat", help="IRC server address") parser.add_argument("--port", type=int, default=6667, help="IRC server port") parser.add_argument("--nickname", type=str, default="airc", help="Bot nickname") parser.add_argument("--channel", type=str, default="#airc-test", help="Channel to join") parser.add_argument("--ai-server", type=str, default="http://localhost:5000", help="OpenAI API endpoint") parser.add_argument('--level', type=str, choices=['DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL'], default='INFO', help='Set the logging level.') return parser.parse_args() def setup_logging(level): numeric_level = getattr(logging, level.upper(), None) if not isinstance(numeric_level, int): raise ValueError(f"Invalid log level: {level}") logging.basicConfig(level=numeric_level, format='%(asctime)s - %(levelname)s - %(message)s') logging.info(f"Logging is set to {level} level.") client = ollama.Client(host=OLLAMA_API_URL) def extract_text_from_html_or_xml(content, is_xml=False): # Parse the content if is_xml: soup = BeautifulSoup(content, 'xml') # Use 'xml' parser for XML content else: soup = BeautifulSoup(content, 'html.parser') # Default to 'html.parser' for HTML content # Extract and return just the text return soup.get_text() class Feed(): def __init__(self, name, url, poll_limit_min = 30, max_articles=5): self.name = name self.url = url self.poll_limit_min = datetime.timedelta(minutes=poll_limit_min) self.last_poll = None self.articles = [] self.max_articles = max_articles self.update() def update(self): now = datetime.datetime.now() if self.last_poll is None or (now - self.last_poll) >= self.poll_limit_min: logging.info(f"Updating {self.name}") feed = feedparser.parse(self.url) self.articles = [] self.last_poll = now content = "" if len(feed.entries) > 0: content += f"Source: {self.name}\n" for entry in feed.entries[:self.max_articles]: title = entry.get("title") if title: content += f"Title: {title}\n" link = entry.get("link") if link: content += f"Link: {link}\n" summary = entry.get("summary") if summary: summary = extract_text_from_html_or_xml(summary, False) content += f"Summary: {summary}\n" published = entry.get("published") if published: content += f"Published: {published}\n" content += "\n" self.articles.append(content) else: logging.info(f"Not updating {self.name} -- {self.poll_limit_min - (now - self.last_poll)}s remain to refresh.") return self.articles # News RSS Feeds rss_feeds = [ Feed(name="BBC World", url="http://feeds.bbci.co.uk/news/world/rss.xml"), Feed(name="Reuters World", url="http://feeds.reuters.com/Reuters/worldNews"), Feed(name="Al Jazeera", url="https://www.aljazeera.com/xml/rss/all.xml"), Feed(name="CNN World", url="http://rss.cnn.com/rss/edition_world.rss"), Feed(name="Time", url="https://time.com/feed/"), Feed(name="Euronews", url="https://www.euronews.com/rss"), Feed(name="FeedX", url="https://feedx.net/rss/ap.xml") ] documents = [ "Llamas like to eat penguins", "Llamas are not vegetarians and have very efficient digestive systems", "Llamas live to be about 120 years old, though some only live for 15 years and others live to be 90 years old", ] import chromadb # Initialize ChromaDB Client db = chromadb.PersistentClient(path="/root/.cache/chroma.db") # We want to save the collection to disk to analyze it offline, but we don't # want to re-use it collection = db.get_or_create_collection("docs") # store each document in a vector embedding database for i, feed in enumerate(rss_feeds): # Use the client instance instead of the global ollama module for j, article in enumerate(feed.articles): response = client.embeddings(model="mxbai-embed-large", prompt=article) embeddings = response["embedding"] # Note: it's "embedding", not "embeddings" collection.add( ids=[str(i)+str(j)], embeddings=embeddings, documents=[article] ) class AIRC(pydle.Client): def __init__(self, nick, channel, client, burst_limit = 5, rate_limit = 1.0, burst_reset_timeout = 10.0): super().__init__(nick) self.nick = nick self.channel = channel self.burst_limit = burst_limit self.sent_burst = 0 self.rate_limit = rate_limit self.burst_reset_timeout = burst_reset_timeout self.sent_burst = 0 # Track messages sent in burst self.last_message_time = None # Track last message time self.system_input = "You are a critical assistant. Give concise and accurate answers in less than 120 characters." self._message_queue = asyncio.Queue() self._task = asyncio.create_task(self._send_from_queue()) self.client = client self.queries = 0 self.processing = datetime.timedelta(minutes=0) async def _send_from_queue(self): """Background task that sends queued messages with burst + rate limiting.""" while True: target, message = await self._message_queue.get() # If burst is still available, send immediately if self.sent_burst < self.burst_limit: self.sent_burst += 1 else: await asyncio.sleep(self.rate_limit) # Apply rate limit await super().message(target, message) # Send message self.last_message_time = asyncio.get_event_loop().time() # Update last message timestamp # Start burst reset countdown after each message asyncio.create_task(self._reset_burst_after_inactivity()) async def _reset_burst_after_inactivity(self): """Resets burst counter only if no new messages are sent within timeout.""" last_time = self.last_message_time await asyncio.sleep(self.burst_reset_timeout) # Wait for inactivity period # Only reset if no new messages were sent during the wait if self.last_message_time == last_time: self.sent_burst = 0 logging.info("Burst limit reset due to inactivity.") async def message(self, target, message): """Splits a multi-line message and sends each line separately. If more than 10 lines, truncate and add a message.""" lines = message.splitlines() # Splits on both '\n' and '\r\n' # Process the first 10 lines for line in lines[:10]: if line.strip(): # Ignore empty lines await self._message_queue.put((target, line)) # If there are more than 10 lines, add the truncation message if len(lines) > 10: await self._message_queue.put((target, "[additional content truncated]")) async def on_connect(self): logging.debug('on_connect') await self.join(self.channel) def remove_substring(self, string, substring): return string.replace(substring, "") def extract_nick_message(self, input_string): # Pattern with capturing groups for nick and message pattern = r"^\s*([^\s:]+?)\s*:\s*(.+?)$" match = re.match(pattern, input_string) if match: nick = match.group(1) # First capturing group message = match.group(2) # Second capturing group return nick, message return None, None # Return None for both if no match async def on_message(self, target, source, message): if source == self.nick: return nick, body = self.extract_nick_message(message) if nick == self.nick: content = None if body == "stats": content = f"{self.queries} queries handled in {self.processing}s" else: self.queries += 1 start = datetime.datetime.now() query_text = body query_response = client.embeddings(model="mxbai-embed-large", prompt=query_text) query_embedding = query_response["embedding"] # Note: singular "embedding", not plural # Then run the query with the correct structure results = collection.query( query_embeddings=[query_embedding], # Make sure this is a list containing the embedding n_results=3 ) data = results['documents'][0][0] logging.info(f"Data for {query_text}: {data}") logging.info(f"From {results}") output = client.generate( model=MODEL_NAME, system=f"Your are {self.nick}. In your response, make reference to this data if appropriate: {data}", prompt=f"Respond to this prompt: {query_text}", stream=False ) end = datetime.datetime.now() self.processing = self.processing + end - start # Prune off the ... content = re.sub(r'^.*?', '', output['response'], flags=re.DOTALL).strip() if content: logging.info(f'Sending: {content}') await self.message(target, f"{content}") def remove_substring(string, substring): return string.replace(substring, "") async def main(): # Parse command-line arguments args = parse_args() # Setup logging based on the provided level setup_logging(args.level) bot = AIRC(args.nickname, args.channel, client) await bot.connect(args.server, args.port, tls=False) await bot.handle_forever() if __name__ == "__main__": asyncio.run(main())