diff --git a/Dockerfile b/Dockerfile index 91c5c09..8f7881b 100644 --- a/Dockerfile +++ b/Dockerfile @@ -340,7 +340,7 @@ WORKDIR /opt/airc SHELL [ "/opt/airc/shell" ] # Needed by src/model-server.py -RUN pip install faiss-cpu sentence_transformers feedparser +RUN pip install faiss-cpu sentence_transformers feedparser bs4 SHELL [ "/bin/bash", "-c" ] diff --git a/src/airc.py b/src/airc.py index 5ab4b78..e8c9a05 100644 --- a/src/airc.py +++ b/src/airc.py @@ -118,11 +118,18 @@ class AIRC(pydle.Client): logging.info("Burst limit reset due to inactivity.") async def message(self, target, message): - """Splits a multi-line message and sends each line separately.""" - for line in message.splitlines(): # Splits on both '\n' and '\r\n' + """Splits a multi-line message and sends each line separately. If more than 10 lines, truncate and add a message.""" + lines = message.splitlines() # Splits on both '\n' and '\r\n' + + # Process the first 10 lines + for line in lines[:10]: if line.strip(): # Ignore empty lines await self._message_queue.put((target, line)) + # If there are more than 10 lines, add the truncation message + if len(lines) > 10: + await self._message_queue.put((target, "[additional content truncated]")) + async def on_connect(self): logging.debug('on_connect') await self.join(self.channel) diff --git a/src/model-server.py b/src/model-server.py index 80114a2..40e62c0 100644 --- a/src/model-server.py +++ b/src/model-server.py @@ -20,6 +20,7 @@ import faiss import numpy as np import torch from sentence_transformers import SentenceTransformer +from bs4 import BeautifulSoup def parse_args(): parser = argparse.ArgumentParser(description="AI is Really Cool Server") @@ -37,6 +38,16 @@ def setup_logging(level): logging.basicConfig(level=numeric_level, format='%(asctime)s - %(levelname)s - %(message)s') logging.info(f"Logging is set to {level} level.") +def extract_text_from_html_or_xml(content, is_xml=False): + # Parse the content + if is_xml: + soup = BeautifulSoup(content, 'xml') # Use 'xml' parser for XML content + else: + soup = BeautifulSoup(content, 'html.parser') # Default to 'html.parser' for HTML content + + # Extract and return just the text + return soup.get_text() + class Feed(): def __init__(self, name, url, poll_limit_min = 30, max_articles=5): self.name = name @@ -67,6 +78,7 @@ class Feed(): content += f"Link: {link}\n" summary = entry.get("summary") if summary: + summary = extract_text_from_html_or_xml(summary, False) content += f"Summary: {summary}\n" published = entry.get("published") if published: @@ -166,7 +178,7 @@ class Chat(): prompt, add_special_tokens=False, return_tensors="pt", - max_length=8000, # Prevent 'Asking to truncate to max_length...' + max_length=7999, # Prevent 'Asking to truncate to max_length...' padding=True, # Handles padding automatically truncation=True )