site fetching is working

This commit is contained in:
James Ketr 2025-03-19 18:22:30 -07:00
parent 786444ce3c
commit 9c0f2ba2bb
2 changed files with 201 additions and 81 deletions

View File

@ -31,6 +31,7 @@ try_import('yfinance', 'yfinance')
try_import('dotenv', 'python-dotenv') try_import('dotenv', 'python-dotenv')
try_import('geopy', 'geopy') try_import('geopy', 'geopy')
try_import('hyphen', 'PyHyphen') try_import('hyphen', 'PyHyphen')
try_import('bs4', 'beautifulsoup4')
from dotenv import load_dotenv from dotenv import load_dotenv
from geopy.geocoders import Nominatim from geopy.geocoders import Nominatim
@ -42,6 +43,7 @@ import pytz
import requests import requests
import yfinance as yf import yfinance as yf
from hyphen import hyphenator from hyphen import hyphenator
from bs4 import BeautifulSoup
# Local defined imports # Local defined imports
from tools import ( from tools import (
@ -71,23 +73,20 @@ BOT_ADMIN="james"
# Globals # Globals
system_message = f""" system_message = f"""
You are a helpful information agent connected to the IRC network {IRC_SERVER}. Your name is {NICK}. You are a helpful information agent connected to the IRC network {IRC_SERVER}. Your name is {NICK}.
You have real time access to any website or URL the user asks about.
Messages from users are in the form "NICK: MESSAGE". The name before the colon (:) tells you which user asked about something.
You are running { { 'model': MODEL_NAME, 'gpu': 'Intel Arc B580', 'cpu': 'Intel Core i9-14900KS', 'ram': '64G' } }. You are running { { 'model': MODEL_NAME, 'gpu': 'Intel Arc B580', 'cpu': 'Intel Core i9-14900KS', 'ram': '64G' } }.
You were launched on {get_current_datetime()}. You were launched on {get_current_datetime()}.
You have real time access to current stock trading values, the current date and time, and current weather information for locations in the United States. You have real time access to stock prices, the current date and time, and current weather information for locations in the United States.
If you use any real time access, do not mention your knowledge cutoff. If you use any real time access, do not mention your knowledge cutoff.
Give short, courteous answers, no more than 2-3 sentences, keeping the answer less than about 100 characters. Give short, courteous answers, no more than 2-3 sentences.
If you have to cut the answer short, ask the user if they want more information and provide it if they say Yes.
Always be accurate. If you don't know the answer, say so. Do not make up details. Always be accurate. If you don't know the answer, say so. Do not make up details.
When you receive a response from summarize_site, you must:
You have tools to: 1. Review the entire content returned by the second LLM
* get_current_datetime: Get current time and date. 2. Provide the URL used to obtain the information.
* get_weather_by_location: Get-real time weather forecast. 3. Incorporate the information into your response as appropriate
* get_ticker_price: Get real-time value of a stock symbol.
Those are the only tools available.
""" """
system_log = [{"role": "system", "content": system_message}] system_log = [{"role": "system", "content": system_message}]
history = []
tool_log = [] tool_log = []
command_log = [] command_log = []
model = None model = None
@ -103,12 +102,12 @@ def parse_args():
parser.add_argument("--irc-nickname", type=str, default=NICK, help=f"Bot nickname. default={NICK}") parser.add_argument("--irc-nickname", type=str, default=NICK, help=f"Bot nickname. default={NICK}")
parser.add_argument("--irc-channel", type=str, default=CHANNEL, help=f"Channel to join. default={CHANNEL}") parser.add_argument("--irc-channel", type=str, default=CHANNEL, help=f"Channel to join. default={CHANNEL}")
parser.add_argument("--irc-use-tls", type=bool, default=USE_TLS, help=f"Use TLS with --irc-server. default={USE_TLS}") parser.add_argument("--irc-use-tls", type=bool, default=USE_TLS, help=f"Use TLS with --irc-server. default={USE_TLS}")
parser.add_argument("--irc-bot-admin", type=str, default=BOT_ADMIN, help=f"Nick that can send admin commands via IRC. default={BOT_ADMIN}")
parser.add_argument("--ollama-server", type=str, default=OLLAMA_API_URL, help=f"Ollama API endpoint. default={OLLAMA_API_URL}") parser.add_argument("--ollama-server", type=str, default=OLLAMA_API_URL, help=f"Ollama API endpoint. default={OLLAMA_API_URL}")
parser.add_argument("--ollama-model", type=str, default=MODEL_NAME, help=f"LLM model to use. default={MODEL_NAME}") parser.add_argument("--ollama-model", type=str, default=MODEL_NAME, help=f"LLM model to use. default={MODEL_NAME}")
parser.add_argument("--gradio-host", type=str, default=GRADIO_HOST, help=f"Host to launch gradio on. default={GRADIO_HOST} only if --gradio-enable is specified.") parser.add_argument("--gradio-host", type=str, default=GRADIO_HOST, help=f"Host to launch gradio on. default={GRADIO_HOST} only if --gradio-enable is specified.")
parser.add_argument("--gradio-port", type=str, default=GRADIO_PORT, help=f"Port to launch gradio on. default={GRADIO_PORT} only if --gradio-enable is specified.") parser.add_argument("--gradio-port", type=str, default=GRADIO_PORT, help=f"Port to launch gradio on. default={GRADIO_PORT} only if --gradio-enable is specified.")
parser.add_argument("--gradio-enable", action="store_true", default=GRADIO_ENABLE, help=f"If set to True, enable Gradio. default={GRADIO_ENABLE}") parser.add_argument("--gradio-enable", action="store_true", default=GRADIO_ENABLE, help=f"If set to True, enable Gradio. default={GRADIO_ENABLE}")
parser.add_argument("--bot-admin", type=str, default=BOT_ADMIN, help=f"Nick that can send admin commands via IRC. default={BOT_ADMIN}")
parser.add_argument('--level', type=str, choices=['DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL'], parser.add_argument('--level', type=str, choices=['DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL'],
default=LOG_LEVEL, help=f'Set the logging level. default={LOG_LEVEL}') default=LOG_LEVEL, help=f'Set the logging level. default={LOG_LEVEL}')
return parser.parse_args() return parser.parse_args()
@ -205,27 +204,31 @@ def split_paragraph_with_hyphenation(text, line_length=80, language='en_US'):
return result_lines return result_lines
# %% # %%
def handle_tool_calls(message): async def handle_tool_calls(message):
response = [] response = []
tools_used = [] tools_used = []
for tool_call in message['tool_calls']: for tool_call in message['tool_calls']:
arguments = tool_call['function']['arguments'] arguments = tool_call['function']['arguments']
tool = tool_call['function']['name'] tool = tool_call['function']['name']
if tool == 'get_ticker_price': match tool:
ticker = arguments.get('ticker') case 'get_ticker_price':
if not ticker: ticker = arguments.get('ticker')
if not ticker:
ret = None
else:
ret = get_ticker_price(ticker)
tools_used.append(tool)
case 'summarize_site':
ret = await summarize_site(arguments.get('url'), arguments.get('question', 'what is the summary of this content?'))
tools_used.append(tool)
case 'get_current_datetime':
ret = get_current_datetime(arguments.get('timezone'))
tools_used.append(tool)
case 'get_weather_by_location':
ret = get_weather_by_location(arguments.get('city'), arguments.get('state'))
tools_used.append(tool)
case _:
ret = None ret = None
else:
ret = get_ticker_price(ticker)
tools_used.append(tool)
elif tool == 'get_current_datetime':
ret = get_current_datetime(arguments.get('timezone'))
tools_used.append(tool)
elif tool == 'get_weather_by_location':
ret = get_weather_by_location(arguments.get('city'), arguments.get('state'))
tools_used.append(tool)
else:
ret = None
response.append({ response.append({
"role": "tool", "role": "tool",
"content": str(ret), "content": str(ret),
@ -237,6 +240,76 @@ def handle_tool_calls(message):
return response, tools_used return response, tools_used
# %% # %%
def total_json_length(dict_array):
total = 0
for item in dict_array:
# Convert dictionary to minimized JSON string
json_string = json.dumps(item, separators=(',', ':'))
total += len(json_string)
return total
async def summarize_site(url, question):
"""
Fetches content from a URL, extracts the text, and uses Ollama to summarize it.
Args:
url (str): The URL of the website to summarize
Returns:
str: A summary of the website content
"""
global model, client
try:
# Fetch the webpage
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
}
logging.info(f"Fetching {url}")
response = requests.get(url, headers=headers, timeout=10)
response.raise_for_status()
logging.info(f"{url} returned. Processing...")
# Parse the HTML
soup = BeautifulSoup(response.text, 'html.parser')
# Remove script and style elements
for script in soup(["script", "style"]):
script.extract()
# Get text content
text = soup.get_text(separator=' ', strip=True)
# Clean up text (remove extra whitespace)
lines = (line.strip() for line in text.splitlines())
chunks = (phrase.strip() for line in lines for phrase in line.split(" "))
text = ' '.join(chunk for chunk in chunks if chunk)
# Limit text length if needed (Ollama may have token limits)
max_chars = 100000
if len(text) > max_chars:
text = text[:max_chars] + "..."
# Create Ollama client
logging.info(f"Requesting summary of: {text}")
# Generate summary using Ollama
prompt = f"CONTENTS:\n\n{text}\n\n{question}"
response = client.generate(model=model,
system="You are given the contents of {url}. Answer the question about the contents",
prompt=prompt)
logging.info(response['response'])
return {
'source': 'summarizer-llm',
'content': response['response'],
'metadata': get_current_datetime()
}
except requests.exceptions.RequestException as e:
return f"Error fetching the URL: {str(e)}"
except Exception as e:
return f"Error processing the website content: {str(e)}"
async def chat(history, is_irc=False): async def chat(history, is_irc=False):
global client, model, irc_bot, system_log, tool_log global client, model, irc_bot, system_log, tool_log
if not client: if not client:
@ -250,6 +323,8 @@ async def chat(history, is_irc=False):
tools_used = [] tools_used = []
if 'tool_calls' in response['message']: if 'tool_calls' in response['message']:
message = response['message'] message = response['message']
tool_result, tools_used = await handle_tool_calls(message)
# Convert Message object to a proper dictionary format # Convert Message object to a proper dictionary format
message_dict = { message_dict = {
'role': message.get('role', 'assistant'), 'role': message.get('role', 'assistant'),
@ -262,9 +337,11 @@ async def chat(history, is_irc=False):
for tc in message['tool_calls'] for tc in message['tool_calls']
] ]
tool_result, tools_used = handle_tool_calls(message)
messages.append(message_dict) # Add properly formatted dict instead of Message object messages.append(message_dict) # Add properly formatted dict instead of Message object
messages.append(tool_result) if isinstance(tool_result, list):
messages.extend(tool_result)
else:
messages.append(tool_result)
try: try:
response = client.chat(model=model, messages=messages) response = client.chat(model=model, messages=messages)
except Exception: except Exception:
@ -334,7 +411,7 @@ class DynamicIRCBot(pydle.Client):
max_lines = 10 max_lines = 10
irc_lines = [] irc_lines = []
for line in message.splitlines(): for line in message.splitlines():
lines = split_paragraph_with_hyphenation(line, line_length=450) lines = split_paragraph_with_hyphenation(line, line_length=300)
irc_lines.extend(lines) irc_lines.extend(lines)
# Send the first 'max_lines' non-empty lines # Send the first 'max_lines' non-empty lines
@ -384,9 +461,8 @@ class DynamicIRCBot(pydle.Client):
user = None user = None
content = message content = message
# If this message is not directed to the bot # If this message is not directed to the bot
if not user or user != self.nickname: if target != self.nickname and (not user or user != self.nickname):
logging.info(f"Message not directed to {self.nickname}") logging.info(f"Message not directed to {self.nickname}")
# Add this message to the history either to the current 'user' context or create # Add this message to the history either to the current 'user' context or create
# add a new message # add a new message
@ -403,7 +479,7 @@ class DynamicIRCBot(pydle.Client):
return return
matches = re.match(r"^!([^\s]+)\s*(.*)?$", content) matches = re.match(r"^!([^\s]+)\s*(.*)?$", content)
if not matches or (self.bot_admin and source != self.bot_admin and source != self.nickname): if not matches:
logging.info(f"Non-command directed message to {self.nickname}: Invoking chat...") logging.info(f"Non-command directed message to {self.nickname}: Invoking chat...")
# Add this message to the history either to the current 'user' context or create # Add this message to the history either to the current 'user' context or create
# add a new message # add a new message
@ -425,7 +501,7 @@ class DynamicIRCBot(pydle.Client):
command = matches.group(1) command = matches.group(1)
arguments = matches.group(2).strip() arguments = matches.group(2).strip()
logging.info(f"Command directed to {self.nickname}: command={command}, arguments={arguments}") logging.info(f"Command directed to {self.nickname}: command={command}, arguments={arguments}")
is_admin = source == self.nickname or source == self.bot_admin
match command: match command:
case "help": case "help":
response = f"info, context, reset, system [prompt], server [address], join channel" response = f"info, context, reset, system [prompt], server [address], join channel"
@ -434,14 +510,19 @@ class DynamicIRCBot(pydle.Client):
response = str(self.system_info) response = str(self.system_info)
case "context": case "context":
if len(self.history) > 1: system_log_size = total_json_length(system_log)
response = '"' + '","'.join(self.history[-1]['content'].split('\n')) + '"' history_size = total_json_length(self.history)
else: tools_size = total_json_length(tools)
response = "<no context>" total_size = system_log_size + history_size + tools_size
response = f"\nsystem prompt: {system_log_size}"
response += f"\nhistory: {history_size} in {len(self.history)} entries."
response += f"\ntools: {tools_size} in {len(tools)} tools."
response += f"\ntotal context: {total_size}"
response += f"\ntotal tool calls: {len(tool_log)}"
case "reset": case "reset":
system_log = [{"role": "system", "content": system_message}] system_log = [{"role": "system", "content": system_message}]
history = [] self.history = []
tool_log = [] tool_log = []
command_log = [] command_log = []
response = 'All contexts reset' response = 'All contexts reset'
@ -455,31 +536,37 @@ class DynamicIRCBot(pydle.Client):
response = " ".join(lines) response = " ".join(lines)
case "server": case "server":
server = arguments.split(" ", 1) if not is_admin:
if server[0] == "": response = "You need to be admin to use this command."
server = IRC_SERVER
else: else:
server = server[0] server = arguments.split(" ", 1)
try: if server[0] == "":
await self.connect(server, 6667, tls=False) server = IRC_SERVER
response="Connected to {server}" else:
except Exception: server = server[0]
response = f"Unable to connect to {server}" try:
logging.exception({ "error": f"Unable to process message {content}"}) await self.connect(server, 6667, tls=False)
response="Connected to {server}"
except Exception:
response = f"Unable to connect to {server}"
logging.exception({ "error": f"Unable to process message {content}"})
case "join": case "join":
channel = arguments.strip() if not is_admin:
if channel == "" or re.match(r"\s", channel): response = "You need to be admin to use this command."
response = "Usage: !join CHANNEL"
else: else:
if not re.match(r"^#", channel): channel = arguments.strip()
channel = f"#{channel}" if channel == "" or re.match(r"\s", channel):
if self.channel and self.channel != channel: response = "Usage: !join CHANNEL"
await self.part(channel) else:
if channel: if not re.match(r"^#", channel):
await self.bot.join(channel) channel = f"#{channel}"
self.channel = channel if self.channel and self.channel != channel:
response = f"Joined {channel}." await self.part(channel)
if channel:
await self.bot.join(channel)
self.channel = channel
response = f"Joined {channel}."
case _: case _:
response = f"Unrecognized command: {command}" response = f"Unrecognized command: {command}"
@ -539,7 +626,7 @@ async def create_ui():
) )
with gr.Row(scale=0): with gr.Row(scale=0):
clear = gr.Button("Clear") clear = gr.Button("Clear")
timer = gr.Timer(1) refresh = gr.Button("Sync with IRC")
async def do_entry(message): async def do_entry(message):
if not irc_bot: if not irc_bot:
@ -557,23 +644,15 @@ async def create_ui():
return irc_bot.history, system_log, tool_log, command_log return irc_bot.history, system_log, tool_log, command_log
def update_log(history): def update_log(history):
if not irc_bot:
return gr.skip()
# This function updates the log after the chatbot responds # This function updates the log after the chatbot responds
return system_log + history, tool_log, command_log return system_log + irc_bot.history, tool_log, command_log
def check_history(): def get_history():
global last_history_len, last_command_len if not irc_bot:
if not irc_bot or last_history_len == len(irc_bot.history): return gr.skip()
history = gr.skip() return irc_bot.history, system_log + irc_bot.history, tool_log, command_log
else:
history = irc_bot.history
last_history_len = len(irc_bot.history)
if last_command_len == len(command_log):
commands = gr.skip()
else:
commands = command_log
last_command_len = len(command_log)
return history, commands
entry.submit( entry.submit(
do_entry, do_entry,
@ -585,7 +664,7 @@ async def create_ui():
outputs=[chat_history, tool_history, command_history] outputs=[chat_history, tool_history, command_history]
) )
timer.tick(check_history, inputs=None, outputs=[chatbot, command_history]) refresh.click(get_history, inputs=None, outputs=[chatbot, chat_history, tool_history, command_history])
clear.click(do_clear, inputs=None, outputs=[chatbot, chat_history, tool_history, command_history], queue=False) clear.click(do_clear, inputs=None, outputs=[chatbot, chat_history, tool_history, command_history], queue=False)
@ -606,7 +685,7 @@ async def main():
client = ollama.Client(host=args.ollama_server) client = ollama.Client(host=args.ollama_server)
model = args.ollama_model model = args.ollama_model
irc_bot = DynamicIRCBot(args.irc_nickname, args.irc_channel, args.bot_admin, args) irc_bot = DynamicIRCBot(args.irc_nickname, args.irc_channel, args.irc_bot_admin, args)
await irc_bot.connect(args.irc_server, args.irc_port, tls=args.irc_use_tls) await irc_bot.connect(args.irc_server, args.irc_port, tls=args.irc_use_tls)
if args.gradio_enable: if args.gradio_enable:

View File

@ -69,6 +69,9 @@ def get_weather_by_location(city, state, country="USA"):
# Step 3: Get the forecast data from the grid endpoint # Step 3: Get the forecast data from the grid endpoint
forecast = get_forecast(grid_endpoint) forecast = get_forecast(grid_endpoint)
if not forecast['location']:
forecast['location'] = location
return forecast return forecast
def get_coordinates(location): def get_coordinates(location):
@ -210,7 +213,7 @@ def get_ticker_price(ticker_symbols):
# Create a Ticker object # Create a Ticker object
try: try:
ticker = yf.Ticker(ticker_symbol) ticker = yf.Ticker(ticker_symbol)
print(ticker)
# Get the latest market data # Get the latest market data
ticker_data = ticker.history(period="1d") ticker_data = ticker.history(period="1d")
@ -268,7 +271,7 @@ def get_current_datetime(timezone="America/Los_Angeles"):
# %% # %%
tools = [{ tools = [ {
"type": "function", "type": "function",
"function": { "function": {
"name": "get_ticker_price", "name": "get_ticker_price",
@ -285,6 +288,44 @@ tools = [{
"additionalProperties": False "additionalProperties": False
} }
} }
}, {
"type": "function",
"function": {
"name": "summarize_site",
"description": "Requests a second LLM agent to download the requested site and answer a question about the site. For example if the user says 'What are the top headlines on cnn.com?' you would use summarize_site to get the answer.",
"parameters": {
"type": "object",
"properties": {
"url": {
"type": "string",
"description": "The website URL to download and process",
},
"question": {
"type": "string",
"description": "The question to ask the second LLM about the content",
},
},
"required": ["url", "question"],
"additionalProperties": False
},
"returns": {
"type": "object",
"properties": {
"source": {
"type": "string",
"description": "Identifier for the source LLM"
},
"content": {
"type": "string",
"description": "The complete response from the second LLM"
},
"metadata": {
"type": "object",
"description": "Additional information about the response"
}
}
}
}
}, { }, {
"type": "function", "type": "function",
"function": { "function": {
@ -324,4 +365,4 @@ tools = [{
} }
}] }]
__all__ = [ 'tools', 'get_current_datetime', 'get_weather_by_location', 'get_ticker_price' ] __all__ = [ 'tools', 'get_current_datetime', 'get_weather_by_location', 'get_ticker_price' ]