site fetching is working
This commit is contained in:
parent
786444ce3c
commit
9c0f2ba2bb
171
jupyter/stock.py
171
jupyter/stock.py
@ -31,6 +31,7 @@ try_import('yfinance', 'yfinance')
|
||||
try_import('dotenv', 'python-dotenv')
|
||||
try_import('geopy', 'geopy')
|
||||
try_import('hyphen', 'PyHyphen')
|
||||
try_import('bs4', 'beautifulsoup4')
|
||||
|
||||
from dotenv import load_dotenv
|
||||
from geopy.geocoders import Nominatim
|
||||
@ -42,6 +43,7 @@ import pytz
|
||||
import requests
|
||||
import yfinance as yf
|
||||
from hyphen import hyphenator
|
||||
from bs4 import BeautifulSoup
|
||||
|
||||
# Local defined imports
|
||||
from tools import (
|
||||
@ -71,23 +73,20 @@ BOT_ADMIN="james"
|
||||
# Globals
|
||||
system_message = f"""
|
||||
You are a helpful information agent connected to the IRC network {IRC_SERVER}. Your name is {NICK}.
|
||||
You have real time access to any website or URL the user asks about.
|
||||
Messages from users are in the form "NICK: MESSAGE". The name before the colon (:) tells you which user asked about something.
|
||||
You are running { { 'model': MODEL_NAME, 'gpu': 'Intel Arc B580', 'cpu': 'Intel Core i9-14900KS', 'ram': '64G' } }.
|
||||
You were launched on {get_current_datetime()}.
|
||||
You have real time access to current stock trading values, the current date and time, and current weather information for locations in the United States.
|
||||
You have real time access to stock prices, the current date and time, and current weather information for locations in the United States.
|
||||
If you use any real time access, do not mention your knowledge cutoff.
|
||||
Give short, courteous answers, no more than 2-3 sentences, keeping the answer less than about 100 characters.
|
||||
If you have to cut the answer short, ask the user if they want more information and provide it if they say Yes.
|
||||
Give short, courteous answers, no more than 2-3 sentences.
|
||||
Always be accurate. If you don't know the answer, say so. Do not make up details.
|
||||
|
||||
You have tools to:
|
||||
* get_current_datetime: Get current time and date.
|
||||
* get_weather_by_location: Get-real time weather forecast.
|
||||
* get_ticker_price: Get real-time value of a stock symbol.
|
||||
|
||||
Those are the only tools available.
|
||||
When you receive a response from summarize_site, you must:
|
||||
1. Review the entire content returned by the second LLM
|
||||
2. Provide the URL used to obtain the information.
|
||||
3. Incorporate the information into your response as appropriate
|
||||
"""
|
||||
system_log = [{"role": "system", "content": system_message}]
|
||||
history = []
|
||||
tool_log = []
|
||||
command_log = []
|
||||
model = None
|
||||
@ -103,12 +102,12 @@ def parse_args():
|
||||
parser.add_argument("--irc-nickname", type=str, default=NICK, help=f"Bot nickname. default={NICK}")
|
||||
parser.add_argument("--irc-channel", type=str, default=CHANNEL, help=f"Channel to join. default={CHANNEL}")
|
||||
parser.add_argument("--irc-use-tls", type=bool, default=USE_TLS, help=f"Use TLS with --irc-server. default={USE_TLS}")
|
||||
parser.add_argument("--irc-bot-admin", type=str, default=BOT_ADMIN, help=f"Nick that can send admin commands via IRC. default={BOT_ADMIN}")
|
||||
parser.add_argument("--ollama-server", type=str, default=OLLAMA_API_URL, help=f"Ollama API endpoint. default={OLLAMA_API_URL}")
|
||||
parser.add_argument("--ollama-model", type=str, default=MODEL_NAME, help=f"LLM model to use. default={MODEL_NAME}")
|
||||
parser.add_argument("--gradio-host", type=str, default=GRADIO_HOST, help=f"Host to launch gradio on. default={GRADIO_HOST} only if --gradio-enable is specified.")
|
||||
parser.add_argument("--gradio-port", type=str, default=GRADIO_PORT, help=f"Port to launch gradio on. default={GRADIO_PORT} only if --gradio-enable is specified.")
|
||||
parser.add_argument("--gradio-enable", action="store_true", default=GRADIO_ENABLE, help=f"If set to True, enable Gradio. default={GRADIO_ENABLE}")
|
||||
parser.add_argument("--bot-admin", type=str, default=BOT_ADMIN, help=f"Nick that can send admin commands via IRC. default={BOT_ADMIN}")
|
||||
parser.add_argument('--level', type=str, choices=['DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL'],
|
||||
default=LOG_LEVEL, help=f'Set the logging level. default={LOG_LEVEL}')
|
||||
return parser.parse_args()
|
||||
@ -205,26 +204,30 @@ def split_paragraph_with_hyphenation(text, line_length=80, language='en_US'):
|
||||
return result_lines
|
||||
|
||||
# %%
|
||||
def handle_tool_calls(message):
|
||||
async def handle_tool_calls(message):
|
||||
response = []
|
||||
tools_used = []
|
||||
for tool_call in message['tool_calls']:
|
||||
arguments = tool_call['function']['arguments']
|
||||
tool = tool_call['function']['name']
|
||||
if tool == 'get_ticker_price':
|
||||
match tool:
|
||||
case 'get_ticker_price':
|
||||
ticker = arguments.get('ticker')
|
||||
if not ticker:
|
||||
ret = None
|
||||
else:
|
||||
ret = get_ticker_price(ticker)
|
||||
tools_used.append(tool)
|
||||
elif tool == 'get_current_datetime':
|
||||
case 'summarize_site':
|
||||
ret = await summarize_site(arguments.get('url'), arguments.get('question', 'what is the summary of this content?'))
|
||||
tools_used.append(tool)
|
||||
case 'get_current_datetime':
|
||||
ret = get_current_datetime(arguments.get('timezone'))
|
||||
tools_used.append(tool)
|
||||
elif tool == 'get_weather_by_location':
|
||||
case 'get_weather_by_location':
|
||||
ret = get_weather_by_location(arguments.get('city'), arguments.get('state'))
|
||||
tools_used.append(tool)
|
||||
else:
|
||||
case _:
|
||||
ret = None
|
||||
response.append({
|
||||
"role": "tool",
|
||||
@ -237,6 +240,76 @@ def handle_tool_calls(message):
|
||||
return response, tools_used
|
||||
|
||||
# %%
|
||||
def total_json_length(dict_array):
|
||||
total = 0
|
||||
for item in dict_array:
|
||||
# Convert dictionary to minimized JSON string
|
||||
json_string = json.dumps(item, separators=(',', ':'))
|
||||
total += len(json_string)
|
||||
return total
|
||||
|
||||
async def summarize_site(url, question):
|
||||
"""
|
||||
Fetches content from a URL, extracts the text, and uses Ollama to summarize it.
|
||||
|
||||
Args:
|
||||
url (str): The URL of the website to summarize
|
||||
|
||||
Returns:
|
||||
str: A summary of the website content
|
||||
"""
|
||||
global model, client
|
||||
try:
|
||||
# Fetch the webpage
|
||||
headers = {
|
||||
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
|
||||
}
|
||||
logging.info(f"Fetching {url}")
|
||||
response = requests.get(url, headers=headers, timeout=10)
|
||||
response.raise_for_status()
|
||||
logging.info(f"{url} returned. Processing...")
|
||||
# Parse the HTML
|
||||
soup = BeautifulSoup(response.text, 'html.parser')
|
||||
|
||||
# Remove script and style elements
|
||||
for script in soup(["script", "style"]):
|
||||
script.extract()
|
||||
|
||||
# Get text content
|
||||
text = soup.get_text(separator=' ', strip=True)
|
||||
|
||||
# Clean up text (remove extra whitespace)
|
||||
lines = (line.strip() for line in text.splitlines())
|
||||
chunks = (phrase.strip() for line in lines for phrase in line.split(" "))
|
||||
text = ' '.join(chunk for chunk in chunks if chunk)
|
||||
|
||||
# Limit text length if needed (Ollama may have token limits)
|
||||
max_chars = 100000
|
||||
if len(text) > max_chars:
|
||||
text = text[:max_chars] + "..."
|
||||
|
||||
# Create Ollama client
|
||||
logging.info(f"Requesting summary of: {text}")
|
||||
|
||||
# Generate summary using Ollama
|
||||
prompt = f"CONTENTS:\n\n{text}\n\n{question}"
|
||||
response = client.generate(model=model,
|
||||
system="You are given the contents of {url}. Answer the question about the contents",
|
||||
prompt=prompt)
|
||||
|
||||
logging.info(response['response'])
|
||||
|
||||
return {
|
||||
'source': 'summarizer-llm',
|
||||
'content': response['response'],
|
||||
'metadata': get_current_datetime()
|
||||
}
|
||||
|
||||
except requests.exceptions.RequestException as e:
|
||||
return f"Error fetching the URL: {str(e)}"
|
||||
except Exception as e:
|
||||
return f"Error processing the website content: {str(e)}"
|
||||
|
||||
async def chat(history, is_irc=False):
|
||||
global client, model, irc_bot, system_log, tool_log
|
||||
if not client:
|
||||
@ -250,6 +323,8 @@ async def chat(history, is_irc=False):
|
||||
tools_used = []
|
||||
if 'tool_calls' in response['message']:
|
||||
message = response['message']
|
||||
tool_result, tools_used = await handle_tool_calls(message)
|
||||
|
||||
# Convert Message object to a proper dictionary format
|
||||
message_dict = {
|
||||
'role': message.get('role', 'assistant'),
|
||||
@ -262,8 +337,10 @@ async def chat(history, is_irc=False):
|
||||
for tc in message['tool_calls']
|
||||
]
|
||||
|
||||
tool_result, tools_used = handle_tool_calls(message)
|
||||
messages.append(message_dict) # Add properly formatted dict instead of Message object
|
||||
if isinstance(tool_result, list):
|
||||
messages.extend(tool_result)
|
||||
else:
|
||||
messages.append(tool_result)
|
||||
try:
|
||||
response = client.chat(model=model, messages=messages)
|
||||
@ -334,7 +411,7 @@ class DynamicIRCBot(pydle.Client):
|
||||
max_lines = 10
|
||||
irc_lines = []
|
||||
for line in message.splitlines():
|
||||
lines = split_paragraph_with_hyphenation(line, line_length=450)
|
||||
lines = split_paragraph_with_hyphenation(line, line_length=300)
|
||||
irc_lines.extend(lines)
|
||||
|
||||
# Send the first 'max_lines' non-empty lines
|
||||
@ -384,9 +461,8 @@ class DynamicIRCBot(pydle.Client):
|
||||
user = None
|
||||
content = message
|
||||
|
||||
|
||||
# If this message is not directed to the bot
|
||||
if not user or user != self.nickname:
|
||||
if target != self.nickname and (not user or user != self.nickname):
|
||||
logging.info(f"Message not directed to {self.nickname}")
|
||||
# Add this message to the history either to the current 'user' context or create
|
||||
# add a new message
|
||||
@ -403,7 +479,7 @@ class DynamicIRCBot(pydle.Client):
|
||||
return
|
||||
|
||||
matches = re.match(r"^!([^\s]+)\s*(.*)?$", content)
|
||||
if not matches or (self.bot_admin and source != self.bot_admin and source != self.nickname):
|
||||
if not matches:
|
||||
logging.info(f"Non-command directed message to {self.nickname}: Invoking chat...")
|
||||
# Add this message to the history either to the current 'user' context or create
|
||||
# add a new message
|
||||
@ -425,7 +501,7 @@ class DynamicIRCBot(pydle.Client):
|
||||
command = matches.group(1)
|
||||
arguments = matches.group(2).strip()
|
||||
logging.info(f"Command directed to {self.nickname}: command={command}, arguments={arguments}")
|
||||
|
||||
is_admin = source == self.nickname or source == self.bot_admin
|
||||
match command:
|
||||
case "help":
|
||||
response = f"info, context, reset, system [prompt], server [address], join channel"
|
||||
@ -434,14 +510,19 @@ class DynamicIRCBot(pydle.Client):
|
||||
response = str(self.system_info)
|
||||
|
||||
case "context":
|
||||
if len(self.history) > 1:
|
||||
response = '"' + '","'.join(self.history[-1]['content'].split('\n')) + '"'
|
||||
else:
|
||||
response = "<no context>"
|
||||
system_log_size = total_json_length(system_log)
|
||||
history_size = total_json_length(self.history)
|
||||
tools_size = total_json_length(tools)
|
||||
total_size = system_log_size + history_size + tools_size
|
||||
response = f"\nsystem prompt: {system_log_size}"
|
||||
response += f"\nhistory: {history_size} in {len(self.history)} entries."
|
||||
response += f"\ntools: {tools_size} in {len(tools)} tools."
|
||||
response += f"\ntotal context: {total_size}"
|
||||
response += f"\ntotal tool calls: {len(tool_log)}"
|
||||
|
||||
case "reset":
|
||||
system_log = [{"role": "system", "content": system_message}]
|
||||
history = []
|
||||
self.history = []
|
||||
tool_log = []
|
||||
command_log = []
|
||||
response = 'All contexts reset'
|
||||
@ -455,6 +536,9 @@ class DynamicIRCBot(pydle.Client):
|
||||
response = " ".join(lines)
|
||||
|
||||
case "server":
|
||||
if not is_admin:
|
||||
response = "You need to be admin to use this command."
|
||||
else:
|
||||
server = arguments.split(" ", 1)
|
||||
if server[0] == "":
|
||||
server = IRC_SERVER
|
||||
@ -468,6 +552,9 @@ class DynamicIRCBot(pydle.Client):
|
||||
logging.exception({ "error": f"Unable to process message {content}"})
|
||||
|
||||
case "join":
|
||||
if not is_admin:
|
||||
response = "You need to be admin to use this command."
|
||||
else:
|
||||
channel = arguments.strip()
|
||||
if channel == "" or re.match(r"\s", channel):
|
||||
response = "Usage: !join CHANNEL"
|
||||
@ -539,7 +626,7 @@ async def create_ui():
|
||||
)
|
||||
with gr.Row(scale=0):
|
||||
clear = gr.Button("Clear")
|
||||
timer = gr.Timer(1)
|
||||
refresh = gr.Button("Sync with IRC")
|
||||
|
||||
async def do_entry(message):
|
||||
if not irc_bot:
|
||||
@ -557,23 +644,15 @@ async def create_ui():
|
||||
return irc_bot.history, system_log, tool_log, command_log
|
||||
|
||||
def update_log(history):
|
||||
if not irc_bot:
|
||||
return gr.skip()
|
||||
# This function updates the log after the chatbot responds
|
||||
return system_log + history, tool_log, command_log
|
||||
return system_log + irc_bot.history, tool_log, command_log
|
||||
|
||||
def check_history():
|
||||
global last_history_len, last_command_len
|
||||
if not irc_bot or last_history_len == len(irc_bot.history):
|
||||
history = gr.skip()
|
||||
else:
|
||||
history = irc_bot.history
|
||||
last_history_len = len(irc_bot.history)
|
||||
if last_command_len == len(command_log):
|
||||
commands = gr.skip()
|
||||
else:
|
||||
commands = command_log
|
||||
last_command_len = len(command_log)
|
||||
|
||||
return history, commands
|
||||
def get_history():
|
||||
if not irc_bot:
|
||||
return gr.skip()
|
||||
return irc_bot.history, system_log + irc_bot.history, tool_log, command_log
|
||||
|
||||
entry.submit(
|
||||
do_entry,
|
||||
@ -585,7 +664,7 @@ async def create_ui():
|
||||
outputs=[chat_history, tool_history, command_history]
|
||||
)
|
||||
|
||||
timer.tick(check_history, inputs=None, outputs=[chatbot, command_history])
|
||||
refresh.click(get_history, inputs=None, outputs=[chatbot, chat_history, tool_history, command_history])
|
||||
|
||||
clear.click(do_clear, inputs=None, outputs=[chatbot, chat_history, tool_history, command_history], queue=False)
|
||||
|
||||
@ -606,7 +685,7 @@ async def main():
|
||||
client = ollama.Client(host=args.ollama_server)
|
||||
model = args.ollama_model
|
||||
|
||||
irc_bot = DynamicIRCBot(args.irc_nickname, args.irc_channel, args.bot_admin, args)
|
||||
irc_bot = DynamicIRCBot(args.irc_nickname, args.irc_channel, args.irc_bot_admin, args)
|
||||
await irc_bot.connect(args.irc_server, args.irc_port, tls=args.irc_use_tls)
|
||||
|
||||
if args.gradio_enable:
|
||||
|
@ -69,6 +69,9 @@ def get_weather_by_location(city, state, country="USA"):
|
||||
# Step 3: Get the forecast data from the grid endpoint
|
||||
forecast = get_forecast(grid_endpoint)
|
||||
|
||||
if not forecast['location']:
|
||||
forecast['location'] = location
|
||||
|
||||
return forecast
|
||||
|
||||
def get_coordinates(location):
|
||||
@ -210,7 +213,7 @@ def get_ticker_price(ticker_symbols):
|
||||
# Create a Ticker object
|
||||
try:
|
||||
ticker = yf.Ticker(ticker_symbol)
|
||||
|
||||
print(ticker)
|
||||
# Get the latest market data
|
||||
ticker_data = ticker.history(period="1d")
|
||||
|
||||
@ -285,6 +288,44 @@ tools = [{
|
||||
"additionalProperties": False
|
||||
}
|
||||
}
|
||||
}, {
|
||||
"type": "function",
|
||||
"function": {
|
||||
"name": "summarize_site",
|
||||
"description": "Requests a second LLM agent to download the requested site and answer a question about the site. For example if the user says 'What are the top headlines on cnn.com?' you would use summarize_site to get the answer.",
|
||||
"parameters": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"url": {
|
||||
"type": "string",
|
||||
"description": "The website URL to download and process",
|
||||
},
|
||||
"question": {
|
||||
"type": "string",
|
||||
"description": "The question to ask the second LLM about the content",
|
||||
},
|
||||
},
|
||||
"required": ["url", "question"],
|
||||
"additionalProperties": False
|
||||
},
|
||||
"returns": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"source": {
|
||||
"type": "string",
|
||||
"description": "Identifier for the source LLM"
|
||||
},
|
||||
"content": {
|
||||
"type": "string",
|
||||
"description": "The complete response from the second LLM"
|
||||
},
|
||||
"metadata": {
|
||||
"type": "object",
|
||||
"description": "Additional information about the response"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}, {
|
||||
"type": "function",
|
||||
"function": {
|
||||
|
Loading…
x
Reference in New Issue
Block a user