RAG is working

2025-03-08 12:25:21 -08:00 · 2025-03-08 12:25:21 -08:00 · dd18ca858b
commit dd18ca858b
parent 8027b5f8e3
2 changed files with 126 additions and 31 deletions
--- a/1
+++ b/1
@ -86,6 +86,7 @@ RUN pip3 install pydle \
 RUN pip install setuptools --upgrade
 RUN pip install ollama 
 RUN pip install feedparser bs4 chromadb
+RUN pip install tiktoken

 SHELL [ "/bin/bash", "-c" ]

--- a/src/chunk.py
+++ b/src/chunk.py
@ -77,6 +77,7 @@ class Feed():

 # News RSS Feeds
 rss_feeds = [
+    Feed(name="IGN.com", url="https://feeds.feedburner.com/ign/games-all"),
    Feed(name="BBC World", url="http://feeds.bbci.co.uk/news/world/rss.xml"),
    Feed(name="Reuters World", url="http://feeds.reuters.com/Reuters/worldNews"),
    Feed(name="Al Jazeera", url="https://www.aljazeera.com/xml/rss/all.xml"),
@ -173,7 +174,8 @@ def chunk_document(document: Dict[str, Any],

 def init_chroma_client(persist_directory: str = PERSIST_DIRECTORY):
    """Initialize and return a ChromaDB client."""
-    return chromadb.PersistentClient(path=persist_directory)
+#    return chromadb.PersistentClient(path=persist_directory)
+    return chromadb.Client()

 def create_or_get_collection(client, collection_name: str):
    """Create or get a ChromaDB collection."""
@ -352,6 +354,19 @@ def show_headlines(documents=None):
    for doc in documents:
        print(f"{doc['source']}: {doc['title']}")

+def show_help():
+    print("""help>
+docs       Show RAG docs
+full       Show last full top match
+headlines  Show the RAG headlines
+prompt     Show the last prompt
+response   Show the last response
+scores     Show last RAG scores
+why|think        Show last response's <think>
+context|match    Show RAG match info to last prompt
+""")
+
+
 # Example usage
 if __name__ == "__main__":
    documents = []
@ -371,27 +386,71 @@ if __name__ == "__main__":
    )

    last_results = None
+    last_prompt = None
+    last_system = None
+    last_response = None
+    last_why = None
+    last_messages = []
    while True:
        try:
            search_query = input("> ").strip()
-        except Exception as e:
+        except KeyboardInterrupt as e:
+            print("\nExiting.")
+            break
+
+        if search_query == "exit" or search_query == "quit":
+            print("\nExiting.")
            break
    
        if search_query == "docs":
            show_documents(documents)
            continue

-        if search_query == "":
+        if search_query == "prompt":
+            if last_prompt:
+                print(f"""last prompt>
+{"="*10}system{"="*10}
+{last_system}
+{"="*10}prompt{"="*10}
+{last_prompt}""")
+            else:
+                print(f"No prompts yet")
+            continue
+
+        if search_query == "response":
+            if last_response:
+                print(f"""last response>
+{"="*10}response{"="*10}
+{last_response}""")
+            else:
+                print(f"No responses yet")
+            continue
+
+        if search_query == "" or search_query == "help":
+            show_help()
+            continue
+
+        if search_query == "headlines":
            show_headlines(documents)
            continue

-        if search_query == "why":
+        if search_query == "match" or search_query == "context":
            if last_results:
                print_top_match(last_results, documents=documents)
            else:
                print("No match to give info on")
            continue

+        if search_query == "why" or search_query == "think":
+            if last_why:
+                print(f"""
+why>
+{last_why}
+""")
+            else:
+                print("No processed prompts")
+            continue
+
        if search_query == "scores":
            if last_results:
                for i, _ in enumerate(last_results):
@ -400,7 +459,6 @@ if __name__ == "__main__":
                print("No match to give info on")
            continue

-
        if search_query == "full":
            if last_results:
                full = get_top_match(last_results, documents=documents)
@ -430,39 +488,75 @@ Full text:
        for doc in documents:
            headlines += f"{doc['source']}: {doc['title']}\n"

-        system=f"""
-News headlines:
-
-{headlines}
-
-"""
-        if full:
-            system += f"""
-
-Make reference to the following and provide the 'Link':
-
-Source: {full["source"]}
-Link: {full["link"]}
-Text: {full["text"]}
+        system="""
+You are the assistant. Your name is airc.

 Do not ask to help the user further.

+Provide short (less than 100 character) responses.
+
+Rules:
+* If the user asks for information about the AI model, how, or who wrote it, provide information about the author from inside the <author></author> tags.
+* If you think the user might be asking about the author, ask a follow up question to clarify.
+* If there is news in between the <input></input> tags relevant to the prompt, use that. Always mention the source when information comes from an item. If asked for the link, provide it.
+* Respond to the prompt in a single, direct response.
+* Do not prefix it with a word like "Answer"
+
+You must follow the rules.
+
 """
-            print(f"""Context:

-Source: {full["source"]}
-Title: {full["title"]}
-Distance: {last_results.get("distances", [[0]])[0][0]}
-Link: {full["link"]}""")
-            
-        continue
+# * If a user asks for weather information, include in your response "{{weather_query("country", "city", "state")}}" where the description of the weather should go.

+        context = f"""<author>
+author={[
+{'info': 'James wrote the python application that is driving this RAG model on top of deepseek-r1:7b. You can find it at https://github.com/jketreno/airc'},
+{'info': 'James Ketrenos wrote the program deploying this AI model with RAG.'},
+{'info': 'James Ketrenos is a software engineer with a history in all levels of the computer stack, from the kernel to full-stack web applications. He dabbles in AI/ML and is familiar with pytorch and ollama.'},
+{'info': 'James lives in Portland, Oregon and has three kids. Two are attending Oregon State University and one is attending Williamette University.'}
+]}
+</author>"""
+
+
+        context += "<input>additional information unrelated to James Ketrenos = ["
+        for doc in documents:
+            item = {'source':doc["source"],'article':{'title':doc["title"],'link':doc["link"],'text':doc["text"]}}
+            context += f"{item}"
+        context += """]
+</input>
+"""
+        prompt = f"{context}{search_query}"
+        last_prompt = prompt
+        last_system = system
+        if len(last_messages) != 0:
+            message_context = f"{last_messages}"
+            prompt = f"{message_context}{prompt}"
+
+        print(f"system len: {len(system)}")            
+        print(f"prompt len: {len(prompt)}")            
        output = client.generate(
            model=MODEL_NAME,
-            system=system,
-            prompt=f"Respond to this prompt: {search_query}",
-            stream=False
+            system=f"{system}{context}",
+            prompt=prompt,
+            stream=False,
+            options={ 'num_ctx': 100000 }
        )
        # Prune off the <think>...</think>
-        content = re.sub(r'^<think>.*?</think>', '', output['response'], flags=re.DOTALL).strip()
-        print(f"Response> {content}")
+        matches = re.match(r'^<think>(.*?)</think>(.*)$', output['response'], flags=re.DOTALL)
+        if matches:
+            last_why = matches[1].strip()
+            content = matches[2].strip()
+        else:
+            print(f"[garbled] response>\n{output['response']}")
+        print(f"Response>\n{content}")
+
+        last_response = content
+        last_messages.extend(({
+            'role': 'user',
+            'name': 'james',
+            'message': search_query
+        }, {
+            'role': 'assistant',
+            'message': content
+        }))
+        last_messages = last_messages[:10]