From 7a9253d64b7f97d7ac5965657e8b2e436df5d2bb Mon Sep 17 00:00:00 2001
From: James Ketrenos <james_git@ketrenos.com>
Date: Thu, 6 Mar 2025 17:02:41 -0800
Subject: [PATCH] ipex-2.6.10 is no longer needed

---
 Dockerfile          | 68 ++++++---------------------------------------
 README.md           |  5 ++--
 src/model-server.py |  8 +++---
 3 files changed, 15 insertions(+), 66 deletions(-)

diff --git a/Dockerfile b/Dockerfile
index 37e664c..f26c2c6 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -158,64 +158,14 @@ RUN { \
 
 ENTRYPOINT [ "/entrypoint.sh" ]
 
-FROM pytorch AS ipex-2.6.10
-
-WORKDIR /opt
-RUN git clone --branch release/xpu/2.6.10 --depth 1 https://github.com/intel/intel-extension-for-pytorch.git ipex-2.6.10
-WORKDIR /opt/ipex-2.6.10
-
-RUN wget -O- https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB \
-    | gpg --dearmor -o /usr/share/keyrings/oneapi-archive-keyring.gpg \
-    && echo "deb [signed-by=/usr/share/keyrings/oneapi-archive-keyring.gpg] https://apt.repos.intel.com/oneapi all main" \
-    | tee /etc/apt/sources.list.d/oneAPI.list \
-    && apt-get update \
-    && DEBIAN_FRONTEND=noninteractive apt-get install -y \
-    intel-deep-learning-essentials-2025.0 \
-    && apt-get clean \
-    && rm -rf /var/lib/apt/lists/{apt,dpkg,cache,log}
-
-# Requirements for building ipex / oneAPI...
-RUN apt-get update \
-    && DEBIAN_FRONTEND=noninteractive apt-get install -y \
-    libspdlog-dev \
-    && apt-get clean \
-    && rm -rf /var/lib/apt/lists/{apt,dpkg,cache,log}
-
-RUN python3 -m venv --system-site-packages /opt/ipex-2.6.10/venv
-
-RUN { \
-    echo '#!/bin/bash' ; \
-    update-alternatives --set python3 /opt/python/bin/python3.11 ; \
-    echo 'source /opt/intel/oneapi/setvars.sh' ; \
-    echo 'source /opt/ipex-2.6.10/venv/bin/activate' ; \
-    echo 'bash -c "${@}"' ; \
-    } > /opt/ipex-2.6.10/shell ; \
-    chmod +x /opt/ipex-2.6.10/shell
-
-SHELL [ "/opt/ipex-2.6.10/shell" ]
-
-#RUN pip3 install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/test/xpu
-RUN pip3 install -r requirements.txt
-
-RUN git submodule update --init --recursive --depth 1
-
-# Building ipex-2.6.10 wheel requires level-zero loader (libze-dev)
-RUN apt-get update \
-    && DEBIAN_FRONTEND=noninteractive apt-get install -y \
-    libze-dev \
-    && apt-get clean \
-    && rm -rf /var/lib/apt/lists/{apt,dpkg,cache,log}
-
-# torch needs to be installed
-RUN pip3 install torch --index-url https://download.pytorch.org/whl/test/xpu
-
-RUN python setup.py bdist_wheel
-
 FROM pytorch AS ipex-llm-src
 
 # Build ipex-llm from source
 
-RUN git clone --depth 1 https://github.com/intel/ipex-llm.git /opt/ipex-llm
+RUN git clone --branch main --depth 1 https://github.com/intel/ipex-llm.git /opt/ipex-llm \
+    && cd /opt/ipex-llm \
+    && git fetch --depth 1 origin cb3c4b26ad058c156591816aa37eec4acfcbf765 \
+    && git checkout cb3c4b26ad058c156591816aa37eec4acfcbf765
 
 WORKDIR /opt/ipex-llm
 
@@ -231,8 +181,8 @@ RUN { \
 SHELL [ "/opt/ipex-llm/shell" ]
 
 RUN pip3 install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/test/xpu
-COPY --from=ipex-2.6.10 /opt/ipex-2.6.10/dist/intel_extension_for_pytorch-2.6.10*.whl /opt/wheels/
-RUN for pkg in /opt/wheels/intel_extension_for_pytorch-2.6.10*.whl; do pip install $pkg[xpu-2-6]; done
+#COPY --from=ipex-2.6.10 /opt/ipex-2.6.10/dist/intel_extension_for_pytorch-2.6.10*.whl /opt/wheels/
+#RUN for pkg in /opt/wheels/intel_extension_for_pytorch-2.6.10*.whl; do pip install $pkg[xpu-2-6]; done
 
 WORKDIR /opt/ipex-llm/python/llm
 RUN pip install requests wheel
@@ -318,9 +268,6 @@ RUN { \
 SHELL [ "/opt/airc/shell" ]
 
 RUN pip3 install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/test/xpu
-# Install ipex built in ipex-2.6.10
-COPY --from=ipex-2.6.10 /opt/ipex-2.6.10/dist/*.whl /opt/wheels/
-RUN for pkg in /opt/wheels/intel_extension_for_pytorch-2.6.10*.whl; do pip install $pkg[xpu-2-6]; done
 # Install ipex-llm built in ipex-llm-src
 COPY --from=ipex-llm-src /opt/ipex-llm/python/llm/dist/*.whl /opt/wheels/
 RUN for pkg in /opt/wheels/ipex_llm*.whl; do pip install $pkg; done
@@ -334,7 +281,8 @@ RUN pip3 install pydle transformers sentencepiece accelerate \
     -p1 < /opt/airc/src/pydle.patch
 
 # mistral fails with cache_position errors with transformers>4.40 (or at least it fails with the latest)
-RUN pip install transformers==4.40
+# as well as MistralSpda* things missing
+RUN pip install "sentence_transformers<3.4.1" "transformers==4.40.0"
 
 RUN pip3 install pydle transformers sentencepiece accelerate
 
diff --git a/README.md b/README.md
index c4d9d6e..086590e 100644
--- a/README.md
+++ b/README.md
@@ -4,8 +4,9 @@ AI is Really Cool
 
 NOTE: If running on an Intel Arc A series graphics processor, fp64 is not supported and may need to either be emulated or have the model quantized.
 
-This project provides container definitions that will provide PyTorch 2.6 with
-Intel's LLM project. In addition, it provides a small local chat server and an IRC client to provide a chat bot.
+This project provides a simple IRC chat client. It runs the neuralchat model, enhanced with a little bit of RAG to fetch news RSS feeds.
+
+Internally, it is built using PyTorch 2.6 and the Intel IPEX/LLM.
 
 # Installation
 
diff --git a/src/model-server.py b/src/model-server.py
index 21cf353..80114a2 100644
--- a/src/model-server.py
+++ b/src/model-server.py
@@ -97,14 +97,14 @@ embedding_model = SentenceTransformer("all-MiniLM-L6-v2")
 # Collect news from all sources
 documents = []
 for feed in rss_feeds:
-    documents.extend(feed.update())
+    documents.extend(feed.articles)
 
 # Step 2: Encode and store news articles into FAISS
 doc_vectors = np.array(embedding_model.encode(documents), dtype=np.float32)
 index = faiss.IndexFlatL2(doc_vectors.shape[1])  # Initialize FAISS index
 index.add(doc_vectors)  # Store news vectors
 
-print(f"Stored {len(doc_vectors)} documents in FAISS index.")
+logging.info(f"Stored {len(doc_vectors)} documents in FAISS index.")
 
 # Step 3: Retrieval function for user queries
 def retrieve_documents(query, top_k=2):
@@ -217,13 +217,13 @@ def chat_completions():
             doc_vectors = np.array(embedding_model.encode(documents), dtype=np.float32)
             index = faiss.IndexFlatL2(doc_vectors.shape[1])  # Initialize FAISS index
             index.add(doc_vectors)  # Store news vectors
-            print(f"Stored {len(doc_vectors)} documents in FAISS index.")
+            logging.info(f"Stored {len(doc_vectors)} documents in FAISS index.")
             response_content = "News refresh requested."
         else:
             logging.info(f"Query: {query}")
             retrieved_docs = retrieve_documents(query)
             rag_prompt = format_prompt(query, retrieved_docs)
-            logging.info(f"RAG prompt: {rag_prompt}")
+            logging.debug(f"RAG prompt: {rag_prompt}")
 
             # Get AI-generated response
             response_content, _ = chat.generate_response(rag_prompt)