From 7a9253d64b7f97d7ac5965657e8b2e436df5d2bb Mon Sep 17 00:00:00 2001 From: James Ketrenos Date: Thu, 6 Mar 2025 17:02:41 -0800 Subject: [PATCH] ipex-2.6.10 is no longer needed --- Dockerfile | 68 ++++++--------------------------------------- README.md | 5 ++-- src/model-server.py | 8 +++--- 3 files changed, 15 insertions(+), 66 deletions(-) diff --git a/Dockerfile b/Dockerfile index 37e664c..f26c2c6 100644 --- a/Dockerfile +++ b/Dockerfile @@ -158,64 +158,14 @@ RUN { \ ENTRYPOINT [ "/entrypoint.sh" ] -FROM pytorch AS ipex-2.6.10 - -WORKDIR /opt -RUN git clone --branch release/xpu/2.6.10 --depth 1 https://github.com/intel/intel-extension-for-pytorch.git ipex-2.6.10 -WORKDIR /opt/ipex-2.6.10 - -RUN wget -O- https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB \ - | gpg --dearmor -o /usr/share/keyrings/oneapi-archive-keyring.gpg \ - && echo "deb [signed-by=/usr/share/keyrings/oneapi-archive-keyring.gpg] https://apt.repos.intel.com/oneapi all main" \ - | tee /etc/apt/sources.list.d/oneAPI.list \ - && apt-get update \ - && DEBIAN_FRONTEND=noninteractive apt-get install -y \ - intel-deep-learning-essentials-2025.0 \ - && apt-get clean \ - && rm -rf /var/lib/apt/lists/{apt,dpkg,cache,log} - -# Requirements for building ipex / oneAPI... -RUN apt-get update \ - && DEBIAN_FRONTEND=noninteractive apt-get install -y \ - libspdlog-dev \ - && apt-get clean \ - && rm -rf /var/lib/apt/lists/{apt,dpkg,cache,log} - -RUN python3 -m venv --system-site-packages /opt/ipex-2.6.10/venv - -RUN { \ - echo '#!/bin/bash' ; \ - update-alternatives --set python3 /opt/python/bin/python3.11 ; \ - echo 'source /opt/intel/oneapi/setvars.sh' ; \ - echo 'source /opt/ipex-2.6.10/venv/bin/activate' ; \ - echo 'bash -c "${@}"' ; \ - } > /opt/ipex-2.6.10/shell ; \ - chmod +x /opt/ipex-2.6.10/shell - -SHELL [ "/opt/ipex-2.6.10/shell" ] - -#RUN pip3 install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/test/xpu -RUN pip3 install -r requirements.txt - -RUN git submodule update --init --recursive --depth 1 - -# Building ipex-2.6.10 wheel requires level-zero loader (libze-dev) -RUN apt-get update \ - && DEBIAN_FRONTEND=noninteractive apt-get install -y \ - libze-dev \ - && apt-get clean \ - && rm -rf /var/lib/apt/lists/{apt,dpkg,cache,log} - -# torch needs to be installed -RUN pip3 install torch --index-url https://download.pytorch.org/whl/test/xpu - -RUN python setup.py bdist_wheel - FROM pytorch AS ipex-llm-src # Build ipex-llm from source -RUN git clone --depth 1 https://github.com/intel/ipex-llm.git /opt/ipex-llm +RUN git clone --branch main --depth 1 https://github.com/intel/ipex-llm.git /opt/ipex-llm \ + && cd /opt/ipex-llm \ + && git fetch --depth 1 origin cb3c4b26ad058c156591816aa37eec4acfcbf765 \ + && git checkout cb3c4b26ad058c156591816aa37eec4acfcbf765 WORKDIR /opt/ipex-llm @@ -231,8 +181,8 @@ RUN { \ SHELL [ "/opt/ipex-llm/shell" ] RUN pip3 install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/test/xpu -COPY --from=ipex-2.6.10 /opt/ipex-2.6.10/dist/intel_extension_for_pytorch-2.6.10*.whl /opt/wheels/ -RUN for pkg in /opt/wheels/intel_extension_for_pytorch-2.6.10*.whl; do pip install $pkg[xpu-2-6]; done +#COPY --from=ipex-2.6.10 /opt/ipex-2.6.10/dist/intel_extension_for_pytorch-2.6.10*.whl /opt/wheels/ +#RUN for pkg in /opt/wheels/intel_extension_for_pytorch-2.6.10*.whl; do pip install $pkg[xpu-2-6]; done WORKDIR /opt/ipex-llm/python/llm RUN pip install requests wheel @@ -318,9 +268,6 @@ RUN { \ SHELL [ "/opt/airc/shell" ] RUN pip3 install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/test/xpu -# Install ipex built in ipex-2.6.10 -COPY --from=ipex-2.6.10 /opt/ipex-2.6.10/dist/*.whl /opt/wheels/ -RUN for pkg in /opt/wheels/intel_extension_for_pytorch-2.6.10*.whl; do pip install $pkg[xpu-2-6]; done # Install ipex-llm built in ipex-llm-src COPY --from=ipex-llm-src /opt/ipex-llm/python/llm/dist/*.whl /opt/wheels/ RUN for pkg in /opt/wheels/ipex_llm*.whl; do pip install $pkg; done @@ -334,7 +281,8 @@ RUN pip3 install pydle transformers sentencepiece accelerate \ -p1 < /opt/airc/src/pydle.patch # mistral fails with cache_position errors with transformers>4.40 (or at least it fails with the latest) -RUN pip install transformers==4.40 +# as well as MistralSpda* things missing +RUN pip install "sentence_transformers<3.4.1" "transformers==4.40.0" RUN pip3 install pydle transformers sentencepiece accelerate diff --git a/README.md b/README.md index c4d9d6e..086590e 100644 --- a/README.md +++ b/README.md @@ -4,8 +4,9 @@ AI is Really Cool NOTE: If running on an Intel Arc A series graphics processor, fp64 is not supported and may need to either be emulated or have the model quantized. -This project provides container definitions that will provide PyTorch 2.6 with -Intel's LLM project. In addition, it provides a small local chat server and an IRC client to provide a chat bot. +This project provides a simple IRC chat client. It runs the neuralchat model, enhanced with a little bit of RAG to fetch news RSS feeds. + +Internally, it is built using PyTorch 2.6 and the Intel IPEX/LLM. # Installation diff --git a/src/model-server.py b/src/model-server.py index 21cf353..80114a2 100644 --- a/src/model-server.py +++ b/src/model-server.py @@ -97,14 +97,14 @@ embedding_model = SentenceTransformer("all-MiniLM-L6-v2") # Collect news from all sources documents = [] for feed in rss_feeds: - documents.extend(feed.update()) + documents.extend(feed.articles) # Step 2: Encode and store news articles into FAISS doc_vectors = np.array(embedding_model.encode(documents), dtype=np.float32) index = faiss.IndexFlatL2(doc_vectors.shape[1]) # Initialize FAISS index index.add(doc_vectors) # Store news vectors -print(f"Stored {len(doc_vectors)} documents in FAISS index.") +logging.info(f"Stored {len(doc_vectors)} documents in FAISS index.") # Step 3: Retrieval function for user queries def retrieve_documents(query, top_k=2): @@ -217,13 +217,13 @@ def chat_completions(): doc_vectors = np.array(embedding_model.encode(documents), dtype=np.float32) index = faiss.IndexFlatL2(doc_vectors.shape[1]) # Initialize FAISS index index.add(doc_vectors) # Store news vectors - print(f"Stored {len(doc_vectors)} documents in FAISS index.") + logging.info(f"Stored {len(doc_vectors)} documents in FAISS index.") response_content = "News refresh requested." else: logging.info(f"Query: {query}") retrieved_docs = retrieve_documents(query) rag_prompt = format_prompt(query, retrieved_docs) - logging.info(f"RAG prompt: {rag_prompt}") + logging.debug(f"RAG prompt: {rag_prompt}") # Get AI-generated response response_content, _ = chat.generate_response(rag_prompt)