No more pydantic errors
This commit is contained in:
parent
3fe2cfd9ef
commit
2bc7451dd9
@ -1,4 +1,4 @@
|
|||||||
from pydantic import BaseModel, Field, model_validator, PrivateAttr
|
from pydantic import BaseModel, Field, model_validator, PrivateAttr # type: ignore
|
||||||
from typing import Literal, TypeAlias, get_args, List, Generator, Iterator, AsyncGenerator, TYPE_CHECKING, Optional, ClassVar
|
from typing import Literal, TypeAlias, get_args, List, Generator, Iterator, AsyncGenerator, TYPE_CHECKING, Optional, ClassVar
|
||||||
from typing_extensions import Annotated
|
from typing_extensions import Annotated
|
||||||
from abc import ABC, abstractmethod
|
from abc import ABC, abstractmethod
|
||||||
|
@ -1,4 +1,5 @@
|
|||||||
from typing import List
|
from pydantic import BaseModel, # type: ignore
|
||||||
|
from typing import List, Optional, Dict, Any
|
||||||
import os
|
import os
|
||||||
import glob
|
import glob
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
@ -12,9 +13,7 @@ import time
|
|||||||
import hashlib
|
import hashlib
|
||||||
import asyncio
|
import asyncio
|
||||||
import json
|
import json
|
||||||
import pickle
|
|
||||||
import numpy as np # type: ignore
|
import numpy as np # type: ignore
|
||||||
import re
|
|
||||||
|
|
||||||
import chromadb
|
import chromadb
|
||||||
import ollama
|
import ollama
|
||||||
@ -25,6 +24,7 @@ from watchdog.observers import Observer # type: ignore
|
|||||||
from watchdog.events import FileSystemEventHandler # type: ignore
|
from watchdog.events import FileSystemEventHandler # type: ignore
|
||||||
import umap # type: ignore
|
import umap # type: ignore
|
||||||
from markitdown import MarkItDown # type: ignore
|
from markitdown import MarkItDown # type: ignore
|
||||||
|
from chromadb.api.models.Collection import Collection # type: ignore
|
||||||
|
|
||||||
# Import your existing modules
|
# Import your existing modules
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
@ -42,6 +42,12 @@ __all__ = [
|
|||||||
DEFAULT_CHUNK_SIZE=750
|
DEFAULT_CHUNK_SIZE=750
|
||||||
DEFAULT_CHUNK_OVERLAP=100
|
DEFAULT_CHUNK_OVERLAP=100
|
||||||
|
|
||||||
|
class ChromaDBGetResponse(BaseModel):
|
||||||
|
ids: List[str]
|
||||||
|
embeddings: Optional[List[List[float]]] = None
|
||||||
|
documents: Optional[List[str]] = None
|
||||||
|
metadatas: Optional[List[Dict[str, Any]]] = None
|
||||||
|
|
||||||
class ChromaDBFileWatcher(FileSystemEventHandler):
|
class ChromaDBFileWatcher(FileSystemEventHandler):
|
||||||
def __init__(self, llm, watch_directory, loop, persist_directory=None, collection_name="documents",
|
def __init__(self, llm, watch_directory, loop, persist_directory=None, collection_name="documents",
|
||||||
chunk_size=DEFAULT_CHUNK_SIZE, chunk_overlap=DEFAULT_CHUNK_OVERLAP, recreate=False):
|
chunk_size=DEFAULT_CHUNK_SIZE, chunk_overlap=DEFAULT_CHUNK_OVERLAP, recreate=False):
|
||||||
@ -52,9 +58,9 @@ class ChromaDBFileWatcher(FileSystemEventHandler):
|
|||||||
self.chunk_size = chunk_size
|
self.chunk_size = chunk_size
|
||||||
self.chunk_overlap = chunk_overlap
|
self.chunk_overlap = chunk_overlap
|
||||||
self.loop = loop
|
self.loop = loop
|
||||||
self._umap_collection = None
|
self._umap_collection : ChromaDBGetResponse | None = None
|
||||||
self._umap_embedding_2d : List[int]= []
|
self._umap_embedding_2d : np.ndarray = []
|
||||||
self._umap_embedding_3d = []
|
self._umap_embedding_3d : np.ndarray = []
|
||||||
self._umap_model_2d : umap.UMAP = None
|
self._umap_model_2d : umap.UMAP = None
|
||||||
self._umap_model_3d : umap.UMAP = None
|
self._umap_model_3d : umap.UMAP = None
|
||||||
self.md = MarkItDown(enable_plugins=False) # Set to True to enable plugins
|
self.md = MarkItDown(enable_plugins=False) # Set to True to enable plugins
|
||||||
@ -68,7 +74,7 @@ class ChromaDBFileWatcher(FileSystemEventHandler):
|
|||||||
self.is_new_collection = False
|
self.is_new_collection = False
|
||||||
|
|
||||||
# Initialize ChromaDB collection
|
# Initialize ChromaDB collection
|
||||||
self._collection = self._get_vector_collection(recreate=recreate)
|
self._collection : Collection = self._get_vector_collection(recreate=recreate)
|
||||||
self._update_umaps()
|
self._update_umaps()
|
||||||
|
|
||||||
# Setup text splitter
|
# Setup text splitter
|
||||||
@ -89,15 +95,15 @@ class ChromaDBFileWatcher(FileSystemEventHandler):
|
|||||||
return self._collection
|
return self._collection
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def umap_collection(self):
|
def umap_collection(self) -> ChromaDBGetResponse | None:
|
||||||
return self._umap_collection
|
return self._umap_collection
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def umap_embedding_2d(self) -> List[int]:
|
def umap_embedding_2d(self) -> np.ndarray:
|
||||||
return self._umap_embedding_2d
|
return self._umap_embedding_2d
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def umap_embedding_3d(self) -> List[int]:
|
def umap_embedding_3d(self) -> np.ndarray:
|
||||||
return self._umap_embedding_3d
|
return self._umap_embedding_3d
|
||||||
|
|
||||||
@property
|
@property
|
||||||
@ -285,7 +291,7 @@ class ChromaDBFileWatcher(FileSystemEventHandler):
|
|||||||
self._umap_embedding_3d = self._umap_model_3d.fit_transform(vectors)
|
self._umap_embedding_3d = self._umap_model_3d.fit_transform(vectors)
|
||||||
logging.info(f"3D UMAP model n_components: {self._umap_model_3d.n_components}") # Should be 3
|
logging.info(f"3D UMAP model n_components: {self._umap_model_3d.n_components}") # Should be 3
|
||||||
|
|
||||||
def _get_vector_collection(self, recreate=False):
|
def _get_vector_collection(self, recreate=False) -> Collection:
|
||||||
"""Get or create a ChromaDB collection."""
|
"""Get or create a ChromaDB collection."""
|
||||||
# Initialize ChromaDB client
|
# Initialize ChromaDB client
|
||||||
chroma_client = chromadb.PersistentClient( # type: ignore
|
chroma_client = chromadb.PersistentClient( # type: ignore
|
||||||
|
Loading…
x
Reference in New Issue
Block a user