From 1ead088eb8d01f61de2672b9802d1c0863caa5c1 Mon Sep 17 00:00:00 2001 From: James Ketrenos Date: Tue, 17 Jan 2023 19:23:48 -0800 Subject: [PATCH] Clustering worked well enough; need to add face column indicating manual (expert) assignment Signed-off-by: James Ketrenos --- client/src/App.css | 2 +- client/src/App.tsx | 42 ++++++++---------------------------- ketrface/cluster.py | 43 ++++++++++++++++++++++++------------- ketrface/detect.py | 5 +++-- ketrface/ketrface/dbscan.py | 9 ++------ ketrface/ketrface/util.py | 7 +----- 6 files changed, 44 insertions(+), 64 deletions(-) diff --git a/client/src/App.css b/client/src/App.css index e238a16..cd08146 100644 --- a/client/src/App.css +++ b/client/src/App.css @@ -58,7 +58,7 @@ div { .Image .FaceBox { border: 1px solid red; - border-radius: 0.25rem; +/* border-radius: 0.25rem;*/ position: absolute; } diff --git a/client/src/App.tsx b/client/src/App.tsx index 390a3e0..523dc06 100644 --- a/client/src/App.tsx +++ b/client/src/App.tsx @@ -37,55 +37,31 @@ const makeFaceBoxes = (photo: any, dimensions: any): any => { )); }; -/* -function debounce(fn: any, ms: number) { - let timer: any; - return () => { - if (timer) clearTimeout(timer); - timer = setTimeout(() => { - timer = null - fn.apply(this as typeof Photo, arguments) - }, ms) - }; -}; -*/ - const Photo = ({ photoId }: any) => { const [image, setImage] = useState(undefined); const ref = useRef(null); - const [dimensions, setDimensions] = React.useState({ - height: window.innerHeight, - width: window.innerWidth - }) + const [dimensions, setDimensions] = React.useState({width: 0, height: 0}); const faces = useMemo(() => { - if (image === undefined) { + if (image === undefined || dimensions.height === 0) { return <>; } return makeFaceBoxes(image, dimensions); }, [image, dimensions]); - useEffect(() : any => { - if (!ref || !ref.current) { + useEffect(() => { + if (!ref.current) { return; } - const el: Element = ref.current as Element; - - const handleResize = () => { + if (dimensions.height !== el.clientHeight + || dimensions.width !== el.clientWidth) { setDimensions({ height: el.clientHeight, width: el.clientWidth }) - }; - - const debouncedHandleResize = handleResize;//debounce(handleResize, 250); - debouncedHandleResize(); - window.addEventListener('resize', debouncedHandleResize); - return () => { - window.removeEventListener('resize', debouncedHandleResize) - }; - }); + } + }/*, [dimensions.height, dimensions.width]*/); useEffect(() => { if (photoId === 0) { @@ -108,7 +84,7 @@ const Photo = ({ photoId }: any) => { return (
{ faces }
); }; diff --git a/ketrface/cluster.py b/ketrface/cluster.py index 989e92f..05f5bbd 100644 --- a/ketrface/cluster.py +++ b/ketrface/cluster.py @@ -24,6 +24,9 @@ html_base = config['basePath'] if html_base == "/": html_base = "." +MAX_CLUSTER_DISTANCE = 0.14 # Used to merge clusters +MAX_DISTANCE_FROM_CENTROID = 0.14 # Used to prune outliers + # TODO # Switch to using DBSCAN # @@ -86,8 +89,10 @@ def load_faces(db_path = db_path): res = cur.execute(''' SELECT faces.id,facedescriptors.descriptors,faces.faceConfidence,faces.photoId,faces.focus FROM faces + INNER JOIN photos ON (photos.duplicate == 0 OR photos.duplicate IS NULL) JOIN facedescriptors ON (faces.descriptorId=facedescriptors.id) WHERE faces.identityId IS null AND faces.faceConfidence>0.99 + AND faces.photoId=photos.id ''') for row in res.fetchall(): id, descriptors, confidence, photoId, focus = row @@ -109,21 +114,27 @@ def load_faces(db_path = db_path): faces.append(face) return faces -def update_distances(identities, prune = False): +def update_distances(identities, + prune = False, + maxDistance = MAX_DISTANCE_FROM_CENTROID): removed = 0 for identity in identities: for face in identity['faces']: average = identity['descriptors'] - distance = findCosineDistance(average, face['descriptors']) - if prune and distance > MAX_EPOCH_DISTANCE: + distance = findCosineDistanceBaked(identity, face) + if prune and distance > maxDistance: average = np.dot(average, len(identity['faces'])) average = np.subtract(average, face['descriptors']) - + face['cluster'] = Undefined face['distance'] = 0 identity['faces'].remove(face) - identity['descriptors'] = np.divide(average, len(identity['faces'])) + average = np.divide(average, len(identity['faces'])) + identity['descriptors'] = average + identity['sqrtsummul'] = np.sqrt(np.sum(np.multiply( + average, average))) + removed += 1 else: face['distance'] = distance @@ -159,19 +170,19 @@ def build_straglers(faces): print('Loading faces from database') faces = load_faces() -minPts = len(faces) / 100 -eps = 0.2 + +minPts = max(len(faces) / 500, 5) +eps = 0.185 print(f'Scanning {len(faces)} faces for clusters (minPts: {minPts}, eps: {eps})') identities = DBSCAN(faces, minPts = minPts, eps = eps) print(f'{len(identities)} clusters grouped') -MAX_CLUSTER_DISTANCE = 0.15 # Used to merge clusters -MAX_EPOCH_DISTANCE = 0.14 # Used to prune outliers - # Compute average center for all clusters identities = update_cluster_averages(identities) +epoch_prune = True +merge_identities = True -if False: +if epoch_prune: removed = -1 epoch = 1 # Filter each cluster removing any face that is > cluster_max_distance @@ -179,14 +190,17 @@ if False: while removed != 0: print(f'Epoch {epoch}...') epoch += 1 - removed = update_distances(identities, prune = True) + removed = update_distances( + identities, + prune = True, + maxDistance = MAX_DISTANCE_FROM_CENTROID) if removed > 0: print(f'Excluded {removed} faces this epoch') print(f'{len(identities)} identities seeded.') reduced = identities -if False: +if merge_identities: # Cluster the clusters... print('Reducing clusters via DBSCAN') reduced = DBSCAN(identities, eps = MAX_CLUSTER_DISTANCE, minPts = 3) @@ -229,9 +243,7 @@ for id, identity in enumerate(reduced): face['cluster'] = identity reduced = update_cluster_averages(reduced) - update_distances(reduced) - sort_identities(reduced) if False: @@ -294,6 +306,7 @@ print(f'Connecting to database: {db_path}') conn = create_connection(db_path) with conn: for identity in reduced: + print(f'Writing identity {identity["id"]} to DB') id = create_identity(conn, identity) for face in identity['faces']: update_face_identity(conn, face['id'], id) diff --git a/ketrface/detect.py b/ketrface/detect.py index 4ee8b8d..74e8681 100644 --- a/ketrface/detect.py +++ b/ketrface/detect.py @@ -197,9 +197,10 @@ conn = create_connection('../db/photos.db') with conn: cur = conn.cursor() res = cur.execute(''' - SELECT photos.id,photos.faces,albums.path,photos.filename FROM photos + SELECT photos.id,photos.faces,albums.path,photos.filename + FROM photos LEFT JOIN albums ON (albums.id=photos.albumId) - WHERE photos.faces=-1 + WHERE photos.faces=-1 AND photos.duplicate=0 ''') rows = res.fetchall() count = len(rows) diff --git a/ketrface/ketrface/dbscan.py b/ketrface/ketrface/dbscan.py index a1f495e..3390415 100644 --- a/ketrface/ketrface/dbscan.py +++ b/ketrface/ketrface/dbscan.py @@ -11,13 +11,8 @@ Noise = -2 # Union of two lists of dicts, adding unique elements of B to # end of A def Union(A, B): -# 5.012 of 100s sample return A + [x for x in B if x not in A] -# 5.039 of 100s sample -# for key in B: -# if key not in A: -# A.append(key) -# return A + # https://en.wikipedia.org/wiki/DBSCAN # @@ -91,7 +86,7 @@ def RangeQuery(points, Q, eps): for P in points: # Scan all points in the database if P == Q: continue - distance = findCoseinDistanceBaked(# Compute distance + distance = findCosineDistanceBaked(# Compute distance Q, P) if distance <= eps: # Check epsilon neighbors.append(P) # Add to result diff --git a/ketrface/ketrface/util.py b/ketrface/ketrface/util.py index c269e28..fa4c119 100644 --- a/ketrface/ketrface/util.py +++ b/ketrface/ketrface/util.py @@ -23,7 +23,6 @@ def redirect_off(): sys.stdout = original original = None - def zlib_uuencode(databytes, name=''): ''' Compress databytes with zlib & uuencode the result ''' inbuff = BytesIO(zlib.compress(databytes, 9)) @@ -47,15 +46,11 @@ class NpEncoder(json.JSONEncoder): if isinstance(obj, np.ndarray): return obj.tolist() -def findCoseinDistanceBaked(src, dst): +def findCosineDistanceBaked(src, dst): a = np.matmul(np.transpose(src['descriptors']), dst['descriptors']) return 1 - (a / (src['sqrtsummul'] * dst['sqrtsummul'])) def findCosineDistance(source_representation, test_representation): -# if type(source_representation) == list: -# source_representation = np.array(source_representation) -# if type(test_representation) == list: -# test_representation = np.array(test_representation) a = np.matmul(np.transpose(source_representation), test_representation) b = np.sum(np.multiply(source_representation, source_representation)) c = np.sum(np.multiply(test_representation, test_representation))