diff --git a/server/cluster.py b/server/cluster.py index b56049a..516e90c 100644 --- a/server/cluster.py +++ b/server/cluster.py @@ -127,36 +127,120 @@ def l2_normalize(x): base = '/pictures/' conn = create_connection('../db/photos.db') -faces = {} +faces = [] identities = [] +def find_nearest_face(faces, identities, face, threshold = 0.99): + closest = None + closest_distance = -1 + for target in identities + faces: + if target == face: + continue + target_distance = findCosineDistance( + target['descriptors'], face['descriptors'] + ) + if target_distance > threshold: + continue + if closest_distance == -1 or target_distance < closest_distance: + closest = target + closest_distance = target_distance + return closest + +def merge_identities(identities, identity1, identity2): + sum1 = np.dot( + identity1['faces'], + identity1['descriptors'] + ) + sum2 = np.dot( + identity2['faces'], + identity2['descriptors'] + ) + sum = np.add(sum1, sum2) + faces = identity1['faces'] + identity2['faces'] + id = 1 + if len(identities): + id = identities[len(identities) - 1]['id'] + 1 + + return { + 'id': id, + 'descriptors': np.divide(sum, faces), + 'faces': faces + } + +def delete_identity(identities, identity): + for i, item in enumerate(identities): + if item['id'] == identity['id']: + return identities.pop(i) + return None + +def update_face_identity(identities, face, closest): + if 'identified_as' in face: + face_identity = face['identified_as'] + delete_identity(identities, face_identity) + else: + face_identity = face + + if 'identified_as' in closest: + closest_identity = closest['identified_as'] + delete_identity(identities, closest_identity) + else: + closest_identity = closest + + identity = merge_identities(identities, face_identity, closest_identity) + identities.append(identity) + closest['identified_as'] = face['identified_as'] = identity + return identity + +def cluster_faces(face): + identities = [] + for i, face in enumerate(faces): + print(f'Clustering {i+1}/{len(faces)}') + closest = find_nearest_face(faces, identities, face, threshold = 0.25) + if closest == None: + print(f'Face {i+1} does not have any matches.') + continue + identity = update_face_identity(identities, face, closest) +# if identity['faces'] > 2: +# print(f'Updated identity {identity["id"]} to hold {identity["faces"]} faces.') + return identities + +def identity_get_faces(item): + return item['faces'] + with conn: cur = conn.cursor() res = cur.execute(''' SELECT faces.id,facedescriptors.descriptors,faces.faceConfidence,faces.photoId FROM faces JOIN facedescriptors ON (faces.descriptorId=facedescriptors.id) - WHERE faces.identityId IS null AND faces.faceConfidence>0.945 + WHERE faces.identityId IS null AND faces.faceConfidence>0.99 ''') for row in res.fetchall(): id, descriptors, confidence, photoId = row - if id in faces: - face = faces[id] - else: + face = None + for target in faces: + if target['id'] == id: + face = target + break + if face == None: face = { 'id': id, - 'scanned': False } - faces[id] = face + faces.append(face) + face['faces'] = 1 face['confidence'] = confidence face['photoId'] = photoId face['descriptors'] = np.frombuffer(descriptors) - for key1 in faces: - face1 = faces[key1] - if face1['scanned'] == True: - continue - face1['scanned'] = True + identities = cluster_faces(faces) + identities.sort(reverse = True, key = identity_get_faces) + for identity in identities: + print(f'{identity["id"]} has {identity["faces"]} faces') + + print(f'{len(identities)} identities seeded.') + + exit(0) + if False: for key2 in faces: if key1 == key2: continue @@ -184,7 +268,9 @@ with conn: face['scoring'] = 0 if model_name == 'VGG-Face': - thresholds = {'cosine': 0.40, 'euclidean': 0.60, 'euclidean_l2': 0.86} +# thresholds = {'cosine': 0.40, 'euclidean': 0.60, 'euclidean_l2': 0.86} +# thresholds = {'cosine': 0.31, 'euclidean': 0.47, 'euclidean_l2': 0.79} + thresholds = {'cosine': 0.25, 'euclidean': 0.47, 'euclidean_l2': 0.79} elif model_name == 'ArcFace': thresholds = {'cosine': 0.68, 'euclidean': 4.15, 'euclidean_l2': 1.13}