Cluster using identity merging

Signed-off-by: James Ketrenos <james_git@ketrenos.com>
2023-01-06 20:01:00 -08:00 · 2023-01-06 20:01:00 -08:00 · 0e7dbd5239
commit 0e7dbd5239
parent 9ec58d2059
1 changed files with 99 additions and 13 deletions
--- a/server/cluster.py
+++ b/server/cluster.py
@ -127,36 +127,120 @@ def l2_normalize(x):
 base = '/pictures/'
 conn = create_connection('../db/photos.db')
-faces = {}
+faces = []
 identities = []
 def find_nearest_face(faces, identities, face, threshold = 0.99):
  closest = None
  closest_distance = -1
  for target in identities + faces:
    if target == face:
      continue
    target_distance = findCosineDistance(
      target['descriptors'], face['descriptors']
    )
    if target_distance > threshold:
      continue
    if closest_distance == -1 or target_distance < closest_distance:
      closest = target
      closest_distance = target_distance
  return closest
 def merge_identities(identities, identity1, identity2):
  sum1 = np.dot(
    identity1['faces'],
    identity1['descriptors']
  )
  sum2 = np.dot(
    identity2['faces'],
    identity2['descriptors']
  )
  sum = np.add(sum1, sum2)
  faces = identity1['faces'] + identity2['faces']
  id = 1
  if len(identities):
    id = identities[len(identities) - 1]['id'] + 1
  return {
    'id': id,
    'descriptors': np.divide(sum, faces),
    'faces': faces
  }
 def delete_identity(identities, identity):
  for i, item in enumerate(identities):
    if item['id'] == identity['id']:
      return identities.pop(i)
  return None
 def update_face_identity(identities, face, closest):
  if 'identified_as' in face:
    face_identity = face['identified_as']
    delete_identity(identities, face_identity)
  else:
    face_identity = face
  if 'identified_as' in closest:
    closest_identity = closest['identified_as']
    delete_identity(identities, closest_identity)
  else:
    closest_identity = closest
  identity = merge_identities(identities, face_identity, closest_identity)
  identities.append(identity)
  closest['identified_as'] = face['identified_as'] = identity
  return identity
 def cluster_faces(face):
  identities = []
  for i, face in enumerate(faces):
    print(f'Clustering {i+1}/{len(faces)}')
    closest = find_nearest_face(faces, identities, face, threshold = 0.25)
    if closest == None:
      print(f'Face {i+1} does not have any matches.')
      continue
    identity = update_face_identity(identities, face, closest)
 #    if identity['faces'] > 2:
 #     print(f'Updated identity {identity["id"]} to hold {identity["faces"]} faces.')
  return identities
 def identity_get_faces(item):
  return item['faces']
 with conn:
  cur = conn.cursor()
  res = cur.execute('''
    SELECT faces.id,facedescriptors.descriptors,faces.faceConfidence,faces.photoId
    FROM faces
    JOIN facedescriptors ON (faces.descriptorId=facedescriptors.id)
-    WHERE faces.identityId IS null AND faces.faceConfidence>0.945
+    WHERE faces.identityId IS null AND faces.faceConfidence>0.99
    ''')
  for row in res.fetchall():
    id, descriptors, confidence, photoId = row
-    if id in faces:
+    face = None
-      face = faces[id]
+    for target in faces:
-    else:
+      if target['id'] == id:
        face = target
        break
    if face == None:
      face = { 
        'id': id,
        'scanned': False 
      }
-      faces[id] = face
+      faces.append(face)
    face['faces'] = 1
    face['confidence'] = confidence
    face['photoId'] = photoId
    face['descriptors'] = np.frombuffer(descriptors)
-  for key1 in faces:
+  identities = cluster_faces(faces)
-    face1 = faces[key1]
+  identities.sort(reverse = True, key = identity_get_faces)
-    if face1['scanned'] == True:
+  for identity in identities:
-      continue
+    print(f'{identity["id"]} has {identity["faces"]} faces')
-    face1['scanned'] = True
+
  print(f'{len(identities)} identities seeded.')
  exit(0)
  if False:
    for key2 in faces:
      if key1 == key2:
        continue
@ -184,7 +268,9 @@ with conn:
      face['scoring'] = 0
      if model_name == 'VGG-Face':
-        thresholds = {'cosine': 0.40, 'euclidean': 0.60, 'euclidean_l2': 0.86}
+#        thresholds = {'cosine': 0.40, 'euclidean': 0.60, 'euclidean_l2': 0.86}
 #        thresholds = {'cosine': 0.31, 'euclidean': 0.47, 'euclidean_l2': 0.79}
        thresholds = {'cosine': 0.25, 'euclidean': 0.47, 'euclidean_l2': 0.79}
      elif model_name == 'ArcFace':
        thresholds = {'cosine': 0.68, 'euclidean': 4.15, 'euclidean_l2': 1.13}