Cluster using identity merging

Signed-off-by: James Ketrenos <james_git@ketrenos.com>
This commit is contained in:
James Ketr 2023-01-06 20:01:00 -08:00
parent 9ec58d2059
commit 0e7dbd5239

View File

@ -127,36 +127,120 @@ def l2_normalize(x):
base = '/pictures/'
conn = create_connection('../db/photos.db')
faces = {}
faces = []
identities = []
def find_nearest_face(faces, identities, face, threshold = 0.99):
closest = None
closest_distance = -1
for target in identities + faces:
if target == face:
continue
target_distance = findCosineDistance(
target['descriptors'], face['descriptors']
)
if target_distance > threshold:
continue
if closest_distance == -1 or target_distance < closest_distance:
closest = target
closest_distance = target_distance
return closest
def merge_identities(identities, identity1, identity2):
sum1 = np.dot(
identity1['faces'],
identity1['descriptors']
)
sum2 = np.dot(
identity2['faces'],
identity2['descriptors']
)
sum = np.add(sum1, sum2)
faces = identity1['faces'] + identity2['faces']
id = 1
if len(identities):
id = identities[len(identities) - 1]['id'] + 1
return {
'id': id,
'descriptors': np.divide(sum, faces),
'faces': faces
}
def delete_identity(identities, identity):
for i, item in enumerate(identities):
if item['id'] == identity['id']:
return identities.pop(i)
return None
def update_face_identity(identities, face, closest):
if 'identified_as' in face:
face_identity = face['identified_as']
delete_identity(identities, face_identity)
else:
face_identity = face
if 'identified_as' in closest:
closest_identity = closest['identified_as']
delete_identity(identities, closest_identity)
else:
closest_identity = closest
identity = merge_identities(identities, face_identity, closest_identity)
identities.append(identity)
closest['identified_as'] = face['identified_as'] = identity
return identity
def cluster_faces(face):
identities = []
for i, face in enumerate(faces):
print(f'Clustering {i+1}/{len(faces)}')
closest = find_nearest_face(faces, identities, face, threshold = 0.25)
if closest == None:
print(f'Face {i+1} does not have any matches.')
continue
identity = update_face_identity(identities, face, closest)
# if identity['faces'] > 2:
# print(f'Updated identity {identity["id"]} to hold {identity["faces"]} faces.')
return identities
def identity_get_faces(item):
return item['faces']
with conn:
cur = conn.cursor()
res = cur.execute('''
SELECT faces.id,facedescriptors.descriptors,faces.faceConfidence,faces.photoId
FROM faces
JOIN facedescriptors ON (faces.descriptorId=facedescriptors.id)
WHERE faces.identityId IS null AND faces.faceConfidence>0.945
WHERE faces.identityId IS null AND faces.faceConfidence>0.99
''')
for row in res.fetchall():
id, descriptors, confidence, photoId = row
if id in faces:
face = faces[id]
else:
face = None
for target in faces:
if target['id'] == id:
face = target
break
if face == None:
face = {
'id': id,
'scanned': False
}
faces[id] = face
faces.append(face)
face['faces'] = 1
face['confidence'] = confidence
face['photoId'] = photoId
face['descriptors'] = np.frombuffer(descriptors)
for key1 in faces:
face1 = faces[key1]
if face1['scanned'] == True:
continue
face1['scanned'] = True
identities = cluster_faces(faces)
identities.sort(reverse = True, key = identity_get_faces)
for identity in identities:
print(f'{identity["id"]} has {identity["faces"]} faces')
print(f'{len(identities)} identities seeded.')
exit(0)
if False:
for key2 in faces:
if key1 == key2:
continue
@ -184,7 +268,9 @@ with conn:
face['scoring'] = 0
if model_name == 'VGG-Face':
thresholds = {'cosine': 0.40, 'euclidean': 0.60, 'euclidean_l2': 0.86}
# thresholds = {'cosine': 0.40, 'euclidean': 0.60, 'euclidean_l2': 0.86}
# thresholds = {'cosine': 0.31, 'euclidean': 0.47, 'euclidean_l2': 0.79}
thresholds = {'cosine': 0.25, 'euclidean': 0.47, 'euclidean_l2': 0.79}
elif model_name == 'ArcFace':
thresholds = {'cosine': 0.68, 'euclidean': 4.15, 'euclidean_l2': 1.13}