Its working pretty well now

Signed-off-by: James Ketrenos <james_git@ketrenos.com>
This commit is contained in:
James Ketr 2023-01-10 16:29:11 -08:00
parent 36b46f3300
commit 1767e807ea
13 changed files with 3074 additions and 3547 deletions

View File

@ -20,7 +20,8 @@ RUN wget -qO- https://deb.nodesource.com/setup_18.x | bash -
RUN DEBIAN_FRONTEND=NONINTERACTIVE apt-get install -y \
python2 \
jhead \
nodejs
nodejs \
jq
# Install the latest npm and npx
RUN npm install --global npm@latest

View File

@ -9,9 +9,9 @@ services:
# - db
restart: always
ports:
- 8134:8123
- 8135:8123
volumes:
- /multimedia/Dad:/pictures
- /home/jketreno/.pic-chalkwharf-bk:/pictures
- ${PWD}/db:/db
- ${PWD}:/website
- ${PWD}/models:/root/.deepface

View File

@ -6,8 +6,7 @@ import sqlite3
from sqlite3 import Error
from PIL import Image
import numpy as np
from deepface import DeepFace
from deepface.detectors import FaceDetector
import functools
from ketrface.util import *
@ -41,6 +40,7 @@ def gen_html(identities):
photoId = face['photoId']
distance = "{:0.4f}".format(face['distance'])
confidence = "{:0.3f}".format(face['confidence'])
focus = int(face['focus'])
label = face['cluster']
if type(label) != str:
label = f'Cluster ({face["cluster"]["id"]})'
@ -49,25 +49,40 @@ def gen_html(identities):
path = f'{html_base}/faces/{"{:02d}".format(faceId % 10)}'
print(f'<img src="{path}/{faceId}.jpg"/>')
print(f'<div style="background-color:rgba(255, 255, 255, 0.4);position:absolute;top:0px;left:0px;right:0px;padding:0.25rem">{label}: {distance}</div>')
print(f'<div style="background-color:rgba(255, 255, 255, 0.4);position:absolute;bottom:0px;left:0px;right:0px;padding:0.25rem">{faceId} {photoId} {confidence}</div>')
print(f'<div style="background-color:rgba(255, 255, 255, 0.4);position:absolute;bottom:0px;left:0px;right:0px;padding:0.25rem">{faceId} {photoId} {confidence} {focus}</div>')
print('</div>')
print('</div>')
print('</div>')
def update_cluster_averages(identities):
for identity in identities:
average = []
for face in identity['faces']:
if len(average) == 0:
average = face['descriptors']
else:
average = np.add(average, face['descriptors'])
average = np.divide(average, len(identity['faces']))
identity['descriptors'] = average
return identities
def load_faces(db_path = db_path):
print(f'Connecting to database: {db_path}')
conn = create_connection(db_path)
faces = []
with conn:
print('Querying faces')
cur = conn.cursor()
res = cur.execute('''
SELECT faces.id,facedescriptors.descriptors,faces.faceConfidence,faces.photoId
SELECT faces.id,facedescriptors.descriptors,faces.faceConfidence,faces.photoId,faces.focus
FROM faces
JOIN facedescriptors ON (faces.descriptorId=facedescriptors.id)
WHERE faces.identityId IS null AND faces.faceConfidence>0.99
''')
for row in res.fetchall():
id, descriptors, confidence, photoId = row
id, descriptors, confidence, photoId, focus = row
if focus is None:
focus = 100 # Assume full focus if focus not set
face = {
'id': id,
'type': 'face',
@ -75,12 +90,38 @@ def load_faces(db_path = db_path):
'distance': 0,
'photoId': photoId,
'descriptors': np.frombuffer(descriptors),
'cluster': Undefined
'cluster': Undefined,
'focus': focus
}
face['faces'] = [ face ]
faces.append(face)
return faces
def update_distances(identities, prune = False):
removed = 0
for identity in identities:
for face in identity['faces']:
average = identity['descriptors']
distance = findCosineDistance(average, face['descriptors'])
if prune and distance > MAX_EPOCH_DISTANCE:
average = np.dot(average, len(identity['faces']))
average = np.subtract(average, face['descriptors'])
face['cluster'] = Undefined
face['distance'] = 0
identity['faces'].remove(face)
identity['descriptors'] = np.divide(average, len(identity['faces']))
removed += 1
else:
face['distance'] = distance
return removed
def sort_identities(identities):
identities.sort(reverse = True, key = lambda x: len(x['faces']))
for identity in identities:
identity['faces'].sort(reverse = False, key = lambda x: x['distance'])
def cluster_sort(A, B):
diff = A['cluster'] - B['cluster']
if diff > 0:
@ -101,23 +142,11 @@ print('Scanning for clusters')
identities = DBSCAN(faces) # process_faces(faces)
print(f'{len(identities)} clusters grouped')
MAX_CLUSTER_DISTANCE = 0.15 # Used to merge clusters
MAX_EPOCH_DISTANCE = 0.14 # Used to prune outliers
# Compute average center for all clusters
sum = 0
for identity in identities:
sum += len(identity['faces'])
print(f'{identity["id"]} has {len(identity["faces"])} faces')
average = []
for face in identity['faces']:
if len(average) == 0:
average = face['descriptors']
else:
average = np.add(average, face['descriptors'])
average = np.divide(average, len(identity['faces']))
identity['descriptors'] = average
identities = update_cluster_averages(identities)
removed = -1
epoch = 1
@ -126,33 +155,68 @@ epoch = 1
while removed != 0:
print(f'Epoch {epoch}...')
epoch += 1
removed = 0
for identity in identities:
for face in identity['faces']:
average = identity['descriptors']
distance = findCosineDistance(average, face['descriptors'])
if distance > 0.14:
average = np.dot(average, len(identity['faces']))
average = np.subtract(average, face['descriptors'])
face['cluster'] = Undefined
face['distance'] = 0
identity['faces'].remove(face)
identity['descriptors'] = np.divide(average, len(identity['faces']))
removed += 1
else:
face['distance'] = distance
removed = update_distances(identities, prune = True)
if removed > 0:
print(f'Excluded {removed} faces this epoch')
identities.sort(reverse = True, key = lambda x: len(x['faces']))
for identity in identities:
identity['faces'].sort(reverse = False, key = lambda x: x['distance'])
print(f'{len(identities)} identities seeded.')
# Cluster the clusters...
print('Reducing clusters via DBSCAN')
reduced = DBSCAN(identities, eps = MAX_CLUSTER_DISTANCE, minPts = 2)
# For each cluster, merge the lists of faces referenced in the cluster's
# "faces" field, which is pointing to clusters (and not actual faces)
for cluster in reduced:
merged = []
for identity in cluster['faces']:
merged = merged + identity['faces']
cluster['faces'] = merged
# Creating a set containing those faces which have not been bound
# to an identity to recluster them in isolation from the rest of
# the faces
noise = []
undefined = []
clustered = []
for face in faces:
if face['cluster'] == Noise:
noise.append(face)
elif face['cluster'] == Undefined:
undefined.append(face)
print(f'Stats: Noise = {len(noise)}, Undefined = {len(undefined)}')
straglers = DBSCAN(noise + undefined)
reduced = update_cluster_averages(reduced + straglers)
# Give all merged identity lists a unique ID
for id, identity in enumerate(reduced):
identity['id'] = id
for face in identity['faces']:
face['cluster'] = identity
update_distances(reduced)
sort_identities(reduced)
# This generates a set of differences between clusters and makes
# a recommendation to merge clusters (outside of DBSCAN)
#
# Worth testing on larger data set
for i, A in enumerate(reduced):
for k, B in enumerate(reduced):
if k < i:
continue
if A == B:
continue
distance = findCosineDistance(A['descriptors'], B['descriptors'])
if distance < MAX_CLUSTER_DISTANCE:
distance = "{:0.4f}".format(distance)
print(f'{A["id"]} to {B["id"]} = {distance}: MERGE')
print('Writing to "identities.html"')
redirect_on('identities.html')
gen_html(identities)
gen_html(reduced)
redirect_off()

View File

@ -1,5 +1,4 @@
import sys
import zlib
import json
import os
import piexif
@ -13,6 +12,7 @@ import cv2
from ketrface.util import *
from ketrface.db import *
face_base = '../'
model_name = 'VGG-Face' # 'ArcFace'
detector_backend = 'mtcnn' # 'retinaface'
model = DeepFace.build_model(model_name)
@ -92,18 +92,22 @@ def extract_faces(img, threshold=0.95, allow_upscaling = True, focus_threshold =
'right_eye': list(face['keypoints']['right_eye']),
},
'score': face['confidence'],
}
to_drop = []
# Re-implementation of 'extract_faces' with the addition of keeping a
# copy of the face image for caching on disk
for k, key in enumerate(faces):
print(f'Processing face {k+1}/{len(faces)}')
identity = faces[key]
identity['focus'] = 100 # Until laplacian variance is executed
facial_area = identity["facial_area"]
landmarks = identity["landmarks"]
left_eye = landmarks["left_eye"]
right_eye = landmarks["right_eye"]
# markup = True
markup = False
if markup == True: # Draw the face rectangle and eyes
@ -142,16 +146,12 @@ def extract_faces(img, threshold=0.95, allow_upscaling = True, focus_threshold =
facial_img = img[top: bottom, left: right]
image = Image.fromarray(facial_img)
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
gray = cv2.cvtColor(facial_img, cv2.COLOR_BGR2GRAY)
focus = variance_of_laplacian(gray)
if focus < focus_threshold:
print(f'Dropping {ke+1} due to focus {focus}.')
faces.pop(key)
identity['focus'] = focus
# Eye order is reversed as the routine does them backwards
image = Image.fromarray(facial_img)
image = alignment_procedure(image, right_eye, left_eye)
image = image.resize(size = input_shape, resample = Image.LANCZOS)
resized = np.asarray(image)
@ -174,6 +174,9 @@ def extract_faces(img, threshold=0.95, allow_upscaling = True, focus_threshold =
identity['image'] = Image.fromarray(resized)
# for key in to_drop:
# faces.pop(key)
return faces
@ -192,9 +195,14 @@ with conn:
photoId, photoFaces, albumPath, photoFilename = row
img_path = f'{base}{albumPath}{photoFilename}'
print(f'Processing {i+1}/{count}: {img_path}')
img = Image.open(img_path)
img = ImageOps.exif_transpose(img) # auto-rotate if needed
img = img.convert()
try:
img = Image.open(img_path)
img = ImageOps.exif_transpose(img) # auto-rotate if needed
img = img.convert("RGB") # Catch "RGBA" and convert to 3-channel
except:
print(f'Unable to load / process {img_path}. Skipping.')
continue
img = np.asarray(img)
faces = extract_faces(img)
if faces is None:
@ -221,6 +229,7 @@ with conn:
'photoId': photoId,
'scanVersion': face['version'],
'faceConfidence': face['score'],
'focus': face['focus'],
'top': face['face']['top'],
'left': face['face']['left'],
'bottom': face['face']['bottom'],
@ -228,7 +237,7 @@ with conn:
'descriptorId': faceDescriptorId,
})
path = f'faces/{"{:02d}".format(faceId % 10)}'
path = f'{face_base}faces/{"{:02d}".format(faceId % 10)}'
try:
os.mkdir(path)
except FileExistsError:

View File

@ -8,13 +8,14 @@ from io import BytesIO
from ketrface.util import *
face_base = "../"
faceId = int(sys.argv[1])
path = f'faces/{"{:02d}".format(faceId % 10)}'
path = f'{face_base}faces/{"{:02d}".format(faceId % 10)}'
img = Image.open(f'{path}/{faceId}.jpg')
exif_dict = piexif.load(img.info["exif"])
compressed_str = exif_dict["Exif"][piexif.ExifIFD.UserComment]
str = zlib_uudecode(compressed_str)
json = json.loads(str)
print(json)
parsed = json.loads(str)
print(json.dumps(parsed, indent=2))

File diff suppressed because it is too large Load Diff

View File

@ -27,14 +27,15 @@ def create_face(conn, face):
:return: face id
"""
sql = '''
INSERT INTO faces(photoId,scanVersion,faceConfidence,top,left,bottom,right,descriptorId)
VALUES(?,?,?,?,?,?,?,?)
INSERT INTO faces(photoId,scanVersion,faceConfidence,focus,top,left,bottom,right,descriptorId)
VALUES(?,?,?,?,?,?,?,?,?)
'''
cur = conn.cursor()
cur.execute(sql, (
face['photoId'],
face['scanVersion'],
face['faceConfidence'],
face['focus'],
face['top'],
face['left'],
face['bottom'],

View File

@ -1,6 +1,6 @@
from ketrface.util import *
MIN_PTS = 10
MIN_PTS = 5
MAX_DISTANCE = 0.25
Undefined = 0
@ -25,7 +25,7 @@ def DBSCAN(points, eps = MAX_DISTANCE, minPts = MIN_PTS, verbose = True):
new_perc = int(100 * (i+1) / total)
if new_perc != perc:
perc = new_perc
print(f'Clustering faces {perc}% ({i}/{total} processed) complete with {len(clusters)} identities.')
print(f'Clustering points {perc}% ({i}/{total} processed) complete with {len(clusters)} identities.')
if P['cluster'] != Undefined: # Previously processed in inner loop
continue
@ -36,7 +36,8 @@ def DBSCAN(points, eps = MAX_DISTANCE, minPts = MIN_PTS, verbose = True):
C = { # Define new cluster
'id': len(clusters),
'faces': [ P ]
'faces': [ P ],
'cluster': Undefined
}
clusters.append(C)

View File

@ -4,6 +4,7 @@ import uu
from io import BytesIO
import json
import numpy as np
import zlib
original = None

View File

@ -141,7 +141,11 @@ function init() {
*/
defaultValue: 0
},
focus: { /* focal via laplacian variance https://pyimagesearch.com/2015/09/07/blur-detection-with-opencv/ */
type: Sequelize.DOUBLE,
defaultValue: 0
},
top: Sequelize.FLOAT, /* 0..1 * photoId.height */
left: Sequelize.FLOAT, /* 0..1 * photoId.width */
bottom: Sequelize.FLOAT, /* 0..1 * photoId.height */