Its working pretty well now

Signed-off-by: James Ketrenos <james_git@ketrenos.com>
This commit is contained in:
James Ketr 2023-01-10 16:29:11 -08:00
parent 36b46f3300
commit 1767e807ea
13 changed files with 3074 additions and 3547 deletions

View File

@ -20,7 +20,8 @@ RUN wget -qO- https://deb.nodesource.com/setup_18.x | bash -
RUN DEBIAN_FRONTEND=NONINTERACTIVE apt-get install -y \ RUN DEBIAN_FRONTEND=NONINTERACTIVE apt-get install -y \
python2 \ python2 \
jhead \ jhead \
nodejs nodejs \
jq
# Install the latest npm and npx # Install the latest npm and npx
RUN npm install --global npm@latest RUN npm install --global npm@latest

View File

@ -9,9 +9,9 @@ services:
# - db # - db
restart: always restart: always
ports: ports:
- 8134:8123 - 8135:8123
volumes: volumes:
- /multimedia/Dad:/pictures - /home/jketreno/.pic-chalkwharf-bk:/pictures
- ${PWD}/db:/db - ${PWD}/db:/db
- ${PWD}:/website - ${PWD}:/website
- ${PWD}/models:/root/.deepface - ${PWD}/models:/root/.deepface

View File

@ -6,8 +6,7 @@ import sqlite3
from sqlite3 import Error from sqlite3 import Error
from PIL import Image from PIL import Image
import numpy as np import numpy as np
from deepface import DeepFace
from deepface.detectors import FaceDetector
import functools import functools
from ketrface.util import * from ketrface.util import *
@ -41,6 +40,7 @@ def gen_html(identities):
photoId = face['photoId'] photoId = face['photoId']
distance = "{:0.4f}".format(face['distance']) distance = "{:0.4f}".format(face['distance'])
confidence = "{:0.3f}".format(face['confidence']) confidence = "{:0.3f}".format(face['confidence'])
focus = int(face['focus'])
label = face['cluster'] label = face['cluster']
if type(label) != str: if type(label) != str:
label = f'Cluster ({face["cluster"]["id"]})' label = f'Cluster ({face["cluster"]["id"]})'
@ -49,25 +49,40 @@ def gen_html(identities):
path = f'{html_base}/faces/{"{:02d}".format(faceId % 10)}' path = f'{html_base}/faces/{"{:02d}".format(faceId % 10)}'
print(f'<img src="{path}/{faceId}.jpg"/>') print(f'<img src="{path}/{faceId}.jpg"/>')
print(f'<div style="background-color:rgba(255, 255, 255, 0.4);position:absolute;top:0px;left:0px;right:0px;padding:0.25rem">{label}: {distance}</div>') print(f'<div style="background-color:rgba(255, 255, 255, 0.4);position:absolute;top:0px;left:0px;right:0px;padding:0.25rem">{label}: {distance}</div>')
print(f'<div style="background-color:rgba(255, 255, 255, 0.4);position:absolute;bottom:0px;left:0px;right:0px;padding:0.25rem">{faceId} {photoId} {confidence}</div>') print(f'<div style="background-color:rgba(255, 255, 255, 0.4);position:absolute;bottom:0px;left:0px;right:0px;padding:0.25rem">{faceId} {photoId} {confidence} {focus}</div>')
print('</div>') print('</div>')
print('</div>') print('</div>')
print('</div>') print('</div>')
def update_cluster_averages(identities):
for identity in identities:
average = []
for face in identity['faces']:
if len(average) == 0:
average = face['descriptors']
else:
average = np.add(average, face['descriptors'])
average = np.divide(average, len(identity['faces']))
identity['descriptors'] = average
return identities
def load_faces(db_path = db_path): def load_faces(db_path = db_path):
print(f'Connecting to database: {db_path}')
conn = create_connection(db_path) conn = create_connection(db_path)
faces = [] faces = []
with conn: with conn:
print('Querying faces')
cur = conn.cursor() cur = conn.cursor()
res = cur.execute(''' res = cur.execute('''
SELECT faces.id,facedescriptors.descriptors,faces.faceConfidence,faces.photoId SELECT faces.id,facedescriptors.descriptors,faces.faceConfidence,faces.photoId,faces.focus
FROM faces FROM faces
JOIN facedescriptors ON (faces.descriptorId=facedescriptors.id) JOIN facedescriptors ON (faces.descriptorId=facedescriptors.id)
WHERE faces.identityId IS null AND faces.faceConfidence>0.99 WHERE faces.identityId IS null AND faces.faceConfidence>0.99
''') ''')
for row in res.fetchall(): for row in res.fetchall():
id, descriptors, confidence, photoId = row id, descriptors, confidence, photoId, focus = row
if focus is None:
focus = 100 # Assume full focus if focus not set
face = { face = {
'id': id, 'id': id,
'type': 'face', 'type': 'face',
@ -75,12 +90,38 @@ def load_faces(db_path = db_path):
'distance': 0, 'distance': 0,
'photoId': photoId, 'photoId': photoId,
'descriptors': np.frombuffer(descriptors), 'descriptors': np.frombuffer(descriptors),
'cluster': Undefined 'cluster': Undefined,
'focus': focus
} }
face['faces'] = [ face ] face['faces'] = [ face ]
faces.append(face) faces.append(face)
return faces return faces
def update_distances(identities, prune = False):
removed = 0
for identity in identities:
for face in identity['faces']:
average = identity['descriptors']
distance = findCosineDistance(average, face['descriptors'])
if prune and distance > MAX_EPOCH_DISTANCE:
average = np.dot(average, len(identity['faces']))
average = np.subtract(average, face['descriptors'])
face['cluster'] = Undefined
face['distance'] = 0
identity['faces'].remove(face)
identity['descriptors'] = np.divide(average, len(identity['faces']))
removed += 1
else:
face['distance'] = distance
return removed
def sort_identities(identities):
identities.sort(reverse = True, key = lambda x: len(x['faces']))
for identity in identities:
identity['faces'].sort(reverse = False, key = lambda x: x['distance'])
def cluster_sort(A, B): def cluster_sort(A, B):
diff = A['cluster'] - B['cluster'] diff = A['cluster'] - B['cluster']
if diff > 0: if diff > 0:
@ -101,23 +142,11 @@ print('Scanning for clusters')
identities = DBSCAN(faces) # process_faces(faces) identities = DBSCAN(faces) # process_faces(faces)
print(f'{len(identities)} clusters grouped') print(f'{len(identities)} clusters grouped')
MAX_CLUSTER_DISTANCE = 0.15 # Used to merge clusters
MAX_EPOCH_DISTANCE = 0.14 # Used to prune outliers
# Compute average center for all clusters # Compute average center for all clusters
sum = 0 identities = update_cluster_averages(identities)
for identity in identities:
sum += len(identity['faces'])
print(f'{identity["id"]} has {len(identity["faces"])} faces')
average = []
for face in identity['faces']:
if len(average) == 0:
average = face['descriptors']
else:
average = np.add(average, face['descriptors'])
average = np.divide(average, len(identity['faces']))
identity['descriptors'] = average
removed = -1 removed = -1
epoch = 1 epoch = 1
@ -126,33 +155,68 @@ epoch = 1
while removed != 0: while removed != 0:
print(f'Epoch {epoch}...') print(f'Epoch {epoch}...')
epoch += 1 epoch += 1
removed = 0 removed = update_distances(identities, prune = True)
for identity in identities:
for face in identity['faces']:
average = identity['descriptors']
distance = findCosineDistance(average, face['descriptors'])
if distance > 0.14:
average = np.dot(average, len(identity['faces']))
average = np.subtract(average, face['descriptors'])
face['cluster'] = Undefined
face['distance'] = 0
identity['faces'].remove(face)
identity['descriptors'] = np.divide(average, len(identity['faces']))
removed += 1
else:
face['distance'] = distance
if removed > 0: if removed > 0:
print(f'Excluded {removed} faces this epoch') print(f'Excluded {removed} faces this epoch')
identities.sort(reverse = True, key = lambda x: len(x['faces']))
for identity in identities:
identity['faces'].sort(reverse = False, key = lambda x: x['distance'])
print(f'{len(identities)} identities seeded.') print(f'{len(identities)} identities seeded.')
# Cluster the clusters...
print('Reducing clusters via DBSCAN')
reduced = DBSCAN(identities, eps = MAX_CLUSTER_DISTANCE, minPts = 2)
# For each cluster, merge the lists of faces referenced in the cluster's
# "faces" field, which is pointing to clusters (and not actual faces)
for cluster in reduced:
merged = []
for identity in cluster['faces']:
merged = merged + identity['faces']
cluster['faces'] = merged
# Creating a set containing those faces which have not been bound
# to an identity to recluster them in isolation from the rest of
# the faces
noise = []
undefined = []
clustered = []
for face in faces:
if face['cluster'] == Noise:
noise.append(face)
elif face['cluster'] == Undefined:
undefined.append(face)
print(f'Stats: Noise = {len(noise)}, Undefined = {len(undefined)}')
straglers = DBSCAN(noise + undefined)
reduced = update_cluster_averages(reduced + straglers)
# Give all merged identity lists a unique ID
for id, identity in enumerate(reduced):
identity['id'] = id
for face in identity['faces']:
face['cluster'] = identity
update_distances(reduced)
sort_identities(reduced)
# This generates a set of differences between clusters and makes
# a recommendation to merge clusters (outside of DBSCAN)
#
# Worth testing on larger data set
for i, A in enumerate(reduced):
for k, B in enumerate(reduced):
if k < i:
continue
if A == B:
continue
distance = findCosineDistance(A['descriptors'], B['descriptors'])
if distance < MAX_CLUSTER_DISTANCE:
distance = "{:0.4f}".format(distance)
print(f'{A["id"]} to {B["id"]} = {distance}: MERGE')
print('Writing to "identities.html"') print('Writing to "identities.html"')
redirect_on('identities.html') redirect_on('identities.html')
gen_html(identities) gen_html(reduced)
redirect_off() redirect_off()

View File

@ -1,5 +1,4 @@
import sys import sys
import zlib
import json import json
import os import os
import piexif import piexif
@ -13,6 +12,7 @@ import cv2
from ketrface.util import * from ketrface.util import *
from ketrface.db import * from ketrface.db import *
face_base = '../'
model_name = 'VGG-Face' # 'ArcFace' model_name = 'VGG-Face' # 'ArcFace'
detector_backend = 'mtcnn' # 'retinaface' detector_backend = 'mtcnn' # 'retinaface'
model = DeepFace.build_model(model_name) model = DeepFace.build_model(model_name)
@ -92,18 +92,22 @@ def extract_faces(img, threshold=0.95, allow_upscaling = True, focus_threshold =
'right_eye': list(face['keypoints']['right_eye']), 'right_eye': list(face['keypoints']['right_eye']),
}, },
'score': face['confidence'], 'score': face['confidence'],
} }
to_drop = []
# Re-implementation of 'extract_faces' with the addition of keeping a # Re-implementation of 'extract_faces' with the addition of keeping a
# copy of the face image for caching on disk # copy of the face image for caching on disk
for k, key in enumerate(faces): for k, key in enumerate(faces):
print(f'Processing face {k+1}/{len(faces)}') print(f'Processing face {k+1}/{len(faces)}')
identity = faces[key] identity = faces[key]
identity['focus'] = 100 # Until laplacian variance is executed
facial_area = identity["facial_area"] facial_area = identity["facial_area"]
landmarks = identity["landmarks"] landmarks = identity["landmarks"]
left_eye = landmarks["left_eye"] left_eye = landmarks["left_eye"]
right_eye = landmarks["right_eye"] right_eye = landmarks["right_eye"]
# markup = True # markup = True
markup = False markup = False
if markup == True: # Draw the face rectangle and eyes if markup == True: # Draw the face rectangle and eyes
@ -142,16 +146,12 @@ def extract_faces(img, threshold=0.95, allow_upscaling = True, focus_threshold =
facial_img = img[top: bottom, left: right] facial_img = img[top: bottom, left: right]
image = Image.fromarray(facial_img) gray = cv2.cvtColor(facial_img, cv2.COLOR_BGR2GRAY)
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
focus = variance_of_laplacian(gray) focus = variance_of_laplacian(gray)
if focus < focus_threshold:
print(f'Dropping {ke+1} due to focus {focus}.')
faces.pop(key)
identity['focus'] = focus identity['focus'] = focus
# Eye order is reversed as the routine does them backwards # Eye order is reversed as the routine does them backwards
image = Image.fromarray(facial_img)
image = alignment_procedure(image, right_eye, left_eye) image = alignment_procedure(image, right_eye, left_eye)
image = image.resize(size = input_shape, resample = Image.LANCZOS) image = image.resize(size = input_shape, resample = Image.LANCZOS)
resized = np.asarray(image) resized = np.asarray(image)
@ -174,6 +174,9 @@ def extract_faces(img, threshold=0.95, allow_upscaling = True, focus_threshold =
identity['image'] = Image.fromarray(resized) identity['image'] = Image.fromarray(resized)
# for key in to_drop:
# faces.pop(key)
return faces return faces
@ -192,9 +195,14 @@ with conn:
photoId, photoFaces, albumPath, photoFilename = row photoId, photoFaces, albumPath, photoFilename = row
img_path = f'{base}{albumPath}{photoFilename}' img_path = f'{base}{albumPath}{photoFilename}'
print(f'Processing {i+1}/{count}: {img_path}') print(f'Processing {i+1}/{count}: {img_path}')
try:
img = Image.open(img_path) img = Image.open(img_path)
img = ImageOps.exif_transpose(img) # auto-rotate if needed img = ImageOps.exif_transpose(img) # auto-rotate if needed
img = img.convert() img = img.convert("RGB") # Catch "RGBA" and convert to 3-channel
except:
print(f'Unable to load / process {img_path}. Skipping.')
continue
img = np.asarray(img) img = np.asarray(img)
faces = extract_faces(img) faces = extract_faces(img)
if faces is None: if faces is None:
@ -221,6 +229,7 @@ with conn:
'photoId': photoId, 'photoId': photoId,
'scanVersion': face['version'], 'scanVersion': face['version'],
'faceConfidence': face['score'], 'faceConfidence': face['score'],
'focus': face['focus'],
'top': face['face']['top'], 'top': face['face']['top'],
'left': face['face']['left'], 'left': face['face']['left'],
'bottom': face['face']['bottom'], 'bottom': face['face']['bottom'],
@ -228,7 +237,7 @@ with conn:
'descriptorId': faceDescriptorId, 'descriptorId': faceDescriptorId,
}) })
path = f'faces/{"{:02d}".format(faceId % 10)}' path = f'{face_base}faces/{"{:02d}".format(faceId % 10)}'
try: try:
os.mkdir(path) os.mkdir(path)
except FileExistsError: except FileExistsError:

View File

@ -8,13 +8,14 @@ from io import BytesIO
from ketrface.util import * from ketrface.util import *
face_base = "../"
faceId = int(sys.argv[1]) faceId = int(sys.argv[1])
path = f'faces/{"{:02d}".format(faceId % 10)}' path = f'{face_base}faces/{"{:02d}".format(faceId % 10)}'
img = Image.open(f'{path}/{faceId}.jpg') img = Image.open(f'{path}/{faceId}.jpg')
exif_dict = piexif.load(img.info["exif"]) exif_dict = piexif.load(img.info["exif"])
compressed_str = exif_dict["Exif"][piexif.ExifIFD.UserComment] compressed_str = exif_dict["Exif"][piexif.ExifIFD.UserComment]
str = zlib_uudecode(compressed_str) str = zlib_uudecode(compressed_str)
json = json.loads(str) parsed = json.loads(str)
print(json) print(json.dumps(parsed, indent=2))

File diff suppressed because it is too large Load Diff

View File

@ -27,14 +27,15 @@ def create_face(conn, face):
:return: face id :return: face id
""" """
sql = ''' sql = '''
INSERT INTO faces(photoId,scanVersion,faceConfidence,top,left,bottom,right,descriptorId) INSERT INTO faces(photoId,scanVersion,faceConfidence,focus,top,left,bottom,right,descriptorId)
VALUES(?,?,?,?,?,?,?,?) VALUES(?,?,?,?,?,?,?,?,?)
''' '''
cur = conn.cursor() cur = conn.cursor()
cur.execute(sql, ( cur.execute(sql, (
face['photoId'], face['photoId'],
face['scanVersion'], face['scanVersion'],
face['faceConfidence'], face['faceConfidence'],
face['focus'],
face['top'], face['top'],
face['left'], face['left'],
face['bottom'], face['bottom'],

View File

@ -1,6 +1,6 @@
from ketrface.util import * from ketrface.util import *
MIN_PTS = 10 MIN_PTS = 5
MAX_DISTANCE = 0.25 MAX_DISTANCE = 0.25
Undefined = 0 Undefined = 0
@ -25,7 +25,7 @@ def DBSCAN(points, eps = MAX_DISTANCE, minPts = MIN_PTS, verbose = True):
new_perc = int(100 * (i+1) / total) new_perc = int(100 * (i+1) / total)
if new_perc != perc: if new_perc != perc:
perc = new_perc perc = new_perc
print(f'Clustering faces {perc}% ({i}/{total} processed) complete with {len(clusters)} identities.') print(f'Clustering points {perc}% ({i}/{total} processed) complete with {len(clusters)} identities.')
if P['cluster'] != Undefined: # Previously processed in inner loop if P['cluster'] != Undefined: # Previously processed in inner loop
continue continue
@ -36,7 +36,8 @@ def DBSCAN(points, eps = MAX_DISTANCE, minPts = MIN_PTS, verbose = True):
C = { # Define new cluster C = { # Define new cluster
'id': len(clusters), 'id': len(clusters),
'faces': [ P ] 'faces': [ P ],
'cluster': Undefined
} }
clusters.append(C) clusters.append(C)

View File

@ -4,6 +4,7 @@ import uu
from io import BytesIO from io import BytesIO
import json import json
import numpy as np import numpy as np
import zlib
original = None original = None

View File

@ -141,6 +141,10 @@ function init() {
*/ */
defaultValue: 0 defaultValue: 0
}, },
focus: { /* focal via laplacian variance https://pyimagesearch.com/2015/09/07/blur-detection-with-opencv/ */
type: Sequelize.DOUBLE,
defaultValue: 0
},
top: Sequelize.FLOAT, /* 0..1 * photoId.height */ top: Sequelize.FLOAT, /* 0..1 * photoId.height */
left: Sequelize.FLOAT, /* 0..1 * photoId.width */ left: Sequelize.FLOAT, /* 0..1 * photoId.width */