Its working pretty well now

Signed-off-by: James Ketrenos <james_git@ketrenos.com>
2023-01-10 16:29:11 -08:00 · 2023-01-10 16:29:11 -08:00 · 1767e807ea
commit 1767e807ea
parent 36b46f3300
13 changed files with 3074 additions and 3547 deletions
--- a/3
+++ b/3
@ -20,7 +20,8 @@ RUN wget -qO- https://deb.nodesource.com/setup_18.x | bash -
 RUN DEBIAN_FRONTEND=NONINTERACTIVE apt-get install -y \
  python2 \
  jhead \
-  nodejs
+  nodejs \
  jq
 # Install the latest npm and npx
 RUN npm install --global npm@latest
--- a/docker-compose.yml
+++ b/docker-compose.yml
@ -9,9 +9,9 @@ services:
 #     - db
    restart: always
    ports:
-      - 8134:8123
+      - 8135:8123
    volumes:
-      - /multimedia/Dad:/pictures
+      - /home/jketreno/.pic-chalkwharf-bk:/pictures
      - ${PWD}/db:/db
      - ${PWD}:/website
      - ${PWD}/models:/root/.deepface
--- a/ketrface/cluster.py
+++ b/ketrface/cluster.py
@ -6,8 +6,7 @@ import sqlite3
 from sqlite3 import Error
 from PIL import Image
 import numpy as np
-from deepface import DeepFace
+
 from deepface.detectors import FaceDetector
 import functools
 from ketrface.util import *
@ -41,6 +40,7 @@ def gen_html(identities):
      photoId = face['photoId']
      distance = "{:0.4f}".format(face['distance'])
      confidence = "{:0.3f}".format(face['confidence'])
      focus = int(face['focus'])
      label = face['cluster']
      if type(label) != str:
        label = f'Cluster ({face["cluster"]["id"]})'
@ -49,25 +49,40 @@ def gen_html(identities):
      path = f'{html_base}/faces/{"{:02d}".format(faceId % 10)}'
      print(f'<img src="{path}/{faceId}.jpg"/>')
      print(f'<div style="background-color:rgba(255, 255, 255, 0.4);position:absolute;top:0px;left:0px;right:0px;padding:0.25rem">{label}: {distance}</div>')
-      print(f'<div style="background-color:rgba(255, 255, 255, 0.4);position:absolute;bottom:0px;left:0px;right:0px;padding:0.25rem">{faceId} {photoId} {confidence}</div>')
+      print(f'<div style="background-color:rgba(255, 255, 255, 0.4);position:absolute;bottom:0px;left:0px;right:0px;padding:0.25rem">{faceId} {photoId} {confidence} {focus}</div>')
      print('</div>')
    print('</div>')
    print('</div>')
 def update_cluster_averages(identities):
  for identity in identities:
    average = []
    for face in identity['faces']:
      if len(average) == 0:
        average = face['descriptors']
      else:
        average = np.add(average, face['descriptors'])
    average = np.divide(average, len(identity['faces']))
    identity['descriptors'] = average
  return identities
 def load_faces(db_path = db_path):
  print(f'Connecting to database: {db_path}')
  conn = create_connection(db_path)
  faces = []
  with conn:
    print('Querying faces')
    cur = conn.cursor()
    res = cur.execute('''
-      SELECT faces.id,facedescriptors.descriptors,faces.faceConfidence,faces.photoId
+      SELECT faces.id,facedescriptors.descriptors,faces.faceConfidence,faces.photoId,faces.focus
      FROM faces
      JOIN facedescriptors ON (faces.descriptorId=facedescriptors.id)
      WHERE faces.identityId IS null AND faces.faceConfidence>0.99
      ''')
    for row in res.fetchall():
-      id, descriptors, confidence, photoId = row
+      id, descriptors, confidence, photoId, focus = row
      if focus is None:
        focus = 100 # Assume full focus if focus not set
      face = { 
        'id': id,
        'type': 'face',
@ -75,12 +90,38 @@ def load_faces(db_path = db_path):
        'distance': 0,
        'photoId': photoId,
        'descriptors': np.frombuffer(descriptors),
-        'cluster': Undefined
+        'cluster': Undefined,
        'focus': focus
      }
      face['faces'] = [ face ]
      faces.append(face)
  return faces
 def update_distances(identities, prune = False):
  removed = 0
  for identity in identities:
    for face in identity['faces']:
      average = identity['descriptors']
      distance = findCosineDistance(average, face['descriptors'])
      if prune and distance > MAX_EPOCH_DISTANCE:
        average = np.dot(average, len(identity['faces']))
        average = np.subtract(average, face['descriptors'])
        face['cluster'] = Undefined
        face['distance'] = 0
        identity['faces'].remove(face)
        identity['descriptors'] = np.divide(average, len(identity['faces']))
        removed += 1
      else:
        face['distance'] = distance
  return removed
 def sort_identities(identities):
  identities.sort(reverse = True, key = lambda x: len(x['faces']))
  for identity in identities:
    identity['faces'].sort(reverse = False, key = lambda x: x['distance'])
 def cluster_sort(A, B):
  diff = A['cluster'] - B['cluster']
  if diff > 0:
@ -101,23 +142,11 @@ print('Scanning for clusters')
 identities = DBSCAN(faces) # process_faces(faces)
 print(f'{len(identities)} clusters grouped')
-
+MAX_CLUSTER_DISTANCE = 0.15  # Used to merge clusters
 MAX_EPOCH_DISTANCE = 0.14    # Used to prune outliers
 # Compute average center for all clusters
-sum = 0
+identities = update_cluster_averages(identities)
 for identity in identities:
  sum += len(identity['faces'])
  print(f'{identity["id"]} has {len(identity["faces"])} faces')
  average = []
  for face in identity['faces']:
    if len(average) == 0:
      average = face['descriptors']
    else:
      average = np.add(average, face['descriptors'])
  average = np.divide(average, len(identity['faces']))
  identity['descriptors'] = average
 removed = -1
 epoch = 1
@ -126,33 +155,68 @@ epoch = 1
 while removed != 0:
  print(f'Epoch {epoch}...')
  epoch += 1
-  removed = 0
+  removed = update_distances(identities, prune = True)
  for identity in identities:
    for face in identity['faces']:
      average = identity['descriptors']
      distance = findCosineDistance(average, face['descriptors'])
      if distance > 0.14:
        average = np.dot(average, len(identity['faces']))
        average = np.subtract(average, face['descriptors'])
        face['cluster'] = Undefined
        face['distance'] = 0
        identity['faces'].remove(face)
        identity['descriptors'] = np.divide(average, len(identity['faces']))
        removed += 1
      else:
        face['distance'] = distance
  if removed > 0:
    print(f'Excluded {removed} faces this epoch')
-identities.sort(reverse = True, key = lambda x: len(x['faces']))
+
 for identity in identities:
  identity['faces'].sort(reverse = False, key = lambda x: x['distance'])
 print(f'{len(identities)} identities seeded.')
 # Cluster the clusters...
 print('Reducing clusters via DBSCAN')
 reduced = DBSCAN(identities, eps = MAX_CLUSTER_DISTANCE, minPts = 2)
 # For each cluster, merge the lists of faces referenced in the cluster's
 # "faces" field, which is pointing to clusters (and not actual faces)
 for cluster in reduced:
  merged = []
  for identity in cluster['faces']:
    merged = merged + identity['faces']
  cluster['faces'] = merged
 # Creating a set containing those faces which have not been bound
 # to an identity to recluster them in isolation from the rest of
 # the faces
 noise = []
 undefined = []
 clustered = []
 for face in faces:
  if face['cluster'] == Noise:
    noise.append(face)
  elif face['cluster'] == Undefined:
    undefined.append(face)
 print(f'Stats: Noise = {len(noise)}, Undefined = {len(undefined)}')
 straglers = DBSCAN(noise + undefined)
 reduced = update_cluster_averages(reduced + straglers)
 # Give all merged identity lists a unique ID
 for id, identity in enumerate(reduced):
  identity['id'] = id
  for face in identity['faces']:
    face['cluster'] = identity
 update_distances(reduced)
 sort_identities(reduced)
 # This generates a set of differences between clusters and makes
 # a recommendation to merge clusters (outside of DBSCAN)
 #
 # Worth testing on larger data set
 for i, A in enumerate(reduced):
  for k, B in enumerate(reduced):
    if k < i:
      continue
    if A == B:
      continue
    distance = findCosineDistance(A['descriptors'], B['descriptors'])
    if distance < MAX_CLUSTER_DISTANCE:
      distance = "{:0.4f}".format(distance)
      print(f'{A["id"]} to {B["id"]} = {distance}: MERGE')
 print('Writing to "identities.html"')
 redirect_on('identities.html')
-gen_html(identities)
+gen_html(reduced)
 redirect_off()
--- a/ketrface/detect.py
+++ b/ketrface/detect.py
@ -1,5 +1,4 @@
 import sys
 import zlib
 import json
 import os
 import piexif
@ -13,6 +12,7 @@ import cv2
 from ketrface.util import *
 from ketrface.db import *
 face_base = '../'
 model_name = 'VGG-Face'    # 'ArcFace'
 detector_backend = 'mtcnn' # 'retinaface'
 model = DeepFace.build_model(model_name)
@ -92,18 +92,22 @@ def extract_faces(img, threshold=0.95, allow_upscaling = True, focus_threshold =
            'right_eye': list(face['keypoints']['right_eye']),
          },
          'score': face['confidence'],
        }
  to_drop = []
  # Re-implementation of 'extract_faces' with the addition of keeping a 
  # copy of the face image for caching on disk
  for k, key in enumerate(faces):
    print(f'Processing face {k+1}/{len(faces)}')
    identity = faces[key]
    identity['focus'] = 100 # Until laplacian variance is executed
    facial_area = identity["facial_area"]
    landmarks = identity["landmarks"]
    left_eye = landmarks["left_eye"]
    right_eye = landmarks["right_eye"]
 #      markup = True
    markup = False
    if markup == True: # Draw the face rectangle and eyes
@ -142,16 +146,12 @@ def extract_faces(img, threshold=0.95, allow_upscaling = True, focus_threshold =
    facial_img = img[top: bottom, left: right]
-    image = Image.fromarray(facial_img)
+    gray = cv2.cvtColor(facial_img, cv2.COLOR_BGR2GRAY)
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    focus = variance_of_laplacian(gray)
    if focus < focus_threshold:
      print(f'Dropping {ke+1} due to focus {focus}.')
      faces.pop(key)
    identity['focus'] = focus
    # Eye order is reversed as the routine does them backwards
    image = Image.fromarray(facial_img)
    image = alignment_procedure(image, right_eye, left_eye)
    image = image.resize(size = input_shape, resample = Image.LANCZOS)
    resized = np.asarray(image)
@ -174,6 +174,9 @@ def extract_faces(img, threshold=0.95, allow_upscaling = True, focus_threshold =
    identity['image'] = Image.fromarray(resized)
 #  for key in to_drop:
 #    faces.pop(key)
  return faces
@ -192,9 +195,14 @@ with conn:
    photoId, photoFaces, albumPath, photoFilename = row
    img_path = f'{base}{albumPath}{photoFilename}'
    print(f'Processing {i+1}/{count}: {img_path}')
    try:
      img = Image.open(img_path)
      img = ImageOps.exif_transpose(img) # auto-rotate if needed
-    img = img.convert()
+      img = img.convert("RGB") # Catch "RGBA" and convert to 3-channel
    except:
      print(f'Unable to load / process {img_path}. Skipping.')
      continue
    img = np.asarray(img)
    faces = extract_faces(img)
    if faces is None:
@ -221,6 +229,7 @@ with conn:
        'photoId': photoId,
        'scanVersion': face['version'],
        'faceConfidence': face['score'],
        'focus': face['focus'],
        'top': face['face']['top'],
        'left': face['face']['left'],
        'bottom': face['face']['bottom'],
@ -228,7 +237,7 @@ with conn:
        'descriptorId': faceDescriptorId,
      })
-      path = f'faces/{"{:02d}".format(faceId % 10)}'
+      path = f'{face_base}faces/{"{:02d}".format(faceId % 10)}'
      try:
        os.mkdir(path)
      except FileExistsError:
--- a/ketrface/headers.py
+++ b/ketrface/headers.py
@ -8,13 +8,14 @@ from io import BytesIO
 from ketrface.util import *
 face_base = "../"
 faceId = int(sys.argv[1])
-path = f'faces/{"{:02d}".format(faceId % 10)}'
+path = f'{face_base}faces/{"{:02d}".format(faceId % 10)}'
 img = Image.open(f'{path}/{faceId}.jpg')
 exif_dict = piexif.load(img.info["exif"])
 compressed_str = exif_dict["Exif"][piexif.ExifIFD.UserComment]
 str = zlib_uudecode(compressed_str)
-json = json.loads(str)
+parsed = json.loads(str)
-print(json)
+print(json.dumps(parsed, indent=2))
--- a/ketrface/identities.html
+++ b/ketrface/identities.html
--- a/ketrface/ketrface/pycache/db.cpython-310.pyc
+++ b/ketrface/ketrface/pycache/db.cpython-310.pyc
--- a/ketrface/ketrface/pycache/dbscan.cpython-310.pyc
+++ b/ketrface/ketrface/pycache/dbscan.cpython-310.pyc
--- a/ketrface/ketrface/pycache/util.cpython-310.pyc
+++ b/ketrface/ketrface/pycache/util.cpython-310.pyc
--- a/ketrface/ketrface/db.py
+++ b/ketrface/ketrface/db.py
@ -27,14 +27,15 @@ def create_face(conn, face):
  :return: face id
  """
  sql = '''
-  INSERT INTO faces(photoId,scanVersion,faceConfidence,top,left,bottom,right,descriptorId)
+  INSERT INTO faces(photoId,scanVersion,faceConfidence,focus,top,left,bottom,right,descriptorId)
-  VALUES(?,?,?,?,?,?,?,?)
+  VALUES(?,?,?,?,?,?,?,?,?)
  '''
  cur = conn.cursor()
  cur.execute(sql, (
    face['photoId'], 
    face['scanVersion'], 
    face['faceConfidence'],
    face['focus'],
    face['top'],
    face['left'],
    face['bottom'],
--- a/ketrface/ketrface/dbscan.py
+++ b/ketrface/ketrface/dbscan.py
@ -1,6 +1,6 @@
 from ketrface.util import *
-MIN_PTS = 10
+MIN_PTS = 5
 MAX_DISTANCE = 0.25
 Undefined = 0
@ -25,7 +25,7 @@ def DBSCAN(points, eps = MAX_DISTANCE, minPts = MIN_PTS, verbose = True):
      new_perc = int(100 * (i+1) / total)
      if new_perc != perc:
        perc = new_perc
-        print(f'Clustering faces {perc}% ({i}/{total} processed) complete with {len(clusters)} identities.')
+        print(f'Clustering points {perc}% ({i}/{total} processed) complete with {len(clusters)} identities.')
    if P['cluster'] != Undefined:     # Previously processed in inner loop
      continue
@ -36,7 +36,8 @@ def DBSCAN(points, eps = MAX_DISTANCE, minPts = MIN_PTS, verbose = True):
    C = {                             # Define new cluster
      'id': len(clusters),
-      'faces': [ P ]
+      'faces': [ P ],
      'cluster': Undefined
    }
    clusters.append(C)
--- a/ketrface/ketrface/util.py
+++ b/ketrface/ketrface/util.py
@ -4,6 +4,7 @@ import uu
 from io import BytesIO
 import json
 import numpy as np
 import zlib
 original = None
--- a/server/db/photos.js
+++ b/server/db/photos.js
@ -141,6 +141,10 @@ function init() {
         */
        defaultValue: 0
      },
      focus: { /* focal via laplacian variance https://pyimagesearch.com/2015/09/07/blur-detection-with-opencv/ */
        type: Sequelize.DOUBLE,
        defaultValue: 0
      },
      top: Sequelize.FLOAT, /* 0..1 * photoId.height */
      left: Sequelize.FLOAT, /* 0..1 * photoId.width */