19 changed files with 700 additions and 4351 deletions
--- a/3
+++ b/3
@ -20,8 +20,7 @@ RUN wget -qO- https://deb.nodesource.com/setup_18.x | bash -
 RUN DEBIAN_FRONTEND=NONINTERACTIVE apt-get install -y \
  python2 \
  jhead \
-  nodejs \
+  nodejs
  jq
 # Install the latest npm and npx
 RUN npm install --global npm@latest
--- a/docker-compose.yml
+++ b/docker-compose.yml
@ -9,9 +9,9 @@ services:
 #     - db
    restart: always
    ports:
-      - 8135:8123
+      - 8134:8123
    volumes:
-      - /home/jketreno/.pic-chalkwharf-bk:/pictures
+      - /multimedia/Dad:/pictures
      - ${PWD}/db:/db
      - ${PWD}:/website
      - ${PWD}/models:/root/.deepface
--- a/ketrface/cluster.py
+++ b/ketrface/cluster.py
@ -1,222 +0,0 @@
 import sys
 import json
 import os
 import piexif
 import sqlite3
 from sqlite3 import Error
 from PIL import Image
 import numpy as np
 import functools
 from ketrface.util import *
 from ketrface.dbscan import *
 from ketrface.db import *
 html_base = '../'
 db_path = '../db/photos.db'
 # TODO
 # Switch to using DBSCAN
 #
 # Thoughts for determining number of clusters to try and target...
 #
 # Augment DBSCAN to rule out identity matching for the same face
 # appearing more than once in a photo
 #
 # NOTE: This means twins or reflections won't both identify in the
 # same photo -- those faces would then identify as a second face pairing
 # which could merge with a cluster, but can not be used to match
 def gen_html(identities):
  for identity in identities:
    print('<div>')
    print(f'<div><b>Identity {identity["id"]} has {len(identity["faces"])}</b><br></div>')
    print('<div>')
    for face in identity['faces']:
      faceId = face['id']
      photoId = face['photoId']
      distance = "{:0.4f}".format(face['distance'])
      confidence = "{:0.3f}".format(face['confidence'])
      focus = int(face['focus'])
      label = face['cluster']
      if type(label) != str:
        label = f'Cluster ({face["cluster"]["id"]})'
      print('<div style="position:relative;display:inline-flex;flex-direction:column">')
      path = f'{html_base}/faces/{"{:02d}".format(faceId % 10)}'
      print(f'<img src="{path}/{faceId}.jpg"/>')
      print(f'<div style="background-color:rgba(255, 255, 255, 0.4);position:absolute;top:0px;left:0px;right:0px;padding:0.25rem">{label}: {distance}</div>')
      print(f'<div style="background-color:rgba(255, 255, 255, 0.4);position:absolute;bottom:0px;left:0px;right:0px;padding:0.25rem">{faceId} {photoId} {confidence} {focus}</div>')
      print('</div>')
    print('</div>')
    print('</div>')
 def update_cluster_averages(identities):
  for identity in identities:
    average = []
    for face in identity['faces']:
      if len(average) == 0:
        average = face['descriptors']
      else:
        average = np.add(average, face['descriptors'])
    average = np.divide(average, len(identity['faces']))
    identity['descriptors'] = average
  return identities
 def load_faces(db_path = db_path):
  print(f'Connecting to database: {db_path}')
  conn = create_connection(db_path)
  faces = []
  with conn:
    print('Querying faces')
    cur = conn.cursor()
    res = cur.execute('''
      SELECT faces.id,facedescriptors.descriptors,faces.faceConfidence,faces.photoId,faces.focus
      FROM faces
      JOIN facedescriptors ON (faces.descriptorId=facedescriptors.id)
      WHERE faces.identityId IS null AND faces.faceConfidence>0.99
      ''')
    for row in res.fetchall():
      id, descriptors, confidence, photoId, focus = row
      if focus is None:
        focus = 100 # Assume full focus if focus not set
      face = { 
        'id': id,
        'type': 'face',
        'confidence': confidence,
        'distance': 0,
        'photoId': photoId,
        'descriptors': np.frombuffer(descriptors),
        'cluster': Undefined,
        'focus': focus
      }
      face['faces'] = [ face ]
      faces.append(face)
  return faces
 def update_distances(identities, prune = False):
  removed = 0
  for identity in identities:
    for face in identity['faces']:
      average = identity['descriptors']
      distance = findCosineDistance(average, face['descriptors'])
      if prune and distance > MAX_EPOCH_DISTANCE:
        average = np.dot(average, len(identity['faces']))
        average = np.subtract(average, face['descriptors'])
        face['cluster'] = Undefined
        face['distance'] = 0
        identity['faces'].remove(face)
        identity['descriptors'] = np.divide(average, len(identity['faces']))
        removed += 1
      else:
        face['distance'] = distance
  return removed
 def sort_identities(identities):
  identities.sort(reverse = True, key = lambda x: len(x['faces']))
  for identity in identities:
    identity['faces'].sort(reverse = False, key = lambda x: x['distance'])
 def cluster_sort(A, B):
  diff = A['cluster'] - B['cluster']
  if diff > 0:
    return 1
  elif diff < 0:
    return -1
  diff = A['confidence'] - B['confidence']
  if diff > 0:
    return 1
  elif diff < 0:
    return -1
  return 0
 print('Loading faces from database')
 faces = load_faces()
 print(f'{len(faces)} faces loaded')
 print('Scanning for clusters')
 identities = DBSCAN(faces) # process_faces(faces)
 print(f'{len(identities)} clusters grouped')
 MAX_CLUSTER_DISTANCE = 0.15  # Used to merge clusters
 MAX_EPOCH_DISTANCE = 0.14    # Used to prune outliers
 # Compute average center for all clusters
 identities = update_cluster_averages(identities)
 removed = -1
 epoch = 1
 # Filter each cluster removing any face that is > cluster_max_distance
 # from the average center point of the cluster
 while removed != 0:
  print(f'Epoch {epoch}...')
  epoch += 1
  removed = update_distances(identities, prune = True)
  if removed > 0:
    print(f'Excluded {removed} faces this epoch')
 print(f'{len(identities)} identities seeded.')
 # Cluster the clusters...
 print('Reducing clusters via DBSCAN')
 reduced = DBSCAN(identities, eps = MAX_CLUSTER_DISTANCE, minPts = 2)
 # For each cluster, merge the lists of faces referenced in the cluster's
 # "faces" field, which is pointing to clusters (and not actual faces)
 for cluster in reduced:
  merged = []
  for identity in cluster['faces']:
    merged = merged + identity['faces']
  cluster['faces'] = merged
 # Creating a set containing those faces which have not been bound
 # to an identity to recluster them in isolation from the rest of
 # the faces
 noise = []
 undefined = []
 clustered = []
 for face in faces:
  if face['cluster'] == Noise:
    noise.append(face)
  elif face['cluster'] == Undefined:
    undefined.append(face)
 print(f'Stats: Noise = {len(noise)}, Undefined = {len(undefined)}')
 straglers = DBSCAN(noise + undefined)
 reduced = update_cluster_averages(reduced + straglers)
 # Give all merged identity lists a unique ID
 for id, identity in enumerate(reduced):
  identity['id'] = id
  for face in identity['faces']:
    face['cluster'] = identity
 update_distances(reduced)
 sort_identities(reduced)
 # This generates a set of differences between clusters and makes
 # a recommendation to merge clusters (outside of DBSCAN)
 #
 # Worth testing on larger data set
 for i, A in enumerate(reduced):
  for k, B in enumerate(reduced):
    if k < i:
      continue
    if A == B:
      continue
    distance = findCosineDistance(A['descriptors'], B['descriptors'])
    if distance < MAX_CLUSTER_DISTANCE:
      distance = "{:0.4f}".format(distance)
      print(f'{A["id"]} to {B["id"]} = {distance}: MERGE')
 print('Writing to "identities.html"')
 redirect_on('identities.html')
 gen_html(reduced)
 redirect_off()
--- a/ketrface/detect.py
+++ b/ketrface/detect.py
@ -1,257 +0,0 @@
 import sys
 import json
 import os
 import piexif
 from PIL import Image, ImageOps
 from deepface import DeepFace
 from deepface.detectors import FaceDetector
 from retinaface import RetinaFace
 import numpy as np
 import cv2
 from ketrface.util import *
 from ketrface.db import *
 face_base = '../'
 model_name = 'VGG-Face'    # 'ArcFace'
 detector_backend = 'mtcnn' # 'retinaface'
 model = DeepFace.build_model(model_name)
 # Derived from https://github.com/serengil/deepface/blob/master/deepface/detectors/MtcnnWrapper.py
 # Add parameters to MTCNN
 from mtcnn import MTCNN
 face_detector = MTCNN(min_face_size = 30)
 input_shape = DeepFace.functions.find_input_shape(model)
 # Adapted from DeepFace
 # https://github.com/serengil/deepface/blob/master/deepface/commons/functions.py
 #
 # Modified to use bicubic resampling and clip expansion, as well as to
 # take a PIL Image instead of numpy array
 def alignment_procedure(img, left_eye, right_eye):
  """
  Given left and right eye coordinates in image, rotate around point
  between eyes such that eyes are horizontal
  :param img: Image (not np.array)
  :param left_eye: Eye appearing on the left (right eye of person)
  :param right_eye: Eye appearing on the right (left eye of person)
  :return: adjusted image
  """
  dY = right_eye[1] - left_eye[1]
  dX = right_eye[0] - left_eye[0]
  radians = np.arctan2(dY, dX)
  rotation = 180 + 180 * radians / np.pi
  if True:
    img = img.rotate(
      angle = rotation,
      resample = Image.BICUBIC, 
      expand = True)
  return img
 def variance_of_laplacian(image):
 	# compute the Laplacian of the image and then return the focus
 	# measure, which is simply the variance of the Laplacian
 	return cv2.Laplacian(image, cv2.CV_64F).var()
 def extract_faces(img, threshold=0.95, allow_upscaling = True, focus_threshold = 100):
  if detector_backend == 'retinaface':
    faces = RetinaFace.detect_faces(
      img_path = img, 
      threshold = threshold, 
      model = model, 
      allow_upscaling = allow_upscaling)
  elif detector_backend == 'mtcnn':
    img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) # mtcnn expects RGB
    redirect_on()
    res = face_detector.detect_faces(img_rgb)
    redirect_off()
    faces = {}
    if type(res) == list:
      for i, face in enumerate(res):
        if threshold > face['confidence']:
          continue
        x = face['box'][0]
        y = face['box'][1]
        w = face['box'][2]
        h = face['box'][3]
        # If face is less than 2.5% of the image width and height,
        # skip it (too small) -- filters out likely blurry faces in
        # large group photos where the actual face may exceed
        # min_face_size passed to MTCNN
        if 0.025 > w / img.shape[0] and 0.025 > h / img.shape[1]:
          print(f'Dropping due to small face size: {w / img.shape[0]} x {h / img.shape[1]}')
          continue
        faces[f'face_{i+1}'] = { # standardize properties
          'facial_area': [ x, y, x + w, y + h ],
          'landmarks': {
            'left_eye': list(face['keypoints']['left_eye']),
            'right_eye': list(face['keypoints']['right_eye']),
          },
          'score': face['confidence'],
        }
  to_drop = []
  # Re-implementation of 'extract_faces' with the addition of keeping a 
  # copy of the face image for caching on disk
  for k, key in enumerate(faces):
    print(f'Processing face {k+1}/{len(faces)}')
    identity = faces[key]
    identity['focus'] = 100 # Until laplacian variance is executed
    facial_area = identity["facial_area"]
    landmarks = identity["landmarks"]
    left_eye = landmarks["left_eye"]
    right_eye = landmarks["right_eye"]
 #      markup = True
    markup = False
    if markup == True: # Draw the face rectangle and eyes
      cv2.rectangle(img, 
        (int(facial_area[0]), int(facial_area[1])),
        (int(facial_area[2]), int(facial_area[3])),
        (0, 0, 255), 2)      
      cv2.circle(img, (int(left_eye[0]), int(left_eye[1])), 5, (255, 0, 0), 2)
      cv2.circle(img, (int(right_eye[0]), int(right_eye[1])), 5, (0, 255, 0), 2)
    # Find center of face, then crop to square
    # of equal width and height
    width = facial_area[2] - facial_area[0]
    height = facial_area[3] - facial_area[1]
    x = facial_area[0] + width * 0.5
    y = facial_area[1] + height * 0.5
    # Make thumbnail a square crop
    if width > height:
      height = width
    else:
      width = height
    #width *= 1.25
    #height *= 1.25
    left = max(round(x - width * 0.5), 0)
    right = min(round(left + width), img.shape[1])  # Y is 1
    top = max(round(y - height * 0.5), 0)
    bottom = min(round(top + height), img.shape[0]) # X is 0
    left_eye[0] -= top
    left_eye[1] -= left
    right_eye[0] -= top
    right_eye[1] -= left
    facial_img = img[top: bottom, left: right]
    gray = cv2.cvtColor(facial_img, cv2.COLOR_BGR2GRAY)
    focus = variance_of_laplacian(gray)
    identity['focus'] = focus
    # Eye order is reversed as the routine does them backwards
    image = Image.fromarray(facial_img)
    image = alignment_procedure(image, right_eye, left_eye)
    image = image.resize(size = input_shape, resample = Image.LANCZOS)
    resized = np.asarray(image)
    redirect_on()
    identity['vector'] = DeepFace.represent(
      img_path = resized,
      model_name = model_name,
      model = model,              # pre-built
      detector_backend = detector_backend,
      enforce_detection = False)
    redirect_off()
    identity["face"] = { 
      'top': facial_area[1] / img.shape[0],
      'left': facial_area[0] / img.shape[1],
      'bottom': facial_area[3] / img.shape[0],
      'right': facial_area[2] / img.shape[1]
    }
    identity['image'] = Image.fromarray(resized)
 #  for key in to_drop:
 #    faces.pop(key)
  return faces
 base = '/pictures/'
 conn = create_connection('../db/photos.db')
 with conn:
  cur = conn.cursor()
  res = cur.execute('''
    SELECT photos.id,photos.faces,albums.path,photos.filename FROM photos
    LEFT JOIN albums ON (albums.id=photos.albumId) 
    WHERE photos.faces=-1
    ''')
  rows = res.fetchall()
  count = len(rows)
  for i, row in enumerate(rows):
    photoId, photoFaces, albumPath, photoFilename = row
    img_path = f'{base}{albumPath}{photoFilename}'
    print(f'Processing {i+1}/{count}: {img_path}')
    try:
      img = Image.open(img_path)
      img = ImageOps.exif_transpose(img) # auto-rotate if needed
      img = img.convert("RGB") # Catch "RGBA" and convert to 3-channel
    except:
      print(f'Unable to load / process {img_path}. Skipping.')
      continue
    img = np.asarray(img)
    faces = extract_faces(img)
    if faces is None:
      print(f'Image no faces: {img_path}')
      update_face_count(conn, photoId, 0)
      continue
    for j, key in enumerate(faces):
      face = faces[key]
      image = face['image']
      print(f'Writing face {j+1}/{len(faces)}')
      #face['analysis'] = DeepFace.analyze(img_path = img, actions = ['age', 'gender', 'race', 'emotion'], enforce_detection = False)
      #face['analysis'] = DeepFace.analyze(img, actions = ['emotion'])
      # TODO: Add additional meta-data allowing back referencing to original
      # photo
      face['version'] = 1 # version 1 doesn't add much...
      data = {k: face[k] for k in set(list(face.keys())) - set(['image', 'facial_area', 'landmarks'])}
      json_str = json.dumps(data, ensure_ascii=False, cls=NpEncoder)
      faceDescriptorId = create_face_descriptor(conn, face)
      faceId = create_face(conn, {
        'photoId': photoId,
        'scanVersion': face['version'],
        'faceConfidence': face['score'],
        'focus': face['focus'],
        'top': face['face']['top'],
        'left': face['face']['left'],
        'bottom': face['face']['bottom'],
        'right': face['face']['right'],
        'descriptorId': faceDescriptorId,
      })
      path = f'{face_base}faces/{"{:02d}".format(faceId % 10)}'
      try:
        os.mkdir(path)
      except FileExistsError:
        pass
      with open(f'{path}/{faceId}.json', 'w', encoding = 'utf-8') as f:
        f.write(json_str)
      compressed_str = zlib_uuencode(json_str.encode())
      # Encode this data into the JPG as Exif
      exif_ifd = {piexif.ExifIFD.UserComment: compressed_str}
      exif_dict = {"0th": {}, "Exif": exif_ifd, "1st": {},
        "thumbnail": None, "GPS": {}}
      image.save(f'{path}/{faceId}.jpg', exif = piexif.dump(exif_dict))
    update_face_count(conn, photoId, len(faces))
--- a/ketrface/identities.html
+++ b/ketrface/identities.html
--- a/ketrface/ketrface/init.py
+++ b/ketrface/ketrface/init.py
--- a/ketrface/ketrface/pycache/init.cpython-310.pyc
+++ b/ketrface/ketrface/pycache/init.cpython-310.pyc
--- a/ketrface/ketrface/pycache/db.cpython-310.pyc
+++ b/ketrface/ketrface/pycache/db.cpython-310.pyc
--- a/ketrface/ketrface/pycache/dbscan.cpython-310.pyc
+++ b/ketrface/ketrface/pycache/dbscan.cpython-310.pyc
--- a/ketrface/ketrface/pycache/util.cpython-310.pyc
+++ b/ketrface/ketrface/pycache/util.cpython-310.pyc
--- a/ketrface/ketrface/db.py
+++ b/ketrface/ketrface/db.py
@ -1,79 +0,0 @@
 import sqlite3
 from sqlite3 import Error
 import numpy as np
 sqlite3.register_adapter(np.array, lambda arr: arr.tobytes())    
 sqlite3.register_converter("array", np.frombuffer)
 def create_connection(db_file):
  """ create a database connection to the SQLite database
      specified by db_file
  :param db_file: database file
  :return: Connection object or None
  """
  conn = None
  try:
    conn = sqlite3.connect(db_file)
  except Error as e:
    print(e)
  return conn
 def create_face(conn, face):
  """
  Create a new face in the faces table
  :param conn:
  :param face:
  :return: face id
  """
  sql = '''
  INSERT INTO faces(photoId,scanVersion,faceConfidence,focus,top,left,bottom,right,descriptorId)
  VALUES(?,?,?,?,?,?,?,?,?)
  '''
  cur = conn.cursor()
  cur.execute(sql, (
    face['photoId'], 
    face['scanVersion'], 
    face['faceConfidence'],
    face['focus'],
    face['top'],
    face['left'],
    face['bottom'],
    face['right'],
    face['descriptorId']
  ))
  conn.commit()
  return cur.lastrowid
 def create_face_descriptor(conn, face):
  """
  Create a new face in the faces table
  :param conn:
  :param face:
  :return: descriptor id
  """
  sql = '''
  INSERT INTO facedescriptors(descriptors)
  VALUES(?)
  '''
  cur = conn.cursor()
  cur.execute(sql, (np.array(face['vector']),))
  conn.commit()
  return cur.lastrowid
 def update_face_count(conn, photoId, faces):
  """
  Update the number of faces that have been matched on a photo
  :param conn:
  :param photoId:
  :param faces:
  :return: None
  """
  sql = '''
  UPDATE photos SET faces=? WHERE id=?
  '''
  cur = conn.cursor()
  cur.execute(sql, (faces, photoId))
  conn.commit()
  return None
--- a/ketrface/ketrface/dbscan.py
+++ b/ketrface/ketrface/dbscan.py
@ -1,73 +0,0 @@
 from ketrface.util import *
 MIN_PTS = 5
 MAX_DISTANCE = 0.25
 Undefined = 0
 Edge = -1
 Noise = -2
 # Union of two lists of dicts
 def Union(A, B):
  C = []
  for key in A + B:
    if key not in C:
      C.append(key)
  return C    
 # https://en.wikipedia.org/wiki/DBSCAN
 def DBSCAN(points, eps = MAX_DISTANCE, minPts = MIN_PTS, verbose = True):
  clusters = []                       # Cluster list
  perc = -1
  total = len(points)
  for i, P in enumerate(points):
    if verbose == True:
      new_perc = int(100 * (i+1) / total)
      if new_perc != perc:
        perc = new_perc
        print(f'Clustering points {perc}% ({i}/{total} processed) complete with {len(clusters)} identities.')
    if P['cluster'] != Undefined:     # Previously processed in inner loop
      continue
    N = RangeQuery(points, P, eps)    # Find neighbors
    if len(N) < minPts:               # Density check
      P['cluster'] = Noise            # Label as Noise
      continue
    C = {                             # Define new cluster
      'id': len(clusters),
      'faces': [ P ],
      'cluster': Undefined
    }
    clusters.append(C)
    P['cluster'] = C                  # Label initial point
    S = N                             # Neighbors to expand (exclude P)
    S.remove(P)
    for Q in S:                       # Process every seed point
      if Q['cluster'] == Noise:       # Change Noise to border point
        Q['cluster'] = C
        C['faces'].append(Q)
      if Q['cluster'] != Undefined:   # Previously processed (border point)
        continue
      Q['cluster'] = C                # Label neighbor
      C['faces'].append(Q)
      N = RangeQuery(points, Q, eps)  # Find neighbors
      if len(N) >= minPts:            # Density check (if Q is a core point)
        S = Union(S, N)               # Add new neighbors to seed set
  return clusters
 def RangeQuery(points, Q, eps):
  neighbors = []
  for P in points:                    # Scan all points in the database
    if P in neighbors:
      continue       
    distance = findCosineDistance(    # Compute distance and check epsilon
      Q['descriptors'], 
      P['descriptors'])
    if distance <= eps:
      neighbors += [ P ]              # Add to result  
  return neighbors
--- a/ketrface/ketrface/util.py
+++ b/ketrface/ketrface/util.py
@ -1,71 +0,0 @@
 import sys
 import os
 import uu
 from io import BytesIO
 import json
 import numpy as np
 import zlib
 original = None
 def redirect_on(file = None):
  global original
  if original == None:
    original = sys.stdout
    if file == None:
      file = os.devnull
    sys.stdout = open(file, 'w')
 def redirect_off():
  global original
  if original != None:
    sys.stdout.close()
    sys.stdout = original
    original = None
 def zlib_uuencode(databytes, name='<data>'):
  ''' Compress databytes with zlib & uuencode the result '''
  inbuff = BytesIO(zlib.compress(databytes, 9))
  outbuff = BytesIO()
  uu.encode(inbuff, outbuff, name=name)
  return outbuff.getvalue()
 def zlib_uudecode(databytes):
  ''' uudecode databytes and decompress the result with zlib '''
  inbuff = BytesIO(databytes)
  outbuff = BytesIO()
  uu.decode(inbuff, outbuff)
  return zlib.decompress(outbuff.getvalue())
 class NpEncoder(json.JSONEncoder):
  def default(self, obj):
    if isinstance(obj, np.integer):
      return int(obj)
    if isinstance(obj, np.floating):
      return float(obj)
    if isinstance(obj, np.ndarray):
      return obj.tolist()
 def findCosineDistance(source_representation, test_representation):
  if type(source_representation) == list:
    source_representation = np.array(source_representation)
  if type(test_representation) == list:
    test_representation = np.array(test_representation)
  a = np.matmul(np.transpose(source_representation), test_representation)
  b = np.sum(np.multiply(source_representation, source_representation))
  c = np.sum(np.multiply(test_representation, test_representation))
  return 1 - (a / (np.sqrt(b) * np.sqrt(c)))
 def findEuclideanDistance(source_representation, test_representation):
  if type(source_representation) == list:
    source_representation = np.array(source_representation)
  if type(test_representation) == list:
    test_representation = np.array(test_representation)
  euclidean_distance = source_representation - test_representation
  euclidean_distance = np.sum(np.multiply(euclidean_distance, euclidean_distance))
  euclidean_distance = np.sqrt(euclidean_distance)
  return euclidean_distance
 def l2_normalize(x):
  return x / np.sqrt(np.sum(np.multiply(x, x)))
--- a/reset.sh
+++ b/reset.sh
@ -2,9 +2,7 @@
 echo "update photos set faces=-1 where faces!=-1;" | ./query.sh
 echo "delete from photos where deleted=1;" | ./query.sh
 echo "delete from facedescriptors where id>0;" | ./query.sh
 echo "delete from sqlite_sequence where name='facedescriptors';" | ./query.sh
 echo "delete from faces where id>0;" | ./query.sh
 echo "delete from sqlite_sequence where name='faces';" | ./query.sh
 find faces -type f -delete
 find faces -empty -delete
 mkdir faces
--- a/server/cluster.py
+++ b/server/cluster.py
@ -0,0 +1,344 @@
 import sys
 import json
 import os
 import piexif
 import sqlite3
 from sqlite3 import Error
 from PIL import Image
 import numpy as np
 from deepface import DeepFace
 from deepface.detectors import FaceDetector
 sqlite3.register_adapter(np.array, lambda arr: arr.tobytes())    
 sqlite3.register_converter("array", np.frombuffer)
 class NpEncoder(json.JSONEncoder):
  def default(self, obj):
    if isinstance(obj, np.integer):
      return int(obj)
    if isinstance(obj, np.floating):
      return float(obj)
    if isinstance(obj, np.ndarray):
      return obj.tolist()
 model_name = 'VGG-Face'    # 'ArcFace'
 detector_backend = 'mtcnn' # 'retinaface'
 model = DeepFace.build_model(model_name)
 face_detector = FaceDetector.build_model(detector_backend)
 input_shape = DeepFace.functions.find_input_shape(model)
 def create_connection(db_file):
  """ create a database connection to the SQLite database
      specified by db_file
  :param db_file: database file
  :return: Connection object or None
  """
  conn = None
  try:
    conn = sqlite3.connect(db_file)
  except Error as e:
    print(e)
  return conn
 def create_face(conn, face):
  """
  Create a new face in the faces table
  :param conn:
  :param face:
  :return: face id
  """
  sql = '''
  INSERT INTO faces(photoId,scanVersion,faceConfidence,top,left,bottom,right)
  VALUES(?,?,?,?,?,?,?)
  '''
  cur = conn.cursor()
  cur.execute(sql, (
    face['photoId'], 
    face['scanVersion'], 
    face['faceConfidence'],
    face['top'],
    face['left'],
    face['bottom'],
    face['right']
  ))
  conn.commit()
  return cur.lastrowid
 def create_face_descriptor(conn, faceId, descriptor):
  """
  Create a new face in the faces table
  :param conn:
  :param faceId:
  :param descriptor:
  :return: descriptor id
  """
  sql = '''
  INSERT INTO facedescriptors(faceId,model,descriptors)
  VALUES(?,?,?)
  '''
  cur = conn.cursor()
  cur.execute(sql, (
    faceId, 
    descriptor['model'],
    np.array(descriptor['descriptors'])
  ))
  conn.commit()
  return cur.lastrowid
 def update_face_count(conn, photoId, faces):
  """
  Update the number of faces that have been matched on a photo
  :param conn:
  :param photoId:
  :param faces:
  :return: None
  """
  sql = '''
  UPDATE photos SET faces=? WHERE id=?
  '''
  cur = conn.cursor()
  cur.execute(sql, (faces, photoId))
  conn.commit()
  return None
 def findCosineDistance(source_representation, test_representation):
  if type(source_representation) == list:
    source_representation = np.array(source_representation)
  if type(test_representation) == list:
    test_representation = np.array(test_representation)
  a = np.matmul(np.transpose(source_representation), test_representation)
  b = np.sum(np.multiply(source_representation, source_representation))
  c = np.sum(np.multiply(test_representation, test_representation))
  return 1 - (a / (np.sqrt(b) * np.sqrt(c)))
 def findEuclideanDistance(source_representation, test_representation):
  if type(source_representation) == list:
    source_representation = np.array(source_representation)
  if type(test_representation) == list:
    test_representation = np.array(test_representation)
  euclidean_distance = source_representation - test_representation
  euclidean_distance = np.sum(np.multiply(euclidean_distance, euclidean_distance))
  euclidean_distance = np.sqrt(euclidean_distance)
  return euclidean_distance
 def l2_normalize(x):
  return x / np.sqrt(np.sum(np.multiply(x, x)))
 base = '/pictures/'
 conn = create_connection('../db/photos.db')
 faces = []
 identities = []
 def find_nearest_face(faces, identities, face, threshold = 0.99):
  closest = None
  closest_distance = -1
  for target in identities + faces:
    if target == face:
      continue
    target_distance = findCosineDistance(
      target['descriptors'], face['descriptors']
    )
    if target_distance > threshold:
      continue
    if closest_distance == -1 or target_distance < closest_distance:
      closest = target
      closest_distance = target_distance
  return closest
 def merge_identities(identities, identity1, identity2):
  sum1 = np.dot(
    identity1['faces'],
    identity1['descriptors']
  )
  sum2 = np.dot(
    identity2['faces'],
    identity2['descriptors']
  )
  sum = np.add(sum1, sum2)
  faces = identity1['faces'] + identity2['faces']
  id = 1
  if len(identities):
    id = identities[len(identities) - 1]['id'] + 1
  return {
    'id': id,
    'descriptors': np.divide(sum, faces),
    'faces': faces
  }
 def delete_identity(identities, identity):
  for i, item in enumerate(identities):
    if item['id'] == identity['id']:
      return identities.pop(i)
  return None
 def update_face_identity(identities, face, closest):
  if 'identified_as' in face:
    face_identity = face['identified_as']
    delete_identity(identities, face_identity)
  else:
    face_identity = face
  if 'identified_as' in closest:
    closest_identity = closest['identified_as']
    delete_identity(identities, closest_identity)
  else:
    closest_identity = closest
  identity = merge_identities(identities, face_identity, closest_identity)
  identities.append(identity)
  closest['identified_as'] = face['identified_as'] = identity
  return identity
 def cluster_faces(face):
  identities = []
  perc = -1
  for i, face in enumerate(faces):
    new_perc = int(100 * (i+1) / len(faces))
    if new_perc != perc:
      perc = new_perc
      print(f'Clustering faces {perc}% complete with {len(identities)} identities.')
    closest = find_nearest_face(faces, identities, face, threshold = 0.25)
    if closest == None:
      continue
    identity = update_face_identity(identities, face, closest)
 #    if identity['faces'] > 2:
 #     print(f'Updated identity {identity["id"]} to hold {identity["faces"]} faces.')
  return identities
 def cluster_identities(identities):
  perc = -1
  last_len = 0
  while last_len != len(identities):
    last_len = len(identities)
    for i, identity in enumerate(identities):
      new_perc = int(100 * (i+1) / len(identities))
      if new_perc != perc:
        perc = new_perc
        print(f'Clustering identities {perc}% complete with {len(identities)} identities.')
      closest = find_nearest_face([], identities, face, threshold = 0.25)
      if closest == None:
        continue
      update_face_identity(identities, identity, closest)
  return identities
 def identity_get_faces(item):
  return item['faces']
 with conn:
  cur = conn.cursor()
  res = cur.execute('''
    SELECT faces.id,facedescriptors.descriptors,faces.faceConfidence,faces.photoId
    FROM faces
    JOIN facedescriptors ON (faces.descriptorId=facedescriptors.id)
    WHERE faces.identityId IS null AND faces.faceConfidence>0.99
    ''')
  for row in res.fetchall():
    id, descriptors, confidence, photoId = row
    face = None
    for target in faces:
      if target['id'] == id:
        face = target
        break
    if face == None:
      face = { 
        'id': id,
      }
      faces.append(face)
    face['faces'] = 1
    face['confidence'] = confidence
    face['photoId'] = photoId
    face['descriptors'] = np.frombuffer(descriptors)
  identities = cluster_faces(faces)
  #identities = cluster_identities(identities)
  identities.sort(reverse = True, key = identity_get_faces)
  sum = 0
  for identity in identities:
    sum += identity['faces']
    print(f'{identity["id"]} has {identity["faces"]} faces')
  print(f'{len(identities)} identities seeded with {sum} faces.')
  exit(0)
  if False:
    for key2 in faces:
      if key1 == key2:
        continue
      face2 = faces[key2]
      if face2['scanned']:
        continue
      face = {
        'between': (face1['id'], face2['id']),
        'confidence': (face1['confidence'], face2['confidence'])
      }
      face['distanceCosine'] = findCosineDistance(
        face1['descriptors'],
        face2['descriptors']
      )
      face['distanceEuclidean'] = findEuclideanDistance(
        face1['descriptors'],
        face2['descriptors']
      )
      face['distanceEuclideanL2'] = findEuclideanDistance(
        l2_normalize(face1['descriptors']),
        l2_normalize(face2['descriptors'])
      )
      face['scoring'] = 0
      if model_name == 'VGG-Face':
 #        thresholds = {'cosine': 0.40, 'euclidean': 0.60, 'euclidean_l2': 0.86}
 #        thresholds = {'cosine': 0.31, 'euclidean': 0.47, 'euclidean_l2': 0.79}
        thresholds = {'cosine': 0.25, 'euclidean': 0.47, 'euclidean_l2': 0.79}
      elif model_name == 'ArcFace':
        thresholds = {'cosine': 0.68, 'euclidean': 4.15, 'euclidean_l2': 1.13}
      if face['distanceCosine'] < thresholds['cosine']:
        face['scoring'] += 1
      if face['distanceEuclidean'] < thresholds['euclidean']:
        face['scoring'] += 1
      if face['distanceEuclideanL2'] < thresholds['euclidean_l2']:
        face['scoring'] += 1
      if face['scoring'] == 3: # Same face!
        if ('identity' in face1) and ('identity' in face2):
          if face1['identity'] != face2['identity']:
            # print(f'Identity mismatch between {key1}({face1["confidence"]}) and {key2}({face2["confidence"]})')
            continue
        elif 'identity' in face1:
          face2['identity'] = face1['identity']
          face1['identity']['members'].append(face)
        elif 'identity' in face2:
          face1['identity'] = face2['identity']
          face2['identity']['members'].append(face)
        else:
          # print(f'Creating new identity {len(identities)} {face["between"]}')
          identity = {
            'members': [],
          }
          face1['identity'] = face2['identity'] = identity
          identity['members'].append(face)
          identities.append(identity)
  for idx, identity in enumerate(identities):
    count = len(identity['members'])
    print('<div>')
    print(f'<div><b>Identity {idx} has {count}</b><br></div>')
    print('<div>')
    for member in identity['members']:
      face1 = member['between'][0]
      face2 = member['between'][1]
      path1 = f'faces/{"{:02d}".format(face1 % 10)}'
      path2 = f'faces/{"{:02d}".format(face2 % 10)}'
      print('<div>')
      print(f'<img src="{path1}/{face1}.jpg"/>{member["confidence"][0]}')
      print(f'<img src="{path2}/{face2}.jpg"/>{member["confidence"][1]}')
      print('</div>')
      print(f'<div>Distance: {member["distanceCosine"]}, {member["distanceEuclidean"]}, {member["distanceEuclideanL2"]}</div>')
    print('</div>')
    print('</div>')
 #    update_face_count(conn, photoId, len(faces))
--- a/server/db/photos.js
+++ b/server/db/photos.js
@ -141,11 +141,7 @@ function init() {
         */
        defaultValue: 0
      },
-      focus: { /* focal via laplacian variance https://pyimagesearch.com/2015/09/07/blur-detection-with-opencv/ */
+
        type: Sequelize.DOUBLE,
        defaultValue: 0
      },
      top: Sequelize.FLOAT, /* 0..1 * photoId.height */
      left: Sequelize.FLOAT, /* 0..1 * photoId.width */
      bottom: Sequelize.FLOAT, /* 0..1 * photoId.height */
--- a/server/detect.py
+++ b/server/detect.py
@ -0,0 +1,336 @@
 import sys
 import zlib
 import json
 import os
 import piexif
 import sqlite3
 from sqlite3 import Error
 from PIL import Image, ImageOps
 from deepface import DeepFace
 from deepface.detectors import FaceDetector
 from retinaface import RetinaFace
 import numpy as np
 import cv2
 import uu
 from io import BytesIO
 original = None
 def redirect_on():
  global original
  if original == None:
    original = sys.stdout
    sys.stdout = open(os.devnull, 'w')
 def redirect_off():
  global original
  if original != None:
    sys.stdout.close()
    sys.stdout = original
    original = None
 def zlib_uuencode(databytes, name='<data>'):
  ''' Compress databytes with zlib & uuencode the result '''
  inbuff = BytesIO(zlib.compress(databytes, 9))
  outbuff = BytesIO()
  uu.encode(inbuff, outbuff, name=name)
  return outbuff.getvalue()
 def zlib_uudecode(databytes):
  ''' uudecode databytes and decompress the result with zlib '''
  inbuff = BytesIO(databytes)
  outbuff = BytesIO()
  uu.decode(inbuff, outbuff)
  return zlib.decompress(outbuff.getvalue())
 class NpEncoder(json.JSONEncoder):
  def default(self, obj):
    if isinstance(obj, np.integer):
      return int(obj)
    if isinstance(obj, np.floating):
      return float(obj)
    if isinstance(obj, np.ndarray):
      return obj.tolist()
 model_name = 'VGG-Face'    # 'ArcFace'
 detector_backend = 'mtcnn' # 'retinaface'
 model = DeepFace.build_model(model_name)
 face_detector = FaceDetector.build_model(detector_backend)
 input_shape = DeepFace.functions.find_input_shape(model)
 # Adapted from DeepFace
 # https://github.com/serengil/deepface/blob/master/deepface/commons/functions.py
 #
 # Modified to use bicubic resampling and clip expansion, as well as to
 # take a PIL Image instead of numpy array
 def alignment_procedure(img, left_eye, right_eye):
  """
  Given left and right eye coordinates in image, rotate around point
  between eyes such that eyes are horizontal
  :param img: Image (not np.array)
  :param left_eye: Eye appearing on the left (right eye of person)
  :param right_eye: Eye appearing on the right (left eye of person)
  :return: adjusted image
  """
  dY = right_eye[1] - left_eye[1]
  dX = right_eye[0] - left_eye[0]
  radians = np.arctan2(dY, dX)
  rotation = 180 + 180 * radians / np.pi
  if True:
    img = img.rotate(
      angle = rotation,
      resample = Image.BICUBIC, 
      expand = True)
  return img
 def extract_faces(img, threshold=0.75, allow_upscaling = True):
  if detector_backend == 'retinaface':
    faces = RetinaFace.detect_faces(
      img_path = img, 
      threshold = threshold, 
      model = model, 
      allow_upscaling = allow_upscaling)
  elif detector_backend == 'mtcnn':
    img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) # mtcnn expects RGB
    redirect_on()
    res = face_detector.detect_faces(img_rgb)
    redirect_off()
    faces = {}
    if type(res) == list:
      for i, face in enumerate(res):
        x = face['box'][0]
        y = face['box'][1]
        w = face['box'][2]
        h = face['box'][3]
        faces[f'face_{i+1}'] = { # standardize properties
          'facial_area': [ x, y, x + w, y + h ],
          'landmarks': {
            'left_eye': list(face['keypoints']['left_eye']),
            'right_eye': list(face['keypoints']['right_eye']),
          },
          'score': face['confidence'],
        }
  # Re-implementation of 'extract_faces' with the addition of keeping a 
  # copy of the face image for caching on disk
  if type(faces) == dict:
    for k, key in enumerate(faces):
      print(f'Processing face {k+1}/{len(faces)}')
      identity = faces[key]
      facial_area = identity["facial_area"]
      landmarks = identity["landmarks"]
      left_eye = landmarks["left_eye"]
      right_eye = landmarks["right_eye"]
 #      markup = True
      markup = False
      if markup == True: # Draw the face rectangle and eyes
        cv2.rectangle(img, 
          (int(facial_area[0]), int(facial_area[1])),
          (int(facial_area[2]), int(facial_area[3])),
          (0, 0, 255), 2)      
        cv2.circle(img, (int(left_eye[0]), int(left_eye[1])), 5, (255, 0, 0), 2)
        cv2.circle(img, (int(right_eye[0]), int(right_eye[1])), 5, (0, 255, 0), 2)
      # Find center of face, then crop to square
      # of equal width and height
      width = facial_area[2] - facial_area[0]
      height = facial_area[3] - facial_area[1]
      x = facial_area[0] + width * 0.5
      y = facial_area[1] + height * 0.5
      # Make thumbnail a square crop
      if width > height:
        height = width
      else:
        width = height
      #width *= 1.25
      #height *= 1.25
      left = max(round(x - width * 0.5), 0)
      right = min(round(left + width), img.shape[1])  # Y is 1
      top = max(round(y - height * 0.5), 0)
      bottom = min(round(top + height), img.shape[0]) # X is 0
      left_eye[0] -= top
      left_eye[1] -= left
      right_eye[0] -= top
      right_eye[1] -= left
      facial_img = img[top: bottom, left: right]
      # Eye order is reversed as the routine does them backwards
      image = Image.fromarray(facial_img)
      image = alignment_procedure(image, right_eye, left_eye)
      image = image.resize(size = input_shape, resample = Image.LANCZOS)
      resized = np.asarray(image)
      redirect_on()
      identity['vector'] = DeepFace.represent(
        img_path = resized,
        model_name = model_name,
        model = model,              # pre-built
        detector_backend = detector_backend,
        enforce_detection = False)
      redirect_off()
      redirect_on()
      identity["face"] = { 
        'top': facial_area[1] / img.shape[0],
        'left': facial_area[0] / img.shape[1],
        'bottom': facial_area[3] / img.shape[0],
        'right': facial_area[2] / img.shape[1]
      }
      redirect_off()
      identity['image'] = Image.fromarray(resized)
    return faces
 def create_connection(db_file):
  """ create a database connection to the SQLite database
      specified by db_file
  :param db_file: database file
  :return: Connection object or None
  """
  conn = None
  try:
    conn = sqlite3.connect(db_file)
  except Error as e:
    print(e)
  return conn
 def create_face(conn, face):
  """
  Create a new face in the faces table
  :param conn:
  :param face:
  :return: face id
  """
  sql = '''
  INSERT INTO faces(photoId,scanVersion,faceConfidence,top,left,bottom,right,descriptorId)
  VALUES(?,?,?,?,?,?,?,?)
  '''
  cur = conn.cursor()
  cur.execute(sql, (
    face['photoId'], 
    face['scanVersion'], 
    face['faceConfidence'],
    face['top'],
    face['left'],
    face['bottom'],
    face['right'],
    face['descriptorId']
  ))
  conn.commit()
  return cur.lastrowid
 def create_face_descriptor(conn, face):
  """
  Create a new face in the faces table
  :param conn:
  :param face:
  :return: descriptor id
  """
  sql = '''
  INSERT INTO facedescriptors(descriptors)
  VALUES(?)
  '''
  cur = conn.cursor()
  cur.execute(sql, (np.array(face['vector']),))
  conn.commit()
  return cur.lastrowid
 def update_face_count(conn, photoId, faces):
  """
  Update the number of faces that have been matched on a photo
  :param conn:
  :param photoId:
  :param faces:
  :return: None
  """
  sql = '''
  UPDATE photos SET faces=? WHERE id=?
  '''
  cur = conn.cursor()
  cur.execute(sql, (faces, photoId))
  conn.commit()
  return None
 base = '/pictures/'
 conn = create_connection('../db/photos.db')
 with conn:
  cur = conn.cursor()
  res = cur.execute('''
    SELECT photos.id,photos.faces,albums.path,photos.filename FROM photos
    LEFT JOIN albums ON (albums.id=photos.albumId) 
    WHERE photos.faces=-1
    ''')
  rows = res.fetchall()
  count = len(rows)
  for i, row in enumerate(rows):
    photoId, photoFaces, albumPath, photoFilename = row
    img_path = f'{base}{albumPath}{photoFilename}'
    print(f'Processing {i+1}/{count}: {img_path}')
    img = Image.open(img_path)
    img = ImageOps.exif_transpose(img) # auto-rotate if needed
    img = img.convert()
    img = np.asarray(img)
    faces = extract_faces(img)
    if faces is None:
      print(f'Image no faces: {img_path}')
      update_face_count(conn, photoId, 0)
      continue
    for j, key in enumerate(faces):
      face = faces[key]
      image = face['image']
      print(f'Writing face {j+1}/{len(faces)}')
      #face['analysis'] = DeepFace.analyze(img_path = img, actions = ['age', 'gender', 'race', 'emotion'], enforce_detection = False)
      #face['analysis'] = DeepFace.analyze(img, actions = ['emotion'])
      # TODO: Add additional meta-data allowing back referencing to original
      # photo
      face['version'] = 1 # version 1 doesn't add much...
      data = {k: face[k] for k in set(list(face.keys())) - set(['image', 'facial_area', 'landmarks'])}
      json_str = json.dumps(data, ensure_ascii=False, cls=NpEncoder)
      faceDescriptorId = create_face_descriptor(conn, face)
      faceId = create_face(conn, {
        'photoId': photoId,
        'scanVersion': face['version'],
        'faceConfidence': face['score'],
        'top': face['face']['top'],
        'left': face['face']['left'],
        'bottom': face['face']['bottom'],
        'right': face['face']['right'],
        'descriptorId': faceDescriptorId,
      })
      path = f'faces/{"{:02d}".format(faceId % 10)}'
      try:
        os.mkdir(path)
      except FileExistsError:
        pass
      with open(f'{path}/{faceId}.json', 'w', encoding = 'utf-8') as f:
        f.write(json_str)
      compressed_str = zlib_uuencode(json_str.encode())
      # Encode this data into the JPG as Exif
      exif_ifd = {piexif.ExifIFD.UserComment: compressed_str}
      exif_dict = {"0th": {}, "Exif": exif_ifd, "1st": {},
        "thumbnail": None, "GPS": {}}
      image.save(f'{path}/{faceId}.jpg', exif = piexif.dump(exif_dict))
    update_face_count(conn, photoId, len(faces))
--- a/ketrface/headers.py
+++ b/ketrface/headers.py
@ -6,16 +6,20 @@ from PIL import Image
 import uu
 from io import BytesIO
-from ketrface.util import *
+def zlib_uudecode(databytes):
    ''' uudecode databytes and decompress the result with zlib '''
    inbuff = BytesIO(databytes)
    outbuff = BytesIO()
    uu.decode(inbuff, outbuff)
    return zlib.decompress(outbuff.getvalue())
 face_base = "../"
 faceId = int(sys.argv[1])
-path = f'{face_base}faces/{"{:02d}".format(faceId % 10)}'
+path = f'faces/{"{:02d}".format(faceId % 10)}'
 img = Image.open(f'{path}/{faceId}.jpg')
 exif_dict = piexif.load(img.info["exif"])
 compressed_str = exif_dict["Exif"][piexif.ExifIFD.UserComment]
 str = zlib_uudecode(compressed_str)
-parsed = json.loads(str)
+json = json.loads(str)
-print(json.dumps(parsed, indent=2))
+print(json)
--- a/server/scanner.js
+++ b/server/scanner.js
@ -232,26 +232,15 @@ function processBlock(items) {
        var src = picturesPath + path + file,
          image = sharp(src);
-        return image/*.limitInputPixels(1073741824)*/
+        return image/*.limitInputPixels(1073741824)*/.metadata().then(function(metadata) {
        .metadata()
        .catch(error => {
          console.error(error);
        })
        .then((metadata) => {
          if (metadata.exif) {
-            try {
+            metadata.exif = exif(metadata.exif);
-              metadata.exif = exif(metadata.exif);
+            delete metadata.exif.thumbnail;
-              delete metadata.exif.thumbnail;
+            delete metadata.exif.image;
-              delete metadata.exif.image;
+            for (var key in metadata.exif.exif) {
-              for (var key in metadata.exif.exif) {
+              if (Buffer.isBuffer(metadata.exif.exif[key])) {
-                if (Buffer.isBuffer(metadata.exif.exif[key])) {
+                metadata.exif.exif[key] = "Buffer[" + metadata.exif.exif[key].length + "]";
                  metadata.exif.exif[key] = 
                    "Buffer[" + metadata.exif.exif[key].length + "]";
                }
              }
            } catch (error) {
              console.error(error);
              delete metadata.exif
            }
          }