Clustering worked well enough; need to add face column indicating manual (expert) assignment

Signed-off-by: James Ketrenos <james_git@ketrenos.com>
This commit is contained in:
James Ketr 2023-01-17 19:23:48 -08:00
parent 5d38cb4787
commit 1ead088eb8
6 changed files with 44 additions and 64 deletions

View File

@ -58,7 +58,7 @@ div {
.Image .FaceBox { .Image .FaceBox {
border: 1px solid red; border: 1px solid red;
border-radius: 0.25rem; /* border-radius: 0.25rem;*/
position: absolute; position: absolute;
} }

View File

@ -37,55 +37,31 @@ const makeFaceBoxes = (photo: any, dimensions: any): any => {
)); ));
}; };
/*
function debounce(fn: any, ms: number) {
let timer: any;
return () => {
if (timer) clearTimeout(timer);
timer = setTimeout(() => {
timer = null
fn.apply(this as typeof Photo, arguments)
}, ms)
};
};
*/
const Photo = ({ photoId }: any) => { const Photo = ({ photoId }: any) => {
const [image, setImage] = useState<any>(undefined); const [image, setImage] = useState<any>(undefined);
const ref = useRef(null); const ref = useRef(null);
const [dimensions, setDimensions] = React.useState({ const [dimensions, setDimensions] = React.useState({width: 0, height: 0});
height: window.innerHeight,
width: window.innerWidth
})
const faces = useMemo(() => { const faces = useMemo(() => {
if (image === undefined) { if (image === undefined || dimensions.height === 0) {
return <></>; return <></>;
} }
return makeFaceBoxes(image, dimensions); return makeFaceBoxes(image, dimensions);
}, [image, dimensions]); }, [image, dimensions]);
useEffect(() : any => { useEffect(() => {
if (!ref || !ref.current) { if (!ref.current) {
return; return;
} }
const el: Element = ref.current as Element; const el: Element = ref.current as Element;
if (dimensions.height !== el.clientHeight
const handleResize = () => { || dimensions.width !== el.clientWidth) {
setDimensions({ setDimensions({
height: el.clientHeight, height: el.clientHeight,
width: el.clientWidth width: el.clientWidth
}) })
}; }
}/*, [dimensions.height, dimensions.width]*/);
const debouncedHandleResize = handleResize;//debounce(handleResize, 250);
debouncedHandleResize();
window.addEventListener('resize', debouncedHandleResize);
return () => {
window.removeEventListener('resize', debouncedHandleResize)
};
});
useEffect(() => { useEffect(() => {
if (photoId === 0) { if (photoId === 0) {
@ -108,7 +84,7 @@ const Photo = ({ photoId }: any) => {
return (<div className="Image" return (<div className="Image"
ref={ref} ref={ref}
style={{ style={{
background: `url(../${image.path}thumbs/scaled/${image.filename})` background: `url(../${image.path}thumbs/scaled/${image.filename})`.replace(/ /g, '%20')
}}>{ faces }</div> }}>{ faces }</div>
); );
}; };

View File

@ -24,6 +24,9 @@ html_base = config['basePath']
if html_base == "/": if html_base == "/":
html_base = "." html_base = "."
MAX_CLUSTER_DISTANCE = 0.14 # Used to merge clusters
MAX_DISTANCE_FROM_CENTROID = 0.14 # Used to prune outliers
# TODO # TODO
# Switch to using DBSCAN # Switch to using DBSCAN
# #
@ -86,8 +89,10 @@ def load_faces(db_path = db_path):
res = cur.execute(''' res = cur.execute('''
SELECT faces.id,facedescriptors.descriptors,faces.faceConfidence,faces.photoId,faces.focus SELECT faces.id,facedescriptors.descriptors,faces.faceConfidence,faces.photoId,faces.focus
FROM faces FROM faces
INNER JOIN photos ON (photos.duplicate == 0 OR photos.duplicate IS NULL)
JOIN facedescriptors ON (faces.descriptorId=facedescriptors.id) JOIN facedescriptors ON (faces.descriptorId=facedescriptors.id)
WHERE faces.identityId IS null AND faces.faceConfidence>0.99 WHERE faces.identityId IS null AND faces.faceConfidence>0.99
AND faces.photoId=photos.id
''') ''')
for row in res.fetchall(): for row in res.fetchall():
id, descriptors, confidence, photoId, focus = row id, descriptors, confidence, photoId, focus = row
@ -109,21 +114,27 @@ def load_faces(db_path = db_path):
faces.append(face) faces.append(face)
return faces return faces
def update_distances(identities, prune = False): def update_distances(identities,
prune = False,
maxDistance = MAX_DISTANCE_FROM_CENTROID):
removed = 0 removed = 0
for identity in identities: for identity in identities:
for face in identity['faces']: for face in identity['faces']:
average = identity['descriptors'] average = identity['descriptors']
distance = findCosineDistance(average, face['descriptors']) distance = findCosineDistanceBaked(identity, face)
if prune and distance > MAX_EPOCH_DISTANCE: if prune and distance > maxDistance:
average = np.dot(average, len(identity['faces'])) average = np.dot(average, len(identity['faces']))
average = np.subtract(average, face['descriptors']) average = np.subtract(average, face['descriptors'])
face['cluster'] = Undefined face['cluster'] = Undefined
face['distance'] = 0 face['distance'] = 0
identity['faces'].remove(face) identity['faces'].remove(face)
identity['descriptors'] = np.divide(average, len(identity['faces'])) average = np.divide(average, len(identity['faces']))
identity['descriptors'] = average
identity['sqrtsummul'] = np.sqrt(np.sum(np.multiply(
average, average)))
removed += 1 removed += 1
else: else:
face['distance'] = distance face['distance'] = distance
@ -159,19 +170,19 @@ def build_straglers(faces):
print('Loading faces from database') print('Loading faces from database')
faces = load_faces() faces = load_faces()
minPts = len(faces) / 100
eps = 0.2 minPts = max(len(faces) / 500, 5)
eps = 0.185
print(f'Scanning {len(faces)} faces for clusters (minPts: {minPts}, eps: {eps})') print(f'Scanning {len(faces)} faces for clusters (minPts: {minPts}, eps: {eps})')
identities = DBSCAN(faces, minPts = minPts, eps = eps) identities = DBSCAN(faces, minPts = minPts, eps = eps)
print(f'{len(identities)} clusters grouped') print(f'{len(identities)} clusters grouped')
MAX_CLUSTER_DISTANCE = 0.15 # Used to merge clusters
MAX_EPOCH_DISTANCE = 0.14 # Used to prune outliers
# Compute average center for all clusters # Compute average center for all clusters
identities = update_cluster_averages(identities) identities = update_cluster_averages(identities)
epoch_prune = True
merge_identities = True
if False: if epoch_prune:
removed = -1 removed = -1
epoch = 1 epoch = 1
# Filter each cluster removing any face that is > cluster_max_distance # Filter each cluster removing any face that is > cluster_max_distance
@ -179,14 +190,17 @@ if False:
while removed != 0: while removed != 0:
print(f'Epoch {epoch}...') print(f'Epoch {epoch}...')
epoch += 1 epoch += 1
removed = update_distances(identities, prune = True) removed = update_distances(
identities,
prune = True,
maxDistance = MAX_DISTANCE_FROM_CENTROID)
if removed > 0: if removed > 0:
print(f'Excluded {removed} faces this epoch') print(f'Excluded {removed} faces this epoch')
print(f'{len(identities)} identities seeded.') print(f'{len(identities)} identities seeded.')
reduced = identities reduced = identities
if False: if merge_identities:
# Cluster the clusters... # Cluster the clusters...
print('Reducing clusters via DBSCAN') print('Reducing clusters via DBSCAN')
reduced = DBSCAN(identities, eps = MAX_CLUSTER_DISTANCE, minPts = 3) reduced = DBSCAN(identities, eps = MAX_CLUSTER_DISTANCE, minPts = 3)
@ -229,9 +243,7 @@ for id, identity in enumerate(reduced):
face['cluster'] = identity face['cluster'] = identity
reduced = update_cluster_averages(reduced) reduced = update_cluster_averages(reduced)
update_distances(reduced) update_distances(reduced)
sort_identities(reduced) sort_identities(reduced)
if False: if False:
@ -294,6 +306,7 @@ print(f'Connecting to database: {db_path}')
conn = create_connection(db_path) conn = create_connection(db_path)
with conn: with conn:
for identity in reduced: for identity in reduced:
print(f'Writing identity {identity["id"]} to DB')
id = create_identity(conn, identity) id = create_identity(conn, identity)
for face in identity['faces']: for face in identity['faces']:
update_face_identity(conn, face['id'], id) update_face_identity(conn, face['id'], id)

View File

@ -197,9 +197,10 @@ conn = create_connection('../db/photos.db')
with conn: with conn:
cur = conn.cursor() cur = conn.cursor()
res = cur.execute(''' res = cur.execute('''
SELECT photos.id,photos.faces,albums.path,photos.filename FROM photos SELECT photos.id,photos.faces,albums.path,photos.filename
FROM photos
LEFT JOIN albums ON (albums.id=photos.albumId) LEFT JOIN albums ON (albums.id=photos.albumId)
WHERE photos.faces=-1 WHERE photos.faces=-1 AND photos.duplicate=0
''') ''')
rows = res.fetchall() rows = res.fetchall()
count = len(rows) count = len(rows)

View File

@ -11,13 +11,8 @@ Noise = -2
# Union of two lists of dicts, adding unique elements of B to # Union of two lists of dicts, adding unique elements of B to
# end of A # end of A
def Union(A, B): def Union(A, B):
# 5.012 of 100s sample
return A + [x for x in B if x not in A] return A + [x for x in B if x not in A]
# 5.039 of 100s sample
# for key in B:
# if key not in A:
# A.append(key)
# return A
# https://en.wikipedia.org/wiki/DBSCAN # https://en.wikipedia.org/wiki/DBSCAN
# #
@ -91,7 +86,7 @@ def RangeQuery(points, Q, eps):
for P in points: # Scan all points in the database for P in points: # Scan all points in the database
if P == Q: if P == Q:
continue continue
distance = findCoseinDistanceBaked(# Compute distance distance = findCosineDistanceBaked(# Compute distance
Q, P) Q, P)
if distance <= eps: # Check epsilon if distance <= eps: # Check epsilon
neighbors.append(P) # Add to result neighbors.append(P) # Add to result

View File

@ -23,7 +23,6 @@ def redirect_off():
sys.stdout = original sys.stdout = original
original = None original = None
def zlib_uuencode(databytes, name='<data>'): def zlib_uuencode(databytes, name='<data>'):
''' Compress databytes with zlib & uuencode the result ''' ''' Compress databytes with zlib & uuencode the result '''
inbuff = BytesIO(zlib.compress(databytes, 9)) inbuff = BytesIO(zlib.compress(databytes, 9))
@ -47,15 +46,11 @@ class NpEncoder(json.JSONEncoder):
if isinstance(obj, np.ndarray): if isinstance(obj, np.ndarray):
return obj.tolist() return obj.tolist()
def findCoseinDistanceBaked(src, dst): def findCosineDistanceBaked(src, dst):
a = np.matmul(np.transpose(src['descriptors']), dst['descriptors']) a = np.matmul(np.transpose(src['descriptors']), dst['descriptors'])
return 1 - (a / (src['sqrtsummul'] * dst['sqrtsummul'])) return 1 - (a / (src['sqrtsummul'] * dst['sqrtsummul']))
def findCosineDistance(source_representation, test_representation): def findCosineDistance(source_representation, test_representation):
# if type(source_representation) == list:
# source_representation = np.array(source_representation)
# if type(test_representation) == list:
# test_representation = np.array(test_representation)
a = np.matmul(np.transpose(source_representation), test_representation) a = np.matmul(np.transpose(source_representation), test_representation)
b = np.sum(np.multiply(source_representation, source_representation)) b = np.sum(np.multiply(source_representation, source_representation))
c = np.sum(np.multiply(test_representation, test_representation)) c = np.sum(np.multiply(test_representation, test_representation))