James Ketrenos 2800430817 Neighbors get added to the live Set during blob expansion
Signed-off-by: James Ketrenos <james_git@ketrenos.com>
2023-01-17 10:14:24 -08:00

98 lines
2.8 KiB
Python

from ketrface.util import *
import time
MIN_PTS = 5
MAX_DISTANCE = 0.25
Undefined = 0
Edge = -1
Noise = -2
# Union of two lists of dicts, adding unique elements of B to
# end of A
def Union(A, B):
for key in B:
if key not in A:
A.append(key)
return A
# https://en.wikipedia.org/wiki/DBSCAN
#
def DBSCAN(points, eps = MAX_DISTANCE, minPts = MIN_PTS, verbose = True):
clusters = [] # Cluster list
perc = -1
total = len(points)
last = 0
start = time.time()
for i, P in enumerate(points):
if verbose == True:
new_perc = int(100 * (i+1) / total)
now = time.time()
if new_perc != perc or now - last > 5:
perc = new_perc
print(f'Clustering points {perc}% ({i}/{total} processed) complete with {len(clusters)} identities ({now - start}s).')
last = now
if P['cluster'] != Undefined: # Previously processed in inner loop
continue
N = RangeQuery(points, P, eps) # Find neighbors
if len(N) < minPts: # Density check
P['cluster'] = Noise # Label as Noise
continue
C = { # Define new cluster
'id': len(clusters),
'faces': [ P ],
'cluster': Undefined
}
clusters.append(C)
P['cluster'] = C # Label initial point
S = N # Neighbors to expand (exclude P)
S.remove(P)
sub_perc = -1
sub_last = 0
for j, Q in enumerate(S): # Process every seed point
if verbose == True:
sub_total = len(S)
sub_new_perc = int(100 * (j+1) / sub_total)
sub_now = time.time()
if sub_new_perc != sub_perc or sub_now - sub_last > 5:
sub_perc = sub_new_perc
print(f'... points {sub_perc}% ({j}/{sub_total} processed [{perc}% total]) complete with {len(clusters)} identities ({now - start}s).')
sub_last = sub_now
if Q['cluster'] == Noise: # Change Noise to border point
Q['cluster'] = C
C['faces'].append(Q)
if Q['cluster'] != Undefined: # Previously processed (border point)
continue
Q['cluster'] = C # Label neighbor
C['faces'].append(Q)
N = RangeQuery(points, Q, eps) # Find neighbors
if len(N) >= minPts: # Density check (if Q is a core point)
S = Union(S, N) # Add new neighbors to seed set
return clusters
def RangeQuery(points, Q, eps):
neighbors = []
for P in points: # Scan all points in the database
if P == Q:
continue
if P in neighbors:
continue
distance = findCosineDistance( # Compute distance and check epsilon
Q['descriptors'],
P['descriptors'])
if distance <= eps:
neighbors += [ P ] # Add to result
return neighbors