98 lines
2.8 KiB
Python
98 lines
2.8 KiB
Python
from ketrface.util import *
|
|
import time
|
|
|
|
MIN_PTS = 5
|
|
MAX_DISTANCE = 0.25
|
|
|
|
Undefined = 0
|
|
Edge = -1
|
|
Noise = -2
|
|
|
|
# Union of two lists of dicts, adding unique elements of B to
|
|
# end of A
|
|
def Union(A, B):
|
|
for key in B:
|
|
if key not in A:
|
|
A.append(key)
|
|
return A
|
|
|
|
# https://en.wikipedia.org/wiki/DBSCAN
|
|
#
|
|
def DBSCAN(points, eps = MAX_DISTANCE, minPts = MIN_PTS, verbose = True):
|
|
clusters = [] # Cluster list
|
|
perc = -1
|
|
total = len(points)
|
|
last = 0
|
|
start = time.time()
|
|
for i, P in enumerate(points):
|
|
if verbose == True:
|
|
new_perc = int(100 * (i+1) / total)
|
|
now = time.time()
|
|
if new_perc != perc or now - last > 5:
|
|
perc = new_perc
|
|
print(f'Clustering points {perc}% ({i}/{total} processed) complete with {len(clusters)} identities ({now - start}s).')
|
|
last = now
|
|
|
|
if P['cluster'] != Undefined: # Previously processed in inner loop
|
|
continue
|
|
|
|
N = RangeQuery(points, P, eps) # Find neighbors
|
|
if len(N) < minPts: # Density check
|
|
P['cluster'] = Noise # Label as Noise
|
|
continue
|
|
|
|
C = { # Define new cluster
|
|
'id': len(clusters),
|
|
'faces': [ P ],
|
|
'cluster': Undefined
|
|
}
|
|
clusters.append(C)
|
|
|
|
P['cluster'] = C # Label initial point
|
|
S = N # Neighbors to expand (exclude P)
|
|
S.remove(P)
|
|
|
|
sub_perc = -1
|
|
sub_last = 0
|
|
|
|
for j, Q in enumerate(S): # Process every seed point
|
|
|
|
if verbose == True:
|
|
sub_total = len(S)
|
|
sub_new_perc = int(100 * (j+1) / sub_total)
|
|
sub_now = time.time()
|
|
if sub_new_perc != sub_perc or sub_now - sub_last > 5:
|
|
sub_perc = sub_new_perc
|
|
print(f'... points {sub_perc}% ({j}/{sub_total} processed [{perc}% total]) complete with {len(clusters)} identities ({now - start}s).')
|
|
sub_last = sub_now
|
|
|
|
if Q['cluster'] == Noise: # Change Noise to border point
|
|
Q['cluster'] = C
|
|
C['faces'].append(Q)
|
|
|
|
if Q['cluster'] != Undefined: # Previously processed (border point)
|
|
continue
|
|
|
|
Q['cluster'] = C # Label neighbor
|
|
C['faces'].append(Q)
|
|
|
|
N = RangeQuery(points, Q, eps) # Find neighbors
|
|
if len(N) >= minPts: # Density check (if Q is a core point)
|
|
S = Union(S, N) # Add new neighbors to seed set
|
|
return clusters
|
|
|
|
def RangeQuery(points, Q, eps):
|
|
neighbors = []
|
|
for P in points: # Scan all points in the database
|
|
if P == Q:
|
|
continue
|
|
if P in neighbors:
|
|
continue
|
|
distance = findCosineDistance( # Compute distance and check epsilon
|
|
Q['descriptors'],
|
|
P['descriptors'])
|
|
if distance <= eps:
|
|
neighbors += [ P ] # Add to result
|
|
return neighbors
|
|
|