From 28004308170f3f2ed713167c03cd3ca652493ad7 Mon Sep 17 00:00:00 2001 From: James Ketrenos Date: Tue, 17 Jan 2023 10:14:24 -0800 Subject: [PATCH] Neighbors get added to the live Set during blob expansion Signed-off-by: James Ketrenos --- ketrface/ketrface/dbscan.py | 35 +++++++++++++++++++++-------------- 1 file changed, 21 insertions(+), 14 deletions(-) diff --git a/ketrface/ketrface/dbscan.py b/ketrface/ketrface/dbscan.py index 8b103f7..e1d441d 100644 --- a/ketrface/ketrface/dbscan.py +++ b/ketrface/ketrface/dbscan.py @@ -8,13 +8,13 @@ Undefined = 0 Edge = -1 Noise = -2 -# Union of two lists of dicts +# Union of two lists of dicts, adding unique elements of B to +# end of A def Union(A, B): - C = A for key in B: - if key not in C: - C.append(key) - return C + if key not in A: + A.append(key) + return A # https://en.wikipedia.org/wiki/DBSCAN # @@ -53,32 +53,39 @@ def DBSCAN(points, eps = MAX_DISTANCE, minPts = MIN_PTS, verbose = True): S.remove(P) sub_perc = -1 - sub_total = len(S) - T = S.copy() - for j, Q in enumerate(S): # Process every seed point + sub_last = 0 + + for j, Q in enumerate(S): # Process every seed point + if verbose == True: + sub_total = len(S) sub_new_perc = int(100 * (j+1) / sub_total) - now = time.time() - if sub_new_perc != sub_perc or now - last > 5: + sub_now = time.time() + if sub_new_perc != sub_perc or sub_now - sub_last > 5: sub_perc = sub_new_perc - print(f'... points {sub_perc}% ({j}/{sub_total} processed) complete with {len(clusters)} identities ({now - start}s).') - last = now + print(f'... points {sub_perc}% ({j}/{sub_total} processed [{perc}% total]) complete with {len(clusters)} identities ({now - start}s).') + sub_last = sub_now + if Q['cluster'] == Noise: # Change Noise to border point Q['cluster'] = C C['faces'].append(Q) + if Q['cluster'] != Undefined: # Previously processed (border point) continue + Q['cluster'] = C # Label neighbor C['faces'].append(Q) + N = RangeQuery(points, Q, eps) # Find neighbors if len(N) >= minPts: # Density check (if Q is a core point) - T = Union(T, N) # Add new neighbors to seed set - S = T + S = Union(S, N) # Add new neighbors to seed set return clusters def RangeQuery(points, Q, eps): neighbors = [] for P in points: # Scan all points in the database + if P == Q: + continue if P in neighbors: continue distance = findCosineDistance( # Compute distance and check epsilon