Improved stats output during clustering

Signed-off-by: James Ketrenos <james_git@ketrenos.com>
2023-01-16 19:55:13 -08:00 · 2023-01-16 19:55:13 -08:00 · 46b2075515
commit 46b2075515
parent e8de846ed0
1 changed files with 22 additions and 6 deletions
--- a/ketrface/ketrface/dbscan.py
+++ b/ketrface/ketrface/dbscan.py
@ -1,4 +1,5 @@
 from ketrface.util import *
+import time

 MIN_PTS = 5
 MAX_DISTANCE = 0.25
@ -9,26 +10,32 @@ Noise = -2

 # Union of two lists of dicts
 def Union(A, B):
-  C = []
-  for key in A + B:
+  C = A
+  for key in B:
    if key not in C:
      C.append(key)
  return C    

 # https://en.wikipedia.org/wiki/DBSCAN
+#
 def DBSCAN(points, eps = MAX_DISTANCE, minPts = MIN_PTS, verbose = True):
  clusters = []                       # Cluster list
  perc = -1
  total = len(points)
+  last = 0
+  start = time.time()
  for i, P in enumerate(points):
    if verbose == True:
      new_perc = int(100 * (i+1) / total)
-      if new_perc != perc:
+      now = time.time()
+      if new_perc != perc or now - last > 5:
        perc = new_perc
-        print(f'Clustering points {perc}% ({i}/{total} processed) complete with {len(clusters)} identities.')
-
+        print(f'Clustering points {perc}% ({i}/{total} processed) complete with {len(clusters)} identities ({now - start}s).')
+        last = now
+      
    if P['cluster'] != Undefined:     # Previously processed in inner loop
      continue
+
    N = RangeQuery(points, P, eps)    # Find neighbors
    if len(N) < minPts:               # Density check
      P['cluster'] = Noise            # Label as Noise
@ -45,7 +52,16 @@ def DBSCAN(points, eps = MAX_DISTANCE, minPts = MIN_PTS, verbose = True):
    S = N                             # Neighbors to expand (exclude P)
    S.remove(P)

-    for Q in S:                       # Process every seed point
+    sub_perc = -1
+    sub_total = len(S)
+    for j, Q in enumerate(S):                       # Process every seed point
+      if verbose == True:
+        sub_new_perc = int(100 * (j+1) / total)
+        now = time.time()
+        if sub_new_perc != perc or now - last > 5:
+          sub_perc = sub_new_perc
+          print(f'... points {sub_perc}% ({j}/{sub_total} processed) complete with {len(clusters)} identities ({now - start}s).')
+          last = now
      if Q['cluster'] == Noise:       # Change Noise to border point
        Q['cluster'] = C
        C['faces'].append(Q)