From c27bceb6d410ece45e0f1b723b761e004fe9c7b2 Mon Sep 17 00:00:00 2001 From: James Ketrenos Date: Sat, 11 Jan 2020 19:38:23 -0800 Subject: [PATCH] Recursively reduce cluster sets Signed-off-by: James Ketrenos --- scanner/scanner.c | 34 +++++++++++++++++++++++++++++----- 1 file changed, 29 insertions(+), 5 deletions(-) diff --git a/scanner/scanner.c b/scanner/scanner.c index 06598e5..cce8fd8 100644 --- a/scanner/scanner.c +++ b/scanner/scanner.c @@ -14,6 +14,10 @@ #define MAX_DISTANCE 0.47L #endif +#ifndef MAX_CLUSTER_SIZE +#define MAX_CLUSTER_SIZE 150 +#endif + /* descriptors extracted from a profile face photo, used to filter out faces near a profile image * (which all identify as the same person) */ float profileDescriptors[][128] = { @@ -315,6 +319,7 @@ int main(int argc, char *argv[]) { long int entries = 0; long int minPts = MIN_PTS; float maxDistance = MAX_DISTANCE; + long int maxClusterSize = MAX_CLUSTER_SIZE; if (argc == 1) { fprintf(stderr, "usage: scanner PATH MAX_DISTANCE MIN_PTS\n"); @@ -329,6 +334,10 @@ int main(int argc, char *argv[]) { sscanf(argv[3], "%ld", &minPts); } + if (argv > 4) { + sscanf(argv[4], "%ld", &maxClusterSize); + } + fprintf(stderr, "\nmaxDistance : %f\nminPts : %ld\n", maxDistance, minPts); /* Allocate storage for all distances */ @@ -475,13 +484,22 @@ int main(int argc, char *argv[]) { long int clusters = DBSCAN(ppFaces, entries, maxDistance, minPts, -1); fprintf(stderr, "\n%ld clusters identified before size-split.\n", clusters); - if (clusters > 0) { + int recalcNeeded = clusters > 0 ? 1 : 0; + float reducedDistance = maxDistance; + + while (recalcNeeded) { int *stats = malloc(sizeof(int) * clusters), delta = 0; memset(stats, 0, sizeof(int) * clusters); getClusterCounts(stats, ppFaces, entries); - + recalcNeeded = 0; + + reducedDistance -= 0.05L; + if (reducedDistance < 0.1) { + break; + } + for (int i = 0; i < clusters; i++) { - if (stats[i] < 100) { + if (stats[i] < maxClusterSize) { continue; } @@ -492,8 +510,14 @@ int main(int argc, char *argv[]) { } } - int split = DBSCAN(ppFaces, entries, maxDistance - 0.05L, minPts, i + 1); - fprintf(stderr, "Cluster %d had %d units. Split into %d clusters.\n", i + 1, stats[i], split); + int split = DBSCAN(ppFaces, entries, reducedDistance, minPts, i + 1); + if (split) { + recalcNeeded |= 1; + } else { + continue; + } + fprintf(stderr, "Cluster %d had %d units. Split into %d clusters (max: %f).\n", + i + 1, stats[i], split, reducedDistance); for (int c = 0; c < split; c++) { fprintf(stderr, "%ld. %ld\n", c + clusters, getClusterCount(ppFaces, entries, delta + c + clusters)); }