Recursively reduce cluster sets

Signed-off-by: James Ketrenos <james_gitlab@ketrenos.com>
This commit is contained in:
James Ketrenos 2020-01-11 19:38:23 -08:00
parent 8bb980123e
commit c27bceb6d4

View File

@ -14,6 +14,10 @@
#define MAX_DISTANCE 0.47L #define MAX_DISTANCE 0.47L
#endif #endif
#ifndef MAX_CLUSTER_SIZE
#define MAX_CLUSTER_SIZE 150
#endif
/* descriptors extracted from a profile face photo, used to filter out faces near a profile image /* descriptors extracted from a profile face photo, used to filter out faces near a profile image
* (which all identify as the same person) */ * (which all identify as the same person) */
float profileDescriptors[][128] = { float profileDescriptors[][128] = {
@ -315,6 +319,7 @@ int main(int argc, char *argv[]) {
long int entries = 0; long int entries = 0;
long int minPts = MIN_PTS; long int minPts = MIN_PTS;
float maxDistance = MAX_DISTANCE; float maxDistance = MAX_DISTANCE;
long int maxClusterSize = MAX_CLUSTER_SIZE;
if (argc == 1) { if (argc == 1) {
fprintf(stderr, "usage: scanner PATH MAX_DISTANCE MIN_PTS\n"); fprintf(stderr, "usage: scanner PATH MAX_DISTANCE MIN_PTS\n");
@ -329,6 +334,10 @@ int main(int argc, char *argv[]) {
sscanf(argv[3], "%ld", &minPts); sscanf(argv[3], "%ld", &minPts);
} }
if (argv > 4) {
sscanf(argv[4], "%ld", &maxClusterSize);
}
fprintf(stderr, "\nmaxDistance : %f\nminPts : %ld\n", maxDistance, minPts); fprintf(stderr, "\nmaxDistance : %f\nminPts : %ld\n", maxDistance, minPts);
/* Allocate storage for all distances */ /* Allocate storage for all distances */
@ -475,13 +484,22 @@ int main(int argc, char *argv[]) {
long int clusters = DBSCAN(ppFaces, entries, maxDistance, minPts, -1); long int clusters = DBSCAN(ppFaces, entries, maxDistance, minPts, -1);
fprintf(stderr, "\n%ld clusters identified before size-split.\n", clusters); fprintf(stderr, "\n%ld clusters identified before size-split.\n", clusters);
if (clusters > 0) { int recalcNeeded = clusters > 0 ? 1 : 0;
float reducedDistance = maxDistance;
while (recalcNeeded) {
int *stats = malloc(sizeof(int) * clusters), delta = 0; int *stats = malloc(sizeof(int) * clusters), delta = 0;
memset(stats, 0, sizeof(int) * clusters); memset(stats, 0, sizeof(int) * clusters);
getClusterCounts(stats, ppFaces, entries); getClusterCounts(stats, ppFaces, entries);
recalcNeeded = 0;
reducedDistance -= 0.05L;
if (reducedDistance < 0.1) {
break;
}
for (int i = 0; i < clusters; i++) { for (int i = 0; i < clusters; i++) {
if (stats[i] < 100) { if (stats[i] < maxClusterSize) {
continue; continue;
} }
@ -492,8 +510,14 @@ int main(int argc, char *argv[]) {
} }
} }
int split = DBSCAN(ppFaces, entries, maxDistance - 0.05L, minPts, i + 1); int split = DBSCAN(ppFaces, entries, reducedDistance, minPts, i + 1);
fprintf(stderr, "Cluster %d had %d units. Split into %d clusters.\n", i + 1, stats[i], split); if (split) {
recalcNeeded |= 1;
} else {
continue;
}
fprintf(stderr, "Cluster %d had %d units. Split into %d clusters (max: %f).\n",
i + 1, stats[i], split, reducedDistance);
for (int c = 0; c < split; c++) { for (int c = 0; c < split; c++) {
fprintf(stderr, "%ld. %ld\n", c + clusters, getClusterCount(ppFaces, entries, delta + c + clusters)); fprintf(stderr, "%ld. %ld\n", c + clusters, getClusterCount(ppFaces, entries, delta + c + clusters));
} }