Sub-split any group with > 100 faces

Signed-off-by: James Ketrenos <james_gitlab@ketrenos.com>
This commit is contained in:
James Ketrenos 2020-01-11 19:07:07 -08:00
parent 2fc08a41b0
commit aaf3752803

View File

@ -96,11 +96,11 @@ RangeQuery(DB, distFunc, Q, eps) {
}
#endif
FaceLink *RangeQuery(Face **ppFaces, long int faceCount, Face *pQ, float eps) {
FaceLink *RangeQuery(Face **ppFaces, long int faceCount, Face *pQ, float eps, long int clusterToBreak) {
FaceLink *pNeighbors = NULL;
for (long int i = 0; i < faceCount; i++) {
Face *pFace = ppFaces[i];
if (pFace->profileDistance <= 0.5) {
if (clusterToBreak > 0 && pFace->clusterId != clusterToBreak) {
continue;
}
@ -137,11 +137,14 @@ long int chainLength(FaceLink *pLink) {
return count;
}
long int DBSCAN(Face **ppFaces, long int faceCount, float eps, int minPts) {
long int C = 0;
long int C = 0;
long int DBSCAN(Face **ppFaces, long int faceCount, float eps, int minPts, long int clusterToBreak) {
int count = 0;
for (long int i = 0; i < faceCount; i++) {
Face *pFace = ppFaces[i];
if (pFace->profileDistance <= 0.5) {
if (clusterToBreak > 0 && pFace->clusterId != clusterToBreak) {
continue;
}
@ -150,26 +153,16 @@ long int DBSCAN(Face **ppFaces, long int faceCount, float eps, int minPts) {
}
float threshold = eps;
FaceLink *pNeighbors = RangeQuery(ppFaces, faceCount, pFace, eps);
FaceLink *pNeighbors = RangeQuery(ppFaces, faceCount, pFace, eps, clusterToBreak);
long neighborCount = chainLength(pNeighbors);
/*
while (neighborCount > minPts * 5) {
threshold *= 0.9;
freeChain(pNeighbors);
pNeighbors = RangeQuery(ppFaces, faceCount, pFace, threshold);
neighborCount = chainLength(pNeighbors);
fprintf(stderr, "\rWith eps of %f, %ld has %ld neighbors.", threshold, pFace->faceId, neighborCount);
}
*/
if (neighborCount < minPts) {
pFace->clusterType = NOISE;
freeChain(pNeighbors);
continue;
}
//printf("%ld has %ld neighbors.\n", pFace->faceId, neighborCount);
C++;
count++;
pFace->clusterId = C;
pFace->clusterType = CORE;
@ -193,18 +186,9 @@ long int DBSCAN(Face **ppFaces, long int faceCount, float eps, int minPts) {
pQ->clusterId = C;
pQ->clusterType = EDGE;
FaceLink *pSubNeighbors = RangeQuery(ppFaces, faceCount, pQ, eps);
FaceLink *pSubNeighbors = RangeQuery(ppFaces, faceCount, pQ, eps, clusterToBreak);
neighborCount = chainLength(pSubNeighbors);
/*
threshold = eps;
while (neighborCount > minPts * 1.25) {
threshold *= 0.9;
freeChain(pSubNeighbors);
pSubNeighbors = RangeQuery(ppFaces, faceCount, pQ, threshold);
neighborCount = chainLength(pSubNeighbors);
fprintf(stderr, "\rWith eps of %f, %ld has %ld neighbors.", threshold, pQ->faceId, neighborCount);
}
*/
if (neighborCount >= minPts) {
pQ->clusterType = CORE;
/* Append these neighbors to the end of the chain */
@ -222,7 +206,7 @@ long int DBSCAN(Face **ppFaces, long int faceCount, float eps, int minPts) {
freeChain(pNeighbors);
}
return C;
return count;
}
typedef struct {
@ -297,6 +281,27 @@ int parseFaceIdRow(void *data, int argc, char **argv, char **column) {
return 0;
}
void getClusterCounts(int *stats, Face **ppFaces, long int entries) {
for (int i = 0; i < entries; i++) {
if (ppFaces[i]->clusterType != CORE && ppFaces[i]->clusterType != EDGE) {
continue;
}
stats[ppFaces[i]->clusterId - 1]++;
}
}
long int getClusterCount(Face **ppFaces, long int entries, int clusterId) {
long int count = 0;
for (long int i = 0; i < entries; i++) {
if (ppFaces[i]->clusterId == clusterId &&
ppFaces[i]->clusterType != NOISE &&
ppFaces[i]->clusterType != UNDEFINED) {
count++;
}
}
return count;
}
/*
* 1. Count how many entries there are
* 2. Allocate storage to hold all entries
@ -421,6 +426,7 @@ int main(int argc, char *argv[]) {
ppFaces[dst++] = pFace;
}
}
fprintf(stderr, "Dropped %ld faces as too close to profile photos (set of %ld).\n", (entries - dst),
(sizeof(profileDescriptors) / sizeof(profileDescriptors[0])));
entries = dst;
@ -466,14 +472,55 @@ int main(int argc, char *argv[]) {
fprintf(stderr, "Calculating clusters: MAX_DISTANCE(%f) MIN_PTS(%ld)\n", maxDistance, minPts);
long int clusters = DBSCAN(ppFaces, entries, maxDistance, minPts);
long int clusters = DBSCAN(ppFaces, entries, maxDistance, minPts, -1);
fprintf(stderr, "\n%ld clusters identified before size-split.\n", clusters);
if (clusters > 0) {
int *stats = malloc(sizeof(int) * clusters), delta = 0;
memset(stats, 0, sizeof(int) * clusters);
getClusterCounts(stats, ppFaces, entries);
for (int i = 0; i < clusters; i++) {
if (stats[i] < 100) {
continue;
}
for (int j = 0; j < entries; j++) {
Face *pFace = ppFaces[j];
if (pFace->clusterId == i + 1) {
pFace->clusterType = UNDEFINED;
}
}
int split = DBSCAN(ppFaces, entries, maxDistance * 0.99L, minPts, i + 1);
fprintf(stderr, "Cluster %d had %d units. Split into %d clusters.\n", i + 1, stats[i], split);
for (int c = 0; c < split; c++) {
fprintf(stderr, "%ld. %ld\n", c + clusters, getClusterCount(ppFaces, entries, c + clusters));
}
for (int j = 0; j < entries; j++) {
Face *pFace = ppFaces[j];
if (pFace->clusterId == i + 1) {
pFace->clusterType = CORE;
}
}
delta += split;
}
clusters += delta;
free(stats);
}
long int undefined = 0, outlier = 0, core = 0, reachable = 0;
for (i = 0; i < entries; i++) {
switch (ppFaces[i]->clusterType) {
case NOISE:
ppFaces[i]->clusterId = 0;
outlier++;
break;
case UNDEFINED:
ppFaces[i]->clusterId = 0;
undefined++;
break;
case CORE:
@ -485,7 +532,7 @@ int main(int argc, char *argv[]) {
}
}
fprintf(stderr, "\n%ld clusters identified!\n", clusters);
fprintf(stderr, "\n%ld clusters being written:\n", clusters);
fprintf(stderr, "%ld NOISE\n", outlier);
fprintf(stderr, "%ld UNDEFINED\n", undefined);
fprintf(stderr, "%ld CORE\n", core);
@ -494,10 +541,10 @@ int main(int argc, char *argv[]) {
fprintf(stdout, "<script>\nvar clusters = [\n");
for (long i = 1; i <= clusters; i++) {
long nodes = 0;
fprintf(stdout, "/* %ld. */ [", i);
for (long int j = 0; j < entries; j++) {
if (ppFaces[j]->clusterId == i) {
if (ppFaces[j]->clusterId == i && ppFaces[j]->clusterId != UNDEFINED) {
if (nodes == 0) {
fprintf(stdout, "/* %ld. */ [", i);
fprintf(stdout, "[%ld,%ld]", ppFaces[j]->faceId, ppFaces[j]->photoId);
} else {
fprintf(stdout, ",[%ld,%ld]", ppFaces[j]->faceId, ppFaces[j]->photoId);
@ -505,10 +552,12 @@ int main(int argc, char *argv[]) {
nodes++;
}
}
if (i < clusters) {
fprintf(stdout, "],\n");
} else {
fprintf(stdout, "]\n");
if (nodes) {
if (i < clusters) {
fprintf(stdout, "],\n");
} else {
fprintf(stdout, "]\n");
}
}
}
fprintf(stdout, "];\n</script>\n");