Sub-split any group with > 100 faces
Signed-off-by: James Ketrenos <james_gitlab@ketrenos.com>
This commit is contained in:
parent
2fc08a41b0
commit
aaf3752803
@ -96,11 +96,11 @@ RangeQuery(DB, distFunc, Q, eps) {
|
||||
}
|
||||
#endif
|
||||
|
||||
FaceLink *RangeQuery(Face **ppFaces, long int faceCount, Face *pQ, float eps) {
|
||||
FaceLink *RangeQuery(Face **ppFaces, long int faceCount, Face *pQ, float eps, long int clusterToBreak) {
|
||||
FaceLink *pNeighbors = NULL;
|
||||
for (long int i = 0; i < faceCount; i++) {
|
||||
Face *pFace = ppFaces[i];
|
||||
if (pFace->profileDistance <= 0.5) {
|
||||
if (clusterToBreak > 0 && pFace->clusterId != clusterToBreak) {
|
||||
continue;
|
||||
}
|
||||
|
||||
@ -137,11 +137,14 @@ long int chainLength(FaceLink *pLink) {
|
||||
return count;
|
||||
}
|
||||
|
||||
long int DBSCAN(Face **ppFaces, long int faceCount, float eps, int minPts) {
|
||||
long int C = 0;
|
||||
long int C = 0;
|
||||
|
||||
long int DBSCAN(Face **ppFaces, long int faceCount, float eps, int minPts, long int clusterToBreak) {
|
||||
int count = 0;
|
||||
|
||||
for (long int i = 0; i < faceCount; i++) {
|
||||
Face *pFace = ppFaces[i];
|
||||
if (pFace->profileDistance <= 0.5) {
|
||||
if (clusterToBreak > 0 && pFace->clusterId != clusterToBreak) {
|
||||
continue;
|
||||
}
|
||||
|
||||
@ -150,26 +153,16 @@ long int DBSCAN(Face **ppFaces, long int faceCount, float eps, int minPts) {
|
||||
}
|
||||
|
||||
float threshold = eps;
|
||||
FaceLink *pNeighbors = RangeQuery(ppFaces, faceCount, pFace, eps);
|
||||
FaceLink *pNeighbors = RangeQuery(ppFaces, faceCount, pFace, eps, clusterToBreak);
|
||||
long neighborCount = chainLength(pNeighbors);
|
||||
/*
|
||||
while (neighborCount > minPts * 5) {
|
||||
threshold *= 0.9;
|
||||
freeChain(pNeighbors);
|
||||
pNeighbors = RangeQuery(ppFaces, faceCount, pFace, threshold);
|
||||
neighborCount = chainLength(pNeighbors);
|
||||
fprintf(stderr, "\rWith eps of %f, %ld has %ld neighbors.", threshold, pFace->faceId, neighborCount);
|
||||
}
|
||||
*/
|
||||
|
||||
if (neighborCount < minPts) {
|
||||
pFace->clusterType = NOISE;
|
||||
freeChain(pNeighbors);
|
||||
continue;
|
||||
}
|
||||
//printf("%ld has %ld neighbors.\n", pFace->faceId, neighborCount);
|
||||
|
||||
C++;
|
||||
count++;
|
||||
|
||||
pFace->clusterId = C;
|
||||
pFace->clusterType = CORE;
|
||||
@ -193,18 +186,9 @@ long int DBSCAN(Face **ppFaces, long int faceCount, float eps, int minPts) {
|
||||
pQ->clusterId = C;
|
||||
pQ->clusterType = EDGE;
|
||||
|
||||
FaceLink *pSubNeighbors = RangeQuery(ppFaces, faceCount, pQ, eps);
|
||||
FaceLink *pSubNeighbors = RangeQuery(ppFaces, faceCount, pQ, eps, clusterToBreak);
|
||||
neighborCount = chainLength(pSubNeighbors);
|
||||
/*
|
||||
threshold = eps;
|
||||
while (neighborCount > minPts * 1.25) {
|
||||
threshold *= 0.9;
|
||||
freeChain(pSubNeighbors);
|
||||
pSubNeighbors = RangeQuery(ppFaces, faceCount, pQ, threshold);
|
||||
neighborCount = chainLength(pSubNeighbors);
|
||||
fprintf(stderr, "\rWith eps of %f, %ld has %ld neighbors.", threshold, pQ->faceId, neighborCount);
|
||||
}
|
||||
*/
|
||||
|
||||
if (neighborCount >= minPts) {
|
||||
pQ->clusterType = CORE;
|
||||
/* Append these neighbors to the end of the chain */
|
||||
@ -222,7 +206,7 @@ long int DBSCAN(Face **ppFaces, long int faceCount, float eps, int minPts) {
|
||||
freeChain(pNeighbors);
|
||||
}
|
||||
|
||||
return C;
|
||||
return count;
|
||||
}
|
||||
|
||||
typedef struct {
|
||||
@ -297,6 +281,27 @@ int parseFaceIdRow(void *data, int argc, char **argv, char **column) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
void getClusterCounts(int *stats, Face **ppFaces, long int entries) {
|
||||
for (int i = 0; i < entries; i++) {
|
||||
if (ppFaces[i]->clusterType != CORE && ppFaces[i]->clusterType != EDGE) {
|
||||
continue;
|
||||
}
|
||||
stats[ppFaces[i]->clusterId - 1]++;
|
||||
}
|
||||
}
|
||||
|
||||
long int getClusterCount(Face **ppFaces, long int entries, int clusterId) {
|
||||
long int count = 0;
|
||||
for (long int i = 0; i < entries; i++) {
|
||||
if (ppFaces[i]->clusterId == clusterId &&
|
||||
ppFaces[i]->clusterType != NOISE &&
|
||||
ppFaces[i]->clusterType != UNDEFINED) {
|
||||
count++;
|
||||
}
|
||||
}
|
||||
return count;
|
||||
}
|
||||
|
||||
/*
|
||||
* 1. Count how many entries there are
|
||||
* 2. Allocate storage to hold all entries
|
||||
@ -421,6 +426,7 @@ int main(int argc, char *argv[]) {
|
||||
ppFaces[dst++] = pFace;
|
||||
}
|
||||
}
|
||||
|
||||
fprintf(stderr, "Dropped %ld faces as too close to profile photos (set of %ld).\n", (entries - dst),
|
||||
(sizeof(profileDescriptors) / sizeof(profileDescriptors[0])));
|
||||
entries = dst;
|
||||
@ -466,14 +472,55 @@ int main(int argc, char *argv[]) {
|
||||
|
||||
fprintf(stderr, "Calculating clusters: MAX_DISTANCE(%f) MIN_PTS(%ld)\n", maxDistance, minPts);
|
||||
|
||||
long int clusters = DBSCAN(ppFaces, entries, maxDistance, minPts);
|
||||
long int clusters = DBSCAN(ppFaces, entries, maxDistance, minPts, -1);
|
||||
fprintf(stderr, "\n%ld clusters identified before size-split.\n", clusters);
|
||||
|
||||
if (clusters > 0) {
|
||||
int *stats = malloc(sizeof(int) * clusters), delta = 0;
|
||||
memset(stats, 0, sizeof(int) * clusters);
|
||||
getClusterCounts(stats, ppFaces, entries);
|
||||
|
||||
for (int i = 0; i < clusters; i++) {
|
||||
if (stats[i] < 100) {
|
||||
continue;
|
||||
}
|
||||
|
||||
for (int j = 0; j < entries; j++) {
|
||||
Face *pFace = ppFaces[j];
|
||||
if (pFace->clusterId == i + 1) {
|
||||
pFace->clusterType = UNDEFINED;
|
||||
}
|
||||
}
|
||||
|
||||
int split = DBSCAN(ppFaces, entries, maxDistance * 0.99L, minPts, i + 1);
|
||||
fprintf(stderr, "Cluster %d had %d units. Split into %d clusters.\n", i + 1, stats[i], split);
|
||||
for (int c = 0; c < split; c++) {
|
||||
fprintf(stderr, "%ld. %ld\n", c + clusters, getClusterCount(ppFaces, entries, c + clusters));
|
||||
}
|
||||
|
||||
for (int j = 0; j < entries; j++) {
|
||||
Face *pFace = ppFaces[j];
|
||||
if (pFace->clusterId == i + 1) {
|
||||
pFace->clusterType = CORE;
|
||||
}
|
||||
}
|
||||
|
||||
delta += split;
|
||||
}
|
||||
|
||||
clusters += delta;
|
||||
free(stats);
|
||||
}
|
||||
|
||||
long int undefined = 0, outlier = 0, core = 0, reachable = 0;
|
||||
for (i = 0; i < entries; i++) {
|
||||
switch (ppFaces[i]->clusterType) {
|
||||
case NOISE:
|
||||
ppFaces[i]->clusterId = 0;
|
||||
outlier++;
|
||||
break;
|
||||
case UNDEFINED:
|
||||
ppFaces[i]->clusterId = 0;
|
||||
undefined++;
|
||||
break;
|
||||
case CORE:
|
||||
@ -485,7 +532,7 @@ int main(int argc, char *argv[]) {
|
||||
}
|
||||
}
|
||||
|
||||
fprintf(stderr, "\n%ld clusters identified!\n", clusters);
|
||||
fprintf(stderr, "\n%ld clusters being written:\n", clusters);
|
||||
fprintf(stderr, "%ld NOISE\n", outlier);
|
||||
fprintf(stderr, "%ld UNDEFINED\n", undefined);
|
||||
fprintf(stderr, "%ld CORE\n", core);
|
||||
@ -494,10 +541,10 @@ int main(int argc, char *argv[]) {
|
||||
fprintf(stdout, "<script>\nvar clusters = [\n");
|
||||
for (long i = 1; i <= clusters; i++) {
|
||||
long nodes = 0;
|
||||
fprintf(stdout, "/* %ld. */ [", i);
|
||||
for (long int j = 0; j < entries; j++) {
|
||||
if (ppFaces[j]->clusterId == i) {
|
||||
if (ppFaces[j]->clusterId == i && ppFaces[j]->clusterId != UNDEFINED) {
|
||||
if (nodes == 0) {
|
||||
fprintf(stdout, "/* %ld. */ [", i);
|
||||
fprintf(stdout, "[%ld,%ld]", ppFaces[j]->faceId, ppFaces[j]->photoId);
|
||||
} else {
|
||||
fprintf(stdout, ",[%ld,%ld]", ppFaces[j]->faceId, ppFaces[j]->photoId);
|
||||
@ -505,10 +552,12 @@ int main(int argc, char *argv[]) {
|
||||
nodes++;
|
||||
}
|
||||
}
|
||||
if (i < clusters) {
|
||||
fprintf(stdout, "],\n");
|
||||
} else {
|
||||
fprintf(stdout, "]\n");
|
||||
if (nodes) {
|
||||
if (i < clusters) {
|
||||
fprintf(stdout, "],\n");
|
||||
} else {
|
||||
fprintf(stdout, "]\n");
|
||||
}
|
||||
}
|
||||
}
|
||||
fprintf(stdout, "];\n</script>\n");
|
||||
|
Loading…
x
Reference in New Issue
Block a user