Sub-split any group with > 100 faces
Signed-off-by: James Ketrenos <james_gitlab@ketrenos.com>
This commit is contained in:
parent
2fc08a41b0
commit
aaf3752803
@ -96,11 +96,11 @@ RangeQuery(DB, distFunc, Q, eps) {
|
|||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
FaceLink *RangeQuery(Face **ppFaces, long int faceCount, Face *pQ, float eps) {
|
FaceLink *RangeQuery(Face **ppFaces, long int faceCount, Face *pQ, float eps, long int clusterToBreak) {
|
||||||
FaceLink *pNeighbors = NULL;
|
FaceLink *pNeighbors = NULL;
|
||||||
for (long int i = 0; i < faceCount; i++) {
|
for (long int i = 0; i < faceCount; i++) {
|
||||||
Face *pFace = ppFaces[i];
|
Face *pFace = ppFaces[i];
|
||||||
if (pFace->profileDistance <= 0.5) {
|
if (clusterToBreak > 0 && pFace->clusterId != clusterToBreak) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -137,11 +137,14 @@ long int chainLength(FaceLink *pLink) {
|
|||||||
return count;
|
return count;
|
||||||
}
|
}
|
||||||
|
|
||||||
long int DBSCAN(Face **ppFaces, long int faceCount, float eps, int minPts) {
|
long int C = 0;
|
||||||
long int C = 0;
|
|
||||||
|
long int DBSCAN(Face **ppFaces, long int faceCount, float eps, int minPts, long int clusterToBreak) {
|
||||||
|
int count = 0;
|
||||||
|
|
||||||
for (long int i = 0; i < faceCount; i++) {
|
for (long int i = 0; i < faceCount; i++) {
|
||||||
Face *pFace = ppFaces[i];
|
Face *pFace = ppFaces[i];
|
||||||
if (pFace->profileDistance <= 0.5) {
|
if (clusterToBreak > 0 && pFace->clusterId != clusterToBreak) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -150,26 +153,16 @@ long int DBSCAN(Face **ppFaces, long int faceCount, float eps, int minPts) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
float threshold = eps;
|
float threshold = eps;
|
||||||
FaceLink *pNeighbors = RangeQuery(ppFaces, faceCount, pFace, eps);
|
FaceLink *pNeighbors = RangeQuery(ppFaces, faceCount, pFace, eps, clusterToBreak);
|
||||||
long neighborCount = chainLength(pNeighbors);
|
long neighborCount = chainLength(pNeighbors);
|
||||||
/*
|
|
||||||
while (neighborCount > minPts * 5) {
|
|
||||||
threshold *= 0.9;
|
|
||||||
freeChain(pNeighbors);
|
|
||||||
pNeighbors = RangeQuery(ppFaces, faceCount, pFace, threshold);
|
|
||||||
neighborCount = chainLength(pNeighbors);
|
|
||||||
fprintf(stderr, "\rWith eps of %f, %ld has %ld neighbors.", threshold, pFace->faceId, neighborCount);
|
|
||||||
}
|
|
||||||
*/
|
|
||||||
|
|
||||||
if (neighborCount < minPts) {
|
if (neighborCount < minPts) {
|
||||||
pFace->clusterType = NOISE;
|
pFace->clusterType = NOISE;
|
||||||
freeChain(pNeighbors);
|
freeChain(pNeighbors);
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
//printf("%ld has %ld neighbors.\n", pFace->faceId, neighborCount);
|
|
||||||
|
|
||||||
C++;
|
C++;
|
||||||
|
count++;
|
||||||
|
|
||||||
pFace->clusterId = C;
|
pFace->clusterId = C;
|
||||||
pFace->clusterType = CORE;
|
pFace->clusterType = CORE;
|
||||||
@ -193,18 +186,9 @@ long int DBSCAN(Face **ppFaces, long int faceCount, float eps, int minPts) {
|
|||||||
pQ->clusterId = C;
|
pQ->clusterId = C;
|
||||||
pQ->clusterType = EDGE;
|
pQ->clusterType = EDGE;
|
||||||
|
|
||||||
FaceLink *pSubNeighbors = RangeQuery(ppFaces, faceCount, pQ, eps);
|
FaceLink *pSubNeighbors = RangeQuery(ppFaces, faceCount, pQ, eps, clusterToBreak);
|
||||||
neighborCount = chainLength(pSubNeighbors);
|
neighborCount = chainLength(pSubNeighbors);
|
||||||
/*
|
|
||||||
threshold = eps;
|
|
||||||
while (neighborCount > minPts * 1.25) {
|
|
||||||
threshold *= 0.9;
|
|
||||||
freeChain(pSubNeighbors);
|
|
||||||
pSubNeighbors = RangeQuery(ppFaces, faceCount, pQ, threshold);
|
|
||||||
neighborCount = chainLength(pSubNeighbors);
|
|
||||||
fprintf(stderr, "\rWith eps of %f, %ld has %ld neighbors.", threshold, pQ->faceId, neighborCount);
|
|
||||||
}
|
|
||||||
*/
|
|
||||||
if (neighborCount >= minPts) {
|
if (neighborCount >= minPts) {
|
||||||
pQ->clusterType = CORE;
|
pQ->clusterType = CORE;
|
||||||
/* Append these neighbors to the end of the chain */
|
/* Append these neighbors to the end of the chain */
|
||||||
@ -222,7 +206,7 @@ long int DBSCAN(Face **ppFaces, long int faceCount, float eps, int minPts) {
|
|||||||
freeChain(pNeighbors);
|
freeChain(pNeighbors);
|
||||||
}
|
}
|
||||||
|
|
||||||
return C;
|
return count;
|
||||||
}
|
}
|
||||||
|
|
||||||
typedef struct {
|
typedef struct {
|
||||||
@ -297,6 +281,27 @@ int parseFaceIdRow(void *data, int argc, char **argv, char **column) {
|
|||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void getClusterCounts(int *stats, Face **ppFaces, long int entries) {
|
||||||
|
for (int i = 0; i < entries; i++) {
|
||||||
|
if (ppFaces[i]->clusterType != CORE && ppFaces[i]->clusterType != EDGE) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
stats[ppFaces[i]->clusterId - 1]++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
long int getClusterCount(Face **ppFaces, long int entries, int clusterId) {
|
||||||
|
long int count = 0;
|
||||||
|
for (long int i = 0; i < entries; i++) {
|
||||||
|
if (ppFaces[i]->clusterId == clusterId &&
|
||||||
|
ppFaces[i]->clusterType != NOISE &&
|
||||||
|
ppFaces[i]->clusterType != UNDEFINED) {
|
||||||
|
count++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return count;
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* 1. Count how many entries there are
|
* 1. Count how many entries there are
|
||||||
* 2. Allocate storage to hold all entries
|
* 2. Allocate storage to hold all entries
|
||||||
@ -421,6 +426,7 @@ int main(int argc, char *argv[]) {
|
|||||||
ppFaces[dst++] = pFace;
|
ppFaces[dst++] = pFace;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fprintf(stderr, "Dropped %ld faces as too close to profile photos (set of %ld).\n", (entries - dst),
|
fprintf(stderr, "Dropped %ld faces as too close to profile photos (set of %ld).\n", (entries - dst),
|
||||||
(sizeof(profileDescriptors) / sizeof(profileDescriptors[0])));
|
(sizeof(profileDescriptors) / sizeof(profileDescriptors[0])));
|
||||||
entries = dst;
|
entries = dst;
|
||||||
@ -466,14 +472,55 @@ int main(int argc, char *argv[]) {
|
|||||||
|
|
||||||
fprintf(stderr, "Calculating clusters: MAX_DISTANCE(%f) MIN_PTS(%ld)\n", maxDistance, minPts);
|
fprintf(stderr, "Calculating clusters: MAX_DISTANCE(%f) MIN_PTS(%ld)\n", maxDistance, minPts);
|
||||||
|
|
||||||
long int clusters = DBSCAN(ppFaces, entries, maxDistance, minPts);
|
long int clusters = DBSCAN(ppFaces, entries, maxDistance, minPts, -1);
|
||||||
|
fprintf(stderr, "\n%ld clusters identified before size-split.\n", clusters);
|
||||||
|
|
||||||
|
if (clusters > 0) {
|
||||||
|
int *stats = malloc(sizeof(int) * clusters), delta = 0;
|
||||||
|
memset(stats, 0, sizeof(int) * clusters);
|
||||||
|
getClusterCounts(stats, ppFaces, entries);
|
||||||
|
|
||||||
|
for (int i = 0; i < clusters; i++) {
|
||||||
|
if (stats[i] < 100) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (int j = 0; j < entries; j++) {
|
||||||
|
Face *pFace = ppFaces[j];
|
||||||
|
if (pFace->clusterId == i + 1) {
|
||||||
|
pFace->clusterType = UNDEFINED;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
int split = DBSCAN(ppFaces, entries, maxDistance * 0.99L, minPts, i + 1);
|
||||||
|
fprintf(stderr, "Cluster %d had %d units. Split into %d clusters.\n", i + 1, stats[i], split);
|
||||||
|
for (int c = 0; c < split; c++) {
|
||||||
|
fprintf(stderr, "%ld. %ld\n", c + clusters, getClusterCount(ppFaces, entries, c + clusters));
|
||||||
|
}
|
||||||
|
|
||||||
|
for (int j = 0; j < entries; j++) {
|
||||||
|
Face *pFace = ppFaces[j];
|
||||||
|
if (pFace->clusterId == i + 1) {
|
||||||
|
pFace->clusterType = CORE;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
delta += split;
|
||||||
|
}
|
||||||
|
|
||||||
|
clusters += delta;
|
||||||
|
free(stats);
|
||||||
|
}
|
||||||
|
|
||||||
long int undefined = 0, outlier = 0, core = 0, reachable = 0;
|
long int undefined = 0, outlier = 0, core = 0, reachable = 0;
|
||||||
for (i = 0; i < entries; i++) {
|
for (i = 0; i < entries; i++) {
|
||||||
switch (ppFaces[i]->clusterType) {
|
switch (ppFaces[i]->clusterType) {
|
||||||
case NOISE:
|
case NOISE:
|
||||||
|
ppFaces[i]->clusterId = 0;
|
||||||
outlier++;
|
outlier++;
|
||||||
break;
|
break;
|
||||||
case UNDEFINED:
|
case UNDEFINED:
|
||||||
|
ppFaces[i]->clusterId = 0;
|
||||||
undefined++;
|
undefined++;
|
||||||
break;
|
break;
|
||||||
case CORE:
|
case CORE:
|
||||||
@ -485,7 +532,7 @@ int main(int argc, char *argv[]) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fprintf(stderr, "\n%ld clusters identified!\n", clusters);
|
fprintf(stderr, "\n%ld clusters being written:\n", clusters);
|
||||||
fprintf(stderr, "%ld NOISE\n", outlier);
|
fprintf(stderr, "%ld NOISE\n", outlier);
|
||||||
fprintf(stderr, "%ld UNDEFINED\n", undefined);
|
fprintf(stderr, "%ld UNDEFINED\n", undefined);
|
||||||
fprintf(stderr, "%ld CORE\n", core);
|
fprintf(stderr, "%ld CORE\n", core);
|
||||||
@ -494,10 +541,10 @@ int main(int argc, char *argv[]) {
|
|||||||
fprintf(stdout, "<script>\nvar clusters = [\n");
|
fprintf(stdout, "<script>\nvar clusters = [\n");
|
||||||
for (long i = 1; i <= clusters; i++) {
|
for (long i = 1; i <= clusters; i++) {
|
||||||
long nodes = 0;
|
long nodes = 0;
|
||||||
fprintf(stdout, "/* %ld. */ [", i);
|
|
||||||
for (long int j = 0; j < entries; j++) {
|
for (long int j = 0; j < entries; j++) {
|
||||||
if (ppFaces[j]->clusterId == i) {
|
if (ppFaces[j]->clusterId == i && ppFaces[j]->clusterId != UNDEFINED) {
|
||||||
if (nodes == 0) {
|
if (nodes == 0) {
|
||||||
|
fprintf(stdout, "/* %ld. */ [", i);
|
||||||
fprintf(stdout, "[%ld,%ld]", ppFaces[j]->faceId, ppFaces[j]->photoId);
|
fprintf(stdout, "[%ld,%ld]", ppFaces[j]->faceId, ppFaces[j]->photoId);
|
||||||
} else {
|
} else {
|
||||||
fprintf(stdout, ",[%ld,%ld]", ppFaces[j]->faceId, ppFaces[j]->photoId);
|
fprintf(stdout, ",[%ld,%ld]", ppFaces[j]->faceId, ppFaces[j]->photoId);
|
||||||
@ -505,10 +552,12 @@ int main(int argc, char *argv[]) {
|
|||||||
nodes++;
|
nodes++;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (i < clusters) {
|
if (nodes) {
|
||||||
fprintf(stdout, "],\n");
|
if (i < clusters) {
|
||||||
} else {
|
fprintf(stdout, "],\n");
|
||||||
fprintf(stdout, "]\n");
|
} else {
|
||||||
|
fprintf(stdout, "]\n");
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
fprintf(stdout, "];\n</script>\n");
|
fprintf(stdout, "];\n</script>\n");
|
||||||
|
Loading…
x
Reference in New Issue
Block a user