diff --git a/scanner/scanner.c b/scanner/scanner.c index 2331f9c..7239ee5 100644 --- a/scanner/scanner.c +++ b/scanner/scanner.c @@ -36,6 +36,7 @@ typedef struct Face { long faceId; long photoId; double confidence; + double profileDistance; ClusterTypes clusterType; double *distances; } Face; @@ -46,45 +47,6 @@ typedef struct FaceLink { Face *pFace; } FaceLink; -char fileBuf[5000]; -char pathBuf[1028]; - - -Face *readFaceDescriptor(Face *pFace, long id, char *path) { - FILE *f; - f = fopen(path, "r"); - if (!f) { - return NULL; - } - size_t s = fread(fileBuf, 1, sizeof(fileBuf), f); - fclose(f); - - char *p = fileBuf; - fileBuf[s] = 0; - while (*p && *p != '-' && *p != '+' && (*p < '0' || *p > '9')) { - p++; - } - int i = 0; - for (i = 0; i < 128; i++) { - char *start = p; - while (*p && *p != ',' && *p != ']' && *p != ' ' && *p != '\n') { - p++; - } - if (!*p) { - break; - } - *p++ = 0; - sscanf(start, "%Lf", &pFace->descriptor[i]); - } - - if (i != 128) { - return NULL; - } - - pFace->faceId = id; - - return pFace; -} long double euclideanDistance(long double *a, long double *b) { long double sum = 0.0L; @@ -136,7 +98,7 @@ FaceLink *RangeQuery(Face **ppFaces, long int faceCount, Face *pQ, double eps) { FaceLink *pNeighbors = NULL; for (long int i = 0; i < faceCount; i++) { Face *pFace = ppFaces[i]; - if (pFace->confidence <= 0.9) { + if (pFace->profileDistance <= 0.5) { continue; } @@ -177,7 +139,7 @@ long int DBSCAN(Face **ppFaces, long int faceCount, double eps, int minPts) { long int C = 0; for (long int i = 0; i < faceCount; i++) { Face *pFace = ppFaces[i]; - if (pFace->confidence <= 0.9) { + if (pFace->profileDistance <= 0.5) { continue; } @@ -318,10 +280,15 @@ int parseFaceIdRow(void *data, int argc, char **argv, char **column) { long int photoId = strtol(argv[1] ? argv[1] : "0", NULL, 10); double confidence = strtod(argv[2] ? argv[2] : "0.0", NULL); + if (confidence < 0.9) { + return 0; + } + Face *pFace = map->ppFaces[map->initialized++]; pFace->faceId = faceId; pFace->photoId = photoId; pFace->confidence = confidence; + return 0; } @@ -414,6 +381,8 @@ int main(int argc, char *argv[]) { sqlite3_close(db); return 1; } + entries = data.initialized; + data.count = data.initialized; fprintf(stderr, "Face data loaded from DB.\n"); @@ -427,11 +396,33 @@ int main(int argc, char *argv[]) { fprintf(stderr, "Descriptor data loaded from DB\n"); - err_msg = NULL; + long double profileDistance = 1.0; + long int dst, src; + for (src = 0, dst = 0; src < entries; src++) { + Face *pFace = ppFaces[src]; + profileDistance = 1.0; + for (int j = 0; j < (sizeof(profileDescriptors) / sizeof(profileDescriptors[0])); j++) { + profileDistance = euclideanDistance(pFace->descriptor, profileDescriptors[j]); + if (profileDistance > 0.5) { + profileDistance = 1.0; + } else { + break; + } + } + if (profileDistance <= 0.5) { + free(pFace->distances); + } else { + pFace->profileDistance = profileDistance; + ppFaces[dst++] = pFace; + } + } + fprintf(stderr, "Dropped %ld faces as too close to profile photos.\n", (entries - dst)); + entries = dst; long int processed = 0; long int last = 0; long double total = 0.0; + long int sampleSize = 0; for (long i = 0; i < entries; i++) { Face *pLink = ppFaces[i]; @@ -446,12 +437,6 @@ int main(int argc, char *argv[]) { } } - if (pLink->confidence <= 0.9 || pTarget->confidence <= 0.9) { - pLink->distances[i] = 0.0; - pTarget->distances[j] = 0.0; - continue; - } - if (i == j) { pLink->distances[i] = 0.0; pTarget->distances[j] = 0.0; @@ -465,11 +450,12 @@ int main(int argc, char *argv[]) { pLink->distances[j] = pTarget->distances[i] = euclideanDistance(pLink->descriptor, pTarget->descriptor); + sampleSize++; total += pLink->distances[j]; } } - fprintf(stderr, "\nAverage distance: %Lf\n", 1. * total / (entries * entries)); + fprintf(stderr, "\nAverage distance: %Lf\n", 1. * total / sampleSize); long int clusters = DBSCAN(ppFaces, entries, maxDistance, minPts); long int undefined = 0, outlier = 0, core = 0, reachable = 0;