From 0ed48fd7f76bd48816b1c9b00358787410c82be6 Mon Sep 17 00:00:00 2001 From: James Ketrenos Date: Tue, 7 Jan 2020 23:17:24 -0800 Subject: [PATCH] Clustering scanner Signed-off-by: James Ketrenos --- clusters-pre | 34 ++++ scanner/Makefile | 5 +- scanner/scanner.c | 390 ++++++++++++++++++++++++++++++++++++++-------- server/face.js | 30 ++-- 4 files changed, 371 insertions(+), 88 deletions(-) create mode 100644 clusters-pre diff --git a/clusters-pre b/clusters-pre new file mode 100644 index 0000000..0137ba2 --- /dev/null +++ b/clusters-pre @@ -0,0 +1,34 @@ + + + + diff --git a/scanner/Makefile b/scanner/Makefile index d1b0397..145ee29 100644 --- a/scanner/Makefile +++ b/scanner/Makefile @@ -6,4 +6,7 @@ ifeq ("$(wildcard /usr/include/sqlite3.h)","") endif scanner: scanner.c - gcc -o scanner scanner.c -lm -lsqlite3 + gcc -o scanner -g scanner.c -lm -lsqlite3 + +clean: + rm scanner diff --git a/scanner/scanner.c b/scanner/scanner.c index 928fdc1..dc13adf 100644 --- a/scanner/scanner.c +++ b/scanner/scanner.c @@ -6,35 +6,45 @@ #include #include +typedef enum { + UNDEFINED = 0, + CORE = 1, + EDGE = 2, + NOISE = 3 +} ClusterTypes; + typedef struct Face { + long faceId; long double descriptor[128]; - int faceId; + long int clusterId; + ClusterTypes clusterType; long double *distances; - struct Face *next; - struct Face *prev; } Face; +typedef struct FaceLink { + struct FaceLink *pNext; + Face *pFace; +} FaceLink; + char fileBuf[5000]; char pathBuf[1028]; -Face *readFaceDescriptor(int id, char *path) { +Face *readFaceDescriptor(Face *pFace, long id, char *path) { FILE *f; - Face *pFace = (Face *)malloc(sizeof(Face)); - memset(pFace, 0, sizeof(Face)); f = fopen(path, "r"); if (!f) { - free(pFace); return NULL; } size_t s = fread(fileBuf, 1, sizeof(fileBuf), f); fclose(f); - + char *p = fileBuf; fileBuf[s] = 0; while (*p && *p != '-' && *p != '+' && (*p < '0' || *p > '9')) { p++; } - for (int i = 0; i < 128; i++) { + int i = 0; + for (i = 0; i < 128; i++) { char *start = p; while (*p && *p != ',' && *p != ']' && *p != ' ' && *p != '\n') { p++; @@ -46,14 +56,17 @@ Face *readFaceDescriptor(int id, char *path) { sscanf(start, "%Lf", &pFace->descriptor[i]); } + if (i != 128) { + return NULL; + } + pFace->faceId = id; - pFace->next = pFace->prev = NULL; return pFace; } long double euclideanDistance(long double *a, long double *b) { - long double sum = 0.0; + long double sum = 0.0L; for (int i = 0; i < 128; i++) { long double delta = a[i] - b[i]; sum += delta * delta; @@ -61,16 +74,193 @@ long double euclideanDistance(long double *a, long double *b) { return sqrtl(sum); } -int main(int argc, char *argv[]) { - int maxId = 0; - int len = 0; - int i; - Face *pChain = NULL; - for (i = 0; i < 100; i++) { - sprintf(pathBuf, "%s/face-data/%d", argv[1], i); - DIR *faceDir = opendir(pathBuf); - fprintf(stderr, "Reading %s...\n", pathBuf); +/* https://en.wikipedia.org/wiki/DBSCAN */ +#if 0 +DBSCAN(DB, distFunc, eps, minPts) { + C = 0 /* Cluster counter */ + for each point P in database DB { + if label(P) ≠ undefined then continue /* Previously processed in inner loop */ + Neighbors N = RangeQuery(DB, distFunc, P, eps) /* Find neighbors */ + if |N| < minPts then { /* Density check */ + label(P) = Noise /* Label as Noise */ + continue + } + C = C + 1 /* next cluster label */ + label(P) = C /* Label initial point */ + Seed set S = N \ {P} /* Neighbors to expand */ + for each point Q in S { /* Process every seed point */ + if label(Q) = Noise then label(Q) = C /* Change Noise to border point */ + if label(Q) ≠ undefined then continue /* Previously processed */ + label(Q) = C /* Label neighbor */ + Neighbors N = RangeQuery(DB, distFunc, Q, eps) /* Find neighbors */ + if |N| ≥ minPts then { /* Density check */ + S = S ∪ N /* Add new neighbors to seed set */ + } + } + } +} +RangeQuery(DB, distFunc, Q, eps) { + Neighbors = empty list + for each point P in database DB { /* Scan all points in the database */ + if distFunc(Q, P) ≤ eps then { /* Compute distance and check epsilon */ + Neighbors = Neighbors ∪ {P} /* Add to result */ + } + } + return Neighbors +} +#endif + +FaceLink *RangeQuery(Face *pFaces, long int faceCount, Face *pQ, double eps) { + FaceLink *pNeighbors = NULL; + for (long int i = 0; i < faceCount; i++) { + Face *pFace = &pFaces[i]; + if (pFace->faceId == pQ->faceId) { + continue; + } + + if (pQ->distances[i] <= eps) { + FaceLink *pLink = malloc(sizeof(*pLink)); + memset(pLink, 0, sizeof(*pLink)); + pLink->pFace = pFace; + pLink->pNext = pNeighbors; + pNeighbors = pLink; + } + } + return pNeighbors; +} + +void freeChain(FaceLink *pLink) { + while (pLink) { + FaceLink *tmp = pLink->pNext; + free(pLink); + pLink = tmp; + } +} + +long int chainLength(FaceLink *pLink) { + long int count = 0; + while (pLink) { + count++; + pLink = pLink->pNext; + } + return count; +} + +long int DBSCAN(Face *faces, long int faceCount, double eps, int minPts) { + long int C = 0; + for (long int i = 0; i < faceCount; i++) { + Face *pFace = &faces[i]; + if (pFace->clusterType != UNDEFINED) { + continue; + } + + FaceLink *pNeighbors = RangeQuery(faces, faceCount, pFace, eps); + long neighborCount = chainLength(pNeighbors); + if (neighborCount < minPts) { + pFace->clusterType = NOISE; + freeChain(pNeighbors); + continue; + } + + //printf("%ld has %ld neighbors.\n", pFace->faceId, neighborCount); + + C++; + + pFace->clusterId = C; + pFace->clusterType = CORE; + + FaceLink *pLink = pNeighbors; + while (pLink) { + Face *pQ = pLink->pFace; + + if (pQ->faceId == pFace->faceId) { + pLink = pLink->pNext; + continue; + } + if (pQ->clusterType == NOISE) { + pQ->clusterId = C; + pQ->clusterType = EDGE; + } + if (pQ->clusterType != UNDEFINED) { + pLink = pLink->pNext; + continue; + } + + pQ->clusterId = C; + pQ->clusterType = EDGE; + + FaceLink *pSubNeighbors = RangeQuery(faces, faceCount, pQ, eps); + neighborCount = chainLength(pSubNeighbors); + if (neighborCount >= minPts) { + pQ->clusterType = CORE; + /* Append these neighbors to the end of the chain */ + FaceLink *pTmp = pLink; + while (pTmp->pNext) { + pTmp = pTmp->pNext; + } + pTmp->pNext = pSubNeighbors; + } else { + freeChain(pSubNeighbors); + } + + pLink = pLink->pNext; + } + freeChain(pNeighbors); + } + + return C; +} + +/* + * 1. Count how many entries there are + * 2. Allocate storage to hold all entries + * 3. Read all entries into flat array + * 4. Allocate MxM matrix and pre-calculate distances + * 5. Perform DBSCAN across MxM matrix to cluster + */ +int main(int argc, char *argv[]) { + long maxId = 0; + long i; + long entries = 0; + + for (i = 0; i < 100; i++) { + sprintf(pathBuf, "%s/face-data/%ld", argv[1], i); + DIR *faceDir = opendir(pathBuf); + if (!faceDir) { + continue; + } + + struct dirent *ent; + while ((ent = readdir(faceDir)) != NULL) { + if (strstr(ent->d_name, ".json") == NULL) { + continue; + } + entries++; + } + closedir(faceDir); + } + + Face *pFaces = malloc(sizeof(Face) * entries); + if (!pFaces) { + fprintf(stderr, "Unable to allocate storage face descriptors."); + return -1; + } + memset(pFaces, 0, sizeof(Face) * entries); + for (i = 0; i < entries; i++) { + pFaces[i].distances = malloc(sizeof(*pFaces[i].distances) * entries); + if (!pFaces[i].distances) { + fprintf(stderr, "Unable to allocate storage for distance dictionary."); + return -1; + } + memset(pFaces[i].distances, 0, sizeof(*pFaces[i].distances) * entries); + } + + entries = 0; + for (i = 0; i < 100; i++) { + sprintf(pathBuf, "%s/face-data/%ld", argv[1], i); + DIR *faceDir = opendir(pathBuf); +// fprintf(stderr, "Reading %s...\n", pathBuf); if (!faceDir) { fprintf(stderr, "Can not open %s\n", pathBuf); continue; @@ -81,7 +271,7 @@ int main(int argc, char *argv[]) { if (strstr(ent->d_name, ".json") == NULL) { continue; } - int id = 0; + long id = 0; char *p = ent->d_name; while (*p && *p != '-') { id *= 10; @@ -91,31 +281,92 @@ int main(int argc, char *argv[]) { char path[1028*2]; sprintf(path, "%s/%s", pathBuf, ent->d_name); maxId = maxId > id ? maxId : id; - Face *pFace = readFaceDescriptor(id, path); - if (!pFace) { + if (!readFaceDescriptor(&pFaces[entries], id, path)) { + fprintf(stderr, "Unable to read %s.\n", path); continue; } - len++; - if (len % 1000 == 0) { - fprintf(stderr, "...read %d...\n", len); + entries++; + if (entries % 1000 == 0) { + fprintf(stderr, "...read %ld...\n", entries); } - if (pChain) { - pFace->next = pChain; - } - pChain = pFace; } closedir(faceDir); } - fprintf(stderr, "Read %d face descriptors...\n", len); + fprintf(stderr, "Read %ld face descriptors...\n", entries); - /* Allocate storage for all distances */ - Face *pLink = pChain; - while (pLink) { - pLink->distances = (long double *)malloc(sizeof(long double) * len); - pLink = pLink->next; + long double total = 0.0; + for (long i = 0; i < entries; i++) { + Face *pLink = &pFaces[i]; + for (long j = 0; j < entries; j++) { + Face *pTarget = &pFaces[j]; + if (i == j) { + pLink->distances[i] = 0.0L; + pTarget->distances[j] = 0.0L; + continue; + } + + if (pLink->distances[j] != 0.0L) { + continue; + } + + pLink->distances[j] = + pTarget->distances[i] = euclideanDistance(pLink->descriptor, pTarget->descriptor); + + total += pLink->distances[j]; + } } + fprintf(stderr, "Average distance: %Lf\n", (long double)1. * total / (entries * entries)); + + long int clusters = DBSCAN(pFaces, entries, 0.44L, 2); + long int undefined = 0, outlier = 0, core = 0, reachable = 0; + for (i = 0; i < entries; i++) { + switch (pFaces[i].clusterType) { + case NOISE: + outlier++; + break; + case UNDEFINED: + undefined++; + break; + case CORE: + core++; + break; + case EDGE: + reachable++; + break; + } + } + + fprintf(stderr, "%ld clusters identified!\n", clusters); + fprintf(stderr, "%ld NOISE\n", outlier); + fprintf(stderr, "%ld UNDEFINED\n", undefined); + fprintf(stderr, "%ld CORE\n", core); + fprintf(stderr, "%ld EDGE\n", reachable); + + fprintf(stdout, "\n"); + + /* Allocate storage for all distances */ sqlite3 *db; int rc = sqlite3_open("db/photos.db", &db); @@ -143,46 +394,47 @@ int main(int argc, char *argv[]) { fprintf(stderr, "facedistances deleted and transaction started.\n"); char sqlBuf[1024]; - pLink = pChain; int sourceIndex = 0, lines = 0; - while (pLink) { + for (long i = 0; i < entries; i++) { + Face *pLink = &pFaces[i]; int targetIndex = 0; - Face *pTarget = pChain; - while (pTarget) { - if (targetIndex == sourceIndex) { - pLink->distances[targetIndex] = 0.0; - pTarget->distances[sourceIndex] = 0.0; - } else { - if (pLink->distances[targetIndex] == 0.0) { - pLink->distances[targetIndex] = - pTarget->distances[sourceIndex] = euclideanDistance(pLink->descriptor, pTarget->descriptor); - if (pLink->distances[targetIndex] < 0.5) { - sprintf(sqlBuf, "INSERT INTO facedistances (face1Id,face2Id,distance) VALUES (%d,%d,%Lf);", - ((pLink->faceId < pTarget->faceId) ? pLink->faceId : pTarget->faceId), - ((pLink->faceId < pTarget->faceId) ? pTarget->faceId : pLink->faceId), - pLink->distances[targetIndex]); - rc = sqlite3_exec(db, sqlBuf, 0, 0, &err_msg); - if (rc != SQLITE_OK ) { - fprintf(stderr, "SQL error: %s\n", err_msg); - sqlite3_free(err_msg); - sqlite3_close(db); - return 1; - } - lines++; - if (lines % 1000 == 0) { - fprintf(stderr, "...output %d DB lines (%0.2f complete)...\n", lines, (float)(1. * sourceIndex / (1. * len))); - } - } + for (long j = 0; j < entries; j++) { + Face *pTarget = &pFaces[j]; + if (i == j) { + pLink->distances[i] = 0.0L; + pTarget->distances[j] = 0.0L; + continue; + } + + if (pLink->distances[j] != 0.0L) { +// continue; + } + + pLink->distances[j] = + pTarget->distances[i] = euclideanDistance(pLink->descriptor, pTarget->descriptor); + + if (pLink->distances[j] < 0.5) { + sprintf(sqlBuf, "INSERT INTO facedistances (face1Id,face2Id,distance) VALUES (%ld,%ld,%Lf);", + ((pLink->faceId < pTarget->faceId) ? pLink->faceId : pTarget->faceId), + ((pLink->faceId < pTarget->faceId) ? pTarget->faceId : pLink->faceId), + pLink->distances[j]); + rc = sqlite3_exec(db, sqlBuf, 0, 0, &err_msg); + if (rc != SQLITE_OK ) { + fprintf(stderr, "SQL error: %s\n", err_msg); + sqlite3_free(err_msg); + sqlite3_close(db); + return 1; + } + lines++; + if (lines % 1000 == 0) { + fprintf(stderr, "...output %d DB lines (%0.2f complete)...\n", lines, + (float)(1. * sourceIndex / (1. * entries))); } } - pTarget = pTarget->next; - targetIndex++; } - pLink = pLink->next; - sourceIndex++; } - sprintf(sqlBuf, "UPDATE faces SET lastComparedId=%d;", maxId); + sprintf(sqlBuf, "UPDATE faces SET lastComparedId=%ld;", maxId); rc = sqlite3_exec(db, "COMMIT;", 0, 0, &err_msg); if (rc != SQLITE_OK ) { diff --git a/server/face.js b/server/face.js index 6f804e2..a0156e5 100644 --- a/server/face.js +++ b/server/face.js @@ -55,7 +55,7 @@ function alignFromLandmarks(image, landmarks) { ctx.rotate(rotation); ctx.scale(scale, scale); ctx.drawImage(image, 0, 0); - /* + ctx.strokeStyle = "red"; ctx.strokeWidth = "1"; ctx.beginPath(); @@ -67,7 +67,7 @@ function alignFromLandmarks(image, landmarks) { } }); ctx.stroke(); - */ + return canvas; } @@ -140,20 +140,6 @@ require("./db/photos").then(function(db) { const file = photo.path + photo.filename; return canvas.loadImage(picturesPath + file).then(async (image) => { - const detectors = await faceapi.detectAllFaces(image, - new faceapi.SsdMobilenetv1Options({ - minConfidence: 0.8 - }) - ).withFaceLandmarks(); - - detectors.forEach(async (detector) => { - const canvas = alignFromLandmarks(image, detector.landmarks); - const descriptor = await faceapi.computeFaceDescriptor(canvas); - const data = []; - /* Confert from sparse object to dense array */ - for (let i = 0; i < 128; i++) { - data.push(descriptor[i]); - } const detectors = [ { detection: { _box: { @@ -172,7 +158,6 @@ require("./db/photos").then(function(db) { /* This is a file */ console.log(`Loading ${file}...`); id = undefined; - loader = canvas.loadImage(picturesPath + file).then(async (image) => { const detectors = await faceapi.detectAllFaces(image, new faceapi.SsdMobilenetv1Options({ @@ -183,10 +168,19 @@ require("./db/photos").then(function(db) { detectors.forEach(async (detector) => { const canvas = alignFromLandmarks(image, detector.landmarks); const descriptor = await faceapi.computeFaceDescriptor(canvas); + + fs.writeFileSync("rotation.png", canvas.toBuffer("image/png", { + quality: 0.95, + chromaSubsampling: false + })); + + process.exit(-1); + +// .withFaceDescriptors(); const data = []; /* Confert from sparse object to dense array */ for (let i = 0; i < 128; i++) { - data.push(descriptor[i]); + data.push(detector.descriptor[i]); } detector.descriptor = data; });