diff --git a/clusters-pre b/clusters-pre
new file mode 100644
index 0000000..0137ba2
--- /dev/null
+++ b/clusters-pre
@@ -0,0 +1,34 @@
+
+
+
+
diff --git a/scanner/Makefile b/scanner/Makefile
index d1b0397..145ee29 100644
--- a/scanner/Makefile
+++ b/scanner/Makefile
@@ -6,4 +6,7 @@ ifeq ("$(wildcard /usr/include/sqlite3.h)","")
endif
scanner: scanner.c
- gcc -o scanner scanner.c -lm -lsqlite3
+ gcc -o scanner -g scanner.c -lm -lsqlite3
+
+clean:
+ rm scanner
diff --git a/scanner/scanner.c b/scanner/scanner.c
index 928fdc1..dc13adf 100644
--- a/scanner/scanner.c
+++ b/scanner/scanner.c
@@ -6,35 +6,45 @@
#include
#include
+typedef enum {
+ UNDEFINED = 0,
+ CORE = 1,
+ EDGE = 2,
+ NOISE = 3
+} ClusterTypes;
+
typedef struct Face {
+ long faceId;
long double descriptor[128];
- int faceId;
+ long int clusterId;
+ ClusterTypes clusterType;
long double *distances;
- struct Face *next;
- struct Face *prev;
} Face;
+typedef struct FaceLink {
+ struct FaceLink *pNext;
+ Face *pFace;
+} FaceLink;
+
char fileBuf[5000];
char pathBuf[1028];
-Face *readFaceDescriptor(int id, char *path) {
+Face *readFaceDescriptor(Face *pFace, long id, char *path) {
FILE *f;
- Face *pFace = (Face *)malloc(sizeof(Face));
- memset(pFace, 0, sizeof(Face));
f = fopen(path, "r");
if (!f) {
- free(pFace);
return NULL;
}
size_t s = fread(fileBuf, 1, sizeof(fileBuf), f);
fclose(f);
-
+
char *p = fileBuf;
fileBuf[s] = 0;
while (*p && *p != '-' && *p != '+' && (*p < '0' || *p > '9')) {
p++;
}
- for (int i = 0; i < 128; i++) {
+ int i = 0;
+ for (i = 0; i < 128; i++) {
char *start = p;
while (*p && *p != ',' && *p != ']' && *p != ' ' && *p != '\n') {
p++;
@@ -46,14 +56,17 @@ Face *readFaceDescriptor(int id, char *path) {
sscanf(start, "%Lf", &pFace->descriptor[i]);
}
+ if (i != 128) {
+ return NULL;
+ }
+
pFace->faceId = id;
- pFace->next = pFace->prev = NULL;
return pFace;
}
long double euclideanDistance(long double *a, long double *b) {
- long double sum = 0.0;
+ long double sum = 0.0L;
for (int i = 0; i < 128; i++) {
long double delta = a[i] - b[i];
sum += delta * delta;
@@ -61,16 +74,193 @@ long double euclideanDistance(long double *a, long double *b) {
return sqrtl(sum);
}
-int main(int argc, char *argv[]) {
- int maxId = 0;
- int len = 0;
- int i;
- Face *pChain = NULL;
- for (i = 0; i < 100; i++) {
- sprintf(pathBuf, "%s/face-data/%d", argv[1], i);
- DIR *faceDir = opendir(pathBuf);
- fprintf(stderr, "Reading %s...\n", pathBuf);
+/* https://en.wikipedia.org/wiki/DBSCAN */
+#if 0
+DBSCAN(DB, distFunc, eps, minPts) {
+ C = 0 /* Cluster counter */
+ for each point P in database DB {
+ if label(P) ≠ undefined then continue /* Previously processed in inner loop */
+ Neighbors N = RangeQuery(DB, distFunc, P, eps) /* Find neighbors */
+ if |N| < minPts then { /* Density check */
+ label(P) = Noise /* Label as Noise */
+ continue
+ }
+ C = C + 1 /* next cluster label */
+ label(P) = C /* Label initial point */
+ Seed set S = N \ {P} /* Neighbors to expand */
+ for each point Q in S { /* Process every seed point */
+ if label(Q) = Noise then label(Q) = C /* Change Noise to border point */
+ if label(Q) ≠ undefined then continue /* Previously processed */
+ label(Q) = C /* Label neighbor */
+ Neighbors N = RangeQuery(DB, distFunc, Q, eps) /* Find neighbors */
+ if |N| ≥ minPts then { /* Density check */
+ S = S ∪ N /* Add new neighbors to seed set */
+ }
+ }
+ }
+}
+RangeQuery(DB, distFunc, Q, eps) {
+ Neighbors = empty list
+ for each point P in database DB { /* Scan all points in the database */
+ if distFunc(Q, P) ≤ eps then { /* Compute distance and check epsilon */
+ Neighbors = Neighbors ∪ {P} /* Add to result */
+ }
+ }
+ return Neighbors
+}
+#endif
+
+FaceLink *RangeQuery(Face *pFaces, long int faceCount, Face *pQ, double eps) {
+ FaceLink *pNeighbors = NULL;
+ for (long int i = 0; i < faceCount; i++) {
+ Face *pFace = &pFaces[i];
+ if (pFace->faceId == pQ->faceId) {
+ continue;
+ }
+
+ if (pQ->distances[i] <= eps) {
+ FaceLink *pLink = malloc(sizeof(*pLink));
+ memset(pLink, 0, sizeof(*pLink));
+ pLink->pFace = pFace;
+ pLink->pNext = pNeighbors;
+ pNeighbors = pLink;
+ }
+ }
+ return pNeighbors;
+}
+
+void freeChain(FaceLink *pLink) {
+ while (pLink) {
+ FaceLink *tmp = pLink->pNext;
+ free(pLink);
+ pLink = tmp;
+ }
+}
+
+long int chainLength(FaceLink *pLink) {
+ long int count = 0;
+ while (pLink) {
+ count++;
+ pLink = pLink->pNext;
+ }
+ return count;
+}
+
+long int DBSCAN(Face *faces, long int faceCount, double eps, int minPts) {
+ long int C = 0;
+ for (long int i = 0; i < faceCount; i++) {
+ Face *pFace = &faces[i];
+ if (pFace->clusterType != UNDEFINED) {
+ continue;
+ }
+
+ FaceLink *pNeighbors = RangeQuery(faces, faceCount, pFace, eps);
+ long neighborCount = chainLength(pNeighbors);
+ if (neighborCount < minPts) {
+ pFace->clusterType = NOISE;
+ freeChain(pNeighbors);
+ continue;
+ }
+
+ //printf("%ld has %ld neighbors.\n", pFace->faceId, neighborCount);
+
+ C++;
+
+ pFace->clusterId = C;
+ pFace->clusterType = CORE;
+
+ FaceLink *pLink = pNeighbors;
+ while (pLink) {
+ Face *pQ = pLink->pFace;
+
+ if (pQ->faceId == pFace->faceId) {
+ pLink = pLink->pNext;
+ continue;
+ }
+ if (pQ->clusterType == NOISE) {
+ pQ->clusterId = C;
+ pQ->clusterType = EDGE;
+ }
+ if (pQ->clusterType != UNDEFINED) {
+ pLink = pLink->pNext;
+ continue;
+ }
+
+ pQ->clusterId = C;
+ pQ->clusterType = EDGE;
+
+ FaceLink *pSubNeighbors = RangeQuery(faces, faceCount, pQ, eps);
+ neighborCount = chainLength(pSubNeighbors);
+ if (neighborCount >= minPts) {
+ pQ->clusterType = CORE;
+ /* Append these neighbors to the end of the chain */
+ FaceLink *pTmp = pLink;
+ while (pTmp->pNext) {
+ pTmp = pTmp->pNext;
+ }
+ pTmp->pNext = pSubNeighbors;
+ } else {
+ freeChain(pSubNeighbors);
+ }
+
+ pLink = pLink->pNext;
+ }
+ freeChain(pNeighbors);
+ }
+
+ return C;
+}
+
+/*
+ * 1. Count how many entries there are
+ * 2. Allocate storage to hold all entries
+ * 3. Read all entries into flat array
+ * 4. Allocate MxM matrix and pre-calculate distances
+ * 5. Perform DBSCAN across MxM matrix to cluster
+ */
+int main(int argc, char *argv[]) {
+ long maxId = 0;
+ long i;
+ long entries = 0;
+
+ for (i = 0; i < 100; i++) {
+ sprintf(pathBuf, "%s/face-data/%ld", argv[1], i);
+ DIR *faceDir = opendir(pathBuf);
+ if (!faceDir) {
+ continue;
+ }
+
+ struct dirent *ent;
+ while ((ent = readdir(faceDir)) != NULL) {
+ if (strstr(ent->d_name, ".json") == NULL) {
+ continue;
+ }
+ entries++;
+ }
+ closedir(faceDir);
+ }
+
+ Face *pFaces = malloc(sizeof(Face) * entries);
+ if (!pFaces) {
+ fprintf(stderr, "Unable to allocate storage face descriptors.");
+ return -1;
+ }
+ memset(pFaces, 0, sizeof(Face) * entries);
+ for (i = 0; i < entries; i++) {
+ pFaces[i].distances = malloc(sizeof(*pFaces[i].distances) * entries);
+ if (!pFaces[i].distances) {
+ fprintf(stderr, "Unable to allocate storage for distance dictionary.");
+ return -1;
+ }
+ memset(pFaces[i].distances, 0, sizeof(*pFaces[i].distances) * entries);
+ }
+
+ entries = 0;
+ for (i = 0; i < 100; i++) {
+ sprintf(pathBuf, "%s/face-data/%ld", argv[1], i);
+ DIR *faceDir = opendir(pathBuf);
+// fprintf(stderr, "Reading %s...\n", pathBuf);
if (!faceDir) {
fprintf(stderr, "Can not open %s\n", pathBuf);
continue;
@@ -81,7 +271,7 @@ int main(int argc, char *argv[]) {
if (strstr(ent->d_name, ".json") == NULL) {
continue;
}
- int id = 0;
+ long id = 0;
char *p = ent->d_name;
while (*p && *p != '-') {
id *= 10;
@@ -91,31 +281,92 @@ int main(int argc, char *argv[]) {
char path[1028*2];
sprintf(path, "%s/%s", pathBuf, ent->d_name);
maxId = maxId > id ? maxId : id;
- Face *pFace = readFaceDescriptor(id, path);
- if (!pFace) {
+ if (!readFaceDescriptor(&pFaces[entries], id, path)) {
+ fprintf(stderr, "Unable to read %s.\n", path);
continue;
}
- len++;
- if (len % 1000 == 0) {
- fprintf(stderr, "...read %d...\n", len);
+ entries++;
+ if (entries % 1000 == 0) {
+ fprintf(stderr, "...read %ld...\n", entries);
}
- if (pChain) {
- pFace->next = pChain;
- }
- pChain = pFace;
}
closedir(faceDir);
}
- fprintf(stderr, "Read %d face descriptors...\n", len);
+ fprintf(stderr, "Read %ld face descriptors...\n", entries);
- /* Allocate storage for all distances */
- Face *pLink = pChain;
- while (pLink) {
- pLink->distances = (long double *)malloc(sizeof(long double) * len);
- pLink = pLink->next;
+ long double total = 0.0;
+ for (long i = 0; i < entries; i++) {
+ Face *pLink = &pFaces[i];
+ for (long j = 0; j < entries; j++) {
+ Face *pTarget = &pFaces[j];
+ if (i == j) {
+ pLink->distances[i] = 0.0L;
+ pTarget->distances[j] = 0.0L;
+ continue;
+ }
+
+ if (pLink->distances[j] != 0.0L) {
+ continue;
+ }
+
+ pLink->distances[j] =
+ pTarget->distances[i] = euclideanDistance(pLink->descriptor, pTarget->descriptor);
+
+ total += pLink->distances[j];
+ }
}
+ fprintf(stderr, "Average distance: %Lf\n", (long double)1. * total / (entries * entries));
+
+ long int clusters = DBSCAN(pFaces, entries, 0.44L, 2);
+ long int undefined = 0, outlier = 0, core = 0, reachable = 0;
+ for (i = 0; i < entries; i++) {
+ switch (pFaces[i].clusterType) {
+ case NOISE:
+ outlier++;
+ break;
+ case UNDEFINED:
+ undefined++;
+ break;
+ case CORE:
+ core++;
+ break;
+ case EDGE:
+ reachable++;
+ break;
+ }
+ }
+
+ fprintf(stderr, "%ld clusters identified!\n", clusters);
+ fprintf(stderr, "%ld NOISE\n", outlier);
+ fprintf(stderr, "%ld UNDEFINED\n", undefined);
+ fprintf(stderr, "%ld CORE\n", core);
+ fprintf(stderr, "%ld EDGE\n", reachable);
+
+ fprintf(stdout, "\n");
+
+ /* Allocate storage for all distances */
sqlite3 *db;
int rc = sqlite3_open("db/photos.db", &db);
@@ -143,46 +394,47 @@ int main(int argc, char *argv[]) {
fprintf(stderr, "facedistances deleted and transaction started.\n");
char sqlBuf[1024];
- pLink = pChain;
int sourceIndex = 0, lines = 0;
- while (pLink) {
+ for (long i = 0; i < entries; i++) {
+ Face *pLink = &pFaces[i];
int targetIndex = 0;
- Face *pTarget = pChain;
- while (pTarget) {
- if (targetIndex == sourceIndex) {
- pLink->distances[targetIndex] = 0.0;
- pTarget->distances[sourceIndex] = 0.0;
- } else {
- if (pLink->distances[targetIndex] == 0.0) {
- pLink->distances[targetIndex] =
- pTarget->distances[sourceIndex] = euclideanDistance(pLink->descriptor, pTarget->descriptor);
- if (pLink->distances[targetIndex] < 0.5) {
- sprintf(sqlBuf, "INSERT INTO facedistances (face1Id,face2Id,distance) VALUES (%d,%d,%Lf);",
- ((pLink->faceId < pTarget->faceId) ? pLink->faceId : pTarget->faceId),
- ((pLink->faceId < pTarget->faceId) ? pTarget->faceId : pLink->faceId),
- pLink->distances[targetIndex]);
- rc = sqlite3_exec(db, sqlBuf, 0, 0, &err_msg);
- if (rc != SQLITE_OK ) {
- fprintf(stderr, "SQL error: %s\n", err_msg);
- sqlite3_free(err_msg);
- sqlite3_close(db);
- return 1;
- }
- lines++;
- if (lines % 1000 == 0) {
- fprintf(stderr, "...output %d DB lines (%0.2f complete)...\n", lines, (float)(1. * sourceIndex / (1. * len)));
- }
- }
+ for (long j = 0; j < entries; j++) {
+ Face *pTarget = &pFaces[j];
+ if (i == j) {
+ pLink->distances[i] = 0.0L;
+ pTarget->distances[j] = 0.0L;
+ continue;
+ }
+
+ if (pLink->distances[j] != 0.0L) {
+// continue;
+ }
+
+ pLink->distances[j] =
+ pTarget->distances[i] = euclideanDistance(pLink->descriptor, pTarget->descriptor);
+
+ if (pLink->distances[j] < 0.5) {
+ sprintf(sqlBuf, "INSERT INTO facedistances (face1Id,face2Id,distance) VALUES (%ld,%ld,%Lf);",
+ ((pLink->faceId < pTarget->faceId) ? pLink->faceId : pTarget->faceId),
+ ((pLink->faceId < pTarget->faceId) ? pTarget->faceId : pLink->faceId),
+ pLink->distances[j]);
+ rc = sqlite3_exec(db, sqlBuf, 0, 0, &err_msg);
+ if (rc != SQLITE_OK ) {
+ fprintf(stderr, "SQL error: %s\n", err_msg);
+ sqlite3_free(err_msg);
+ sqlite3_close(db);
+ return 1;
+ }
+ lines++;
+ if (lines % 1000 == 0) {
+ fprintf(stderr, "...output %d DB lines (%0.2f complete)...\n", lines,
+ (float)(1. * sourceIndex / (1. * entries)));
}
}
- pTarget = pTarget->next;
- targetIndex++;
}
- pLink = pLink->next;
- sourceIndex++;
}
- sprintf(sqlBuf, "UPDATE faces SET lastComparedId=%d;", maxId);
+ sprintf(sqlBuf, "UPDATE faces SET lastComparedId=%ld;", maxId);
rc = sqlite3_exec(db, "COMMIT;", 0, 0, &err_msg);
if (rc != SQLITE_OK ) {
diff --git a/server/face.js b/server/face.js
index 6f804e2..a0156e5 100644
--- a/server/face.js
+++ b/server/face.js
@@ -55,7 +55,7 @@ function alignFromLandmarks(image, landmarks) {
ctx.rotate(rotation);
ctx.scale(scale, scale);
ctx.drawImage(image, 0, 0);
- /*
+
ctx.strokeStyle = "red";
ctx.strokeWidth = "1";
ctx.beginPath();
@@ -67,7 +67,7 @@ function alignFromLandmarks(image, landmarks) {
}
});
ctx.stroke();
- */
+
return canvas;
}
@@ -140,20 +140,6 @@ require("./db/photos").then(function(db) {
const file = photo.path + photo.filename;
return canvas.loadImage(picturesPath + file).then(async (image) => {
- const detectors = await faceapi.detectAllFaces(image,
- new faceapi.SsdMobilenetv1Options({
- minConfidence: 0.8
- })
- ).withFaceLandmarks();
-
- detectors.forEach(async (detector) => {
- const canvas = alignFromLandmarks(image, detector.landmarks);
- const descriptor = await faceapi.computeFaceDescriptor(canvas);
- const data = [];
- /* Confert from sparse object to dense array */
- for (let i = 0; i < 128; i++) {
- data.push(descriptor[i]);
- }
const detectors = [ {
detection: {
_box: {
@@ -172,7 +158,6 @@ require("./db/photos").then(function(db) {
/* This is a file */
console.log(`Loading ${file}...`);
id = undefined;
-
loader = canvas.loadImage(picturesPath + file).then(async (image) => {
const detectors = await faceapi.detectAllFaces(image,
new faceapi.SsdMobilenetv1Options({
@@ -183,10 +168,19 @@ require("./db/photos").then(function(db) {
detectors.forEach(async (detector) => {
const canvas = alignFromLandmarks(image, detector.landmarks);
const descriptor = await faceapi.computeFaceDescriptor(canvas);
+
+ fs.writeFileSync("rotation.png", canvas.toBuffer("image/png", {
+ quality: 0.95,
+ chromaSubsampling: false
+ }));
+
+ process.exit(-1);
+
+// .withFaceDescriptors();
const data = [];
/* Confert from sparse object to dense array */
for (let i = 0; i < 128; i++) {
- data.push(descriptor[i]);
+ data.push(detector.descriptor[i]);
}
detector.descriptor = data;
});