From 05afd4febda85c91fb9ac32a9e4e791debb59855 Mon Sep 17 00:00:00 2001 From: James Ketrenos Date: Sat, 11 Jan 2020 13:45:24 -0800 Subject: [PATCH] Added 'facedescriptors' table instead of having separate json files' Signed-off-by: James Ketrenos --- scanner/Makefile | 5 +- scanner/json2db.c | 239 ++++++++++++++++++++++++++++++++++++++++++++ scanner/scanner.c | 208 +++++++++++++++++++------------------- server/db/photos.js | 14 +++ 4 files changed, 360 insertions(+), 106 deletions(-) create mode 100644 scanner/json2db.c diff --git a/scanner/Makefile b/scanner/Makefile index 1552e9c..10a5654 100644 --- a/scanner/Makefile +++ b/scanner/Makefile @@ -1,4 +1,4 @@ -all: scanner +all: scanner json2db ifeq ("$(wildcard /usr/include/sqlite3.h)","") @echo "Install libsqlite3-dev" @@ -8,5 +8,8 @@ endif scanner: scanner.c gcc -o scanner scanner.c -lm -lsqlite3 +json2db: json2db.c + gcc -o json2db json2db.c -lm -lsqlite3 + clean: rm scanner diff --git a/scanner/json2db.c b/scanner/json2db.c new file mode 100644 index 0000000..1dcbf18 --- /dev/null +++ b/scanner/json2db.c @@ -0,0 +1,239 @@ +#include +#include +#include +#include +#include +#include +#include + +#ifndef MIN_PTS +#define MIN_PTS 3 +#endif + +#ifndef MAX_DISTANCE +#define MAX_DISTANCE 0.47L +#endif + +typedef struct Face { + long double descriptor[128]; + long int clusterId; + long faceId; + long photoId; + double confidence; + double *distances; +} Face; + +typedef struct FaceLink { + struct FaceLink *pNext; + long double distance; + Face *pFace; +} FaceLink; + +char fileBuf[5000]; +char pathBuf[1028]; + +Face *readFaceDescriptor(Face *pFace, long id, char *path) { + FILE *f; + f = fopen(path, "r"); + if (!f) { + return NULL; + } + size_t s = fread(fileBuf, 1, sizeof(fileBuf), f); + fclose(f); + + char *p = fileBuf; + fileBuf[s] = 0; + while (*p && *p != '-' && *p != '+' && (*p < '0' || *p > '9')) { + p++; + } + int i = 0; + for (i = 0; i < 128; i++) { + char *start = p; + while (*p && *p != ',' && *p != ']' && *p != ' ' && *p != '\n') { + p++; + } + if (!*p) { + break; + } + *p++ = 0; + sscanf(start, "%Lf", &pFace->descriptor[i]); + } + + if (i != 128) { + return NULL; + } + + pFace->faceId = id; + + return pFace; +} + +int insert_descriptors(sqlite3 *db, long int faceId, long double descriptor[128]) { + char sql_buf[1024]; + int rc; + + snprintf(sql_buf, sizeof(sql_buf), "INSERT INTO facedescriptors (faceId,descriptors) VALUES (%ld,?)", faceId); + + sqlite3_stmt *insert_stmt; + rc = sqlite3_prepare_v2(db, sql_buf, -1, &insert_stmt, NULL); + if(SQLITE_OK != rc) { + fprintf(stderr, "Can't prepare insert statment %s (%i): %s\n", sql_buf, rc, sqlite3_errmsg(db)); + return rc; + } + + // Bind a block of zeros the size of the file we're going to insert later + sqlite3_bind_zeroblob(insert_stmt, 1, sizeof(long double) * 128); + if (SQLITE_DONE != (rc = sqlite3_step(insert_stmt))) { + fprintf(stderr, "Insert statement didn't work (%i): %s\n", rc, sqlite3_errmsg(db)); + return rc; + } + + sqlite3_int64 rowid = sqlite3_last_insert_rowid(db); + + // Getting here means we have a valid file handle, f, and a valid db handle, db + // Also, a blank row has been inserted with key rowid + sqlite3_blob *blob; + rc = sqlite3_blob_open(db, "main", "facedescriptors", "descriptors", rowid, 1, &blob); + if (SQLITE_OK != rc) { + fprintf(stderr, "Couldn't get blob handle (%i): %s\n", rc, sqlite3_errmsg(db)); + return rc; + } + + if(SQLITE_OK != (rc = sqlite3_blob_write(blob, descriptor, 128 * sizeof(*descriptor), 0))) { + fprintf(stderr, "Error writing to blob handle.\n"); + return rc; + } + + sqlite3_blob_close(blob); + + return SQLITE_OK; +} + +/* + * 1. Count how many entries there are + * 2. Allocate storage to hold all entries + * 3. Read all entries into flat array + * 4. Allocate MxM matrix and pre-calculate distances + * 5. Perform DBSCAN across MxM matrix to cluster + */ +int main(int argc, char *argv[]) { + long maxId = 0; + long i; + long entries = 0; + long int minPts = MIN_PTS; + long double maxDistance = MAX_DISTANCE; + char sqlBuf[1024]; + + if (argc == 1) { + fprintf(stderr, "usage: scanner PATH MAX_DISTANCE MIN_PTS\n"); + return -1; + } + + for (i = 0; i < 100; i++) { + sprintf(pathBuf, "%s/face-data/%ld", argv[1], i); + DIR *faceDir = opendir(pathBuf); + if (!faceDir) { + continue; + } + + struct dirent *ent; + while ((ent = readdir(faceDir)) != NULL) { + if (strstr(ent->d_name, ".json") == NULL) { + continue; + } + entries++; + } + closedir(faceDir); + } + + fprintf(stderr, "Found %ld entries to load and process.\n", entries); + + /* Allocate storage for all distances */ + sqlite3 *db; + + int rc = sqlite3_open("db/photos.db", &db); + if (rc != SQLITE_OK) { + fprintf(stderr, "Cannot open database: %s\n", sqlite3_errmsg(db)); + sqlite3_close(db); + return 1; + } + + fprintf(stderr, "DB opened."); + + Face **ppFaces = malloc(sizeof(Face *) * entries); + if (!ppFaces) { + fprintf(stderr, "Unable to allocate storage face descriptors."); + return -1; + } + for (i = 0; i < entries; i++) { + ppFaces[i] = malloc(sizeof(Face)); + memset(ppFaces[i], 0, sizeof(Face)); + } + + char *err_msg = NULL; + + char *sql = + "DELETE FROM facedescriptors;" + "BEGIN TRANSACTION;"; + + rc = sqlite3_exec(db, sql, 0, 0, &err_msg); + if (rc != SQLITE_OK ) { + fprintf(stderr, "SQL error: %s\n", err_msg); + sqlite3_free(err_msg); + sqlite3_close(db); + return 1; + } + + long int processed = 0; + int last = 0; + for (i = 0; i < 100; i++) { + sprintf(pathBuf, "%s/face-data/%ld", argv[1], i); + DIR *faceDir = opendir(pathBuf); + if (!faceDir) { + fprintf(stderr, "Can not open %s\n", pathBuf); + continue; + } + + struct dirent *ent; + while (processed < entries && (ent = readdir(faceDir)) != NULL) { + if (strstr(ent->d_name, ".json") == NULL) { + continue; + } + long id = 0; + char *p = ent->d_name; + while (*p && *p != '-') { + id *= 10; + id += *p - '0'; + p++; + } + char path[1028*2]; + sprintf(path, "%s/%s", pathBuf, ent->d_name); + maxId = maxId > id ? maxId : id; + if (!readFaceDescriptor(ppFaces[processed], id, path)) { + fprintf(stderr, "Unable to read %s.\n", path); + continue; + } + + rc = insert_descriptors(db, id, ppFaces[processed]->descriptor); + if (rc != SQLITE_OK ) { + fprintf(stderr, "SQL error: %s\n", err_msg); + sqlite3_free(err_msg); + sqlite3_close(db); + return 1; + } + } + closedir(faceDir); + } + + rc = sqlite3_exec(db, "COMMIT;", 0, 0, &err_msg); + if (rc != SQLITE_OK ) { + fprintf(stderr, "SQL error: %s\n", err_msg); + sqlite3_free(err_msg); + sqlite3_close(db); + return 1; + } + + sqlite3_close(db); + + return 0; +} diff --git a/scanner/scanner.c b/scanner/scanner.c index a2f5c89..2331f9c 100644 --- a/scanner/scanner.c +++ b/scanner/scanner.c @@ -49,6 +49,7 @@ typedef struct FaceLink { char fileBuf[5000]; char pathBuf[1028]; + Face *readFaceDescriptor(Face *pFace, long id, char *path) { FILE *f; f = fopen(path, "r"); @@ -258,25 +259,71 @@ long int DBSCAN(Face **ppFaces, long int faceCount, double eps, int minPts) { } typedef struct { + sqlite3 *db; Face **ppFaces; long int count; + long int initialized; } FaceCallbackData; -int parseFaceIdRow(void *data, int argc, char **argv, char **column) { - FaceCallbackData *map = data; - long int faceId = strtol(argv[0] ? argv[0] : "0", NULL, 10); - long int photoId = strtol(argv[1] ? argv[1] : "0", NULL, 10); - double confidence = strtod(argv[2] ? argv[2] : "0.0", NULL); - for (long int i = 0; i < map->count; i++) { - if (map->ppFaces[i]->faceId == faceId) { - map->ppFaces[i]->photoId = photoId; - map->ppFaces[i]->confidence = confidence; - break; - } - } +int parseFaceIdCount(void *data, int argc, char **argv, char **column) { + long int *pCount = data; + *pCount = strtol(argv[0] ? argv[0] : "0", NULL, 10); return 0; } +int parseFaceDescriptor(void *data, int argc, char **argv, char **column) { + FaceCallbackData *map = data; + + long int faceId = strtol(argv[0] ? argv[0] : "0", NULL, 10); + + char sql_buf[1024]; + int rc; + + Face *pFace = NULL; + for (long int i = 0; i < map->count; i++) { + pFace = map->ppFaces[i]; + if (pFace->faceId == faceId) { + break; + } + pFace = NULL; + } + + if (!pFace) { + return SQLITE_OK; + } + + // Getting here means we have a valid file handle, f, and a valid db handle, db + // Also, a blank row has been inserted with key rowid + sqlite3_blob *blob; + rc = sqlite3_blob_open(map->db, "main", "facedescriptors", "descriptors", faceId, 1, &blob); + if (SQLITE_OK != rc) { + fprintf(stderr, "Couldn't get blob handle (%i): %s\n", rc, sqlite3_errmsg(map->db)); + return rc; + } + + if (SQLITE_OK != (rc = sqlite3_blob_read(blob, pFace->descriptor, sizeof(pFace->descriptor), 0))) { + fprintf(stderr, "Error reading from blob handle.\n"); + return rc; + } + + sqlite3_blob_close(blob); + + return SQLITE_OK; +} + +int parseFaceIdRow(void *data, int argc, char **argv, char **column) { + FaceCallbackData *map = data; + + long int faceId = strtol(argv[0] ? argv[0] : "0", NULL, 10); + long int photoId = strtol(argv[1] ? argv[1] : "0", NULL, 10); + double confidence = strtod(argv[2] ? argv[2] : "0.0", NULL); + + Face *pFace = map->ppFaces[map->initialized++]; + pFace->faceId = faceId; + pFace->photoId = photoId; + pFace->confidence = confidence; + return 0; +} /* * 1. Count how many entries there are @@ -288,7 +335,7 @@ int parseFaceIdRow(void *data, int argc, char **argv, char **column) { int main(int argc, char *argv[]) { long maxId = 0; long i; - long entries = 0; + long int entries = 0; long int minPts = MIN_PTS; long double maxDistance = MAX_DISTANCE; @@ -306,29 +353,37 @@ int main(int argc, char *argv[]) { } fprintf(stderr, "\nmaxDistance : %Lf\nminPts : %ld\n", maxDistance, minPts); - - for (i = 0; i < 100; i++) { - sprintf(pathBuf, "%s/face-data/%ld", argv[1], i); - DIR *faceDir = opendir(pathBuf); - if (!faceDir) { - continue; - } - struct dirent *ent; - while ((ent = readdir(faceDir)) != NULL) { - if (strstr(ent->d_name, ".json") == NULL) { - continue; - } - entries++; - } - closedir(faceDir); + /* Allocate storage for all distances */ + sqlite3 *db; + + int rc = sqlite3_open("db/photos.db", &db); + if (rc != SQLITE_OK) { + fprintf(stderr, "Cannot open database: %s\n", sqlite3_errmsg(db)); + sqlite3_close(db); + return 1; } + fprintf(stderr, "DB opened.\n"); + char *err_msg = NULL; + + entries = 0; + rc = sqlite3_exec(db, "SELECT COUNT(id) FROM faces", parseFaceIdCount, &entries, &err_msg); + if (rc != SQLITE_OK) { + fprintf(stderr, "SQL error: %s\n", err_msg); + sqlite3_free(err_msg); + sqlite3_close(db); + return 1; + } + + fprintf(stderr, "%ld faces in DB.\n", entries); + Face **ppFaces = malloc(sizeof(Face *) * entries); if (!ppFaces) { fprintf(stderr, "Unable to allocate storage face descriptors."); return -1; } + for (i = 0; i < entries; i++) { ppFaces[i] = malloc(sizeof(Face)); memset(ppFaces[i], 0, sizeof(Face)); @@ -343,84 +398,13 @@ int main(int argc, char *argv[]) { memset(ppFaces[i]->distances, 0, sizeof(*ppFaces[i]->distances) * entries); } - long int processed = 0; - int last = 0; - for (i = 0; i < 100; i++) { - sprintf(pathBuf, "%s/face-data/%ld", argv[1], i); - DIR *faceDir = opendir(pathBuf); -// fprintf(stderr, "Reading %s...\n", pathBuf); - if (!faceDir) { - fprintf(stderr, "Can not open %s\n", pathBuf); - continue; - } + fprintf(stderr, "Storage allocated for %ld faces.\n", entries); - struct dirent *ent; - while (processed < entries && (ent = readdir(faceDir)) != NULL) { - if (strstr(ent->d_name, ".json") == NULL) { - continue; - } - long id = 0; - char *p = ent->d_name; - while (*p && *p != '-') { - id *= 10; - id += *p - '0'; - p++; - } - char path[1028*2]; - sprintf(path, "%s/%s", pathBuf, ent->d_name); - maxId = maxId > id ? maxId : id; - if (!readFaceDescriptor(ppFaces[processed], id, path)) { - fprintf(stderr, "Unable to read %s.\n", path); - continue; - } - - long double profileDistance = 1.0; - for (int i = 0; i < (sizeof(profileDescriptors) / sizeof(profileDescriptors[0])); i++) { - profileDistance = euclideanDistance(ppFaces[processed]->descriptor, profileDescriptors[i]); - if (profileDistance > 0.5) { - profileDistance = 1.0; - } else { - break; - } - } - - if (profileDistance <= 0.5) { - fprintf(stderr, "\nFace id %ld distance from profile face: %Lf", - ppFaces[processed]->faceId, profileDistance); - /* This entry will be skipped */ - entries--; - } else { - processed++; - } - if (processed % 1000 == 0) { - int perc = 100 * processed / entries; - if (perc != last) { - fprintf(stderr, "\rRead %d%% of descriptors.", perc); - last = perc; - } - } - } - closedir(faceDir); - } - - fprintf(stderr, "\nRead %ld face descriptors...\n", entries); - - /* Allocate storage for all distances */ - sqlite3 *db; - - int rc = sqlite3_open("db/photos.db", &db); - if (rc != SQLITE_OK) { - fprintf(stderr, "Cannot open database: %s\n", sqlite3_errmsg(db)); - sqlite3_close(db); - return 1; - } - - fprintf(stderr, "DB opened."); - - char *err_msg = NULL; FaceCallbackData data = { + db: db, ppFaces: ppFaces, - count: entries + count: entries, + initialized: 0 }; rc = sqlite3_exec(db, "SELECT id,photoId,faceConfidence FROM faces", parseFaceIdRow, &data, &err_msg); @@ -431,10 +415,24 @@ int main(int argc, char *argv[]) { return 1; } - fprintf(stderr, "Face data loaded from DB\n"); + fprintf(stderr, "Face data loaded from DB.\n"); - processed = 0; + rc = sqlite3_exec(db, "SELECT * FROM facedescriptors", parseFaceDescriptor, &data, &err_msg); + if (rc != SQLITE_OK) { + fprintf(stderr, "SQL error: %s\n", err_msg); + sqlite3_free(err_msg); + sqlite3_close(db); + return 1; + } + + fprintf(stderr, "Descriptor data loaded from DB\n"); + + err_msg = NULL; + + long int processed = 0; + long int last = 0; long double total = 0.0; + for (long i = 0; i < entries; i++) { Face *pLink = ppFaces[i]; for (long j = 0; j < entries; j++) { diff --git a/server/db/photos.js b/server/db/photos.js index a652f32..4319904 100755 --- a/server/db/photos.js +++ b/server/db/photos.js @@ -150,6 +150,20 @@ function init() { } }); + const FaceDescriptor = db.sequelize.define('facedescriptor', { + faceId: { + type: Sequelize.INTEGER, + primaryKey: true, + references: { + model: Face, + key: 'id', + } + }, + descriptors: Sequelize.BLOB + }, { + timestamps: false + }); + const FaceDistances = db.sequelize.define('facedistance', { face1Id: { type: Sequelize.INTEGER,