Added 'facedescriptors' table instead of having separate json files'

Signed-off-by: James Ketrenos <james_gitlab@ketrenos.com>
This commit is contained in:
James Ketrenos 2020-01-11 13:45:24 -08:00
parent 7f5f944c97
commit 05afd4febd
4 changed files with 360 additions and 106 deletions

View File

@ -1,4 +1,4 @@
all: scanner all: scanner json2db
ifeq ("$(wildcard /usr/include/sqlite3.h)","") ifeq ("$(wildcard /usr/include/sqlite3.h)","")
@echo "Install libsqlite3-dev" @echo "Install libsqlite3-dev"
@ -8,5 +8,8 @@ endif
scanner: scanner.c scanner: scanner.c
gcc -o scanner scanner.c -lm -lsqlite3 gcc -o scanner scanner.c -lm -lsqlite3
json2db: json2db.c
gcc -o json2db json2db.c -lm -lsqlite3
clean: clean:
rm scanner rm scanner

239
scanner/json2db.c Normal file
View File

@ -0,0 +1,239 @@
#include <stdio.h>
#include <stdlib.h>
#include <dirent.h>
#include <sys/types.h>
#include <string.h>
#include <math.h>
#include <sqlite3.h>
#ifndef MIN_PTS
#define MIN_PTS 3
#endif
#ifndef MAX_DISTANCE
#define MAX_DISTANCE 0.47L
#endif
typedef struct Face {
long double descriptor[128];
long int clusterId;
long faceId;
long photoId;
double confidence;
double *distances;
} Face;
typedef struct FaceLink {
struct FaceLink *pNext;
long double distance;
Face *pFace;
} FaceLink;
char fileBuf[5000];
char pathBuf[1028];
Face *readFaceDescriptor(Face *pFace, long id, char *path) {
FILE *f;
f = fopen(path, "r");
if (!f) {
return NULL;
}
size_t s = fread(fileBuf, 1, sizeof(fileBuf), f);
fclose(f);
char *p = fileBuf;
fileBuf[s] = 0;
while (*p && *p != '-' && *p != '+' && (*p < '0' || *p > '9')) {
p++;
}
int i = 0;
for (i = 0; i < 128; i++) {
char *start = p;
while (*p && *p != ',' && *p != ']' && *p != ' ' && *p != '\n') {
p++;
}
if (!*p) {
break;
}
*p++ = 0;
sscanf(start, "%Lf", &pFace->descriptor[i]);
}
if (i != 128) {
return NULL;
}
pFace->faceId = id;
return pFace;
}
int insert_descriptors(sqlite3 *db, long int faceId, long double descriptor[128]) {
char sql_buf[1024];
int rc;
snprintf(sql_buf, sizeof(sql_buf), "INSERT INTO facedescriptors (faceId,descriptors) VALUES (%ld,?)", faceId);
sqlite3_stmt *insert_stmt;
rc = sqlite3_prepare_v2(db, sql_buf, -1, &insert_stmt, NULL);
if(SQLITE_OK != rc) {
fprintf(stderr, "Can't prepare insert statment %s (%i): %s\n", sql_buf, rc, sqlite3_errmsg(db));
return rc;
}
// Bind a block of zeros the size of the file we're going to insert later
sqlite3_bind_zeroblob(insert_stmt, 1, sizeof(long double) * 128);
if (SQLITE_DONE != (rc = sqlite3_step(insert_stmt))) {
fprintf(stderr, "Insert statement didn't work (%i): %s\n", rc, sqlite3_errmsg(db));
return rc;
}
sqlite3_int64 rowid = sqlite3_last_insert_rowid(db);
// Getting here means we have a valid file handle, f, and a valid db handle, db
// Also, a blank row has been inserted with key rowid
sqlite3_blob *blob;
rc = sqlite3_blob_open(db, "main", "facedescriptors", "descriptors", rowid, 1, &blob);
if (SQLITE_OK != rc) {
fprintf(stderr, "Couldn't get blob handle (%i): %s\n", rc, sqlite3_errmsg(db));
return rc;
}
if(SQLITE_OK != (rc = sqlite3_blob_write(blob, descriptor, 128 * sizeof(*descriptor), 0))) {
fprintf(stderr, "Error writing to blob handle.\n");
return rc;
}
sqlite3_blob_close(blob);
return SQLITE_OK;
}
/*
* 1. Count how many entries there are
* 2. Allocate storage to hold all entries
* 3. Read all entries into flat array
* 4. Allocate MxM matrix and pre-calculate distances
* 5. Perform DBSCAN across MxM matrix to cluster
*/
int main(int argc, char *argv[]) {
long maxId = 0;
long i;
long entries = 0;
long int minPts = MIN_PTS;
long double maxDistance = MAX_DISTANCE;
char sqlBuf[1024];
if (argc == 1) {
fprintf(stderr, "usage: scanner PATH MAX_DISTANCE MIN_PTS\n");
return -1;
}
for (i = 0; i < 100; i++) {
sprintf(pathBuf, "%s/face-data/%ld", argv[1], i);
DIR *faceDir = opendir(pathBuf);
if (!faceDir) {
continue;
}
struct dirent *ent;
while ((ent = readdir(faceDir)) != NULL) {
if (strstr(ent->d_name, ".json") == NULL) {
continue;
}
entries++;
}
closedir(faceDir);
}
fprintf(stderr, "Found %ld entries to load and process.\n", entries);
/* Allocate storage for all distances */
sqlite3 *db;
int rc = sqlite3_open("db/photos.db", &db);
if (rc != SQLITE_OK) {
fprintf(stderr, "Cannot open database: %s\n", sqlite3_errmsg(db));
sqlite3_close(db);
return 1;
}
fprintf(stderr, "DB opened.");
Face **ppFaces = malloc(sizeof(Face *) * entries);
if (!ppFaces) {
fprintf(stderr, "Unable to allocate storage face descriptors.");
return -1;
}
for (i = 0; i < entries; i++) {
ppFaces[i] = malloc(sizeof(Face));
memset(ppFaces[i], 0, sizeof(Face));
}
char *err_msg = NULL;
char *sql =
"DELETE FROM facedescriptors;"
"BEGIN TRANSACTION;";
rc = sqlite3_exec(db, sql, 0, 0, &err_msg);
if (rc != SQLITE_OK ) {
fprintf(stderr, "SQL error: %s\n", err_msg);
sqlite3_free(err_msg);
sqlite3_close(db);
return 1;
}
long int processed = 0;
int last = 0;
for (i = 0; i < 100; i++) {
sprintf(pathBuf, "%s/face-data/%ld", argv[1], i);
DIR *faceDir = opendir(pathBuf);
if (!faceDir) {
fprintf(stderr, "Can not open %s\n", pathBuf);
continue;
}
struct dirent *ent;
while (processed < entries && (ent = readdir(faceDir)) != NULL) {
if (strstr(ent->d_name, ".json") == NULL) {
continue;
}
long id = 0;
char *p = ent->d_name;
while (*p && *p != '-') {
id *= 10;
id += *p - '0';
p++;
}
char path[1028*2];
sprintf(path, "%s/%s", pathBuf, ent->d_name);
maxId = maxId > id ? maxId : id;
if (!readFaceDescriptor(ppFaces[processed], id, path)) {
fprintf(stderr, "Unable to read %s.\n", path);
continue;
}
rc = insert_descriptors(db, id, ppFaces[processed]->descriptor);
if (rc != SQLITE_OK ) {
fprintf(stderr, "SQL error: %s\n", err_msg);
sqlite3_free(err_msg);
sqlite3_close(db);
return 1;
}
}
closedir(faceDir);
}
rc = sqlite3_exec(db, "COMMIT;", 0, 0, &err_msg);
if (rc != SQLITE_OK ) {
fprintf(stderr, "SQL error: %s\n", err_msg);
sqlite3_free(err_msg);
sqlite3_close(db);
return 1;
}
sqlite3_close(db);
return 0;
}

View File

@ -49,6 +49,7 @@ typedef struct FaceLink {
char fileBuf[5000]; char fileBuf[5000];
char pathBuf[1028]; char pathBuf[1028];
Face *readFaceDescriptor(Face *pFace, long id, char *path) { Face *readFaceDescriptor(Face *pFace, long id, char *path) {
FILE *f; FILE *f;
f = fopen(path, "r"); f = fopen(path, "r");
@ -258,25 +259,71 @@ long int DBSCAN(Face **ppFaces, long int faceCount, double eps, int minPts) {
} }
typedef struct { typedef struct {
sqlite3 *db;
Face **ppFaces; Face **ppFaces;
long int count; long int count;
long int initialized;
} FaceCallbackData; } FaceCallbackData;
int parseFaceIdRow(void *data, int argc, char **argv, char **column) { int parseFaceIdCount(void *data, int argc, char **argv, char **column) {
FaceCallbackData *map = data; long int *pCount = data;
long int faceId = strtol(argv[0] ? argv[0] : "0", NULL, 10); *pCount = strtol(argv[0] ? argv[0] : "0", NULL, 10);
long int photoId = strtol(argv[1] ? argv[1] : "0", NULL, 10);
double confidence = strtod(argv[2] ? argv[2] : "0.0", NULL);
for (long int i = 0; i < map->count; i++) {
if (map->ppFaces[i]->faceId == faceId) {
map->ppFaces[i]->photoId = photoId;
map->ppFaces[i]->confidence = confidence;
break;
}
}
return 0; return 0;
} }
int parseFaceDescriptor(void *data, int argc, char **argv, char **column) {
FaceCallbackData *map = data;
long int faceId = strtol(argv[0] ? argv[0] : "0", NULL, 10);
char sql_buf[1024];
int rc;
Face *pFace = NULL;
for (long int i = 0; i < map->count; i++) {
pFace = map->ppFaces[i];
if (pFace->faceId == faceId) {
break;
}
pFace = NULL;
}
if (!pFace) {
return SQLITE_OK;
}
// Getting here means we have a valid file handle, f, and a valid db handle, db
// Also, a blank row has been inserted with key rowid
sqlite3_blob *blob;
rc = sqlite3_blob_open(map->db, "main", "facedescriptors", "descriptors", faceId, 1, &blob);
if (SQLITE_OK != rc) {
fprintf(stderr, "Couldn't get blob handle (%i): %s\n", rc, sqlite3_errmsg(map->db));
return rc;
}
if (SQLITE_OK != (rc = sqlite3_blob_read(blob, pFace->descriptor, sizeof(pFace->descriptor), 0))) {
fprintf(stderr, "Error reading from blob handle.\n");
return rc;
}
sqlite3_blob_close(blob);
return SQLITE_OK;
}
int parseFaceIdRow(void *data, int argc, char **argv, char **column) {
FaceCallbackData *map = data;
long int faceId = strtol(argv[0] ? argv[0] : "0", NULL, 10);
long int photoId = strtol(argv[1] ? argv[1] : "0", NULL, 10);
double confidence = strtod(argv[2] ? argv[2] : "0.0", NULL);
Face *pFace = map->ppFaces[map->initialized++];
pFace->faceId = faceId;
pFace->photoId = photoId;
pFace->confidence = confidence;
return 0;
}
/* /*
* 1. Count how many entries there are * 1. Count how many entries there are
@ -288,7 +335,7 @@ int parseFaceIdRow(void *data, int argc, char **argv, char **column) {
int main(int argc, char *argv[]) { int main(int argc, char *argv[]) {
long maxId = 0; long maxId = 0;
long i; long i;
long entries = 0; long int entries = 0;
long int minPts = MIN_PTS; long int minPts = MIN_PTS;
long double maxDistance = MAX_DISTANCE; long double maxDistance = MAX_DISTANCE;
@ -306,29 +353,37 @@ int main(int argc, char *argv[]) {
} }
fprintf(stderr, "\nmaxDistance : %Lf\nminPts : %ld\n", maxDistance, minPts); fprintf(stderr, "\nmaxDistance : %Lf\nminPts : %ld\n", maxDistance, minPts);
for (i = 0; i < 100; i++) {
sprintf(pathBuf, "%s/face-data/%ld", argv[1], i);
DIR *faceDir = opendir(pathBuf);
if (!faceDir) {
continue;
}
struct dirent *ent; /* Allocate storage for all distances */
while ((ent = readdir(faceDir)) != NULL) { sqlite3 *db;
if (strstr(ent->d_name, ".json") == NULL) {
continue; int rc = sqlite3_open("db/photos.db", &db);
} if (rc != SQLITE_OK) {
entries++; fprintf(stderr, "Cannot open database: %s\n", sqlite3_errmsg(db));
} sqlite3_close(db);
closedir(faceDir); return 1;
} }
fprintf(stderr, "DB opened.\n");
char *err_msg = NULL;
entries = 0;
rc = sqlite3_exec(db, "SELECT COUNT(id) FROM faces", parseFaceIdCount, &entries, &err_msg);
if (rc != SQLITE_OK) {
fprintf(stderr, "SQL error: %s\n", err_msg);
sqlite3_free(err_msg);
sqlite3_close(db);
return 1;
}
fprintf(stderr, "%ld faces in DB.\n", entries);
Face **ppFaces = malloc(sizeof(Face *) * entries); Face **ppFaces = malloc(sizeof(Face *) * entries);
if (!ppFaces) { if (!ppFaces) {
fprintf(stderr, "Unable to allocate storage face descriptors."); fprintf(stderr, "Unable to allocate storage face descriptors.");
return -1; return -1;
} }
for (i = 0; i < entries; i++) { for (i = 0; i < entries; i++) {
ppFaces[i] = malloc(sizeof(Face)); ppFaces[i] = malloc(sizeof(Face));
memset(ppFaces[i], 0, sizeof(Face)); memset(ppFaces[i], 0, sizeof(Face));
@ -343,84 +398,13 @@ int main(int argc, char *argv[]) {
memset(ppFaces[i]->distances, 0, sizeof(*ppFaces[i]->distances) * entries); memset(ppFaces[i]->distances, 0, sizeof(*ppFaces[i]->distances) * entries);
} }
long int processed = 0; fprintf(stderr, "Storage allocated for %ld faces.\n", entries);
int last = 0;
for (i = 0; i < 100; i++) {
sprintf(pathBuf, "%s/face-data/%ld", argv[1], i);
DIR *faceDir = opendir(pathBuf);
// fprintf(stderr, "Reading %s...\n", pathBuf);
if (!faceDir) {
fprintf(stderr, "Can not open %s\n", pathBuf);
continue;
}
struct dirent *ent;
while (processed < entries && (ent = readdir(faceDir)) != NULL) {
if (strstr(ent->d_name, ".json") == NULL) {
continue;
}
long id = 0;
char *p = ent->d_name;
while (*p && *p != '-') {
id *= 10;
id += *p - '0';
p++;
}
char path[1028*2];
sprintf(path, "%s/%s", pathBuf, ent->d_name);
maxId = maxId > id ? maxId : id;
if (!readFaceDescriptor(ppFaces[processed], id, path)) {
fprintf(stderr, "Unable to read %s.\n", path);
continue;
}
long double profileDistance = 1.0;
for (int i = 0; i < (sizeof(profileDescriptors) / sizeof(profileDescriptors[0])); i++) {
profileDistance = euclideanDistance(ppFaces[processed]->descriptor, profileDescriptors[i]);
if (profileDistance > 0.5) {
profileDistance = 1.0;
} else {
break;
}
}
if (profileDistance <= 0.5) {
fprintf(stderr, "\nFace id %ld distance from profile face: %Lf",
ppFaces[processed]->faceId, profileDistance);
/* This entry will be skipped */
entries--;
} else {
processed++;
}
if (processed % 1000 == 0) {
int perc = 100 * processed / entries;
if (perc != last) {
fprintf(stderr, "\rRead %d%% of descriptors.", perc);
last = perc;
}
}
}
closedir(faceDir);
}
fprintf(stderr, "\nRead %ld face descriptors...\n", entries);
/* Allocate storage for all distances */
sqlite3 *db;
int rc = sqlite3_open("db/photos.db", &db);
if (rc != SQLITE_OK) {
fprintf(stderr, "Cannot open database: %s\n", sqlite3_errmsg(db));
sqlite3_close(db);
return 1;
}
fprintf(stderr, "DB opened.");
char *err_msg = NULL;
FaceCallbackData data = { FaceCallbackData data = {
db: db,
ppFaces: ppFaces, ppFaces: ppFaces,
count: entries count: entries,
initialized: 0
}; };
rc = sqlite3_exec(db, "SELECT id,photoId,faceConfidence FROM faces", parseFaceIdRow, &data, &err_msg); rc = sqlite3_exec(db, "SELECT id,photoId,faceConfidence FROM faces", parseFaceIdRow, &data, &err_msg);
@ -431,10 +415,24 @@ int main(int argc, char *argv[]) {
return 1; return 1;
} }
fprintf(stderr, "Face data loaded from DB\n"); fprintf(stderr, "Face data loaded from DB.\n");
processed = 0; rc = sqlite3_exec(db, "SELECT * FROM facedescriptors", parseFaceDescriptor, &data, &err_msg);
if (rc != SQLITE_OK) {
fprintf(stderr, "SQL error: %s\n", err_msg);
sqlite3_free(err_msg);
sqlite3_close(db);
return 1;
}
fprintf(stderr, "Descriptor data loaded from DB\n");
err_msg = NULL;
long int processed = 0;
long int last = 0;
long double total = 0.0; long double total = 0.0;
for (long i = 0; i < entries; i++) { for (long i = 0; i < entries; i++) {
Face *pLink = ppFaces[i]; Face *pLink = ppFaces[i];
for (long j = 0; j < entries; j++) { for (long j = 0; j < entries; j++) {

View File

@ -150,6 +150,20 @@ function init() {
} }
}); });
const FaceDescriptor = db.sequelize.define('facedescriptor', {
faceId: {
type: Sequelize.INTEGER,
primaryKey: true,
references: {
model: Face,
key: 'id',
}
},
descriptors: Sequelize.BLOB
}, {
timestamps: false
});
const FaceDistances = db.sequelize.define('facedistance', { const FaceDistances = db.sequelize.define('facedistance', {
face1Id: { face1Id: {
type: Sequelize.INTEGER, type: Sequelize.INTEGER,