ketr.photos/scanner/json2db.c
James Ketrenos b792446cc9 Move from long double to flaot
Signed-off-by: James Ketrenos <james_gitlab@ketrenos.com>
2020-01-11 17:25:36 -08:00

249 lines
5.8 KiB
C

#include <stdio.h>
#include <stdlib.h>
#include <dirent.h>
#include <sys/types.h>
#include <string.h>
#include <math.h>
#include <sqlite3.h>
#ifndef MIN_PTS
#define MIN_PTS 3
#endif
#ifndef MAX_DISTANCE
#define MAX_DISTANCE 0.47L
#endif
typedef struct Face {
float descriptor[128];
long int clusterId;
long faceId;
long photoId;
float confidence;
float *distances;
} Face;
typedef struct FaceLink {
struct FaceLink *pNext;
float distance;
Face *pFace;
} FaceLink;
char fileBuf[5000];
char pathBuf[1028];
Face *readFaceDescriptor(Face *pFace, long id, char *path) {
FILE *f;
f = fopen(path, "r");
if (!f) {
return NULL;
}
size_t s = fread(fileBuf, 1, sizeof(fileBuf), f);
fclose(f);
char *p = fileBuf;
fileBuf[s] = 0;
while (*p && *p != '-' && *p != '+' && (*p < '0' || *p > '9')) {
p++;
}
int i = 0;
for (i = 0; i < 128; i++) {
char *start = p;
while (*p && *p != ',' && *p != ']' && *p != ' ' && *p != '\n') {
p++;
}
if (!*p) {
break;
}
*p++ = 0;
sscanf(start, "%f", &pFace->descriptor[i]);
}
if (i != 128) {
return NULL;
}
pFace->faceId = id;
return pFace;
}
int insert_descriptors(sqlite3 *db, long int faceId, float descriptor[128]) {
char sql_buf[1024];
int rc;
snprintf(sql_buf, sizeof(sql_buf), "INSERT INTO facedescriptors (faceId,descriptors) VALUES (%ld,?)", faceId);
sqlite3_stmt *insert_stmt;
rc = sqlite3_prepare_v2(db, sql_buf, -1, &insert_stmt, NULL);
if(SQLITE_OK != rc) {
fprintf(stderr, "Can't prepare insert statment %s (%i): %s\n", sql_buf, rc, sqlite3_errmsg(db));
return rc;
}
// Bind a block of zeros the size of the file we're going to insert later
sqlite3_bind_zeroblob(insert_stmt, 1, sizeof(float) * 128);
if (SQLITE_DONE != (rc = sqlite3_step(insert_stmt))) {
fprintf(stderr, "Insert statement didn't work (%i): %s\n", rc, sqlite3_errmsg(db));
return rc;
}
sqlite3_int64 rowid = sqlite3_last_insert_rowid(db);
// Getting here means we have a valid file handle, f, and a valid db handle, db
// Also, a blank row has been inserted with key rowid
sqlite3_blob *blob;
rc = sqlite3_blob_open(db, "main", "facedescriptors", "descriptors", rowid, 1, &blob);
if (SQLITE_OK != rc) {
fprintf(stderr, "Couldn't get blob handle (%i): %s\n", rc, sqlite3_errmsg(db));
return rc;
}
if(SQLITE_OK != (rc = sqlite3_blob_write(blob, descriptor, 128 * sizeof(float), 0))) {
fprintf(stderr, "Error writing to blob handle.\n");
return rc;
}
sqlite3_blob_close(blob);
return SQLITE_OK;
}
/*
* 1. Count how many entries there are
* 2. Allocate storage to hold all entries
* 3. Read all entries into flat array
* 4. Allocate MxM matrix and pre-calculate distances
* 5. Perform DBSCAN across MxM matrix to cluster
*/
int main(int argc, char *argv[]) {
long maxId = 0;
long i;
long entries = 0;
long int minPts = MIN_PTS;
float maxDistance = MAX_DISTANCE;
char sqlBuf[1024];
if (argc == 1) {
fprintf(stderr, "usage: scanner PATH MAX_DISTANCE MIN_PTS\n");
return -1;
}
for (i = 0; i < 100; i++) {
sprintf(pathBuf, "%s/face-data/%ld", argv[1], i);
DIR *faceDir = opendir(pathBuf);
if (!faceDir) {
continue;
}
struct dirent *ent;
while ((ent = readdir(faceDir)) != NULL) {
if (strstr(ent->d_name, ".json") == NULL) {
continue;
}
entries++;
}
closedir(faceDir);
}
fprintf(stderr, "Found %ld entries to load and process.\n", entries);
/* Allocate storage for all distances */
sqlite3 *db;
int rc = sqlite3_open("db/photos.db", &db);
if (rc != SQLITE_OK) {
fprintf(stderr, "Cannot open database: %s\n", sqlite3_errmsg(db));
sqlite3_close(db);
return 1;
}
fprintf(stderr, "DB opened.\n");
Face **ppFaces = malloc(sizeof(Face *) * entries);
if (!ppFaces) {
fprintf(stderr, "Unable to allocate storage face descriptors.");
return -1;
}
for (i = 0; i < entries; i++) {
ppFaces[i] = malloc(sizeof(Face));
memset(ppFaces[i], 0, sizeof(Face));
}
char *err_msg = NULL;
char *sql =
"DELETE FROM facedescriptors;"
"BEGIN TRANSACTION;";
rc = sqlite3_exec(db, sql, 0, 0, &err_msg);
if (rc != SQLITE_OK ) {
fprintf(stderr, "SQL error: %s\n", err_msg);
sqlite3_free(err_msg);
sqlite3_close(db);
return 1;
}
long int processed = 0;
int last = 0;
for (i = 0; i < 100; i++) {
sprintf(pathBuf, "%s/face-data/%ld", argv[1], i);
DIR *faceDir = opendir(pathBuf);
if (!faceDir) {
fprintf(stderr, "Can not open %s\n", pathBuf);
continue;
}
struct dirent *ent;
while (processed < entries && (ent = readdir(faceDir)) != NULL) {
if (strstr(ent->d_name, ".json") == NULL) {
continue;
}
long id = 0;
char *p = ent->d_name;
while (*p && *p != '-') {
id *= 10;
id += *p - '0';
p++;
}
char path[1028*2];
sprintf(path, "%s/%s", pathBuf, ent->d_name);
maxId = maxId > id ? maxId : id;
if (!readFaceDescriptor(ppFaces[processed], id, path)) {
fprintf(stderr, "Unable to read %s.\n", path);
continue;
}
rc = insert_descriptors(db, id, ppFaces[processed]->descriptor);
if (rc != SQLITE_OK ) {
fprintf(stderr, "SQL error: %s\n", err_msg);
sqlite3_free(err_msg);
sqlite3_close(db);
return 1;
}
if (processed % 1000 == 0) {
int perc = 100 * processed / entries;
if (perc != last) {
fprintf(stderr, "\rLoading %d%% complete.", perc);
last = perc;
}
}
}
closedir(faceDir);
}
rc = sqlite3_exec(db, "COMMIT;", 0, 0, &err_msg);
if (rc != SQLITE_OK ) {
fprintf(stderr, "SQL error: %s\n", err_msg);
sqlite3_free(err_msg);
sqlite3_close(db);
return 1;
}
sqlite3_close(db);
fprintf(stderr, "\nDone.\n");
return 0;
}