#include #include #include #include #include #include #include #ifndef MIN_PTS #define MIN_PTS 3 #endif #ifndef MAX_DISTANCE #define MAX_DISTANCE 0.47L #endif typedef struct Face { float descriptor[128]; long int clusterId; long faceId; long photoId; float confidence; float *distances; } Face; typedef struct FaceLink { struct FaceLink *pNext; float distance; Face *pFace; } FaceLink; char fileBuf[5000]; char pathBuf[1028]; Face *readFaceDescriptor(Face *pFace, long id, char *path) { FILE *f; f = fopen(path, "r"); if (!f) { return NULL; } size_t s = fread(fileBuf, 1, sizeof(fileBuf), f); fclose(f); char *p = fileBuf; fileBuf[s] = 0; while (*p && *p != '-' && *p != '+' && (*p < '0' || *p > '9')) { p++; } int i = 0; for (i = 0; i < 128; i++) { char *start = p; while (*p && *p != ',' && *p != ']' && *p != ' ' && *p != '\n') { p++; } if (!*p) { break; } *p++ = 0; sscanf(start, "%f", &pFace->descriptor[i]); } if (i != 128) { return NULL; } pFace->faceId = id; return pFace; } int insert_descriptors(sqlite3 *db, long int faceId, float descriptor[128]) { char sql_buf[1024]; int rc; snprintf(sql_buf, sizeof(sql_buf), "INSERT INTO facedescriptors (faceId,descriptors) VALUES (%ld,?)", faceId); sqlite3_stmt *insert_stmt; rc = sqlite3_prepare_v2(db, sql_buf, -1, &insert_stmt, NULL); if(SQLITE_OK != rc) { fprintf(stderr, "Can't prepare insert statment %s (%i): %s\n", sql_buf, rc, sqlite3_errmsg(db)); return rc; } // Bind a block of zeros the size of the file we're going to insert later sqlite3_bind_zeroblob(insert_stmt, 1, sizeof(float) * 128); if (SQLITE_DONE != (rc = sqlite3_step(insert_stmt))) { fprintf(stderr, "Insert statement didn't work (%i): %s\n", rc, sqlite3_errmsg(db)); return rc; } sqlite3_int64 rowid = sqlite3_last_insert_rowid(db); // Getting here means we have a valid file handle, f, and a valid db handle, db // Also, a blank row has been inserted with key rowid sqlite3_blob *blob; rc = sqlite3_blob_open(db, "main", "facedescriptors", "descriptors", rowid, 1, &blob); if (SQLITE_OK != rc) { fprintf(stderr, "Couldn't get blob handle (%i): %s\n", rc, sqlite3_errmsg(db)); return rc; } if(SQLITE_OK != (rc = sqlite3_blob_write(blob, descriptor, 128 * sizeof(float), 0))) { fprintf(stderr, "Error writing to blob handle.\n"); return rc; } sqlite3_blob_close(blob); return SQLITE_OK; } /* * 1. Count how many entries there are * 2. Allocate storage to hold all entries * 3. Read all entries into flat array * 4. Allocate MxM matrix and pre-calculate distances * 5. Perform DBSCAN across MxM matrix to cluster */ int main(int argc, char *argv[]) { long maxId = 0; long i; long entries = 0; long int minPts = MIN_PTS; float maxDistance = MAX_DISTANCE; char sqlBuf[1024]; if (argc == 1) { fprintf(stderr, "usage: scanner PATH MAX_DISTANCE MIN_PTS\n"); return -1; } for (i = 0; i < 100; i++) { sprintf(pathBuf, "%s/face-data/%ld", argv[1], i); DIR *faceDir = opendir(pathBuf); if (!faceDir) { continue; } struct dirent *ent; while ((ent = readdir(faceDir)) != NULL) { if (strstr(ent->d_name, ".json") == NULL) { continue; } entries++; } closedir(faceDir); } fprintf(stderr, "Found %ld entries to load and process.\n", entries); /* Allocate storage for all distances */ sqlite3 *db; int rc = sqlite3_open("db/photos.db", &db); if (rc != SQLITE_OK) { fprintf(stderr, "Cannot open database: %s\n", sqlite3_errmsg(db)); sqlite3_close(db); return 1; } fprintf(stderr, "DB opened.\n"); Face **ppFaces = malloc(sizeof(Face *) * entries); if (!ppFaces) { fprintf(stderr, "Unable to allocate storage face descriptors."); return -1; } for (i = 0; i < entries; i++) { ppFaces[i] = malloc(sizeof(Face)); memset(ppFaces[i], 0, sizeof(Face)); } char *err_msg = NULL; char *sql = "DELETE FROM facedescriptors;" "BEGIN TRANSACTION;"; rc = sqlite3_exec(db, sql, 0, 0, &err_msg); if (rc != SQLITE_OK ) { fprintf(stderr, "SQL error: %s\n", err_msg); sqlite3_free(err_msg); sqlite3_close(db); return 1; } long int processed = 0; int last = 0; for (i = 0; i < 100; i++) { sprintf(pathBuf, "%s/face-data/%ld", argv[1], i); DIR *faceDir = opendir(pathBuf); if (!faceDir) { fprintf(stderr, "Can not open %s\n", pathBuf); continue; } struct dirent *ent; while (processed < entries && (ent = readdir(faceDir)) != NULL) { if (strstr(ent->d_name, ".json") == NULL) { continue; } long id = 0; char *p = ent->d_name; while (*p && *p != '-') { id *= 10; id += *p - '0'; p++; } char path[1028*2]; sprintf(path, "%s/%s", pathBuf, ent->d_name); maxId = maxId > id ? maxId : id; if (!readFaceDescriptor(ppFaces[processed], id, path)) { fprintf(stderr, "Unable to read %s.\n", path); continue; } rc = insert_descriptors(db, id, ppFaces[processed]->descriptor); if (rc != SQLITE_OK ) { fprintf(stderr, "SQL error: %s\n", err_msg); sqlite3_free(err_msg); sqlite3_close(db); return 1; } if (processed % 1000 == 0) { int perc = 100 * processed / entries; if (perc != last) { fprintf(stderr, "\rLoading %d%% complete.", perc); last = perc; } } } closedir(faceDir); } rc = sqlite3_exec(db, "COMMIT;", 0, 0, &err_msg); if (rc != SQLITE_OK ) { fprintf(stderr, "SQL error: %s\n", err_msg); sqlite3_free(err_msg); sqlite3_close(db); return 1; } sqlite3_close(db); fprintf(stderr, "\nDone.\n"); return 0; }