ketr.photos/scanner/scanner.c
James Ketrenos 05afd4febd Added 'facedescriptors' table instead of having separate json files'
Signed-off-by: James Ketrenos <james_gitlab@ketrenos.com>
2020-01-11 13:45:24 -08:00

602 lines
26 KiB
C
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#include <stdio.h>
#include <stdlib.h>
#include <dirent.h>
#include <sys/types.h>
#include <string.h>
#include <math.h>
#include <sqlite3.h>
#ifndef MIN_PTS
#define MIN_PTS 3
#endif
#ifndef MAX_DISTANCE
#define MAX_DISTANCE 0.47L
#endif
/* descriptors extracted from a profile face photo, used to filter out faces near a profile image
* (which all identify as the same person) */
long double profileDescriptors[][128] = {
{ -0.061601437628269196,0.11936908215284348,0.05469074845314026,-0.01107284426689148,-0.11588133871555328,-0.002019548788666725,-0.11300364136695862,-0.07167613506317139,0.1764453798532486,-0.05901609733700752,0.2385266125202179,0.012328468263149261,-0.2573902904987335,0.04025992751121521,-0.11443999409675598,0.11594478785991669,-0.17500847578048706,-0.07691430300474167,-0.12378361076116562,-0.11740189790725708,0.03630289435386658,0.06033151596784592,0.04939301311969757,0.004410859197378159,-0.20615431666374207,-0.25997376441955566,-0.08603082597255707,-0.09167610853910446,0.03498300909996033,-0.18279242515563965,0.041221488267183304,0.09842021763324738,-0.10988932847976685,0.031966306269168854,0.013916028663516045,0.008073337376117706,0.013303801417350769,-0.06880992650985718,0.17893345654010773,0.027153145521879196,-0.1894378960132599,-0.020184200257062912,0.05442515388131142,0.2977772355079651,0.20385639369487762,0.010473722591996193,0.029712140560150146,-0.10179305076599121,0.07872007042169571,-0.2609504461288452,0.004072457551956177,0.20807254314422607,0.039381805807352066,0.10894455015659332,0.0656222552061081,-0.21101640164852142,0.0015651732683181763,0.09899670630693436,-0.1148049384355545,0.03364836052060127,0.04903073608875275,-0.03714796528220177,-0.06581848114728928,-0.08302658796310425,0.1889929473400116,0.08697575330734253,-0.14220063388347626,-0.18263882398605347,0.17710572481155396,-0.1408098042011261,-0.04034702479839325,0.14808061718940735,-0.14523223042488098,-0.22445881366729736,-0.2085285633802414,0.04144661873579025,0.3701931834220886,0.140765979886055,-0.1524423211812973,0.05934419482946396,-0.09778376668691635,-0.05247225984930992,-0.07087128609418869,0.08293693512678146,-0.07502841204404831,0.01957462728023529,-0.07184667885303497,0.05862969905138016,0.19565513730049133,-0.07941281795501709,0.04070422798395157,0.26956090331077576,0.01857529580593109,-0.03987279161810875,-0.0008930861949920654,0.08536889404058456,-0.16341862082481384,0.03542519360780716,-0.10296104848384857,-0.022122247144579887,0.08253192901611328,-0.07827107608318329,0.01481425017118454,0.08741594851016998,-0.17036983370780945,0.20205597579479218,-0.010307766497135162,-0.04007430374622345,-0.0006583929061889648,-0.016108371317386627,-0.04460605978965759,-0.034171007573604584,0.24336735904216766,-0.2778766453266144,0.18527397513389587,0.1877088099718094,0.098460853099823,0.148922860622406,0.06344866007566452,0.13334715366363525,0.0003817584365606308,0.004202648997306824,-0.11708918958902359,-0.06176237016916275,-0.027644027024507523,-0.10988462716341019,-0.10410083830356598,0.0056285373866558075 },
{ -0.05286264419555664,0.1027536615729332,0.0177864171564579,-0.007238239049911499,-0.15839841961860657,-0.013195162639021873,-0.055493343621492386,-0.011598028242588043,0.11436303704977036,-0.023542361333966255,0.23070596158504486,0.006959080696105957,-0.23580302298069,-0.04991580545902252,-0.048089612275362015,0.1130392998456955,-0.1211254671216011,-0.053501617163419724,-0.20760701596736908,-0.11128067970275879,0.015699222683906555,0.048812925815582275,-0.004359103739261627,-0.01246669515967369,-0.08746911585330963,-0.2714271545410156,-0.07527103275060654,-0.07152064889669418,0.13769567012786865,-0.1629178822040558,-0.030123017728328705,-0.006879039108753204,-0.15528543293476105,-0.07373329252004623,0.035297941416502,-0.0019944999366998672,0.00020968541502952576,-0.06796075403690338,0.15615630149841309,-0.02517728880047798,-0.15598921477794647,0.048431847244501114,0.030592434108257294,0.22424748539924622,0.18237638473510742,0.03839367255568504,-0.018516220152378082,-0.07987417280673981,0.10236746072769165,-0.23993468284606934,0.01739603653550148,0.20749998092651367,0.07386040687561035,0.09853726625442505,0.06335064768791199,-0.14400270581245422,-0.0020356550812721252,0.16046060621738434,-0.1502964049577713,0.03028072975575924,-0.013792440295219421,-0.04380643367767334,-0.05555684491991997,-0.13338367640972137,0.17439348995685577,0.08730429410934448,-0.11853333562612534,-0.14580222964286804,0.11578165739774704,-0.15631376206874847,0.0005779601633548737,0.057921670377254486,-0.11303769052028656,-0.14606109261512756,-0.24737423658370972,0.07011876255273819,0.3969222903251648,0.15113821625709534,-0.19518792629241943,0.029213692992925644,-0.11209011822938919,-0.058080971240997314,-0.01972014084458351,-0.010737363249063492,-0.12097814679145813,-0.017350099980831146,-0.057253990322351456,0.049719784408807755,0.19717393815517426,-0.06898155063390732,0.06022507697343826,0.2233758270740509,-0.0060714855790138245,0.03075665421783924,0.014615798369050026,0.029250048100948334,-0.11849372833967209,-0.06616988033056259,-0.042307183146476746,0.0033659040927886963,0.0857284739613533,-0.20014573633670807,0.034076105803251266,0.06595847010612488,-0.16573786735534668,0.12949475646018982,0.008994176983833313,0.004620999097824097,0.02356269210577011,0.0182054340839386,-0.0561535507440567,-0.035449616611003876,0.23626668751239777,-0.2073628008365631,0.3139644265174866,0.1955631524324417,0.029616639018058777,0.11366549134254456,0.07708290964365005,0.10866693407297134,-0.06293588131666183,-0.0077966004610061646,-0.057174161076545715,-0.08943509310483932,0.004156911745667458,-0.0984298512339592,-0.005768085829913616,0.0017160219140350819},
{0.0011762604117393494,0.15072135627269745,0.020299416035413742,-0.06642502546310425,-0.060089170932769775,0.020552314817905426,-0.043852221220731735,-0.06478262692689896,0.17363804578781128,-0.03403667360544205,0.2395429164171219,0.01041267067193985,-0.24776551127433777,-0.046057891100645065,-0.08346940577030182,0.099162757396698,-0.19483637809753418,-0.04046493023633957,-0.1129814013838768,-0.1319343000650406,0.04903741180896759,0.07213468849658966,0.051988281309604645,-0.01571550965309143,-0.1113474890589714,-0.25194793939590454,-0.06103351712226868,-0.13213123381137848,0.10333564877510071,-0.15162967145442963,0.061464592814445496,0.09279994666576385,-0.11863565444946289,-0.07045092433691025,-0.01209489069879055,-0.009468602016568184,-0.10253313928842545,-0.09072671830654144,0.22084617614746094,0.006743522360920906,-0.06707601249217987,0.004636548459529877,-0.00785135105252266,0.3281668722629547,0.11846040189266205,-0.0055367788299918175,0.04063810035586357,-0.06342281401157379,0.1398719847202301,-0.29172199964523315,0.06687593460083008,0.17154791951179504,0.08854547142982483,0.16494478285312653,0.09101200103759766,-0.18667718768119812,0.08216162025928497,0.12080536782741547,-0.1759440004825592,0.10922732204198837,0.03968639671802521,-0.06957980990409851,-0.02280031517148018,-0.0626034140586853,0.2077835500240326,0.10969341546297073,-0.08934597671031952,-0.1270003616809845,0.14771263301372528,-0.05404433608055115,-0.05664592981338501,0.07035835832357407,-0.12639567255973816,-0.1545434296131134,-0.1935984045267105,0.05806276947259903,0.3116001486778259,0.11509235948324203,-0.1787276566028595,0.020631596446037292,-0.08929038792848587,-0.03143969923257828,0.053848765790462494,0.03168227896094322,-0.07926452159881592,-0.05334986746311188,-0.0591375008225441,0.024413203820586205,0.16138945519924164,-0.10341612249612808,-0.0791877955198288,0.2381574809551239,0.012866765260696411,0.009398067370057106,0.049464400857686996,0.02796872705221176,-0.11044278740882874,0.05444034934043884,-0.09540048986673355,0.0681515783071518,0.09935745596885681,-0.1683940887451172,-0.047316670417785645,0.04480529949069023,-0.1389012485742569,0.2227819412946701,0.03692697733640671,0.01761404424905777,0.013868780806660652,-0.05882856249809265,-0.11958207935094833,0.026760146021842957,0.25092488527297974,-0.26664063334465027,0.33623644709587097,0.16868337988853455,0.05719297379255295,0.17470279335975647,0.057424478232860565,0.08439406007528305,-0.0024905502796173096,-0.037533506751060486,-0.1358765959739685,-0.09953152388334274,0.05512324720621109,-0.025295836851000786,-0.0866888239979744,0.026549236848950386},
{-0.1503148078918457,0.12206108123064041,0.02967856079339981,-0.03082158789038658,-0.1129150539636612,0.0478484109044075,-0.04801269620656967,-0.07795510441064835,0.1626167893409729,-0.041412316262722015,0.2022078037261963,0.031001679599285126,-0.20948299765586853,-0.04371509701013565,-0.0841950997710228,0.10647765547037125,-0.1393173485994339,-0.17036688327789307,-0.07359742373228073,-0.09497823566198349,-0.023887813091278076,0.02798299677670002,-0.011271866038441658,0.02640014886856079,-0.1520528793334961,-0.22326916456222534,-0.023297179490327835,-0.10385704040527344,0.02730628475546837,-0.09421993046998978,0.007815570570528507,0.03076392412185669,-0.2347397357225418,-0.07311992347240448,0.03199088200926781,0.1157669946551323,-0.0806952491402626,0.0058890413492918015,0.25238266587257385,0.0066128019243478775,-0.11484481394290924,0.058332689106464386,0.07401782274246216,0.30297812819480896,0.13892878592014313,0.015657028183341026,-0.026928886771202087,-0.041992999613285065,0.08834651112556458,-0.25814956426620483,0.025386730208992958,0.24164703488349915,0.11268384754657745,0.13700686395168304,-0.0037370696663856506,-0.1474452018737793,0.07658589631319046,0.11919880658388138,-0.16230738162994385,0.06895574927330017,0.05145460367202759,-0.09962668269872665,-0.07351469993591309,0.016625329852104187,0.24626575410366058,0.09379985928535461,-0.07967095822095871,-0.14824430644512177,0.1664988100528717,-0.07381315529346466,-0.0701378807425499,0.094453826546669,-0.1053486168384552,-0.173324853181839,-0.2059929072856903,0.11443521082401276,0.37259921431541443,0.12261700630187988,-0.17118749022483826,0.1002739816904068,-0.0962643027305603,0.0028629377484321594,0.02517216093838215,0.009352639317512512,-0.07928505539894104,-0.027248665690422058,-0.06288609653711319,0.03766700252890587,0.17690418660640717,0.00913313403725624,-0.016148168593645096,0.16757085919380188,0.081269770860672,-0.003465055488049984,0.08387735486030579,0.05247170478105545,-0.035166703164577484,-0.02116062119603157,-0.15585437417030334,-0.0015826895833015442,0.06662821024656296,-0.07819218933582306,0.03278189152479172,0.059533413499593735,-0.18928831815719604,0.14338715374469757,0.029624812304973602,-0.04126661643385887,-0.017487522214651108,0.024950236082077026,-0.06610194593667984,-0.007236182689666748,0.1634015440940857,-0.3118166923522949,0.32635706663131714,0.1551782488822937,-0.006172630935907364,0.1549994796514511,0.11846084892749786,0.05585896223783493,0.005930176004767418,0.0026269033551216125,-0.08288535475730896,-0.14617297053337097,0.04124702885746956,-0.023869477212429047,0.056380532681941986,0.059275589883327484}
};
typedef enum {
UNDEFINED = 0,
CORE = 1,
EDGE = 2,
NOISE = 3
} ClusterTypes;
typedef struct Face {
long double descriptor[128];
long int clusterId;
long faceId;
long photoId;
double confidence;
ClusterTypes clusterType;
double *distances;
} Face;
typedef struct FaceLink {
struct FaceLink *pNext;
long double distance;
Face *pFace;
} FaceLink;
char fileBuf[5000];
char pathBuf[1028];
Face *readFaceDescriptor(Face *pFace, long id, char *path) {
FILE *f;
f = fopen(path, "r");
if (!f) {
return NULL;
}
size_t s = fread(fileBuf, 1, sizeof(fileBuf), f);
fclose(f);
char *p = fileBuf;
fileBuf[s] = 0;
while (*p && *p != '-' && *p != '+' && (*p < '0' || *p > '9')) {
p++;
}
int i = 0;
for (i = 0; i < 128; i++) {
char *start = p;
while (*p && *p != ',' && *p != ']' && *p != ' ' && *p != '\n') {
p++;
}
if (!*p) {
break;
}
*p++ = 0;
sscanf(start, "%Lf", &pFace->descriptor[i]);
}
if (i != 128) {
return NULL;
}
pFace->faceId = id;
return pFace;
}
long double euclideanDistance(long double *a, long double *b) {
long double sum = 0.0L;
for (int i = 0; i < 128; i++) {
long double delta = a[i] - b[i];
sum += delta * delta;
}
return sqrtl(sum);
}
/* https://en.wikipedia.org/wiki/DBSCAN */
#if 0
DBSCAN(DB, distFunc, eps, minPts) {
C = 0 /* Cluster counter */
for each point P in database DB {
if label(P) ≠ undefined then continue /* Previously processed in inner loop */
Neighbors N = RangeQuery(DB, distFunc, P, eps) /* Find neighbors */
if |N| < minPts then { /* Density check */
label(P) = Noise /* Label as Noise */
continue
}
C = C + 1 /* next cluster label */
label(P) = C /* Label initial point */
Seed set S = N \ {P} /* Neighbors to expand */
for each point Q in S { /* Process every seed point */
if label(Q) = Noise then label(Q) = C /* Change Noise to border point */
if label(Q) ≠ undefined then continue /* Previously processed */
label(Q) = C /* Label neighbor */
Neighbors N = RangeQuery(DB, distFunc, Q, eps) /* Find neighbors */
if |N| ≥ minPts then { /* Density check */
S = S N /* Add new neighbors to seed set */
}
}
}
}
RangeQuery(DB, distFunc, Q, eps) {
Neighbors = empty list
for each point P in database DB { /* Scan all points in the database */
if distFunc(Q, P) ≤ eps then { /* Compute distance and check epsilon */
Neighbors = Neighbors {P} /* Add to result */
}
}
return Neighbors
}
#endif
FaceLink *RangeQuery(Face **ppFaces, long int faceCount, Face *pQ, double eps) {
FaceLink *pNeighbors = NULL;
for (long int i = 0; i < faceCount; i++) {
Face *pFace = ppFaces[i];
if (pFace->confidence <= 0.9) {
continue;
}
if (pFace->faceId == pQ->faceId) {
continue;
}
if (pQ->distances[i] > 0.0 && pQ->distances[i] <= eps) {
FaceLink *pLink = malloc(sizeof(*pLink));
memset(pLink, 0, sizeof(*pLink));
pLink->distance = pQ->distances[i];
pLink->pFace = pFace;
pLink->pNext = pNeighbors;
pNeighbors = pLink;
}
}
return pNeighbors;
}
void freeChain(FaceLink *pLink) {
while (pLink) {
FaceLink *tmp = pLink->pNext;
free(pLink);
pLink = tmp;
}
}
long int chainLength(FaceLink *pLink) {
long int count = 0;
while (pLink) {
count++;
pLink = pLink->pNext;
}
return count;
}
long int DBSCAN(Face **ppFaces, long int faceCount, double eps, int minPts) {
long int C = 0;
for (long int i = 0; i < faceCount; i++) {
Face *pFace = ppFaces[i];
if (pFace->confidence <= 0.9) {
continue;
}
if (pFace->clusterType != UNDEFINED) {
continue;
}
double threshold = eps;
FaceLink *pNeighbors = RangeQuery(ppFaces, faceCount, pFace, eps);
long neighborCount = chainLength(pNeighbors);
while (neighborCount > minPts * 5) {
threshold *= 0.9;
freeChain(pNeighbors);
pNeighbors = RangeQuery(ppFaces, faceCount, pFace, threshold);
neighborCount = chainLength(pNeighbors);
fprintf(stderr, "With eps of %f, %ld has %ld neighbors.\n", threshold, pFace->faceId, neighborCount);
}
if (neighborCount < minPts) {
pFace->clusterType = NOISE;
freeChain(pNeighbors);
continue;
}
//printf("%ld has %ld neighbors.\n", pFace->faceId, neighborCount);
C++;
pFace->clusterId = C;
pFace->clusterType = CORE;
FaceLink *pLink = pNeighbors;
while (pLink) {
Face *pQ = pLink->pFace;
if (pQ->faceId == pFace->faceId) {
pLink = pLink->pNext;
continue;
}
if (pQ->clusterType == NOISE) {
pQ->clusterId = C;
pQ->clusterType = EDGE;
}
if (pQ->clusterType != UNDEFINED) {
pLink = pLink->pNext;
continue;
}
pQ->clusterId = C;
pQ->clusterType = EDGE;
FaceLink *pSubNeighbors = RangeQuery(ppFaces, faceCount, pQ, eps);
neighborCount = chainLength(pSubNeighbors);
threshold = eps;
while (neighborCount > minPts * 5) {
threshold *= 0.9;
freeChain(pSubNeighbors);
pSubNeighbors = RangeQuery(ppFaces, faceCount, pQ, threshold);
neighborCount = chainLength(pSubNeighbors);
fprintf(stderr, "With eps of %f, %ld has %ld neighbors.\n", threshold, pQ->faceId, neighborCount);
}
if (neighborCount >= minPts) {
pQ->clusterType = CORE;
/* Append these neighbors to the end of the chain */
FaceLink *pTmp = pLink;
while (pTmp->pNext) {
pTmp = pTmp->pNext;
}
pTmp->pNext = pSubNeighbors;
} else {
freeChain(pSubNeighbors);
}
pLink = pLink->pNext;
}
freeChain(pNeighbors);
}
return C;
}
typedef struct {
sqlite3 *db;
Face **ppFaces;
long int count;
long int initialized;
} FaceCallbackData;
int parseFaceIdCount(void *data, int argc, char **argv, char **column) {
long int *pCount = data;
*pCount = strtol(argv[0] ? argv[0] : "0", NULL, 10);
return 0;
}
int parseFaceDescriptor(void *data, int argc, char **argv, char **column) {
FaceCallbackData *map = data;
long int faceId = strtol(argv[0] ? argv[0] : "0", NULL, 10);
char sql_buf[1024];
int rc;
Face *pFace = NULL;
for (long int i = 0; i < map->count; i++) {
pFace = map->ppFaces[i];
if (pFace->faceId == faceId) {
break;
}
pFace = NULL;
}
if (!pFace) {
return SQLITE_OK;
}
// Getting here means we have a valid file handle, f, and a valid db handle, db
// Also, a blank row has been inserted with key rowid
sqlite3_blob *blob;
rc = sqlite3_blob_open(map->db, "main", "facedescriptors", "descriptors", faceId, 1, &blob);
if (SQLITE_OK != rc) {
fprintf(stderr, "Couldn't get blob handle (%i): %s\n", rc, sqlite3_errmsg(map->db));
return rc;
}
if (SQLITE_OK != (rc = sqlite3_blob_read(blob, pFace->descriptor, sizeof(pFace->descriptor), 0))) {
fprintf(stderr, "Error reading from blob handle.\n");
return rc;
}
sqlite3_blob_close(blob);
return SQLITE_OK;
}
int parseFaceIdRow(void *data, int argc, char **argv, char **column) {
FaceCallbackData *map = data;
long int faceId = strtol(argv[0] ? argv[0] : "0", NULL, 10);
long int photoId = strtol(argv[1] ? argv[1] : "0", NULL, 10);
double confidence = strtod(argv[2] ? argv[2] : "0.0", NULL);
Face *pFace = map->ppFaces[map->initialized++];
pFace->faceId = faceId;
pFace->photoId = photoId;
pFace->confidence = confidence;
return 0;
}
/*
* 1. Count how many entries there are
* 2. Allocate storage to hold all entries
* 3. Read all entries into flat array
* 4. Allocate MxM matrix and pre-calculate distances
* 5. Perform DBSCAN across MxM matrix to cluster
*/
int main(int argc, char *argv[]) {
long maxId = 0;
long i;
long int entries = 0;
long int minPts = MIN_PTS;
long double maxDistance = MAX_DISTANCE;
if (argc == 1) {
fprintf(stderr, "usage: scanner PATH MAX_DISTANCE MIN_PTS\n");
return -1;
}
if (argc > 2) {
sscanf(argv[2], "%Lf", &maxDistance);
}
if (argc > 3) {
sscanf(argv[3], "%ld", &minPts);
}
fprintf(stderr, "\nmaxDistance : %Lf\nminPts : %ld\n", maxDistance, minPts);
/* Allocate storage for all distances */
sqlite3 *db;
int rc = sqlite3_open("db/photos.db", &db);
if (rc != SQLITE_OK) {
fprintf(stderr, "Cannot open database: %s\n", sqlite3_errmsg(db));
sqlite3_close(db);
return 1;
}
fprintf(stderr, "DB opened.\n");
char *err_msg = NULL;
entries = 0;
rc = sqlite3_exec(db, "SELECT COUNT(id) FROM faces", parseFaceIdCount, &entries, &err_msg);
if (rc != SQLITE_OK) {
fprintf(stderr, "SQL error: %s\n", err_msg);
sqlite3_free(err_msg);
sqlite3_close(db);
return 1;
}
fprintf(stderr, "%ld faces in DB.\n", entries);
Face **ppFaces = malloc(sizeof(Face *) * entries);
if (!ppFaces) {
fprintf(stderr, "Unable to allocate storage face descriptors.");
return -1;
}
for (i = 0; i < entries; i++) {
ppFaces[i] = malloc(sizeof(Face));
memset(ppFaces[i], 0, sizeof(Face));
}
for (i = 0; i < entries; i++) {
ppFaces[i]->distances = malloc(sizeof(*ppFaces[i]->distances) * entries);
if (!ppFaces[i]->distances) {
fprintf(stderr, "Unable to allocate storage for distance dictionary.");
return -1;
}
memset(ppFaces[i]->distances, 0, sizeof(*ppFaces[i]->distances) * entries);
}
fprintf(stderr, "Storage allocated for %ld faces.\n", entries);
FaceCallbackData data = {
db: db,
ppFaces: ppFaces,
count: entries,
initialized: 0
};
rc = sqlite3_exec(db, "SELECT id,photoId,faceConfidence FROM faces", parseFaceIdRow, &data, &err_msg);
if (rc != SQLITE_OK) {
fprintf(stderr, "SQL error: %s\n", err_msg);
sqlite3_free(err_msg);
sqlite3_close(db);
return 1;
}
fprintf(stderr, "Face data loaded from DB.\n");
rc = sqlite3_exec(db, "SELECT * FROM facedescriptors", parseFaceDescriptor, &data, &err_msg);
if (rc != SQLITE_OK) {
fprintf(stderr, "SQL error: %s\n", err_msg);
sqlite3_free(err_msg);
sqlite3_close(db);
return 1;
}
fprintf(stderr, "Descriptor data loaded from DB\n");
err_msg = NULL;
long int processed = 0;
long int last = 0;
long double total = 0.0;
for (long i = 0; i < entries; i++) {
Face *pLink = ppFaces[i];
for (long j = 0; j < entries; j++) {
Face *pTarget = ppFaces[j];
processed++;
if (processed % 1000 == 0) {
int perc = 100 * processed / (entries * entries);
if (perc != last) {
fprintf(stderr, "\rComputed %d%% complete.", perc);
last = perc;
}
}
if (pLink->confidence <= 0.9 || pTarget->confidence <= 0.9) {
pLink->distances[i] = 0.0;
pTarget->distances[j] = 0.0;
continue;
}
if (i == j) {
pLink->distances[i] = 0.0;
pTarget->distances[j] = 0.0;
continue;
}
if (pLink->distances[j] != 0.0) {
continue;
}
pLink->distances[j] =
pTarget->distances[i] = euclideanDistance(pLink->descriptor, pTarget->descriptor);
total += pLink->distances[j];
}
}
fprintf(stderr, "\nAverage distance: %Lf\n", 1. * total / (entries * entries));
long int clusters = DBSCAN(ppFaces, entries, maxDistance, minPts);
long int undefined = 0, outlier = 0, core = 0, reachable = 0;
for (i = 0; i < entries; i++) {
switch (ppFaces[i]->clusterType) {
case NOISE:
outlier++;
break;
case UNDEFINED:
undefined++;
break;
case CORE:
core++;
break;
case EDGE:
reachable++;
break;
}
}
fprintf(stderr, "%ld clusters identified!\n", clusters);
fprintf(stderr, "%ld NOISE\n", outlier);
fprintf(stderr, "%ld UNDEFINED\n", undefined);
fprintf(stderr, "%ld CORE\n", core);
fprintf(stderr, "%ld EDGE\n", reachable);
fprintf(stdout, "<script>\nvar clusters = [\n");
for (long i = 1; i <= clusters; i++) {
long nodes = 0;
fprintf(stdout, "/* %ld. */ [", i);
for (long int j = 0; j < entries; j++) {
if (ppFaces[j]->clusterId == i) {
if (nodes == 0) {
fprintf(stdout, "[%ld,%ld]", ppFaces[j]->faceId, ppFaces[j]->photoId);
} else {
fprintf(stdout, ",[%ld,%ld]", ppFaces[j]->faceId, ppFaces[j]->photoId);
}
nodes++;
}
}
if (i < clusters) {
fprintf(stdout, "],\n");
} else {
fprintf(stdout, "]\n");
}
}
fprintf(stdout, "];\n</script>\n");
char *sql =
"DELETE FROM facedistances;"
"BEGIN TRANSACTION;";
rc = sqlite3_exec(db, sql, 0, 0, &err_msg);
if (rc != SQLITE_OK ) {
fprintf(stderr, "SQL error: %s\n", err_msg);
sqlite3_free(err_msg);
sqlite3_close(db);
return 1;
}
fprintf(stderr, "facedistances deleted and transaction started.\n");
for (long i = 0; i < entries; i++) {
memset(ppFaces[i]->distances, 0, sizeof(*ppFaces[i]->distances) * entries);
}
char sqlBuf[1024];
processed = 0;
last = 0;
for (long i = 0; i < entries; i++) {
Face *pLink = ppFaces[i];
for (long j = 0; j < entries; j++) {
Face *pTarget = ppFaces[j];
processed++;
if (processed % 1000 == 0) {
int perc = 100 * processed / (entries * entries);
if (perc != last) {
fprintf(stderr, "\rComputed %d%% complete.", perc);
last = perc;
}
}
if (i == j) {
pLink->distances[i] = 0.0;
pTarget->distances[j] = 0.0;
continue;
}
if (pLink->distances[j] != 0.0) {
continue;
}
pLink->distances[j] =
pTarget->distances[i] = euclideanDistance(pLink->descriptor, pTarget->descriptor);
if (pLink->distances[j] < 0.5) {
sprintf(sqlBuf, "INSERT INTO facedistances (face1Id,face2Id,distance) VALUES (%ld,%ld,%lf);",
((pLink->faceId < pTarget->faceId) ? pLink->faceId : pTarget->faceId),
((pLink->faceId < pTarget->faceId) ? pTarget->faceId : pLink->faceId),
pLink->distances[j]);
rc = sqlite3_exec(db, sqlBuf, 0, 0, &err_msg);
if (rc != SQLITE_OK ) {
fprintf(stderr, "SQL error: %s\n", err_msg);
sqlite3_free(err_msg);
sqlite3_close(db);
return 1;
}
}
}
}
fprintf(stderr, "\n");
sprintf(sqlBuf, "UPDATE faces SET lastComparedId=%ld;", maxId);
rc = sqlite3_exec(db, "COMMIT;", 0, 0, &err_msg);
if (rc != SQLITE_OK ) {
fprintf(stderr, "SQL error: %s\n", err_msg);
sqlite3_free(err_msg);
sqlite3_close(db);
return 1;
}
sqlite3_close(db);
return 0;
}