ketr.photos/scanner/scanner.c
James Ketrenos 0db1181851 Output counts indexes correctly
Signed-off-by: James Ketrenos <james_gitlab@ketrenos.com>
2020-01-11 19:45:07 -08:00

672 lines
33 KiB
C
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#include <stdio.h>
#include <stdlib.h>
#include <dirent.h>
#include <sys/types.h>
#include <string.h>
#include <math.h>
#include <sqlite3.h>
#ifndef MIN_PTS
#define MIN_PTS 3
#endif
#ifndef MAX_DISTANCE
#define MAX_DISTANCE 0.47L
#endif
#ifndef MAX_CLUSTER_SIZE
#define MAX_CLUSTER_SIZE 150
#endif
/* descriptors extracted from a profile face photo, used to filter out faces near a profile image
* (which all identify as the same person) */
float profileDescriptors[][128] = {
{ -0.061601437628269196,0.11936908215284348,0.05469074845314026,-0.01107284426689148,-0.11588133871555328,-0.002019548788666725,-0.11300364136695862,-0.07167613506317139,0.1764453798532486,-0.05901609733700752,0.2385266125202179,0.012328468263149261,-0.2573902904987335,0.04025992751121521,-0.11443999409675598,0.11594478785991669,-0.17500847578048706,-0.07691430300474167,-0.12378361076116562,-0.11740189790725708,0.03630289435386658,0.06033151596784592,0.04939301311969757,0.004410859197378159,-0.20615431666374207,-0.25997376441955566,-0.08603082597255707,-0.09167610853910446,0.03498300909996033,-0.18279242515563965,0.041221488267183304,0.09842021763324738,-0.10988932847976685,0.031966306269168854,0.013916028663516045,0.008073337376117706,0.013303801417350769,-0.06880992650985718,0.17893345654010773,0.027153145521879196,-0.1894378960132599,-0.020184200257062912,0.05442515388131142,0.2977772355079651,0.20385639369487762,0.010473722591996193,0.029712140560150146,-0.10179305076599121,0.07872007042169571,-0.2609504461288452,0.004072457551956177,0.20807254314422607,0.039381805807352066,0.10894455015659332,0.0656222552061081,-0.21101640164852142,0.0015651732683181763,0.09899670630693436,-0.1148049384355545,0.03364836052060127,0.04903073608875275,-0.03714796528220177,-0.06581848114728928,-0.08302658796310425,0.1889929473400116,0.08697575330734253,-0.14220063388347626,-0.18263882398605347,0.17710572481155396,-0.1408098042011261,-0.04034702479839325,0.14808061718940735,-0.14523223042488098,-0.22445881366729736,-0.2085285633802414,0.04144661873579025,0.3701931834220886,0.140765979886055,-0.1524423211812973,0.05934419482946396,-0.09778376668691635,-0.05247225984930992,-0.07087128609418869,0.08293693512678146,-0.07502841204404831,0.01957462728023529,-0.07184667885303497,0.05862969905138016,0.19565513730049133,-0.07941281795501709,0.04070422798395157,0.26956090331077576,0.01857529580593109,-0.03987279161810875,-0.0008930861949920654,0.08536889404058456,-0.16341862082481384,0.03542519360780716,-0.10296104848384857,-0.022122247144579887,0.08253192901611328,-0.07827107608318329,0.01481425017118454,0.08741594851016998,-0.17036983370780945,0.20205597579479218,-0.010307766497135162,-0.04007430374622345,-0.0006583929061889648,-0.016108371317386627,-0.04460605978965759,-0.034171007573604584,0.24336735904216766,-0.2778766453266144,0.18527397513389587,0.1877088099718094,0.098460853099823,0.148922860622406,0.06344866007566452,0.13334715366363525,0.0003817584365606308,0.004202648997306824,-0.11708918958902359,-0.06176237016916275,-0.027644027024507523,-0.10988462716341019,-0.10410083830356598,0.0056285373866558075 },
{ -0.05286264419555664,0.1027536615729332,0.0177864171564579,-0.007238239049911499,-0.15839841961860657,-0.013195162639021873,-0.055493343621492386,-0.011598028242588043,0.11436303704977036,-0.023542361333966255,0.23070596158504486,0.006959080696105957,-0.23580302298069,-0.04991580545902252,-0.048089612275362015,0.1130392998456955,-0.1211254671216011,-0.053501617163419724,-0.20760701596736908,-0.11128067970275879,0.015699222683906555,0.048812925815582275,-0.004359103739261627,-0.01246669515967369,-0.08746911585330963,-0.2714271545410156,-0.07527103275060654,-0.07152064889669418,0.13769567012786865,-0.1629178822040558,-0.030123017728328705,-0.006879039108753204,-0.15528543293476105,-0.07373329252004623,0.035297941416502,-0.0019944999366998672,0.00020968541502952576,-0.06796075403690338,0.15615630149841309,-0.02517728880047798,-0.15598921477794647,0.048431847244501114,0.030592434108257294,0.22424748539924622,0.18237638473510742,0.03839367255568504,-0.018516220152378082,-0.07987417280673981,0.10236746072769165,-0.23993468284606934,0.01739603653550148,0.20749998092651367,0.07386040687561035,0.09853726625442505,0.06335064768791199,-0.14400270581245422,-0.0020356550812721252,0.16046060621738434,-0.1502964049577713,0.03028072975575924,-0.013792440295219421,-0.04380643367767334,-0.05555684491991997,-0.13338367640972137,0.17439348995685577,0.08730429410934448,-0.11853333562612534,-0.14580222964286804,0.11578165739774704,-0.15631376206874847,0.0005779601633548737,0.057921670377254486,-0.11303769052028656,-0.14606109261512756,-0.24737423658370972,0.07011876255273819,0.3969222903251648,0.15113821625709534,-0.19518792629241943,0.029213692992925644,-0.11209011822938919,-0.058080971240997314,-0.01972014084458351,-0.010737363249063492,-0.12097814679145813,-0.017350099980831146,-0.057253990322351456,0.049719784408807755,0.19717393815517426,-0.06898155063390732,0.06022507697343826,0.2233758270740509,-0.0060714855790138245,0.03075665421783924,0.014615798369050026,0.029250048100948334,-0.11849372833967209,-0.06616988033056259,-0.042307183146476746,0.0033659040927886963,0.0857284739613533,-0.20014573633670807,0.034076105803251266,0.06595847010612488,-0.16573786735534668,0.12949475646018982,0.008994176983833313,0.004620999097824097,0.02356269210577011,0.0182054340839386,-0.0561535507440567,-0.035449616611003876,0.23626668751239777,-0.2073628008365631,0.3139644265174866,0.1955631524324417,0.029616639018058777,0.11366549134254456,0.07708290964365005,0.10866693407297134,-0.06293588131666183,-0.0077966004610061646,-0.057174161076545715,-0.08943509310483932,0.004156911745667458,-0.0984298512339592,-0.005768085829913616,0.0017160219140350819},
{0.0011762604117393494,0.15072135627269745,0.020299416035413742,-0.06642502546310425,-0.060089170932769775,0.020552314817905426,-0.043852221220731735,-0.06478262692689896,0.17363804578781128,-0.03403667360544205,0.2395429164171219,0.01041267067193985,-0.24776551127433777,-0.046057891100645065,-0.08346940577030182,0.099162757396698,-0.19483637809753418,-0.04046493023633957,-0.1129814013838768,-0.1319343000650406,0.04903741180896759,0.07213468849658966,0.051988281309604645,-0.01571550965309143,-0.1113474890589714,-0.25194793939590454,-0.06103351712226868,-0.13213123381137848,0.10333564877510071,-0.15162967145442963,0.061464592814445496,0.09279994666576385,-0.11863565444946289,-0.07045092433691025,-0.01209489069879055,-0.009468602016568184,-0.10253313928842545,-0.09072671830654144,0.22084617614746094,0.006743522360920906,-0.06707601249217987,0.004636548459529877,-0.00785135105252266,0.3281668722629547,0.11846040189266205,-0.0055367788299918175,0.04063810035586357,-0.06342281401157379,0.1398719847202301,-0.29172199964523315,0.06687593460083008,0.17154791951179504,0.08854547142982483,0.16494478285312653,0.09101200103759766,-0.18667718768119812,0.08216162025928497,0.12080536782741547,-0.1759440004825592,0.10922732204198837,0.03968639671802521,-0.06957980990409851,-0.02280031517148018,-0.0626034140586853,0.2077835500240326,0.10969341546297073,-0.08934597671031952,-0.1270003616809845,0.14771263301372528,-0.05404433608055115,-0.05664592981338501,0.07035835832357407,-0.12639567255973816,-0.1545434296131134,-0.1935984045267105,0.05806276947259903,0.3116001486778259,0.11509235948324203,-0.1787276566028595,0.020631596446037292,-0.08929038792848587,-0.03143969923257828,0.053848765790462494,0.03168227896094322,-0.07926452159881592,-0.05334986746311188,-0.0591375008225441,0.024413203820586205,0.16138945519924164,-0.10341612249612808,-0.0791877955198288,0.2381574809551239,0.012866765260696411,0.009398067370057106,0.049464400857686996,0.02796872705221176,-0.11044278740882874,0.05444034934043884,-0.09540048986673355,0.0681515783071518,0.09935745596885681,-0.1683940887451172,-0.047316670417785645,0.04480529949069023,-0.1389012485742569,0.2227819412946701,0.03692697733640671,0.01761404424905777,0.013868780806660652,-0.05882856249809265,-0.11958207935094833,0.026760146021842957,0.25092488527297974,-0.26664063334465027,0.33623644709587097,0.16868337988853455,0.05719297379255295,0.17470279335975647,0.057424478232860565,0.08439406007528305,-0.0024905502796173096,-0.037533506751060486,-0.1358765959739685,-0.09953152388334274,0.05512324720621109,-0.025295836851000786,-0.0866888239979744,0.026549236848950386},
{-0.1503148078918457,0.12206108123064041,0.02967856079339981,-0.03082158789038658,-0.1129150539636612,0.0478484109044075,-0.04801269620656967,-0.07795510441064835,0.1626167893409729,-0.041412316262722015,0.2022078037261963,0.031001679599285126,-0.20948299765586853,-0.04371509701013565,-0.0841950997710228,0.10647765547037125,-0.1393173485994339,-0.17036688327789307,-0.07359742373228073,-0.09497823566198349,-0.023887813091278076,0.02798299677670002,-0.011271866038441658,0.02640014886856079,-0.1520528793334961,-0.22326916456222534,-0.023297179490327835,-0.10385704040527344,0.02730628475546837,-0.09421993046998978,0.007815570570528507,0.03076392412185669,-0.2347397357225418,-0.07311992347240448,0.03199088200926781,0.1157669946551323,-0.0806952491402626,0.0058890413492918015,0.25238266587257385,0.0066128019243478775,-0.11484481394290924,0.058332689106464386,0.07401782274246216,0.30297812819480896,0.13892878592014313,0.015657028183341026,-0.026928886771202087,-0.041992999613285065,0.08834651112556458,-0.25814956426620483,0.025386730208992958,0.24164703488349915,0.11268384754657745,0.13700686395168304,-0.0037370696663856506,-0.1474452018737793,0.07658589631319046,0.11919880658388138,-0.16230738162994385,0.06895574927330017,0.05145460367202759,-0.09962668269872665,-0.07351469993591309,0.016625329852104187,0.24626575410366058,0.09379985928535461,-0.07967095822095871,-0.14824430644512177,0.1664988100528717,-0.07381315529346466,-0.0701378807425499,0.094453826546669,-0.1053486168384552,-0.173324853181839,-0.2059929072856903,0.11443521082401276,0.37259921431541443,0.12261700630187988,-0.17118749022483826,0.1002739816904068,-0.0962643027305603,0.0028629377484321594,0.02517216093838215,0.009352639317512512,-0.07928505539894104,-0.027248665690422058,-0.06288609653711319,0.03766700252890587,0.17690418660640717,0.00913313403725624,-0.016148168593645096,0.16757085919380188,0.081269770860672,-0.003465055488049984,0.08387735486030579,0.05247170478105545,-0.035166703164577484,-0.02116062119603157,-0.15585437417030334,-0.0015826895833015442,0.06662821024656296,-0.07819218933582306,0.03278189152479172,0.059533413499593735,-0.18928831815719604,0.14338715374469757,0.029624812304973602,-0.04126661643385887,-0.017487522214651108,0.024950236082077026,-0.06610194593667984,-0.007236182689666748,0.1634015440940857,-0.3118166923522949,0.32635706663131714,0.1551782488822937,-0.006172630935907364,0.1549994796514511,0.11846084892749786,0.05585896223783493,0.005930176004767418,0.0026269033551216125,-0.08288535475730896,-0.14617297053337097,0.04124702885746956,-0.023869477212429047,0.056380532681941986,0.059275589883327484},
{-0.11449231207370758,0.08218079805374146,0.052244849503040314,-0.05849563330411911,-0.09875518083572388,-0.06790590286254883,-0.03999913111329079,-0.07766807824373245,0.15596428513526917,-0.0483558364212513,0.21132533252239227,-0.01931747794151306,-0.24744108319282532,-0.10812359303236008,0.058341797441244125,0.11405576020479202,-0.1183864176273346,-0.0856679230928421,-0.13984550535678864,-0.11198180168867111,-0.03631503880023956,-0.019455134868621826,0.038762785494327545,0.04496624693274498,-0.10771285742521286,-0.25013211369514465,-0.06068229675292969,-0.11673891544342041,0.07542752474546432,-0.14139781892299652,0.0017466647550463676,0.043040819466114044,-0.17083533108234406,-0.07615770399570465,-0.026609988883137703,0.06443975865840912,-0.056286998093128204,-0.08603766560554504,0.17082615196704865,-0.03381050005555153,-0.14733527600765228,-0.1067851260304451,-0.04236249998211861,0.23973657190799713,0.21202436089515686,-0.008970214053988457,0.017333630472421646,-0.012529492378234863,0.05171665549278259,-0.2481795996427536,-0.0033811144530773163,0.11406000703573227,0.11761488020420074,0.06430923938751221,0.05201558396220207,-0.18238718807697296,0.03609202429652214,0.10110349208116531,-0.16749322414398193,-0.017176488414406776,0.02417522668838501,-0.09461188316345215,-0.08760598301887512,-0.09778585284948349,0.2984878420829773,0.09519178420305252,-0.11396373808383942,-0.11740261316299438,0.15713796019554138,-0.11983270198106766,0.0014888402074575424,0.11709675192832947,-0.11239394545555115,-0.1615625023841858,-0.21783563494682312,0.062243033200502396,0.3098629415035248,0.13144542276859283,-0.19728504121303558,-0.00864078477025032,-0.1421070396900177,-0.027583356946706772,-0.03719120845198631,0.004923664033412933,-0.08225893974304199,0.016801394522190094,-0.05810196325182915,0.0058958642184734344,0.15847420692443848,-0.09055405110120773,0.03644956648349762,0.2036021500825882,0.012157909572124481,-0.014717459678649902,-0.008820030838251114,-0.07779868692159653,-0.03748408332467079,0.028270550072193146,-0.05236710608005524,0.013832693919539452,0.10549424588680267,-0.1504925936460495,0.04579198732972145,0.05509837344288826,-0.1997680962085724,0.15631046891212463,0.042316727340221405,-0.017336193472146988,0.02395687624812126,-0.036151397973299026,-0.10194570571184158,-0.06460698693990707,0.24390262365341187,-0.2809107303619385,0.22127604484558105,0.2234485149383545,0.06370076537132263,0.13879287242889404,0.02190246433019638,0.10918933153152466,-0.025838086381554604,0.010740015655755997,-0.09715697914361954,-0.013207517564296722,0.0729929581284523,-0.09213361144065857,-0.011896252632141113,0.03725235536694527},
{-0.05498696118593216,0.05113350972533226,0.05531687289476395,-0.09476281702518463,-0.10580143332481384,-0.030396001413464546,-0.0010913275182247162,-0.1052420362830162,0.10643371194601059,-0.06002769619226456,0.21559466421604156,-0.0154794380068779,-0.2808150053024292,-0.0600040964782238,0.0403410941362381,0.0919484943151474,-0.1326475292444229,-0.10436968505382538,-0.19102108478546143,-0.0795152336359024,-0.04159826040267944,0.021737679839134216,0.019609834998846054,-0.04616895690560341,-0.17645984888076782,-0.1682741940021515,-0.08414799720048904,-0.12163443863391876,0.026273906230926514,-0.07314532995223999,0.051960788667201996,0.09379777312278748,-0.1891252100467682,-0.059710826724767685,0.011947771534323692,0.014459501951932907,-0.07258555293083191,-0.07127422094345093,0.16470138728618622,-0.02148628979921341,-0.12428629398345947,0.027712488546967506,0.015993379056453705,0.16111883521080017,0.24940571188926697,-0.0341871902346611,0.007754117250442505,-0.0865824818611145,0.049111928790807724,-0.2875933051109314,-0.0030787810683250427,0.1479477882385254,0.026672853156924248,0.15996402502059937,0.053178343921899796,-0.12440557777881622,0.09016239643096924,0.16699564456939697,-0.20755255222320557,0.06710638105869293,0.03819752484560013,-0.14896832406520844,-0.04662872850894928,-0.027680709958076477,0.14560657739639282,0.10368138551712036,-0.04873866215348244,-0.176229327917099,0.18367958068847656,-0.14353249967098236,-0.08559320867061615,0.12607094645500183,-0.12804611027240753,-0.1290942132472992,-0.23025573790073395,0.021010763943195343,0.3133254051208496,0.15103624761104584,-0.16475120186805725,0.017042677849531174,-0.13039177656173706,-0.04864456504583359,0.0031204214319586754,-0.053125109523534775,-0.011741500347852707,-0.1571718007326126,-0.059252332895994186,0.018309343606233597,0.24032914638519287,-0.08475983887910843,0.04019710421562195,0.2481696456670761,0.02487030252814293,-0.07094515115022659,0.02628476172685623,-0.00712304562330246,-0.04976058006286621,-0.024602070450782776,-0.0648813247680664,-0.02827349305152893,0.04442674666643143,-0.16200967133045197,0.02547246217727661,0.10203316062688828,-0.1560029238462448,0.165535107254982,-0.011267822235822678,-0.023648075759410858,0.001466844230890274,-0.07181182503700256,0.012026548385620117,0.011392228305339813,0.21011506021022797,-0.22743536531925201,0.2915312945842743,0.1937231570482254,0.011561829596757889,0.07002981752157211,0.00032756663858890533,0.09776002168655396,-0.07220730185508728,0.004206985235214233,-0.12127777934074402,-0.0900777205824852,0.012966942973434925,0.008859572932124138,-0.01780618168413639,0.052432361990213394},
};
typedef enum {
UNDEFINED = 0,
CORE = 1,
EDGE = 2,
NOISE = 3
} ClusterTypes;
typedef struct Face {
float descriptor[128];
long int clusterId;
long faceId;
long photoId;
float confidence;
float profileDistance;
ClusterTypes clusterType;
float *distances;
} Face;
typedef struct FaceLink {
struct FaceLink *pNext;
float distance;
Face *pFace;
} FaceLink;
float euclideanDistance(float *a, float *b) {
float sum = 0.0L;
for (int i = 0; i < 128; i++) {
float delta = a[i] - b[i];
sum += delta * delta;
}
return sqrtf(sum);
}
/* https://en.wikipedia.org/wiki/DBSCAN */
#if 0
DBSCAN(DB, distFunc, eps, minPts) {
C = 0 /* Cluster counter */
for each point P in database DB {
if label(P) ≠ undefined then continue /* Previously processed in inner loop */
Neighbors N = RangeQuery(DB, distFunc, P, eps) /* Find neighbors */
if |N| < minPts then { /* Density check */
label(P) = Noise /* Label as Noise */
continue
}
C = C + 1 /* next cluster label */
label(P) = C /* Label initial point */
Seed set S = N \ {P} /* Neighbors to expand */
for each point Q in S { /* Process every seed point */
if label(Q) = Noise then label(Q) = C /* Change Noise to border point */
if label(Q) ≠ undefined then continue /* Previously processed */
label(Q) = C /* Label neighbor */
Neighbors N = RangeQuery(DB, distFunc, Q, eps) /* Find neighbors */
if |N| ≥ minPts then { /* Density check */
S = S N /* Add new neighbors to seed set */
}
}
}
}
RangeQuery(DB, distFunc, Q, eps) {
Neighbors = empty list
for each point P in database DB { /* Scan all points in the database */
if distFunc(Q, P) ≤ eps then { /* Compute distance and check epsilon */
Neighbors = Neighbors {P} /* Add to result */
}
}
return Neighbors
}
#endif
FaceLink *RangeQuery(Face **ppFaces, long int faceCount, Face *pQ, float eps, long int clusterToBreak) {
FaceLink *pNeighbors = NULL;
for (long int i = 0; i < faceCount; i++) {
Face *pFace = ppFaces[i];
if (clusterToBreak > 0 && pFace->clusterId != clusterToBreak) {
continue;
}
if (pFace->faceId == pQ->faceId) {
continue;
}
if (pQ->distances[i] > 0.0 && pQ->distances[i] <= eps) {
FaceLink *pLink = malloc(sizeof(*pLink));
memset(pLink, 0, sizeof(*pLink));
pLink->distance = pQ->distances[i];
pLink->pFace = pFace;
pLink->pNext = pNeighbors;
pNeighbors = pLink;
}
}
return pNeighbors;
}
void freeChain(FaceLink *pLink) {
while (pLink) {
FaceLink *tmp = pLink->pNext;
free(pLink);
pLink = tmp;
}
}
long int chainLength(FaceLink *pLink) {
long int count = 0;
while (pLink) {
count++;
pLink = pLink->pNext;
}
return count;
}
long int C = 0;
long int DBSCAN(Face **ppFaces, long int faceCount, float eps, int minPts, long int clusterToBreak) {
int count = 0;
for (long int i = 0; i < faceCount; i++) {
Face *pFace = ppFaces[i];
if (clusterToBreak > 0 && pFace->clusterId != clusterToBreak) {
continue;
}
if (pFace->clusterType != UNDEFINED) {
continue;
}
float threshold = eps;
FaceLink *pNeighbors = RangeQuery(ppFaces, faceCount, pFace, eps, clusterToBreak);
long neighborCount = chainLength(pNeighbors);
if (neighborCount < minPts) {
pFace->clusterType = NOISE;
freeChain(pNeighbors);
continue;
}
C++;
count++;
pFace->clusterId = C;
pFace->clusterType = CORE;
FaceLink *pLink = pNeighbors;
while (pLink) {
Face *pQ = pLink->pFace;
if (pQ->faceId == pFace->faceId) {
pLink = pLink->pNext;
continue;
}
if (pQ->clusterType == NOISE) {
pQ->clusterId = C;
pQ->clusterType = EDGE;
}
if (pQ->clusterType != UNDEFINED) {
pLink = pLink->pNext;
continue;
}
pQ->clusterId = C;
pQ->clusterType = EDGE;
FaceLink *pSubNeighbors = RangeQuery(ppFaces, faceCount, pQ, eps, clusterToBreak);
neighborCount = chainLength(pSubNeighbors);
if (neighborCount >= minPts) {
pQ->clusterType = CORE;
/* Append these neighbors to the end of the chain */
FaceLink *pTmp = pLink;
while (pTmp->pNext) {
pTmp = pTmp->pNext;
}
pTmp->pNext = pSubNeighbors;
} else {
freeChain(pSubNeighbors);
}
pLink = pLink->pNext;
}
freeChain(pNeighbors);
}
return count;
}
typedef struct {
sqlite3 *db;
Face **ppFaces;
long int count;
long int initialized;
} FaceCallbackData;
int parseFaceIdCount(void *data, int argc, char **argv, char **column) {
long int *pCount = data;
*pCount = strtol(argv[0] ? argv[0] : "0", NULL, 10);
return 0;
}
int parseFaceDescriptor(void *data, int argc, char **argv, char **column) {
FaceCallbackData *map = data;
long int faceId = strtol(argv[0] ? argv[0] : "0", NULL, 10);
char sql_buf[1024];
int rc;
Face *pFace = NULL;
for (long int i = 0; i < map->count; i++) {
pFace = map->ppFaces[i];
if (pFace->faceId == faceId) {
break;
}
pFace = NULL;
}
if (!pFace) {
return SQLITE_OK;
}
// Getting here means we have a valid file handle, f, and a valid db handle, db
// Also, a blank row has been inserted with key rowid
sqlite3_blob *blob;
rc = sqlite3_blob_open(map->db, "main", "facedescriptors", "descriptors", faceId, 1, &blob);
if (SQLITE_OK != rc) {
fprintf(stderr, "Couldn't get blob handle (%i): %s\n", rc, sqlite3_errmsg(map->db));
return rc;
}
if (SQLITE_OK != (rc = sqlite3_blob_read(blob, pFace->descriptor, sizeof(pFace->descriptor), 0))) {
fprintf(stderr, "Error reading from blob handle.\n");
return rc;
}
sqlite3_blob_close(blob);
return SQLITE_OK;
}
int parseFaceIdRow(void *data, int argc, char **argv, char **column) {
FaceCallbackData *map = data;
long int faceId = strtol(argv[0] ? argv[0] : "0", NULL, 10);
long int photoId = strtol(argv[1] ? argv[1] : "0", NULL, 10);
float confidence = strtof(argv[2] ? argv[2] : "0.0", NULL);
if (confidence < 0.9) {
return 0;
}
Face *pFace = map->ppFaces[map->initialized++];
pFace->faceId = faceId;
pFace->photoId = photoId;
pFace->confidence = confidence;
return 0;
}
void getClusterCounts(int *stats, Face **ppFaces, long int entries) {
for (int i = 0; i < entries; i++) {
if (ppFaces[i]->clusterType != CORE && ppFaces[i]->clusterType != EDGE) {
continue;
}
stats[ppFaces[i]->clusterId - 1]++;
}
}
long int getClusterCount(Face **ppFaces, long int entries, int clusterId) {
long int count = 0;
for (long int i = 0; i < entries; i++) {
if (ppFaces[i]->clusterId == clusterId &&
ppFaces[i]->clusterType != NOISE &&
ppFaces[i]->clusterType != UNDEFINED) {
count++;
}
}
return count;
}
/*
* 1. Count how many entries there are
* 2. Allocate storage to hold all entries
* 3. Read all entries into flat array
* 4. Allocate MxM matrix and pre-calculate distances
* 5. Perform DBSCAN across MxM matrix to cluster
*/
int main(int argc, char *argv[]) {
long maxId = 0;
long i;
long int entries = 0;
long int minPts = MIN_PTS;
float maxDistance = MAX_DISTANCE;
long int maxClusterSize = MAX_CLUSTER_SIZE;
if (argc == 1) {
fprintf(stderr, "usage: scanner PATH MAX_DISTANCE MIN_PTS\n");
return -1;
}
if (argc > 2) {
sscanf(argv[2], "%f", &maxDistance);
}
if (argc > 3) {
sscanf(argv[3], "%ld", &minPts);
}
if (argc > 4) {
sscanf(argv[4], "%ld", &maxClusterSize);
}
fprintf(stderr, "\nmaxDistance : %f\nminPts : %ld\n", maxDistance, minPts);
/* Allocate storage for all distances */
sqlite3 *db;
int rc = sqlite3_open("db/photos.db", &db);
if (rc != SQLITE_OK) {
fprintf(stderr, "Cannot open database: %s\n", sqlite3_errmsg(db));
sqlite3_close(db);
return 1;
}
fprintf(stderr, "DB opened.\n");
char *err_msg = NULL;
entries = 0;
rc = sqlite3_exec(db, "SELECT COUNT(id) FROM faces", parseFaceIdCount, &entries, &err_msg);
if (rc != SQLITE_OK) {
fprintf(stderr, "SQL error: %s\n", err_msg);
sqlite3_free(err_msg);
sqlite3_close(db);
return 1;
}
fprintf(stderr, "%ld faces in DB.\n", entries);
Face **ppFaces = malloc(sizeof(Face *) * entries);
if (!ppFaces) {
fprintf(stderr, "Unable to allocate storage face descriptors.");
return -1;
}
for (i = 0; i < entries; i++) {
ppFaces[i] = malloc(sizeof(Face));
memset(ppFaces[i], 0, sizeof(Face));
}
for (i = 0; i < entries; i++) {
ppFaces[i]->distances = malloc(sizeof(*ppFaces[i]->distances) * entries);
if (!ppFaces[i]->distances) {
fprintf(stderr, "Unable to allocate storage for distance dictionary.");
return -1;
}
memset(ppFaces[i]->distances, 0, sizeof(*ppFaces[i]->distances) * entries);
}
fprintf(stderr, "Storage allocated for %ld faces.\n", entries);
FaceCallbackData data = {
db: db,
ppFaces: ppFaces,
count: entries,
initialized: 0
};
rc = sqlite3_exec(db, "SELECT id,photoId,faceConfidence FROM faces", parseFaceIdRow, &data, &err_msg);
if (rc != SQLITE_OK) {
fprintf(stderr, "SQL error: %s\n", err_msg);
sqlite3_free(err_msg);
sqlite3_close(db);
return 1;
}
entries = data.initialized;
data.count = data.initialized;
fprintf(stderr, "Face data loaded from DB.\n");
rc = sqlite3_exec(db, "SELECT * FROM facedescriptors", parseFaceDescriptor, &data, &err_msg);
if (rc != SQLITE_OK) {
fprintf(stderr, "SQL error: %s\n", err_msg);
sqlite3_free(err_msg);
sqlite3_close(db);
return 1;
}
fprintf(stderr, "Descriptor data loaded from DB\n");
float profileDistance = 1.0;
long int dst, src;
for (src = 0, dst = 0; src < entries; src++) {
Face *pFace = ppFaces[src];
profileDistance = 1.0;
for (int j = 0; j < (sizeof(profileDescriptors) / sizeof(profileDescriptors[0])); j++) {
profileDistance = euclideanDistance(pFace->descriptor, profileDescriptors[j]);
if (profileDistance > 0.5) {
profileDistance = 1.0;
} else {
break;
}
}
if (profileDistance <= 0.5) {
free(pFace->distances);
} else {
pFace->profileDistance = profileDistance;
ppFaces[dst++] = pFace;
}
}
fprintf(stderr, "Dropped %ld faces as too close to profile photos (set of %ld).\n", (entries - dst),
(sizeof(profileDescriptors) / sizeof(profileDescriptors[0])));
entries = dst;
long int processed = 0;
long int last = 0;
float total = 0.0;
long int sampleSize = 0;
fprintf(stderr, "Calculating distances O(N^2) times for %ld faces.\n", entries);
for (long i = 0; i < entries; i++) {
Face *pLink = ppFaces[i];
for (long j = 0; j < entries; j++) {
Face *pTarget = ppFaces[j];
processed++;
if (processed % 1000 == 0) {
int perc = 100 * processed / (entries * entries);
if (perc != last) {
fprintf(stderr, "\rComputed %d%% complete.", perc);
last = perc;
}
}
if (i == j) {
pLink->distances[i] = 0.0;
pTarget->distances[j] = 0.0;
continue;
}
if (pLink->distances[j] != 0.0) {
continue;
}
pLink->distances[j] =
pTarget->distances[i] = euclideanDistance(pLink->descriptor, pTarget->descriptor);
sampleSize++;
total += pLink->distances[j];
}
}
fprintf(stderr, "\nAverage distance: %f\n", 1. * total / sampleSize);
fprintf(stderr, "Calculating clusters: MAX_DISTANCE(%f) MIN_PTS(%ld)\n", maxDistance, minPts);
long int clusters = DBSCAN(ppFaces, entries, maxDistance, minPts, -1);
fprintf(stderr, "\n%ld clusters identified before size-split.\n", clusters);
int recalcNeeded = clusters > 0 ? 1 : 0;
float reducedDistance = maxDistance;
while (recalcNeeded) {
int *stats = malloc(sizeof(int) * clusters), delta = 0;
memset(stats, 0, sizeof(int) * clusters);
getClusterCounts(stats, ppFaces, entries);
recalcNeeded = 0;
reducedDistance -= 0.05L;
if (reducedDistance < 0.1) {
break;
}
for (int i = 0; i < clusters; i++) {
if (stats[i] < maxClusterSize) {
continue;
}
for (int j = 0; j < entries; j++) {
Face *pFace = ppFaces[j];
if (pFace->clusterId == i + 1) {
pFace->clusterType = UNDEFINED;
}
}
int split = DBSCAN(ppFaces, entries, reducedDistance, minPts, i + 1);
if (split) {
recalcNeeded |= 1;
} else {
continue;
}
fprintf(stderr, "Cluster %d had %d units. Split into %d clusters (max: %f).\n",
i + 1, stats[i], split, reducedDistance);
for (int c = 0; c < split; c++) {
fprintf(stderr, "%ld. %ld\n", delta + c + clusters, getClusterCount(ppFaces, entries, delta + c + clusters));
}
for (int j = 0; j < entries; j++) {
Face *pFace = ppFaces[j];
if (pFace->clusterId == i + 1) {
pFace->clusterType = CORE;
}
}
delta += split;
}
clusters += delta;
free(stats);
}
long int undefined = 0, outlier = 0, core = 0, reachable = 0;
for (i = 0; i < entries; i++) {
switch (ppFaces[i]->clusterType) {
case NOISE:
ppFaces[i]->clusterId = 0;
outlier++;
break;
case UNDEFINED:
ppFaces[i]->clusterId = 0;
undefined++;
break;
case CORE:
core++;
break;
case EDGE:
reachable++;
break;
}
}
fprintf(stderr, "\n%ld clusters being written:\n", clusters);
fprintf(stderr, "%ld NOISE\n", outlier);
fprintf(stderr, "%ld UNDEFINED\n", undefined);
fprintf(stderr, "%ld CORE\n", core);
fprintf(stderr, "%ld EDGE\n", reachable);
fprintf(stdout, "<script>\nvar clusters = [\n");
for (long i = 1; i <= clusters; i++) {
long nodes = 0;
for (long int j = 0; j < entries; j++) {
if (ppFaces[j]->clusterId == i && ppFaces[j]->clusterId != UNDEFINED) {
if (nodes == 0) {
fprintf(stdout, "/* %ld. */ [", i);
fprintf(stdout, "[%ld,%ld]", ppFaces[j]->faceId, ppFaces[j]->photoId);
} else {
fprintf(stdout, ",[%ld,%ld]", ppFaces[j]->faceId, ppFaces[j]->photoId);
}
nodes++;
}
}
if (nodes) {
if (i < clusters) {
fprintf(stdout, "],\n");
} else {
fprintf(stdout, "]\n");
}
}
}
fprintf(stdout, "];\n</script>\n");
fprintf(stderr, "Skipping face writing!\n");
return 0;
char *sql =
"DELETE FROM facedistances;"
"BEGIN TRANSACTION;";
rc = sqlite3_exec(db, sql, 0, 0, &err_msg);
if (rc != SQLITE_OK ) {
fprintf(stderr, "SQL error: %s\n", err_msg);
sqlite3_free(err_msg);
sqlite3_close(db);
return 1;
}
fprintf(stderr, "facedistances deleted and transaction started.\n");
for (long i = 0; i < entries; i++) {
memset(ppFaces[i]->distances, 0, sizeof(*ppFaces[i]->distances) * entries);
}
char sqlBuf[1024];
processed = 0;
last = 0;
for (long i = 0; i < entries; i++) {
Face *pLink = ppFaces[i];
for (long j = 0; j < entries; j++) {
Face *pTarget = ppFaces[j];
processed++;
if (processed % 1000 == 0) {
int perc = 100 * processed / (entries * entries);
if (perc != last) {
fprintf(stderr, "\rComputed %d%% complete.", perc);
last = perc;
}
}
if (i == j) {
pLink->distances[i] = 0.0;
pTarget->distances[j] = 0.0;
continue;
}
if (pLink->distances[j] != 0.0) {
continue;
}
pLink->distances[j] =
pTarget->distances[i] = euclideanDistance(pLink->descriptor, pTarget->descriptor);
if (pLink->distances[j] < 0.5) {
sprintf(sqlBuf, "INSERT INTO facedistances (face1Id,face2Id,distance) VALUES (%ld,%ld,%f);",
((pLink->faceId < pTarget->faceId) ? pLink->faceId : pTarget->faceId),
((pLink->faceId < pTarget->faceId) ? pTarget->faceId : pLink->faceId),
pLink->distances[j]);
rc = sqlite3_exec(db, sqlBuf, 0, 0, &err_msg);
if (rc != SQLITE_OK ) {
fprintf(stderr, "SQL error: %s\n", err_msg);
sqlite3_free(err_msg);
sqlite3_close(db);
return 1;
}
}
}
}
fprintf(stderr, "\n");
sprintf(sqlBuf, "UPDATE faces SET lastComparedId=%ld;", maxId);
rc = sqlite3_exec(db, "COMMIT;", 0, 0, &err_msg);
if (rc != SQLITE_OK ) {
fprintf(stderr, "SQL error: %s\n", err_msg);
sqlite3_free(err_msg);
sqlite3_close(db);
return 1;
}
sqlite3_close(db);
return 0;
}