672 lines
33 KiB
C
672 lines
33 KiB
C
#include <stdio.h>
|
||
#include <stdlib.h>
|
||
#include <dirent.h>
|
||
#include <sys/types.h>
|
||
#include <string.h>
|
||
#include <math.h>
|
||
#include <sqlite3.h>
|
||
|
||
#ifndef MIN_PTS
|
||
#define MIN_PTS 3
|
||
#endif
|
||
|
||
#ifndef MAX_DISTANCE
|
||
#define MAX_DISTANCE 0.47L
|
||
#endif
|
||
|
||
#ifndef MAX_CLUSTER_SIZE
|
||
#define MAX_CLUSTER_SIZE 150
|
||
#endif
|
||
|
||
/* descriptors extracted from a profile face photo, used to filter out faces near a profile image
|
||
* (which all identify as the same person) */
|
||
float profileDescriptors[][128] = {
|
||
{ -0.061601437628269196,0.11936908215284348,0.05469074845314026,-0.01107284426689148,-0.11588133871555328,-0.002019548788666725,-0.11300364136695862,-0.07167613506317139,0.1764453798532486,-0.05901609733700752,0.2385266125202179,0.012328468263149261,-0.2573902904987335,0.04025992751121521,-0.11443999409675598,0.11594478785991669,-0.17500847578048706,-0.07691430300474167,-0.12378361076116562,-0.11740189790725708,0.03630289435386658,0.06033151596784592,0.04939301311969757,0.004410859197378159,-0.20615431666374207,-0.25997376441955566,-0.08603082597255707,-0.09167610853910446,0.03498300909996033,-0.18279242515563965,0.041221488267183304,0.09842021763324738,-0.10988932847976685,0.031966306269168854,0.013916028663516045,0.008073337376117706,0.013303801417350769,-0.06880992650985718,0.17893345654010773,0.027153145521879196,-0.1894378960132599,-0.020184200257062912,0.05442515388131142,0.2977772355079651,0.20385639369487762,0.010473722591996193,0.029712140560150146,-0.10179305076599121,0.07872007042169571,-0.2609504461288452,0.004072457551956177,0.20807254314422607,0.039381805807352066,0.10894455015659332,0.0656222552061081,-0.21101640164852142,0.0015651732683181763,0.09899670630693436,-0.1148049384355545,0.03364836052060127,0.04903073608875275,-0.03714796528220177,-0.06581848114728928,-0.08302658796310425,0.1889929473400116,0.08697575330734253,-0.14220063388347626,-0.18263882398605347,0.17710572481155396,-0.1408098042011261,-0.04034702479839325,0.14808061718940735,-0.14523223042488098,-0.22445881366729736,-0.2085285633802414,0.04144661873579025,0.3701931834220886,0.140765979886055,-0.1524423211812973,0.05934419482946396,-0.09778376668691635,-0.05247225984930992,-0.07087128609418869,0.08293693512678146,-0.07502841204404831,0.01957462728023529,-0.07184667885303497,0.05862969905138016,0.19565513730049133,-0.07941281795501709,0.04070422798395157,0.26956090331077576,0.01857529580593109,-0.03987279161810875,-0.0008930861949920654,0.08536889404058456,-0.16341862082481384,0.03542519360780716,-0.10296104848384857,-0.022122247144579887,0.08253192901611328,-0.07827107608318329,0.01481425017118454,0.08741594851016998,-0.17036983370780945,0.20205597579479218,-0.010307766497135162,-0.04007430374622345,-0.0006583929061889648,-0.016108371317386627,-0.04460605978965759,-0.034171007573604584,0.24336735904216766,-0.2778766453266144,0.18527397513389587,0.1877088099718094,0.098460853099823,0.148922860622406,0.06344866007566452,0.13334715366363525,0.0003817584365606308,0.004202648997306824,-0.11708918958902359,-0.06176237016916275,-0.027644027024507523,-0.10988462716341019,-0.10410083830356598,0.0056285373866558075 },
|
||
{ -0.05286264419555664,0.1027536615729332,0.0177864171564579,-0.007238239049911499,-0.15839841961860657,-0.013195162639021873,-0.055493343621492386,-0.011598028242588043,0.11436303704977036,-0.023542361333966255,0.23070596158504486,0.006959080696105957,-0.23580302298069,-0.04991580545902252,-0.048089612275362015,0.1130392998456955,-0.1211254671216011,-0.053501617163419724,-0.20760701596736908,-0.11128067970275879,0.015699222683906555,0.048812925815582275,-0.004359103739261627,-0.01246669515967369,-0.08746911585330963,-0.2714271545410156,-0.07527103275060654,-0.07152064889669418,0.13769567012786865,-0.1629178822040558,-0.030123017728328705,-0.006879039108753204,-0.15528543293476105,-0.07373329252004623,0.035297941416502,-0.0019944999366998672,0.00020968541502952576,-0.06796075403690338,0.15615630149841309,-0.02517728880047798,-0.15598921477794647,0.048431847244501114,0.030592434108257294,0.22424748539924622,0.18237638473510742,0.03839367255568504,-0.018516220152378082,-0.07987417280673981,0.10236746072769165,-0.23993468284606934,0.01739603653550148,0.20749998092651367,0.07386040687561035,0.09853726625442505,0.06335064768791199,-0.14400270581245422,-0.0020356550812721252,0.16046060621738434,-0.1502964049577713,0.03028072975575924,-0.013792440295219421,-0.04380643367767334,-0.05555684491991997,-0.13338367640972137,0.17439348995685577,0.08730429410934448,-0.11853333562612534,-0.14580222964286804,0.11578165739774704,-0.15631376206874847,0.0005779601633548737,0.057921670377254486,-0.11303769052028656,-0.14606109261512756,-0.24737423658370972,0.07011876255273819,0.3969222903251648,0.15113821625709534,-0.19518792629241943,0.029213692992925644,-0.11209011822938919,-0.058080971240997314,-0.01972014084458351,-0.010737363249063492,-0.12097814679145813,-0.017350099980831146,-0.057253990322351456,0.049719784408807755,0.19717393815517426,-0.06898155063390732,0.06022507697343826,0.2233758270740509,-0.0060714855790138245,0.03075665421783924,0.014615798369050026,0.029250048100948334,-0.11849372833967209,-0.06616988033056259,-0.042307183146476746,0.0033659040927886963,0.0857284739613533,-0.20014573633670807,0.034076105803251266,0.06595847010612488,-0.16573786735534668,0.12949475646018982,0.008994176983833313,0.004620999097824097,0.02356269210577011,0.0182054340839386,-0.0561535507440567,-0.035449616611003876,0.23626668751239777,-0.2073628008365631,0.3139644265174866,0.1955631524324417,0.029616639018058777,0.11366549134254456,0.07708290964365005,0.10866693407297134,-0.06293588131666183,-0.0077966004610061646,-0.057174161076545715,-0.08943509310483932,0.004156911745667458,-0.0984298512339592,-0.005768085829913616,0.0017160219140350819},
|
||
{0.0011762604117393494,0.15072135627269745,0.020299416035413742,-0.06642502546310425,-0.060089170932769775,0.020552314817905426,-0.043852221220731735,-0.06478262692689896,0.17363804578781128,-0.03403667360544205,0.2395429164171219,0.01041267067193985,-0.24776551127433777,-0.046057891100645065,-0.08346940577030182,0.099162757396698,-0.19483637809753418,-0.04046493023633957,-0.1129814013838768,-0.1319343000650406,0.04903741180896759,0.07213468849658966,0.051988281309604645,-0.01571550965309143,-0.1113474890589714,-0.25194793939590454,-0.06103351712226868,-0.13213123381137848,0.10333564877510071,-0.15162967145442963,0.061464592814445496,0.09279994666576385,-0.11863565444946289,-0.07045092433691025,-0.01209489069879055,-0.009468602016568184,-0.10253313928842545,-0.09072671830654144,0.22084617614746094,0.006743522360920906,-0.06707601249217987,0.004636548459529877,-0.00785135105252266,0.3281668722629547,0.11846040189266205,-0.0055367788299918175,0.04063810035586357,-0.06342281401157379,0.1398719847202301,-0.29172199964523315,0.06687593460083008,0.17154791951179504,0.08854547142982483,0.16494478285312653,0.09101200103759766,-0.18667718768119812,0.08216162025928497,0.12080536782741547,-0.1759440004825592,0.10922732204198837,0.03968639671802521,-0.06957980990409851,-0.02280031517148018,-0.0626034140586853,0.2077835500240326,0.10969341546297073,-0.08934597671031952,-0.1270003616809845,0.14771263301372528,-0.05404433608055115,-0.05664592981338501,0.07035835832357407,-0.12639567255973816,-0.1545434296131134,-0.1935984045267105,0.05806276947259903,0.3116001486778259,0.11509235948324203,-0.1787276566028595,0.020631596446037292,-0.08929038792848587,-0.03143969923257828,0.053848765790462494,0.03168227896094322,-0.07926452159881592,-0.05334986746311188,-0.0591375008225441,0.024413203820586205,0.16138945519924164,-0.10341612249612808,-0.0791877955198288,0.2381574809551239,0.012866765260696411,0.009398067370057106,0.049464400857686996,0.02796872705221176,-0.11044278740882874,0.05444034934043884,-0.09540048986673355,0.0681515783071518,0.09935745596885681,-0.1683940887451172,-0.047316670417785645,0.04480529949069023,-0.1389012485742569,0.2227819412946701,0.03692697733640671,0.01761404424905777,0.013868780806660652,-0.05882856249809265,-0.11958207935094833,0.026760146021842957,0.25092488527297974,-0.26664063334465027,0.33623644709587097,0.16868337988853455,0.05719297379255295,0.17470279335975647,0.057424478232860565,0.08439406007528305,-0.0024905502796173096,-0.037533506751060486,-0.1358765959739685,-0.09953152388334274,0.05512324720621109,-0.025295836851000786,-0.0866888239979744,0.026549236848950386},
|
||
{-0.1503148078918457,0.12206108123064041,0.02967856079339981,-0.03082158789038658,-0.1129150539636612,0.0478484109044075,-0.04801269620656967,-0.07795510441064835,0.1626167893409729,-0.041412316262722015,0.2022078037261963,0.031001679599285126,-0.20948299765586853,-0.04371509701013565,-0.0841950997710228,0.10647765547037125,-0.1393173485994339,-0.17036688327789307,-0.07359742373228073,-0.09497823566198349,-0.023887813091278076,0.02798299677670002,-0.011271866038441658,0.02640014886856079,-0.1520528793334961,-0.22326916456222534,-0.023297179490327835,-0.10385704040527344,0.02730628475546837,-0.09421993046998978,0.007815570570528507,0.03076392412185669,-0.2347397357225418,-0.07311992347240448,0.03199088200926781,0.1157669946551323,-0.0806952491402626,0.0058890413492918015,0.25238266587257385,0.0066128019243478775,-0.11484481394290924,0.058332689106464386,0.07401782274246216,0.30297812819480896,0.13892878592014313,0.015657028183341026,-0.026928886771202087,-0.041992999613285065,0.08834651112556458,-0.25814956426620483,0.025386730208992958,0.24164703488349915,0.11268384754657745,0.13700686395168304,-0.0037370696663856506,-0.1474452018737793,0.07658589631319046,0.11919880658388138,-0.16230738162994385,0.06895574927330017,0.05145460367202759,-0.09962668269872665,-0.07351469993591309,0.016625329852104187,0.24626575410366058,0.09379985928535461,-0.07967095822095871,-0.14824430644512177,0.1664988100528717,-0.07381315529346466,-0.0701378807425499,0.094453826546669,-0.1053486168384552,-0.173324853181839,-0.2059929072856903,0.11443521082401276,0.37259921431541443,0.12261700630187988,-0.17118749022483826,0.1002739816904068,-0.0962643027305603,0.0028629377484321594,0.02517216093838215,0.009352639317512512,-0.07928505539894104,-0.027248665690422058,-0.06288609653711319,0.03766700252890587,0.17690418660640717,0.00913313403725624,-0.016148168593645096,0.16757085919380188,0.081269770860672,-0.003465055488049984,0.08387735486030579,0.05247170478105545,-0.035166703164577484,-0.02116062119603157,-0.15585437417030334,-0.0015826895833015442,0.06662821024656296,-0.07819218933582306,0.03278189152479172,0.059533413499593735,-0.18928831815719604,0.14338715374469757,0.029624812304973602,-0.04126661643385887,-0.017487522214651108,0.024950236082077026,-0.06610194593667984,-0.007236182689666748,0.1634015440940857,-0.3118166923522949,0.32635706663131714,0.1551782488822937,-0.006172630935907364,0.1549994796514511,0.11846084892749786,0.05585896223783493,0.005930176004767418,0.0026269033551216125,-0.08288535475730896,-0.14617297053337097,0.04124702885746956,-0.023869477212429047,0.056380532681941986,0.059275589883327484},
|
||
{-0.11449231207370758,0.08218079805374146,0.052244849503040314,-0.05849563330411911,-0.09875518083572388,-0.06790590286254883,-0.03999913111329079,-0.07766807824373245,0.15596428513526917,-0.0483558364212513,0.21132533252239227,-0.01931747794151306,-0.24744108319282532,-0.10812359303236008,0.058341797441244125,0.11405576020479202,-0.1183864176273346,-0.0856679230928421,-0.13984550535678864,-0.11198180168867111,-0.03631503880023956,-0.019455134868621826,0.038762785494327545,0.04496624693274498,-0.10771285742521286,-0.25013211369514465,-0.06068229675292969,-0.11673891544342041,0.07542752474546432,-0.14139781892299652,0.0017466647550463676,0.043040819466114044,-0.17083533108234406,-0.07615770399570465,-0.026609988883137703,0.06443975865840912,-0.056286998093128204,-0.08603766560554504,0.17082615196704865,-0.03381050005555153,-0.14733527600765228,-0.1067851260304451,-0.04236249998211861,0.23973657190799713,0.21202436089515686,-0.008970214053988457,0.017333630472421646,-0.012529492378234863,0.05171665549278259,-0.2481795996427536,-0.0033811144530773163,0.11406000703573227,0.11761488020420074,0.06430923938751221,0.05201558396220207,-0.18238718807697296,0.03609202429652214,0.10110349208116531,-0.16749322414398193,-0.017176488414406776,0.02417522668838501,-0.09461188316345215,-0.08760598301887512,-0.09778585284948349,0.2984878420829773,0.09519178420305252,-0.11396373808383942,-0.11740261316299438,0.15713796019554138,-0.11983270198106766,0.0014888402074575424,0.11709675192832947,-0.11239394545555115,-0.1615625023841858,-0.21783563494682312,0.062243033200502396,0.3098629415035248,0.13144542276859283,-0.19728504121303558,-0.00864078477025032,-0.1421070396900177,-0.027583356946706772,-0.03719120845198631,0.004923664033412933,-0.08225893974304199,0.016801394522190094,-0.05810196325182915,0.0058958642184734344,0.15847420692443848,-0.09055405110120773,0.03644956648349762,0.2036021500825882,0.012157909572124481,-0.014717459678649902,-0.008820030838251114,-0.07779868692159653,-0.03748408332467079,0.028270550072193146,-0.05236710608005524,0.013832693919539452,0.10549424588680267,-0.1504925936460495,0.04579198732972145,0.05509837344288826,-0.1997680962085724,0.15631046891212463,0.042316727340221405,-0.017336193472146988,0.02395687624812126,-0.036151397973299026,-0.10194570571184158,-0.06460698693990707,0.24390262365341187,-0.2809107303619385,0.22127604484558105,0.2234485149383545,0.06370076537132263,0.13879287242889404,0.02190246433019638,0.10918933153152466,-0.025838086381554604,0.010740015655755997,-0.09715697914361954,-0.013207517564296722,0.0729929581284523,-0.09213361144065857,-0.011896252632141113,0.03725235536694527},
|
||
{-0.05498696118593216,0.05113350972533226,0.05531687289476395,-0.09476281702518463,-0.10580143332481384,-0.030396001413464546,-0.0010913275182247162,-0.1052420362830162,0.10643371194601059,-0.06002769619226456,0.21559466421604156,-0.0154794380068779,-0.2808150053024292,-0.0600040964782238,0.0403410941362381,0.0919484943151474,-0.1326475292444229,-0.10436968505382538,-0.19102108478546143,-0.0795152336359024,-0.04159826040267944,0.021737679839134216,0.019609834998846054,-0.04616895690560341,-0.17645984888076782,-0.1682741940021515,-0.08414799720048904,-0.12163443863391876,0.026273906230926514,-0.07314532995223999,0.051960788667201996,0.09379777312278748,-0.1891252100467682,-0.059710826724767685,0.011947771534323692,0.014459501951932907,-0.07258555293083191,-0.07127422094345093,0.16470138728618622,-0.02148628979921341,-0.12428629398345947,0.027712488546967506,0.015993379056453705,0.16111883521080017,0.24940571188926697,-0.0341871902346611,0.007754117250442505,-0.0865824818611145,0.049111928790807724,-0.2875933051109314,-0.0030787810683250427,0.1479477882385254,0.026672853156924248,0.15996402502059937,0.053178343921899796,-0.12440557777881622,0.09016239643096924,0.16699564456939697,-0.20755255222320557,0.06710638105869293,0.03819752484560013,-0.14896832406520844,-0.04662872850894928,-0.027680709958076477,0.14560657739639282,0.10368138551712036,-0.04873866215348244,-0.176229327917099,0.18367958068847656,-0.14353249967098236,-0.08559320867061615,0.12607094645500183,-0.12804611027240753,-0.1290942132472992,-0.23025573790073395,0.021010763943195343,0.3133254051208496,0.15103624761104584,-0.16475120186805725,0.017042677849531174,-0.13039177656173706,-0.04864456504583359,0.0031204214319586754,-0.053125109523534775,-0.011741500347852707,-0.1571718007326126,-0.059252332895994186,0.018309343606233597,0.24032914638519287,-0.08475983887910843,0.04019710421562195,0.2481696456670761,0.02487030252814293,-0.07094515115022659,0.02628476172685623,-0.00712304562330246,-0.04976058006286621,-0.024602070450782776,-0.0648813247680664,-0.02827349305152893,0.04442674666643143,-0.16200967133045197,0.02547246217727661,0.10203316062688828,-0.1560029238462448,0.165535107254982,-0.011267822235822678,-0.023648075759410858,0.001466844230890274,-0.07181182503700256,0.012026548385620117,0.011392228305339813,0.21011506021022797,-0.22743536531925201,0.2915312945842743,0.1937231570482254,0.011561829596757889,0.07002981752157211,0.00032756663858890533,0.09776002168655396,-0.07220730185508728,0.004206985235214233,-0.12127777934074402,-0.0900777205824852,0.012966942973434925,0.008859572932124138,-0.01780618168413639,0.052432361990213394},
|
||
};
|
||
|
||
typedef enum {
|
||
UNDEFINED = 0,
|
||
CORE = 1,
|
||
EDGE = 2,
|
||
NOISE = 3
|
||
} ClusterTypes;
|
||
|
||
typedef struct Face {
|
||
float descriptor[128];
|
||
long int clusterId;
|
||
long faceId;
|
||
long photoId;
|
||
float confidence;
|
||
float profileDistance;
|
||
ClusterTypes clusterType;
|
||
float *distances;
|
||
} Face;
|
||
|
||
typedef struct FaceLink {
|
||
struct FaceLink *pNext;
|
||
float distance;
|
||
Face *pFace;
|
||
} FaceLink;
|
||
|
||
|
||
float euclideanDistance(float *a, float *b) {
|
||
float sum = 0.0L;
|
||
for (int i = 0; i < 128; i++) {
|
||
float delta = a[i] - b[i];
|
||
sum += delta * delta;
|
||
}
|
||
return sqrtf(sum);
|
||
}
|
||
|
||
/* https://en.wikipedia.org/wiki/DBSCAN */
|
||
#if 0
|
||
DBSCAN(DB, distFunc, eps, minPts) {
|
||
C = 0 /* Cluster counter */
|
||
for each point P in database DB {
|
||
if label(P) ≠ undefined then continue /* Previously processed in inner loop */
|
||
Neighbors N = RangeQuery(DB, distFunc, P, eps) /* Find neighbors */
|
||
if |N| < minPts then { /* Density check */
|
||
label(P) = Noise /* Label as Noise */
|
||
continue
|
||
}
|
||
C = C + 1 /* next cluster label */
|
||
label(P) = C /* Label initial point */
|
||
Seed set S = N \ {P} /* Neighbors to expand */
|
||
for each point Q in S { /* Process every seed point */
|
||
if label(Q) = Noise then label(Q) = C /* Change Noise to border point */
|
||
if label(Q) ≠ undefined then continue /* Previously processed */
|
||
label(Q) = C /* Label neighbor */
|
||
Neighbors N = RangeQuery(DB, distFunc, Q, eps) /* Find neighbors */
|
||
if |N| ≥ minPts then { /* Density check */
|
||
S = S ∪ N /* Add new neighbors to seed set */
|
||
}
|
||
}
|
||
}
|
||
}
|
||
|
||
RangeQuery(DB, distFunc, Q, eps) {
|
||
Neighbors = empty list
|
||
for each point P in database DB { /* Scan all points in the database */
|
||
if distFunc(Q, P) ≤ eps then { /* Compute distance and check epsilon */
|
||
Neighbors = Neighbors ∪ {P} /* Add to result */
|
||
}
|
||
}
|
||
return Neighbors
|
||
}
|
||
#endif
|
||
|
||
FaceLink *RangeQuery(Face **ppFaces, long int faceCount, Face *pQ, float eps, long int clusterToBreak) {
|
||
FaceLink *pNeighbors = NULL;
|
||
for (long int i = 0; i < faceCount; i++) {
|
||
Face *pFace = ppFaces[i];
|
||
if (clusterToBreak > 0 && pFace->clusterId != clusterToBreak) {
|
||
continue;
|
||
}
|
||
|
||
if (pFace->faceId == pQ->faceId) {
|
||
continue;
|
||
}
|
||
|
||
if (pQ->distances[i] > 0.0 && pQ->distances[i] <= eps) {
|
||
FaceLink *pLink = malloc(sizeof(*pLink));
|
||
memset(pLink, 0, sizeof(*pLink));
|
||
pLink->distance = pQ->distances[i];
|
||
pLink->pFace = pFace;
|
||
pLink->pNext = pNeighbors;
|
||
pNeighbors = pLink;
|
||
}
|
||
}
|
||
return pNeighbors;
|
||
}
|
||
|
||
void freeChain(FaceLink *pLink) {
|
||
while (pLink) {
|
||
FaceLink *tmp = pLink->pNext;
|
||
free(pLink);
|
||
pLink = tmp;
|
||
}
|
||
}
|
||
|
||
long int chainLength(FaceLink *pLink) {
|
||
long int count = 0;
|
||
while (pLink) {
|
||
count++;
|
||
pLink = pLink->pNext;
|
||
}
|
||
return count;
|
||
}
|
||
|
||
long int C = 0;
|
||
|
||
long int DBSCAN(Face **ppFaces, long int faceCount, float eps, int minPts, long int clusterToBreak) {
|
||
int count = 0;
|
||
|
||
for (long int i = 0; i < faceCount; i++) {
|
||
Face *pFace = ppFaces[i];
|
||
if (clusterToBreak > 0 && pFace->clusterId != clusterToBreak) {
|
||
continue;
|
||
}
|
||
|
||
if (pFace->clusterType != UNDEFINED) {
|
||
continue;
|
||
}
|
||
|
||
float threshold = eps;
|
||
FaceLink *pNeighbors = RangeQuery(ppFaces, faceCount, pFace, eps, clusterToBreak);
|
||
long neighborCount = chainLength(pNeighbors);
|
||
if (neighborCount < minPts) {
|
||
pFace->clusterType = NOISE;
|
||
freeChain(pNeighbors);
|
||
continue;
|
||
}
|
||
|
||
C++;
|
||
count++;
|
||
|
||
pFace->clusterId = C;
|
||
pFace->clusterType = CORE;
|
||
|
||
FaceLink *pLink = pNeighbors;
|
||
while (pLink) {
|
||
Face *pQ = pLink->pFace;
|
||
if (pQ->faceId == pFace->faceId) {
|
||
pLink = pLink->pNext;
|
||
continue;
|
||
}
|
||
if (pQ->clusterType == NOISE) {
|
||
pQ->clusterId = C;
|
||
pQ->clusterType = EDGE;
|
||
}
|
||
if (pQ->clusterType != UNDEFINED) {
|
||
pLink = pLink->pNext;
|
||
continue;
|
||
}
|
||
|
||
pQ->clusterId = C;
|
||
pQ->clusterType = EDGE;
|
||
|
||
FaceLink *pSubNeighbors = RangeQuery(ppFaces, faceCount, pQ, eps, clusterToBreak);
|
||
neighborCount = chainLength(pSubNeighbors);
|
||
|
||
if (neighborCount >= minPts) {
|
||
pQ->clusterType = CORE;
|
||
/* Append these neighbors to the end of the chain */
|
||
FaceLink *pTmp = pLink;
|
||
while (pTmp->pNext) {
|
||
pTmp = pTmp->pNext;
|
||
}
|
||
pTmp->pNext = pSubNeighbors;
|
||
} else {
|
||
freeChain(pSubNeighbors);
|
||
}
|
||
|
||
pLink = pLink->pNext;
|
||
}
|
||
freeChain(pNeighbors);
|
||
}
|
||
|
||
return count;
|
||
}
|
||
|
||
typedef struct {
|
||
sqlite3 *db;
|
||
Face **ppFaces;
|
||
long int count;
|
||
long int initialized;
|
||
} FaceCallbackData;
|
||
|
||
int parseFaceIdCount(void *data, int argc, char **argv, char **column) {
|
||
long int *pCount = data;
|
||
*pCount = strtol(argv[0] ? argv[0] : "0", NULL, 10);
|
||
return 0;
|
||
}
|
||
|
||
int parseFaceDescriptor(void *data, int argc, char **argv, char **column) {
|
||
FaceCallbackData *map = data;
|
||
|
||
long int faceId = strtol(argv[0] ? argv[0] : "0", NULL, 10);
|
||
|
||
char sql_buf[1024];
|
||
int rc;
|
||
|
||
Face *pFace = NULL;
|
||
for (long int i = 0; i < map->count; i++) {
|
||
pFace = map->ppFaces[i];
|
||
if (pFace->faceId == faceId) {
|
||
break;
|
||
}
|
||
pFace = NULL;
|
||
}
|
||
|
||
if (!pFace) {
|
||
return SQLITE_OK;
|
||
}
|
||
|
||
// Getting here means we have a valid file handle, f, and a valid db handle, db
|
||
// Also, a blank row has been inserted with key rowid
|
||
sqlite3_blob *blob;
|
||
rc = sqlite3_blob_open(map->db, "main", "facedescriptors", "descriptors", faceId, 1, &blob);
|
||
if (SQLITE_OK != rc) {
|
||
fprintf(stderr, "Couldn't get blob handle (%i): %s\n", rc, sqlite3_errmsg(map->db));
|
||
return rc;
|
||
}
|
||
|
||
if (SQLITE_OK != (rc = sqlite3_blob_read(blob, pFace->descriptor, sizeof(pFace->descriptor), 0))) {
|
||
fprintf(stderr, "Error reading from blob handle.\n");
|
||
return rc;
|
||
}
|
||
|
||
sqlite3_blob_close(blob);
|
||
|
||
return SQLITE_OK;
|
||
}
|
||
|
||
int parseFaceIdRow(void *data, int argc, char **argv, char **column) {
|
||
FaceCallbackData *map = data;
|
||
|
||
long int faceId = strtol(argv[0] ? argv[0] : "0", NULL, 10);
|
||
long int photoId = strtol(argv[1] ? argv[1] : "0", NULL, 10);
|
||
float confidence = strtof(argv[2] ? argv[2] : "0.0", NULL);
|
||
|
||
if (confidence < 0.9) {
|
||
return 0;
|
||
}
|
||
|
||
Face *pFace = map->ppFaces[map->initialized++];
|
||
pFace->faceId = faceId;
|
||
pFace->photoId = photoId;
|
||
pFace->confidence = confidence;
|
||
|
||
return 0;
|
||
}
|
||
|
||
void getClusterCounts(int *stats, Face **ppFaces, long int entries) {
|
||
for (int i = 0; i < entries; i++) {
|
||
if (ppFaces[i]->clusterType != CORE && ppFaces[i]->clusterType != EDGE) {
|
||
continue;
|
||
}
|
||
stats[ppFaces[i]->clusterId - 1]++;
|
||
}
|
||
}
|
||
|
||
long int getClusterCount(Face **ppFaces, long int entries, int clusterId) {
|
||
long int count = 0;
|
||
for (long int i = 0; i < entries; i++) {
|
||
if (ppFaces[i]->clusterId == clusterId &&
|
||
ppFaces[i]->clusterType != NOISE &&
|
||
ppFaces[i]->clusterType != UNDEFINED) {
|
||
count++;
|
||
}
|
||
}
|
||
return count;
|
||
}
|
||
|
||
/*
|
||
* 1. Count how many entries there are
|
||
* 2. Allocate storage to hold all entries
|
||
* 3. Read all entries into flat array
|
||
* 4. Allocate MxM matrix and pre-calculate distances
|
||
* 5. Perform DBSCAN across MxM matrix to cluster
|
||
*/
|
||
int main(int argc, char *argv[]) {
|
||
long maxId = 0;
|
||
long i;
|
||
long int entries = 0;
|
||
long int minPts = MIN_PTS;
|
||
float maxDistance = MAX_DISTANCE;
|
||
long int maxClusterSize = MAX_CLUSTER_SIZE;
|
||
|
||
if (argc == 1) {
|
||
fprintf(stderr, "usage: scanner PATH MAX_DISTANCE MIN_PTS\n");
|
||
return -1;
|
||
}
|
||
|
||
if (argc > 2) {
|
||
sscanf(argv[2], "%f", &maxDistance);
|
||
}
|
||
|
||
if (argc > 3) {
|
||
sscanf(argv[3], "%ld", &minPts);
|
||
}
|
||
|
||
if (argc > 4) {
|
||
sscanf(argv[4], "%ld", &maxClusterSize);
|
||
}
|
||
|
||
fprintf(stderr, "\nmaxDistance : %f\nminPts : %ld\n", maxDistance, minPts);
|
||
|
||
/* Allocate storage for all distances */
|
||
sqlite3 *db;
|
||
|
||
int rc = sqlite3_open("db/photos.db", &db);
|
||
if (rc != SQLITE_OK) {
|
||
fprintf(stderr, "Cannot open database: %s\n", sqlite3_errmsg(db));
|
||
sqlite3_close(db);
|
||
return 1;
|
||
}
|
||
|
||
fprintf(stderr, "DB opened.\n");
|
||
char *err_msg = NULL;
|
||
|
||
entries = 0;
|
||
rc = sqlite3_exec(db, "SELECT COUNT(id) FROM faces", parseFaceIdCount, &entries, &err_msg);
|
||
if (rc != SQLITE_OK) {
|
||
fprintf(stderr, "SQL error: %s\n", err_msg);
|
||
sqlite3_free(err_msg);
|
||
sqlite3_close(db);
|
||
return 1;
|
||
}
|
||
|
||
fprintf(stderr, "%ld faces in DB.\n", entries);
|
||
|
||
Face **ppFaces = malloc(sizeof(Face *) * entries);
|
||
if (!ppFaces) {
|
||
fprintf(stderr, "Unable to allocate storage face descriptors.");
|
||
return -1;
|
||
}
|
||
|
||
for (i = 0; i < entries; i++) {
|
||
ppFaces[i] = malloc(sizeof(Face));
|
||
memset(ppFaces[i], 0, sizeof(Face));
|
||
}
|
||
|
||
for (i = 0; i < entries; i++) {
|
||
ppFaces[i]->distances = malloc(sizeof(*ppFaces[i]->distances) * entries);
|
||
if (!ppFaces[i]->distances) {
|
||
fprintf(stderr, "Unable to allocate storage for distance dictionary.");
|
||
return -1;
|
||
}
|
||
memset(ppFaces[i]->distances, 0, sizeof(*ppFaces[i]->distances) * entries);
|
||
}
|
||
|
||
fprintf(stderr, "Storage allocated for %ld faces.\n", entries);
|
||
|
||
FaceCallbackData data = {
|
||
db: db,
|
||
ppFaces: ppFaces,
|
||
count: entries,
|
||
initialized: 0
|
||
};
|
||
|
||
rc = sqlite3_exec(db, "SELECT id,photoId,faceConfidence FROM faces", parseFaceIdRow, &data, &err_msg);
|
||
if (rc != SQLITE_OK) {
|
||
fprintf(stderr, "SQL error: %s\n", err_msg);
|
||
sqlite3_free(err_msg);
|
||
sqlite3_close(db);
|
||
return 1;
|
||
}
|
||
entries = data.initialized;
|
||
data.count = data.initialized;
|
||
|
||
fprintf(stderr, "Face data loaded from DB.\n");
|
||
|
||
rc = sqlite3_exec(db, "SELECT * FROM facedescriptors", parseFaceDescriptor, &data, &err_msg);
|
||
if (rc != SQLITE_OK) {
|
||
fprintf(stderr, "SQL error: %s\n", err_msg);
|
||
sqlite3_free(err_msg);
|
||
sqlite3_close(db);
|
||
return 1;
|
||
}
|
||
|
||
fprintf(stderr, "Descriptor data loaded from DB\n");
|
||
|
||
float profileDistance = 1.0;
|
||
long int dst, src;
|
||
for (src = 0, dst = 0; src < entries; src++) {
|
||
Face *pFace = ppFaces[src];
|
||
profileDistance = 1.0;
|
||
for (int j = 0; j < (sizeof(profileDescriptors) / sizeof(profileDescriptors[0])); j++) {
|
||
profileDistance = euclideanDistance(pFace->descriptor, profileDescriptors[j]);
|
||
if (profileDistance > 0.5) {
|
||
profileDistance = 1.0;
|
||
} else {
|
||
break;
|
||
}
|
||
}
|
||
if (profileDistance <= 0.5) {
|
||
free(pFace->distances);
|
||
} else {
|
||
pFace->profileDistance = profileDistance;
|
||
ppFaces[dst++] = pFace;
|
||
}
|
||
}
|
||
|
||
fprintf(stderr, "Dropped %ld faces as too close to profile photos (set of %ld).\n", (entries - dst),
|
||
(sizeof(profileDescriptors) / sizeof(profileDescriptors[0])));
|
||
entries = dst;
|
||
|
||
long int processed = 0;
|
||
long int last = 0;
|
||
float total = 0.0;
|
||
long int sampleSize = 0;
|
||
|
||
fprintf(stderr, "Calculating distances O(N^2) times for %ld faces.\n", entries);
|
||
for (long i = 0; i < entries; i++) {
|
||
Face *pLink = ppFaces[i];
|
||
for (long j = 0; j < entries; j++) {
|
||
Face *pTarget = ppFaces[j];
|
||
processed++;
|
||
if (processed % 1000 == 0) {
|
||
int perc = 100 * processed / (entries * entries);
|
||
if (perc != last) {
|
||
fprintf(stderr, "\rComputed %d%% complete.", perc);
|
||
last = perc;
|
||
}
|
||
}
|
||
|
||
if (i == j) {
|
||
pLink->distances[i] = 0.0;
|
||
pTarget->distances[j] = 0.0;
|
||
continue;
|
||
}
|
||
|
||
if (pLink->distances[j] != 0.0) {
|
||
continue;
|
||
}
|
||
|
||
pLink->distances[j] =
|
||
pTarget->distances[i] = euclideanDistance(pLink->descriptor, pTarget->descriptor);
|
||
|
||
sampleSize++;
|
||
total += pLink->distances[j];
|
||
}
|
||
}
|
||
|
||
fprintf(stderr, "\nAverage distance: %f\n", 1. * total / sampleSize);
|
||
|
||
fprintf(stderr, "Calculating clusters: MAX_DISTANCE(%f) MIN_PTS(%ld)\n", maxDistance, minPts);
|
||
|
||
long int clusters = DBSCAN(ppFaces, entries, maxDistance, minPts, -1);
|
||
fprintf(stderr, "\n%ld clusters identified before size-split.\n", clusters);
|
||
|
||
int recalcNeeded = clusters > 0 ? 1 : 0;
|
||
float reducedDistance = maxDistance;
|
||
|
||
while (recalcNeeded) {
|
||
int *stats = malloc(sizeof(int) * clusters), delta = 0;
|
||
memset(stats, 0, sizeof(int) * clusters);
|
||
getClusterCounts(stats, ppFaces, entries);
|
||
recalcNeeded = 0;
|
||
|
||
reducedDistance -= 0.05L;
|
||
if (reducedDistance < 0.1) {
|
||
break;
|
||
}
|
||
|
||
for (int i = 0; i < clusters; i++) {
|
||
if (stats[i] < maxClusterSize) {
|
||
continue;
|
||
}
|
||
|
||
for (int j = 0; j < entries; j++) {
|
||
Face *pFace = ppFaces[j];
|
||
if (pFace->clusterId == i + 1) {
|
||
pFace->clusterType = UNDEFINED;
|
||
}
|
||
}
|
||
|
||
int split = DBSCAN(ppFaces, entries, reducedDistance, minPts, i + 1);
|
||
if (split) {
|
||
recalcNeeded |= 1;
|
||
} else {
|
||
continue;
|
||
}
|
||
fprintf(stderr, "Cluster %d had %d units. Split into %d clusters (max: %f).\n",
|
||
i + 1, stats[i], split, reducedDistance);
|
||
for (int c = 0; c < split; c++) {
|
||
fprintf(stderr, "%ld. %ld\n", delta + c + clusters, getClusterCount(ppFaces, entries, delta + c + clusters));
|
||
}
|
||
|
||
for (int j = 0; j < entries; j++) {
|
||
Face *pFace = ppFaces[j];
|
||
if (pFace->clusterId == i + 1) {
|
||
pFace->clusterType = CORE;
|
||
}
|
||
}
|
||
|
||
delta += split;
|
||
}
|
||
|
||
clusters += delta;
|
||
free(stats);
|
||
}
|
||
|
||
long int undefined = 0, outlier = 0, core = 0, reachable = 0;
|
||
for (i = 0; i < entries; i++) {
|
||
switch (ppFaces[i]->clusterType) {
|
||
case NOISE:
|
||
ppFaces[i]->clusterId = 0;
|
||
outlier++;
|
||
break;
|
||
case UNDEFINED:
|
||
ppFaces[i]->clusterId = 0;
|
||
undefined++;
|
||
break;
|
||
case CORE:
|
||
core++;
|
||
break;
|
||
case EDGE:
|
||
reachable++;
|
||
break;
|
||
}
|
||
}
|
||
|
||
fprintf(stderr, "\n%ld clusters being written:\n", clusters);
|
||
fprintf(stderr, "%ld NOISE\n", outlier);
|
||
fprintf(stderr, "%ld UNDEFINED\n", undefined);
|
||
fprintf(stderr, "%ld CORE\n", core);
|
||
fprintf(stderr, "%ld EDGE\n", reachable);
|
||
|
||
fprintf(stdout, "<script>\nvar clusters = [\n");
|
||
for (long i = 1; i <= clusters; i++) {
|
||
long nodes = 0;
|
||
for (long int j = 0; j < entries; j++) {
|
||
if (ppFaces[j]->clusterId == i && ppFaces[j]->clusterId != UNDEFINED) {
|
||
if (nodes == 0) {
|
||
fprintf(stdout, "/* %ld. */ [", i);
|
||
fprintf(stdout, "[%ld,%ld]", ppFaces[j]->faceId, ppFaces[j]->photoId);
|
||
} else {
|
||
fprintf(stdout, ",[%ld,%ld]", ppFaces[j]->faceId, ppFaces[j]->photoId);
|
||
}
|
||
nodes++;
|
||
}
|
||
}
|
||
if (nodes) {
|
||
if (i < clusters) {
|
||
fprintf(stdout, "],\n");
|
||
} else {
|
||
fprintf(stdout, "]\n");
|
||
}
|
||
}
|
||
}
|
||
fprintf(stdout, "];\n</script>\n");
|
||
|
||
fprintf(stderr, "Skipping face writing!\n");
|
||
return 0;
|
||
|
||
char *sql =
|
||
"DELETE FROM facedistances;"
|
||
"BEGIN TRANSACTION;";
|
||
|
||
rc = sqlite3_exec(db, sql, 0, 0, &err_msg);
|
||
if (rc != SQLITE_OK ) {
|
||
fprintf(stderr, "SQL error: %s\n", err_msg);
|
||
sqlite3_free(err_msg);
|
||
sqlite3_close(db);
|
||
return 1;
|
||
}
|
||
|
||
fprintf(stderr, "facedistances deleted and transaction started.\n");
|
||
for (long i = 0; i < entries; i++) {
|
||
memset(ppFaces[i]->distances, 0, sizeof(*ppFaces[i]->distances) * entries);
|
||
}
|
||
|
||
char sqlBuf[1024];
|
||
|
||
processed = 0;
|
||
last = 0;
|
||
for (long i = 0; i < entries; i++) {
|
||
Face *pLink = ppFaces[i];
|
||
for (long j = 0; j < entries; j++) {
|
||
Face *pTarget = ppFaces[j];
|
||
|
||
processed++;
|
||
if (processed % 1000 == 0) {
|
||
int perc = 100 * processed / (entries * entries);
|
||
if (perc != last) {
|
||
fprintf(stderr, "\rComputed %d%% complete.", perc);
|
||
last = perc;
|
||
}
|
||
}
|
||
|
||
if (i == j) {
|
||
pLink->distances[i] = 0.0;
|
||
pTarget->distances[j] = 0.0;
|
||
continue;
|
||
}
|
||
|
||
if (pLink->distances[j] != 0.0) {
|
||
continue;
|
||
}
|
||
|
||
pLink->distances[j] =
|
||
pTarget->distances[i] = euclideanDistance(pLink->descriptor, pTarget->descriptor);
|
||
|
||
if (pLink->distances[j] < 0.5) {
|
||
sprintf(sqlBuf, "INSERT INTO facedistances (face1Id,face2Id,distance) VALUES (%ld,%ld,%f);",
|
||
((pLink->faceId < pTarget->faceId) ? pLink->faceId : pTarget->faceId),
|
||
((pLink->faceId < pTarget->faceId) ? pTarget->faceId : pLink->faceId),
|
||
pLink->distances[j]);
|
||
rc = sqlite3_exec(db, sqlBuf, 0, 0, &err_msg);
|
||
if (rc != SQLITE_OK ) {
|
||
fprintf(stderr, "SQL error: %s\n", err_msg);
|
||
sqlite3_free(err_msg);
|
||
sqlite3_close(db);
|
||
return 1;
|
||
}
|
||
}
|
||
}
|
||
}
|
||
|
||
fprintf(stderr, "\n");
|
||
|
||
sprintf(sqlBuf, "UPDATE faces SET lastComparedId=%ld;", maxId);
|
||
|
||
rc = sqlite3_exec(db, "COMMIT;", 0, 0, &err_msg);
|
||
if (rc != SQLITE_OK ) {
|
||
fprintf(stderr, "SQL error: %s\n", err_msg);
|
||
sqlite3_free(err_msg);
|
||
sqlite3_close(db);
|
||
return 1;
|
||
}
|
||
|
||
sqlite3_close(db);
|
||
|
||
return 0;
|
||
}
|