#include #include #include #include #include #include #include #ifndef MIN_PTS #define MIN_PTS 3 #endif #ifndef MAX_DISTANCE #define MAX_DISTANCE 0.47L #endif #ifndef MAX_CLUSTER_SIZE #define MAX_CLUSTER_SIZE 150 #endif /* descriptors extracted from a profile face photo, used to filter out faces near a profile image * (which all identify as the same person) */ float profileDescriptors[][128] = { { -0.061601437628269196,0.11936908215284348,0.05469074845314026,-0.01107284426689148,-0.11588133871555328,-0.002019548788666725,-0.11300364136695862,-0.07167613506317139,0.1764453798532486,-0.05901609733700752,0.2385266125202179,0.012328468263149261,-0.2573902904987335,0.04025992751121521,-0.11443999409675598,0.11594478785991669,-0.17500847578048706,-0.07691430300474167,-0.12378361076116562,-0.11740189790725708,0.03630289435386658,0.06033151596784592,0.04939301311969757,0.004410859197378159,-0.20615431666374207,-0.25997376441955566,-0.08603082597255707,-0.09167610853910446,0.03498300909996033,-0.18279242515563965,0.041221488267183304,0.09842021763324738,-0.10988932847976685,0.031966306269168854,0.013916028663516045,0.008073337376117706,0.013303801417350769,-0.06880992650985718,0.17893345654010773,0.027153145521879196,-0.1894378960132599,-0.020184200257062912,0.05442515388131142,0.2977772355079651,0.20385639369487762,0.010473722591996193,0.029712140560150146,-0.10179305076599121,0.07872007042169571,-0.2609504461288452,0.004072457551956177,0.20807254314422607,0.039381805807352066,0.10894455015659332,0.0656222552061081,-0.21101640164852142,0.0015651732683181763,0.09899670630693436,-0.1148049384355545,0.03364836052060127,0.04903073608875275,-0.03714796528220177,-0.06581848114728928,-0.08302658796310425,0.1889929473400116,0.08697575330734253,-0.14220063388347626,-0.18263882398605347,0.17710572481155396,-0.1408098042011261,-0.04034702479839325,0.14808061718940735,-0.14523223042488098,-0.22445881366729736,-0.2085285633802414,0.04144661873579025,0.3701931834220886,0.140765979886055,-0.1524423211812973,0.05934419482946396,-0.09778376668691635,-0.05247225984930992,-0.07087128609418869,0.08293693512678146,-0.07502841204404831,0.01957462728023529,-0.07184667885303497,0.05862969905138016,0.19565513730049133,-0.07941281795501709,0.04070422798395157,0.26956090331077576,0.01857529580593109,-0.03987279161810875,-0.0008930861949920654,0.08536889404058456,-0.16341862082481384,0.03542519360780716,-0.10296104848384857,-0.022122247144579887,0.08253192901611328,-0.07827107608318329,0.01481425017118454,0.08741594851016998,-0.17036983370780945,0.20205597579479218,-0.010307766497135162,-0.04007430374622345,-0.0006583929061889648,-0.016108371317386627,-0.04460605978965759,-0.034171007573604584,0.24336735904216766,-0.2778766453266144,0.18527397513389587,0.1877088099718094,0.098460853099823,0.148922860622406,0.06344866007566452,0.13334715366363525,0.0003817584365606308,0.004202648997306824,-0.11708918958902359,-0.06176237016916275,-0.027644027024507523,-0.10988462716341019,-0.10410083830356598,0.0056285373866558075 }, { -0.05286264419555664,0.1027536615729332,0.0177864171564579,-0.007238239049911499,-0.15839841961860657,-0.013195162639021873,-0.055493343621492386,-0.011598028242588043,0.11436303704977036,-0.023542361333966255,0.23070596158504486,0.006959080696105957,-0.23580302298069,-0.04991580545902252,-0.048089612275362015,0.1130392998456955,-0.1211254671216011,-0.053501617163419724,-0.20760701596736908,-0.11128067970275879,0.015699222683906555,0.048812925815582275,-0.004359103739261627,-0.01246669515967369,-0.08746911585330963,-0.2714271545410156,-0.07527103275060654,-0.07152064889669418,0.13769567012786865,-0.1629178822040558,-0.030123017728328705,-0.006879039108753204,-0.15528543293476105,-0.07373329252004623,0.035297941416502,-0.0019944999366998672,0.00020968541502952576,-0.06796075403690338,0.15615630149841309,-0.02517728880047798,-0.15598921477794647,0.048431847244501114,0.030592434108257294,0.22424748539924622,0.18237638473510742,0.03839367255568504,-0.018516220152378082,-0.07987417280673981,0.10236746072769165,-0.23993468284606934,0.01739603653550148,0.20749998092651367,0.07386040687561035,0.09853726625442505,0.06335064768791199,-0.14400270581245422,-0.0020356550812721252,0.16046060621738434,-0.1502964049577713,0.03028072975575924,-0.013792440295219421,-0.04380643367767334,-0.05555684491991997,-0.13338367640972137,0.17439348995685577,0.08730429410934448,-0.11853333562612534,-0.14580222964286804,0.11578165739774704,-0.15631376206874847,0.0005779601633548737,0.057921670377254486,-0.11303769052028656,-0.14606109261512756,-0.24737423658370972,0.07011876255273819,0.3969222903251648,0.15113821625709534,-0.19518792629241943,0.029213692992925644,-0.11209011822938919,-0.058080971240997314,-0.01972014084458351,-0.010737363249063492,-0.12097814679145813,-0.017350099980831146,-0.057253990322351456,0.049719784408807755,0.19717393815517426,-0.06898155063390732,0.06022507697343826,0.2233758270740509,-0.0060714855790138245,0.03075665421783924,0.014615798369050026,0.029250048100948334,-0.11849372833967209,-0.06616988033056259,-0.042307183146476746,0.0033659040927886963,0.0857284739613533,-0.20014573633670807,0.034076105803251266,0.06595847010612488,-0.16573786735534668,0.12949475646018982,0.008994176983833313,0.004620999097824097,0.02356269210577011,0.0182054340839386,-0.0561535507440567,-0.035449616611003876,0.23626668751239777,-0.2073628008365631,0.3139644265174866,0.1955631524324417,0.029616639018058777,0.11366549134254456,0.07708290964365005,0.10866693407297134,-0.06293588131666183,-0.0077966004610061646,-0.057174161076545715,-0.08943509310483932,0.004156911745667458,-0.0984298512339592,-0.005768085829913616,0.0017160219140350819}, {0.0011762604117393494,0.15072135627269745,0.020299416035413742,-0.06642502546310425,-0.060089170932769775,0.020552314817905426,-0.043852221220731735,-0.06478262692689896,0.17363804578781128,-0.03403667360544205,0.2395429164171219,0.01041267067193985,-0.24776551127433777,-0.046057891100645065,-0.08346940577030182,0.099162757396698,-0.19483637809753418,-0.04046493023633957,-0.1129814013838768,-0.1319343000650406,0.04903741180896759,0.07213468849658966,0.051988281309604645,-0.01571550965309143,-0.1113474890589714,-0.25194793939590454,-0.06103351712226868,-0.13213123381137848,0.10333564877510071,-0.15162967145442963,0.061464592814445496,0.09279994666576385,-0.11863565444946289,-0.07045092433691025,-0.01209489069879055,-0.009468602016568184,-0.10253313928842545,-0.09072671830654144,0.22084617614746094,0.006743522360920906,-0.06707601249217987,0.004636548459529877,-0.00785135105252266,0.3281668722629547,0.11846040189266205,-0.0055367788299918175,0.04063810035586357,-0.06342281401157379,0.1398719847202301,-0.29172199964523315,0.06687593460083008,0.17154791951179504,0.08854547142982483,0.16494478285312653,0.09101200103759766,-0.18667718768119812,0.08216162025928497,0.12080536782741547,-0.1759440004825592,0.10922732204198837,0.03968639671802521,-0.06957980990409851,-0.02280031517148018,-0.0626034140586853,0.2077835500240326,0.10969341546297073,-0.08934597671031952,-0.1270003616809845,0.14771263301372528,-0.05404433608055115,-0.05664592981338501,0.07035835832357407,-0.12639567255973816,-0.1545434296131134,-0.1935984045267105,0.05806276947259903,0.3116001486778259,0.11509235948324203,-0.1787276566028595,0.020631596446037292,-0.08929038792848587,-0.03143969923257828,0.053848765790462494,0.03168227896094322,-0.07926452159881592,-0.05334986746311188,-0.0591375008225441,0.024413203820586205,0.16138945519924164,-0.10341612249612808,-0.0791877955198288,0.2381574809551239,0.012866765260696411,0.009398067370057106,0.049464400857686996,0.02796872705221176,-0.11044278740882874,0.05444034934043884,-0.09540048986673355,0.0681515783071518,0.09935745596885681,-0.1683940887451172,-0.047316670417785645,0.04480529949069023,-0.1389012485742569,0.2227819412946701,0.03692697733640671,0.01761404424905777,0.013868780806660652,-0.05882856249809265,-0.11958207935094833,0.026760146021842957,0.25092488527297974,-0.26664063334465027,0.33623644709587097,0.16868337988853455,0.05719297379255295,0.17470279335975647,0.057424478232860565,0.08439406007528305,-0.0024905502796173096,-0.037533506751060486,-0.1358765959739685,-0.09953152388334274,0.05512324720621109,-0.025295836851000786,-0.0866888239979744,0.026549236848950386}, {-0.1503148078918457,0.12206108123064041,0.02967856079339981,-0.03082158789038658,-0.1129150539636612,0.0478484109044075,-0.04801269620656967,-0.07795510441064835,0.1626167893409729,-0.041412316262722015,0.2022078037261963,0.031001679599285126,-0.20948299765586853,-0.04371509701013565,-0.0841950997710228,0.10647765547037125,-0.1393173485994339,-0.17036688327789307,-0.07359742373228073,-0.09497823566198349,-0.023887813091278076,0.02798299677670002,-0.011271866038441658,0.02640014886856079,-0.1520528793334961,-0.22326916456222534,-0.023297179490327835,-0.10385704040527344,0.02730628475546837,-0.09421993046998978,0.007815570570528507,0.03076392412185669,-0.2347397357225418,-0.07311992347240448,0.03199088200926781,0.1157669946551323,-0.0806952491402626,0.0058890413492918015,0.25238266587257385,0.0066128019243478775,-0.11484481394290924,0.058332689106464386,0.07401782274246216,0.30297812819480896,0.13892878592014313,0.015657028183341026,-0.026928886771202087,-0.041992999613285065,0.08834651112556458,-0.25814956426620483,0.025386730208992958,0.24164703488349915,0.11268384754657745,0.13700686395168304,-0.0037370696663856506,-0.1474452018737793,0.07658589631319046,0.11919880658388138,-0.16230738162994385,0.06895574927330017,0.05145460367202759,-0.09962668269872665,-0.07351469993591309,0.016625329852104187,0.24626575410366058,0.09379985928535461,-0.07967095822095871,-0.14824430644512177,0.1664988100528717,-0.07381315529346466,-0.0701378807425499,0.094453826546669,-0.1053486168384552,-0.173324853181839,-0.2059929072856903,0.11443521082401276,0.37259921431541443,0.12261700630187988,-0.17118749022483826,0.1002739816904068,-0.0962643027305603,0.0028629377484321594,0.02517216093838215,0.009352639317512512,-0.07928505539894104,-0.027248665690422058,-0.06288609653711319,0.03766700252890587,0.17690418660640717,0.00913313403725624,-0.016148168593645096,0.16757085919380188,0.081269770860672,-0.003465055488049984,0.08387735486030579,0.05247170478105545,-0.035166703164577484,-0.02116062119603157,-0.15585437417030334,-0.0015826895833015442,0.06662821024656296,-0.07819218933582306,0.03278189152479172,0.059533413499593735,-0.18928831815719604,0.14338715374469757,0.029624812304973602,-0.04126661643385887,-0.017487522214651108,0.024950236082077026,-0.06610194593667984,-0.007236182689666748,0.1634015440940857,-0.3118166923522949,0.32635706663131714,0.1551782488822937,-0.006172630935907364,0.1549994796514511,0.11846084892749786,0.05585896223783493,0.005930176004767418,0.0026269033551216125,-0.08288535475730896,-0.14617297053337097,0.04124702885746956,-0.023869477212429047,0.056380532681941986,0.059275589883327484}, {-0.11449231207370758,0.08218079805374146,0.052244849503040314,-0.05849563330411911,-0.09875518083572388,-0.06790590286254883,-0.03999913111329079,-0.07766807824373245,0.15596428513526917,-0.0483558364212513,0.21132533252239227,-0.01931747794151306,-0.24744108319282532,-0.10812359303236008,0.058341797441244125,0.11405576020479202,-0.1183864176273346,-0.0856679230928421,-0.13984550535678864,-0.11198180168867111,-0.03631503880023956,-0.019455134868621826,0.038762785494327545,0.04496624693274498,-0.10771285742521286,-0.25013211369514465,-0.06068229675292969,-0.11673891544342041,0.07542752474546432,-0.14139781892299652,0.0017466647550463676,0.043040819466114044,-0.17083533108234406,-0.07615770399570465,-0.026609988883137703,0.06443975865840912,-0.056286998093128204,-0.08603766560554504,0.17082615196704865,-0.03381050005555153,-0.14733527600765228,-0.1067851260304451,-0.04236249998211861,0.23973657190799713,0.21202436089515686,-0.008970214053988457,0.017333630472421646,-0.012529492378234863,0.05171665549278259,-0.2481795996427536,-0.0033811144530773163,0.11406000703573227,0.11761488020420074,0.06430923938751221,0.05201558396220207,-0.18238718807697296,0.03609202429652214,0.10110349208116531,-0.16749322414398193,-0.017176488414406776,0.02417522668838501,-0.09461188316345215,-0.08760598301887512,-0.09778585284948349,0.2984878420829773,0.09519178420305252,-0.11396373808383942,-0.11740261316299438,0.15713796019554138,-0.11983270198106766,0.0014888402074575424,0.11709675192832947,-0.11239394545555115,-0.1615625023841858,-0.21783563494682312,0.062243033200502396,0.3098629415035248,0.13144542276859283,-0.19728504121303558,-0.00864078477025032,-0.1421070396900177,-0.027583356946706772,-0.03719120845198631,0.004923664033412933,-0.08225893974304199,0.016801394522190094,-0.05810196325182915,0.0058958642184734344,0.15847420692443848,-0.09055405110120773,0.03644956648349762,0.2036021500825882,0.012157909572124481,-0.014717459678649902,-0.008820030838251114,-0.07779868692159653,-0.03748408332467079,0.028270550072193146,-0.05236710608005524,0.013832693919539452,0.10549424588680267,-0.1504925936460495,0.04579198732972145,0.05509837344288826,-0.1997680962085724,0.15631046891212463,0.042316727340221405,-0.017336193472146988,0.02395687624812126,-0.036151397973299026,-0.10194570571184158,-0.06460698693990707,0.24390262365341187,-0.2809107303619385,0.22127604484558105,0.2234485149383545,0.06370076537132263,0.13879287242889404,0.02190246433019638,0.10918933153152466,-0.025838086381554604,0.010740015655755997,-0.09715697914361954,-0.013207517564296722,0.0729929581284523,-0.09213361144065857,-0.011896252632141113,0.03725235536694527}, {-0.05498696118593216,0.05113350972533226,0.05531687289476395,-0.09476281702518463,-0.10580143332481384,-0.030396001413464546,-0.0010913275182247162,-0.1052420362830162,0.10643371194601059,-0.06002769619226456,0.21559466421604156,-0.0154794380068779,-0.2808150053024292,-0.0600040964782238,0.0403410941362381,0.0919484943151474,-0.1326475292444229,-0.10436968505382538,-0.19102108478546143,-0.0795152336359024,-0.04159826040267944,0.021737679839134216,0.019609834998846054,-0.04616895690560341,-0.17645984888076782,-0.1682741940021515,-0.08414799720048904,-0.12163443863391876,0.026273906230926514,-0.07314532995223999,0.051960788667201996,0.09379777312278748,-0.1891252100467682,-0.059710826724767685,0.011947771534323692,0.014459501951932907,-0.07258555293083191,-0.07127422094345093,0.16470138728618622,-0.02148628979921341,-0.12428629398345947,0.027712488546967506,0.015993379056453705,0.16111883521080017,0.24940571188926697,-0.0341871902346611,0.007754117250442505,-0.0865824818611145,0.049111928790807724,-0.2875933051109314,-0.0030787810683250427,0.1479477882385254,0.026672853156924248,0.15996402502059937,0.053178343921899796,-0.12440557777881622,0.09016239643096924,0.16699564456939697,-0.20755255222320557,0.06710638105869293,0.03819752484560013,-0.14896832406520844,-0.04662872850894928,-0.027680709958076477,0.14560657739639282,0.10368138551712036,-0.04873866215348244,-0.176229327917099,0.18367958068847656,-0.14353249967098236,-0.08559320867061615,0.12607094645500183,-0.12804611027240753,-0.1290942132472992,-0.23025573790073395,0.021010763943195343,0.3133254051208496,0.15103624761104584,-0.16475120186805725,0.017042677849531174,-0.13039177656173706,-0.04864456504583359,0.0031204214319586754,-0.053125109523534775,-0.011741500347852707,-0.1571718007326126,-0.059252332895994186,0.018309343606233597,0.24032914638519287,-0.08475983887910843,0.04019710421562195,0.2481696456670761,0.02487030252814293,-0.07094515115022659,0.02628476172685623,-0.00712304562330246,-0.04976058006286621,-0.024602070450782776,-0.0648813247680664,-0.02827349305152893,0.04442674666643143,-0.16200967133045197,0.02547246217727661,0.10203316062688828,-0.1560029238462448,0.165535107254982,-0.011267822235822678,-0.023648075759410858,0.001466844230890274,-0.07181182503700256,0.012026548385620117,0.011392228305339813,0.21011506021022797,-0.22743536531925201,0.2915312945842743,0.1937231570482254,0.011561829596757889,0.07002981752157211,0.00032756663858890533,0.09776002168655396,-0.07220730185508728,0.004206985235214233,-0.12127777934074402,-0.0900777205824852,0.012966942973434925,0.008859572932124138,-0.01780618168413639,0.052432361990213394}, }; typedef enum { UNDEFINED = 0, CORE = 1, EDGE = 2, NOISE = 3 } ClusterTypes; typedef struct Face { float descriptor[128]; long int clusterId; long faceId; long photoId; float confidence; float profileDistance; ClusterTypes clusterType; float *distances; } Face; typedef struct FaceLink { struct FaceLink *pNext; float distance; Face *pFace; } FaceLink; float euclideanDistance(float *a, float *b) { float sum = 0.0L; for (int i = 0; i < 128; i++) { float delta = a[i] - b[i]; sum += delta * delta; } return sqrtf(sum); } /* https://en.wikipedia.org/wiki/DBSCAN */ #if 0 DBSCAN(DB, distFunc, eps, minPts) { C = 0 /* Cluster counter */ for each point P in database DB { if label(P) ≠ undefined then continue /* Previously processed in inner loop */ Neighbors N = RangeQuery(DB, distFunc, P, eps) /* Find neighbors */ if |N| < minPts then { /* Density check */ label(P) = Noise /* Label as Noise */ continue } C = C + 1 /* next cluster label */ label(P) = C /* Label initial point */ Seed set S = N \ {P} /* Neighbors to expand */ for each point Q in S { /* Process every seed point */ if label(Q) = Noise then label(Q) = C /* Change Noise to border point */ if label(Q) ≠ undefined then continue /* Previously processed */ label(Q) = C /* Label neighbor */ Neighbors N = RangeQuery(DB, distFunc, Q, eps) /* Find neighbors */ if |N| ≥ minPts then { /* Density check */ S = S ∪ N /* Add new neighbors to seed set */ } } } } RangeQuery(DB, distFunc, Q, eps) { Neighbors = empty list for each point P in database DB { /* Scan all points in the database */ if distFunc(Q, P) ≤ eps then { /* Compute distance and check epsilon */ Neighbors = Neighbors ∪ {P} /* Add to result */ } } return Neighbors } #endif FaceLink *RangeQuery(Face **ppFaces, long int faceCount, Face *pQ, float eps, long int clusterToBreak) { FaceLink *pNeighbors = NULL; for (long int i = 0; i < faceCount; i++) { Face *pFace = ppFaces[i]; if (clusterToBreak > 0 && pFace->clusterId != clusterToBreak) { continue; } if (pFace->faceId == pQ->faceId) { continue; } if (pQ->distances[i] > 0.0 && pQ->distances[i] <= eps) { FaceLink *pLink = malloc(sizeof(*pLink)); memset(pLink, 0, sizeof(*pLink)); pLink->distance = pQ->distances[i]; pLink->pFace = pFace; pLink->pNext = pNeighbors; pNeighbors = pLink; } } return pNeighbors; } void freeChain(FaceLink *pLink) { while (pLink) { FaceLink *tmp = pLink->pNext; free(pLink); pLink = tmp; } } long int chainLength(FaceLink *pLink) { long int count = 0; while (pLink) { count++; pLink = pLink->pNext; } return count; } long int C = 0; long int DBSCAN(Face **ppFaces, long int faceCount, float eps, int minPts, long int clusterToBreak) { int count = 0; for (long int i = 0; i < faceCount; i++) { Face *pFace = ppFaces[i]; if (clusterToBreak > 0 && pFace->clusterId != clusterToBreak) { continue; } if (pFace->clusterType != UNDEFINED) { continue; } float threshold = eps; FaceLink *pNeighbors = RangeQuery(ppFaces, faceCount, pFace, eps, clusterToBreak); long neighborCount = chainLength(pNeighbors); if (neighborCount < minPts) { pFace->clusterType = NOISE; freeChain(pNeighbors); continue; } C++; count++; pFace->clusterId = C; pFace->clusterType = CORE; FaceLink *pLink = pNeighbors; while (pLink) { Face *pQ = pLink->pFace; if (pQ->faceId == pFace->faceId) { pLink = pLink->pNext; continue; } if (pQ->clusterType == NOISE) { pQ->clusterId = C; pQ->clusterType = EDGE; } if (pQ->clusterType != UNDEFINED) { pLink = pLink->pNext; continue; } pQ->clusterId = C; pQ->clusterType = EDGE; FaceLink *pSubNeighbors = RangeQuery(ppFaces, faceCount, pQ, eps, clusterToBreak); neighborCount = chainLength(pSubNeighbors); if (neighborCount >= minPts) { pQ->clusterType = CORE; /* Append these neighbors to the end of the chain */ FaceLink *pTmp = pLink; while (pTmp->pNext) { pTmp = pTmp->pNext; } pTmp->pNext = pSubNeighbors; } else { freeChain(pSubNeighbors); } pLink = pLink->pNext; } freeChain(pNeighbors); } return count; } typedef struct { sqlite3 *db; Face **ppFaces; long int count; long int initialized; } FaceCallbackData; int parseFaceIdCount(void *data, int argc, char **argv, char **column) { long int *pCount = data; *pCount = strtol(argv[0] ? argv[0] : "0", NULL, 10); return 0; } int parseFaceDescriptor(void *data, int argc, char **argv, char **column) { FaceCallbackData *map = data; long int faceId = strtol(argv[0] ? argv[0] : "0", NULL, 10); char sql_buf[1024]; int rc; Face *pFace = NULL; for (long int i = 0; i < map->count; i++) { pFace = map->ppFaces[i]; if (pFace->faceId == faceId) { break; } pFace = NULL; } if (!pFace) { return SQLITE_OK; } // Getting here means we have a valid file handle, f, and a valid db handle, db // Also, a blank row has been inserted with key rowid sqlite3_blob *blob; rc = sqlite3_blob_open(map->db, "main", "facedescriptors", "descriptors", faceId, 1, &blob); if (SQLITE_OK != rc) { fprintf(stderr, "Couldn't get blob handle (%i): %s\n", rc, sqlite3_errmsg(map->db)); return rc; } if (SQLITE_OK != (rc = sqlite3_blob_read(blob, pFace->descriptor, sizeof(pFace->descriptor), 0))) { fprintf(stderr, "Error reading from blob handle.\n"); return rc; } sqlite3_blob_close(blob); return SQLITE_OK; } int parseFaceIdRow(void *data, int argc, char **argv, char **column) { FaceCallbackData *map = data; long int faceId = strtol(argv[0] ? argv[0] : "0", NULL, 10); long int photoId = strtol(argv[1] ? argv[1] : "0", NULL, 10); float confidence = strtof(argv[2] ? argv[2] : "0.0", NULL); if (confidence < 0.9) { return 0; } Face *pFace = map->ppFaces[map->initialized++]; pFace->faceId = faceId; pFace->photoId = photoId; pFace->confidence = confidence; return 0; } void getClusterCounts(int *stats, Face **ppFaces, long int entries) { for (int i = 0; i < entries; i++) { if (ppFaces[i]->clusterType != CORE && ppFaces[i]->clusterType != EDGE) { continue; } stats[ppFaces[i]->clusterId - 1]++; } } long int getClusterCount(Face **ppFaces, long int entries, int clusterId) { long int count = 0; for (long int i = 0; i < entries; i++) { if (ppFaces[i]->clusterId == clusterId && ppFaces[i]->clusterType != NOISE && ppFaces[i]->clusterType != UNDEFINED) { count++; } } return count; } /* * 1. Count how many entries there are * 2. Allocate storage to hold all entries * 3. Read all entries into flat array * 4. Allocate MxM matrix and pre-calculate distances * 5. Perform DBSCAN across MxM matrix to cluster */ int main(int argc, char *argv[]) { long maxId = 0; long i; long int entries = 0; long int minPts = MIN_PTS; float maxDistance = MAX_DISTANCE; long int maxClusterSize = MAX_CLUSTER_SIZE; if (argc == 1) { fprintf(stderr, "usage: scanner PATH MAX_DISTANCE MIN_PTS\n"); return -1; } if (argc > 2) { sscanf(argv[2], "%f", &maxDistance); } if (argc > 3) { sscanf(argv[3], "%ld", &minPts); } if (argc > 4) { sscanf(argv[4], "%ld", &maxClusterSize); } fprintf(stderr, "\nmaxDistance : %f\nminPts : %ld\n", maxDistance, minPts); /* Allocate storage for all distances */ sqlite3 *db; int rc = sqlite3_open("db/photos.db", &db); if (rc != SQLITE_OK) { fprintf(stderr, "Cannot open database: %s\n", sqlite3_errmsg(db)); sqlite3_close(db); return 1; } fprintf(stderr, "DB opened.\n"); char *err_msg = NULL; entries = 0; rc = sqlite3_exec(db, "SELECT COUNT(id) FROM faces", parseFaceIdCount, &entries, &err_msg); if (rc != SQLITE_OK) { fprintf(stderr, "SQL error: %s\n", err_msg); sqlite3_free(err_msg); sqlite3_close(db); return 1; } fprintf(stderr, "%ld faces in DB.\n", entries); Face **ppFaces = malloc(sizeof(Face *) * entries); if (!ppFaces) { fprintf(stderr, "Unable to allocate storage face descriptors."); return -1; } for (i = 0; i < entries; i++) { ppFaces[i] = malloc(sizeof(Face)); memset(ppFaces[i], 0, sizeof(Face)); } for (i = 0; i < entries; i++) { ppFaces[i]->distances = malloc(sizeof(*ppFaces[i]->distances) * entries); if (!ppFaces[i]->distances) { fprintf(stderr, "Unable to allocate storage for distance dictionary."); return -1; } memset(ppFaces[i]->distances, 0, sizeof(*ppFaces[i]->distances) * entries); } fprintf(stderr, "Storage allocated for %ld faces.\n", entries); FaceCallbackData data = { db: db, ppFaces: ppFaces, count: entries, initialized: 0 }; rc = sqlite3_exec(db, "SELECT id,photoId,faceConfidence FROM faces", parseFaceIdRow, &data, &err_msg); if (rc != SQLITE_OK) { fprintf(stderr, "SQL error: %s\n", err_msg); sqlite3_free(err_msg); sqlite3_close(db); return 1; } entries = data.initialized; data.count = data.initialized; fprintf(stderr, "Face data loaded from DB.\n"); rc = sqlite3_exec(db, "SELECT * FROM facedescriptors", parseFaceDescriptor, &data, &err_msg); if (rc != SQLITE_OK) { fprintf(stderr, "SQL error: %s\n", err_msg); sqlite3_free(err_msg); sqlite3_close(db); return 1; } fprintf(stderr, "Descriptor data loaded from DB\n"); float profileDistance = 1.0; long int dst, src; for (src = 0, dst = 0; src < entries; src++) { Face *pFace = ppFaces[src]; profileDistance = 1.0; for (int j = 0; j < (sizeof(profileDescriptors) / sizeof(profileDescriptors[0])); j++) { profileDistance = euclideanDistance(pFace->descriptor, profileDescriptors[j]); if (profileDistance > 0.5) { profileDistance = 1.0; } else { break; } } if (profileDistance <= 0.5) { free(pFace->distances); } else { pFace->profileDistance = profileDistance; ppFaces[dst++] = pFace; } } fprintf(stderr, "Dropped %ld faces as too close to profile photos (set of %ld).\n", (entries - dst), (sizeof(profileDescriptors) / sizeof(profileDescriptors[0]))); entries = dst; long int processed = 0; long int last = 0; float total = 0.0; long int sampleSize = 0; fprintf(stderr, "Calculating distances O(N^2) times for %ld faces.\n", entries); for (long i = 0; i < entries; i++) { Face *pLink = ppFaces[i]; for (long j = 0; j < entries; j++) { Face *pTarget = ppFaces[j]; processed++; if (processed % 1000 == 0) { int perc = 100 * processed / (entries * entries); if (perc != last) { fprintf(stderr, "\rComputed %d%% complete.", perc); last = perc; } } if (i == j) { pLink->distances[i] = 0.0; pTarget->distances[j] = 0.0; continue; } if (pLink->distances[j] != 0.0) { continue; } pLink->distances[j] = pTarget->distances[i] = euclideanDistance(pLink->descriptor, pTarget->descriptor); sampleSize++; total += pLink->distances[j]; } } fprintf(stderr, "\nAverage distance: %f\n", 1. * total / sampleSize); fprintf(stderr, "Calculating clusters: MAX_DISTANCE(%f) MIN_PTS(%ld)\n", maxDistance, minPts); long int clusters = DBSCAN(ppFaces, entries, maxDistance, minPts, -1); fprintf(stderr, "\n%ld clusters identified before size-split.\n", clusters); int recalcNeeded = clusters > 0 ? 1 : 0; float reducedDistance = maxDistance; while (recalcNeeded) { int *stats = malloc(sizeof(int) * clusters), delta = 0; memset(stats, 0, sizeof(int) * clusters); getClusterCounts(stats, ppFaces, entries); recalcNeeded = 0; reducedDistance -= 0.05L; if (reducedDistance < 0.1) { break; } for (int i = 0; i < clusters; i++) { if (stats[i] < maxClusterSize) { continue; } for (int j = 0; j < entries; j++) { Face *pFace = ppFaces[j]; if (pFace->clusterId == i + 1) { pFace->clusterType = UNDEFINED; } } int split = DBSCAN(ppFaces, entries, reducedDistance, minPts, i + 1); if (split) { recalcNeeded |= 1; } else { continue; } fprintf(stderr, "Cluster %d had %d units. Split into %d clusters (max: %f).\n", i + 1, stats[i], split, reducedDistance); for (int c = 0; c < split; c++) { fprintf(stderr, "%ld. %ld\n", delta + c + clusters, getClusterCount(ppFaces, entries, delta + c + clusters)); } for (int j = 0; j < entries; j++) { Face *pFace = ppFaces[j]; if (pFace->clusterId == i + 1) { pFace->clusterType = CORE; } } delta += split; } clusters += delta; free(stats); } long int undefined = 0, outlier = 0, core = 0, reachable = 0; for (i = 0; i < entries; i++) { switch (ppFaces[i]->clusterType) { case NOISE: ppFaces[i]->clusterId = 0; outlier++; break; case UNDEFINED: ppFaces[i]->clusterId = 0; undefined++; break; case CORE: core++; break; case EDGE: reachable++; break; } } fprintf(stderr, "\n%ld clusters being written:\n", clusters); fprintf(stderr, "%ld NOISE\n", outlier); fprintf(stderr, "%ld UNDEFINED\n", undefined); fprintf(stderr, "%ld CORE\n", core); fprintf(stderr, "%ld EDGE\n", reachable); fprintf(stdout, "\n"); fprintf(stderr, "Skipping face writing!\n"); return 0; char *sql = "DELETE FROM facedistances;" "BEGIN TRANSACTION;"; rc = sqlite3_exec(db, sql, 0, 0, &err_msg); if (rc != SQLITE_OK ) { fprintf(stderr, "SQL error: %s\n", err_msg); sqlite3_free(err_msg); sqlite3_close(db); return 1; } fprintf(stderr, "facedistances deleted and transaction started.\n"); for (long i = 0; i < entries; i++) { memset(ppFaces[i]->distances, 0, sizeof(*ppFaces[i]->distances) * entries); } char sqlBuf[1024]; processed = 0; last = 0; for (long i = 0; i < entries; i++) { Face *pLink = ppFaces[i]; for (long j = 0; j < entries; j++) { Face *pTarget = ppFaces[j]; processed++; if (processed % 1000 == 0) { int perc = 100 * processed / (entries * entries); if (perc != last) { fprintf(stderr, "\rComputed %d%% complete.", perc); last = perc; } } if (i == j) { pLink->distances[i] = 0.0; pTarget->distances[j] = 0.0; continue; } if (pLink->distances[j] != 0.0) { continue; } pLink->distances[j] = pTarget->distances[i] = euclideanDistance(pLink->descriptor, pTarget->descriptor); if (pLink->distances[j] < 0.5) { sprintf(sqlBuf, "INSERT INTO facedistances (face1Id,face2Id,distance) VALUES (%ld,%ld,%f);", ((pLink->faceId < pTarget->faceId) ? pLink->faceId : pTarget->faceId), ((pLink->faceId < pTarget->faceId) ? pTarget->faceId : pLink->faceId), pLink->distances[j]); rc = sqlite3_exec(db, sqlBuf, 0, 0, &err_msg); if (rc != SQLITE_OK ) { fprintf(stderr, "SQL error: %s\n", err_msg); sqlite3_free(err_msg); sqlite3_close(db); return 1; } } } } fprintf(stderr, "\n"); sprintf(sqlBuf, "UPDATE faces SET lastComparedId=%ld;", maxId); rc = sqlite3_exec(db, "COMMIT;", 0, 0, &err_msg); if (rc != SQLITE_OK ) { fprintf(stderr, "SQL error: %s\n", err_msg); sqlite3_free(err_msg); sqlite3_close(db); return 1; } sqlite3_close(db); return 0; }