diff --git a/server/scanner.js b/server/scanner.js index b74944c..058dc2e 100755 --- a/server/scanner.js +++ b/server/scanner.js @@ -281,19 +281,30 @@ const processBlock = async (items) => { return b.stats.mtime - a.stats.mtime; }); - let toProcess = processing.length, lastMessage = moment(); + let toProcess = processing.length; setStatus(`Items to be processed: ${toProcess}`); const updateHash = [], insertHash = [], updateAsset = []; + + let last = 0, + lastTime = Date.now(), + start = Date.now(); await Promise.map(processing, async (asset) => { toProcess--; - if (moment().add(-5, 'seconds') > lastMessage) { - setStatus("Items to be processed: " + toProcess); - lastMessage = moment(); + const elapsed = Date.now() - lastTime; + if (elapsed > 5000) { + const processed = processing.length - toProcess, + rate = Math.ceil(100 * (processed - last) / elapsed) / 100, + eta = Math.ceil(toProcess * + (((Date.now() - start) / 1000) / processed)); + setStatus(`Processed ${processed}/${processing.length} ` + + `(${rate} assets/second). ETA ${eta}s`); + last = processed; + lastTime = Date.now(); } /* Create JPG from RAW if there is a RAW file and no JPG */ @@ -311,38 +322,55 @@ const processBlock = async (items) => { asset.hash = await computeHash( picturesPath + asset.album.path + asset.filename) + /* Scan for existing DB entries -- this is a promise, which + * means the closure for the *Hash lists could introduce a race + * condition where the array didn't contain a match prior to calling + * the DB, but during the DB transaction, a match came into being + */ + const results = await photoDB.sequelize.query( + "SELECT photohashes.*,photos.filename,albums.path FROM photohashes " + + "LEFT JOIN photos ON (photos.id=photohashes.photoId) " + + "LEFT JOIN albums ON (albums.id=photos.albumId) " + + "WHERE hash=:hash OR photoId=:id", { + replacements: asset, + type: photoDB.sequelize.QueryTypes.SELECT + }); + /* Writes to DB for new assets hasn't happened yet, so the DB - * won't have new duplicates */ - let duplicate = insertHash.find(entry => entry.hash === asset.hash) - || updateHash.find(entry => entry.hash === asset.hash); + * won't have new duplicates */ + let duplicate; - let results = []; - - /* In addition to checking new entries, check the DB for old - * duplicates */ - if (!duplicate) { - results = await photoDB.sequelize.query( - "SELECT photohashes.*,photos.filename,albums.path FROM photohashes " + - "LEFT JOIN photos ON (photos.id=photohashes.photoId) " + - "LEFT JOIN albums ON (albums.id=photos.albumId) " + - "WHERE hash=:hash OR photoId=:id", { - replacements: asset, - type: photoDB.sequelize.QueryTypes.SELECT - }); - - if (results.length == 0) { + if (results.length == 0) { + duplicate = insertHash.find(entry => entry.hash === asset.hash); + if (duplicate) { + console.log(`DUPLICATE in INSERT hash list (${insertHash.length}) ` + + `for ${duplicate.filename}`); + } else { insertHash.push(asset); - } else if (results[0].hash != asset.hash) { - updateHash.push(asset); - } else if (results[0].photoId != asset.id) { - duplicate = results[0]; } + } else if (results[0].hash != asset.hash) { + duplicate = updateHash.find(entry => entry.hash === asset.hash); + if (duplicate) { + console.log(`DUPLICATE in UPDATE hash list (${updateHash.length}) ` + + `for ${duplicate.filename}`); + } else { + updateHash.push(asset); + } + } else if (results[0].photoId != asset.id) { + duplicate = results[0]; + console.log(`DUPLICATE in HASH db for ${duplicate.filename}`); } + /* Output log information about duplicates */ if (duplicate) { - setStatus("Duplicate asset: " + - "'" + asset.album.path + asset.filename + "' is a copy of " + - "'" + duplicate.album.path + duplicate.filename + "'"); + /* Fixup structure based on whether obtained from *Hash array or + * from DB */ + if (!duplicate.path && duplicate.album) { + duplicate.path = duplicate.album.path; + } + setStatus(`Duplicate asset: ` + + `'${asset.album.path}${asset.filename}' is a copy of ` + + `'${duplicate.path}${duplicate.filename}'`); if (asset.duplicate != duplicate.photoId) { asset.duplicate = duplicate.photoId; duplicates.push(asset); @@ -374,6 +402,33 @@ const processBlock = async (items) => { }); try { + console.log(`Updating ${updateAsset.length} asset entries.`); + await photoDB.sequelize.transaction(async (t) => { + /* Update the DB with the image information */ + await Promise.mapSeries(updateAsset, async (item) => { + await photoDB.sequelize.query("UPDATE photos SET " + + "added=:added,modified=:modified,taken=:taken,width=:width," + + "height=:height,size=:size,scanned=CURRENT_TIMESTAMP " + + "WHERE id=:id", { + replacements: item, + transaction: t + }); + }); + }); + } catch (error) { + console.error(error); + process.exit(-1); + } + + /* Run UPDATE for hashes first in case a new photo is being added + * that has the same hash as an OLD entry. This could happen + * if an original file is COPIED and the original DB entry is + * edited (UPDATE), and the original is moved elsewhere (INSERT) + * + * If the UPDATE does not occur first, the INSERT will fail. + */ + try { + console.log(`Updating ${updateHash.length} asset HASH entries.`); await photoDB.sequelize.transaction(async (t) => { await Promise.mapSeries(updateHash, async (item) => { await photoDB.sequelize.query( @@ -383,17 +438,8 @@ const processBlock = async (items) => { }); }); }); - await photoDB.sequelize.transaction(async (t) => { - /* Update the DB with the image information */ - await Promise.mapSeries(updateAsset, async (item) => { - await photoDB.sequelize.query("UPDATE photos SET " + - "added=:added,modified=:modified,taken=:taken,width=:width,height=:height,size=:size,scanned=CURRENT_TIMESTAMP " + - "WHERE id=:id", { - replacements: item, - transaction: t - }); - }); - }); + + console.log(`Inserting ${insertHash.length} HASH entries.`); await photoDB.sequelize.transaction(async (t) => { await Promise.mapSeries(insertHash, async (item) => { await photoDB.sequelize.query(