From 4f1d72147ae3f1101ecd5b7aa588b44960750da5 Mon Sep 17 00:00:00 2001 From: James Ketrenos Date: Fri, 28 Sep 2018 16:29:29 -0700 Subject: [PATCH] Restructure when the photohashes is updated to be with the hash generation instead of bulked --- server/scanner.js | 119 ++++++++++++++++++---------------------------- 1 file changed, 46 insertions(+), 73 deletions(-) diff --git a/server/scanner.js b/server/scanner.js index 030f9db..1ec5590 100755 --- a/server/scanner.js +++ b/server/scanner.js @@ -202,9 +202,45 @@ function processBlock(items) { }); let toProcess = processing.length, lastMessage = moment(); + console.log("Hash items to be processed: " + toProcess); return Promise.map(processing, function(asset) { return computeHash(picturesPath + asset.album.path + asset.filename).then(function(hash) { asset.hash = hash; + return asset; + }).then(function(asset) { + return photoDB.sequelize.query("SELECT photohashes.*,photos.filename,albums.path FROM photohashes " + + "LEFT JOIN photos ON (photos.id=photohashes.photoId) " + + "LEFT JOIN albums ON (albums.id=photos.albumId) " + + "WHERE hash=:hash OR photoId=:id", { + replacements: asset, + type: photoDB.sequelize.QueryTypes.SELECT + }).then(function(results) { + let query; + + if (results.length == 0) { + query = "INSERT INTO photohashes (hash,photoId) VALUES(:hash,:id)"; + } else if (results[0].hash != asset.hash) { + query = "UPDATE photohashes SET hash=:hash WHERE photoId=:id)"; + } else if (results[0].photoId != asset.id) { + console.log("Duplicate asset: " + + "'" + asset.album.path + asset.filename + "' is a copy of " + + "'" + results[0].path + results[0].filename + "'"); + duplicates.push(asset); + return; + } + + /* Even if the hash doesn't need to be updated, the entry needs to be scanned */ + needsProcessing.push(asset); + + if (!query) { + return asset; + } + + return photoDB.sequelize.query(query, { + replacements: asset, + }).then(function() { + return asset; + }); }).then(function() { toProcess--; if (moment().add(-5, 'seconds') > lastMessage) { @@ -213,76 +249,11 @@ function processBlock(items) { } }); }, { - concurrency: 10 - }).then(function() { - let toProcess = processing.length, lastMessage = moment(); - /* Needs to be one at a time in case there are multiple HASH collisions. To speed - * up commits to the DB we will batch these into 100 record transactions where HASH - * collions are done via DB query *AND* in-memory table lookup in the current batch */ - let batchSize = 100, batches = []; - while (processing.length) { - batches.push(processing.splice(0, batchSize)); - } - return Promise.mapSeries(batches, function(batch) { - return photoDB.sequelize.transaction(function(transaction) { - return Promise.mapSeries(batch, function(asset, index) { - return photoDB.sequelize.query("SELECT photohashes.*,photos.filename,albums.path FROM photohashes " + - "LEFT JOIN photos ON (photos.id=photohashes.photoId) " + - "LEFT JOIN albums ON (albums.id=photos.albumId) " + - "WHERE hash=:hash OR photoId=:id", { - replacements: asset, - type: photoDB.sequelize.QueryTypes.SELECT - }).then(function(results) { - let query; - - /* If this asset exists in this transaction block, push it into the results */ - for (let i = 0; i < index; i++) { - if (batch[i].hash == asset.hash) { - results.push({ - id: batch[i].id, - hash: batch[i].hash, - path: batch[i].album.path, - filename: batch[i].filename - }); - } - } - - if (results.length == 0) { - query = "INSERT INTO photohashes (hash,photoId) VALUES(:hash,:id)"; - } else if (results[0].hash != asset.hash) { - query = "UPDATE photohashes SET hash=:hash WHERE photoId=:id)"; - } else if (results[0].photoId != asset.id) { - console.log("Duplicate asset: " + - "'" + asset.album.path + asset.filename + "' is a copy of " + - "'" + results[0].path + results[0].filename + "'"); - duplicates.push(asset); - return; - } - - /* Even if the hash doesn't need to be updated, the entry needs to be scanned */ - needsProcessing.push(asset); - - if (!query) { - return; - } - - return photoDB.sequelize.query(query, { - replacements: asset, - transaction: transaction - }); - }).then(function() { - toProcess--; - if (moment().add(-5, 'seconds') > lastMessage) { - console.log("Hash items to be checked: " + toProcess); - lastMessage = moment(); - } - }); - }); - }); - }).catch(function(error) { - console.log("Error commiting HASH transactions"); - throw error; - }); + concurrency: 5 + }).catch(function(error) { + console.log("Error commiting HASH transactions"); + throw error; + }); }).then(function() { let toProcess = needsProcessing.length, lastMessage = moment(); console.log(needsProcessing.length + " assets need to have metadata extracted"); @@ -711,16 +682,20 @@ module.exports = { ((Date.now() - now) / 1000) + "s"); now = Date.now(); + console.log(albums.length + " assets remaining to have DB entries updated. ETA N/A"); + let processed = 0, start = Date.now(), last = 0, updateScanned = []; return photoDB.sequelize.transaction(function(transaction) { return Promise.map(assets, function(asset) { return findOrUpdateDBAsset(transaction, asset).then(function(asset) { - if (asset.scanned < asset.stats.mtime) { + if (asset.scanned < asset.stats.mtime || !asset.added) { needsProcessing.push(asset); } else { updateScanned.push(asset.id); } }); + }, { + concurrency: 10 }).then(function(asset) { processed++; @@ -735,8 +710,6 @@ module.exports = { last = processed; start = Date.now(); }); - }, { - concurrency: 5 }).then(function() { if (updateScanned.length) { return photoDB.sequelize.query("UPDATE photos SET scanned=CURRENT_TIMESTAMP WHERE id IN (:ids)", {