diff --git a/server/scanner.js b/server/scanner.js index 2729941..b4b8d02 100755 --- a/server/scanner.js +++ b/server/scanner.js @@ -3,13 +3,15 @@ const Promise = require("bluebird"), fs = require("fs"), config = require("config"), - moment = require("moment"); + moment = require("moment"), + crypto = require("crypto"); + let scanning = 0; let photoDB = null; -const picturesPath = config.get("picturesPath").replace(/\/$/, ""); +const picturesPath = config.get("picturesPath").replace(/\/$/, "") + "/"; const processQueue = [], triedClean = []; @@ -63,8 +65,8 @@ function _scanDir(parent, path) { // console.log("Adding " + path + " under " + parent, replacements); return photoDB.sequelize.query("INSERT INTO albums (path,parentId,name) VALUES(:path,:parent,:name)", { replacements: replacements - }).then(function(results) { - return results[1].lastID; + }).spread(function(results, metadata) { + return metadata.lastID; }); } else { return results[0].id; @@ -465,7 +467,7 @@ function triggerWatcher() { function scanDir(parent, path) { let re = new RegExp("\.((" + extensions.join(")|(") + "))$", "i"), album = { - path: path.slice(picturesPath.length) || "/", + path: path.slice(picturesPath.length) + "/", name: path.replace(/.*\//, "").replace(/_/g, " "), parent: parent, allAssetCount: 0, @@ -523,11 +525,10 @@ function scanDir(parent, path) { } assets.push({ - path: path.slice(picturesPath.length), filename: file.replace(rawExtension, ".jpg"), /* We will be converting from NEF/ORF => JPG */ name: file.replace(/.[^.]*$/, ""), stats: stats, - parent: album + album: album }); }); }); @@ -561,8 +562,8 @@ function findOrCreateDBAlbum(album) { } return photoDB.sequelize.query("INSERT INTO albums (path,parentId,name) VALUES(:path,:parentId,:name)", { replacements: album - }).then(function(results) { - return results[1].lastID; + }).spread(function(results, metadata) { + return metadata.lastID; }); } else { return results[0].id; @@ -574,13 +575,13 @@ function findOrCreateDBAlbum(album) { } function findOrUpdateDBAsset(asset) { - let query = "SELECT id FROM photos WHERE albumId=:albumId AND filename=:filename"; - if (!asset.parent || !asset.parent.id) { - let error = "Asset being processed without a parent"; + let query = "SELECT id,scanned FROM photos WHERE albumId=:albumId AND filename=:filename"; + if (!asset.album || !asset.album.id) { + let error = "Asset being processed without an album"; console.error(error); throw error; } - asset.albumId = asset.parent.id; + asset.albumId = asset.album.id; return photoDB.sequelize.query(query, { replacements: asset, type: photoDB.sequelize.QueryTypes.SELECT @@ -591,17 +592,44 @@ function findOrUpdateDBAsset(asset) { * leverage the OS' ability to cache read-only file assets in the cache and to pass * those to the sharp layer later :) Accordingly, we will load the file here, create a SHA * and create that HASH entry (unless it already exists, in which case DUPLICATE!!!! Gah.) */ - return photoDB.sequelize.query("INSERT INTO photos (albumId,filename,name) VALUES(:albumId,:filename,:name)", { + return photoDB.sequelize.query("INSERT INTO photos " + + "(albumId,filename,name) " + + "VALUES(:albumId,:filename,:name)", { replacements: asset - }).then(function(results) { - return results[1].lastID; + }).spread(function(results, metadata) { + return [ metadata.lastID, null ]; }); } else { - return results[0].id; + return [ results[0].id, results[0].scanned ]; } - }).then(function(id) { + }).spread(function(id, scanned) { asset.id = id; - return id; + asset.scanned = scanned; + return asset; + }); +} + +function computeHash(filepath) { + return new Promise(function(resolve, reject) { + try { + const input = fs.createReadStream(filepath), + hash = crypto.createHash("sha256"); + if (!input) { + return reject() + } + + input.on("readable", function() { + const data = input.read(); + if (data) { + hash.update(data); + } else { + resolve(hash.digest("hex")); + } + }); + } catch(error) { + console.error("Unable to create hash: " + error); + return reject(error); + } }); } @@ -620,6 +648,7 @@ module.exports = { * 8. Look up all DB entries with last-scanned date < NOW -- purge from DB (they were * removed on disk)? Also purge from the HASH table. */ + let initialized = Date.now(); let now = Date.now(); return scanDir(null, picturesPath).spread(function(albums, assets) { console.log("Found " + assets.length + " assets in " + albums.length + " albums after " + @@ -635,26 +664,82 @@ module.exports = { ((Date.now() - now) / 1000) + "s"); now = Date.now(); - let processed = 0, start = Date.now(), last = 0; + let processed = 0, start = Date.now(), last = 0, hashNeeded = []; return Promise.map(assets, function(asset) { - return findOrUpdateDBAsset(asset).then(function() { - let elapsed = Date.now() - start; - processed++; - if (elapsed > 5000) { - let remaining = assets.length - processed; - console.log(remaining + " assets remaining. ETA " + Math.ceil((elapsed / 1000) * remaining / (processed - last)) + "s"); - last = processed; - start = Date.now(); + return findOrUpdateDBAsset(asset).then(function(asset) { + if (asset.scanned < asset.stats.mtime) { + hashNeeded.push(asset); } + }).then(function(asset) { + processed++; + + let elapsed = Date.now() - start; + if (elapsed < 5000) { + return asset; + } + + let remaining = assets.length - processed; + console.log(remaining + " assets remaining. ETA " + + Math.ceil((elapsed / 1000) * remaining / (processed - last)) + "s"); + last = processed; + start = Date.now(); }); }, { concurrency: 5 + }).then(function() { + console.log(hashNeeded.length + " assets need HASH computed"); + return Promise.map(hashNeeded, function(asset) { + return computeHash(picturesPath + asset.album.path + asset.filename).then(function(hash) { + asset.hash = hash; + }); + }).then(function() { + let needsProcessing = []; + return Promise.map(hashNeeded, function(asset) { + return db.sequelize.query("SELECT hash FROM photohashes WHERE photoId=:id", { + replacements: asset, + type: photoDB.sequelize.QueryTypes.SELECT + }).then(function(results) { + let query; + if (results.length == 0) { + query = "INSERT INTO photohashes (hash,photoId) VALUES(:hash,:id)"; + } else if (results[0].hash != asset.hash) { + query = "UPDATE photohashes SET hash=:hash WHERE photoId=:id)"; + } else { + return; + } + + return db.sequelize.query(query, { + replacements: asset + }).then(function() { + /* HASH has been updated; add to the needsProcessing array */ + needsProcessing.push(asset.id); + }); + }); + }, { + concurrency: 5 + }).then(function() { + if (!needsProcessing.length) { + return; + } + + /* 'needsProcessing' should only have their scanned stamp updated once thumbnails + * have been created. */ + console.log(needsProcessing.length + " had their HASH updated. Updating scanned stamp."); + return db.sequelize.query("UPDATE photos SET scanned=CURRENT_TIMESTAMP WHERE id IN (:scanned)", { + replacements: { + scanned: needsProcessing + } + }); + }); + }); }).then(function() { console.log("Processed " + assets.length + " asset DB entries in " + ((Date.now() - now) / 1000) + "s"); }); }); /*triggerWatcher();*/ + }).then(function() { + console.log("Total time to initialize DB and all scans: " + ((Date.now() - initialized) / 1000) + "s"); }); } };