Track duplicates, removed, and added items

Signed-off-by: James Ketrenos <james_git@ketrenos.com>
This commit is contained in:
James Ketr 2018-09-28 00:42:51 -07:00
parent 383b5fae61
commit 8f16c0337c
3 changed files with 83 additions and 22 deletions

View File

@ -64,6 +64,10 @@ function init() {
type: Sequelize.BOOLEAN, type: Sequelize.BOOLEAN,
defaultValue: 0 defaultValue: 0
}, },
deleted: {
type: Sequelize.BOOLEAN,
defaultValue: 0
},
albumId: { albumId: {
type: Sequelize.INTEGER, type: Sequelize.INTEGER,
allowNull: true, allowNull: true,

View File

@ -46,7 +46,7 @@ router.get("/memories/*", function(req, res/*, next*/) {
let date = new Date(decodeURI(req.url).replace(/\?.*$/, "")); let date = new Date(decodeURI(req.url).replace(/\?.*$/, ""));
let query = "SELECT photos.*,albums.path AS path FROM photos " + let query = "SELECT photos.*,albums.path AS path FROM photos " +
"INNER JOIN albums ON (albums.id=photos.albumId) " + "INNER JOIN albums ON (albums.id=photos.albumId) " +
"WHERE (photos.duplicate=0 AND photos.scanned NOT NULL AND " + index + ") ORDER BY taken DESC,id DESC LIMIT " + (limit * 2 + 1); "WHERE (photos.duplicate=0 AND photos.deleted=0 AND photos.scanned NOT NULL AND " + index + ") ORDER BY taken DESC,id DESC LIMIT " + (limit * 2 + 1);
// console.log("Memories for " + date.toISOString().replace(/T.*/, "")); // console.log("Memories for " + date.toISOString().replace(/T.*/, ""));
// console.log(query); // console.log(query);
@ -117,7 +117,7 @@ router.get("/*", function(req, res/*, next*/) {
let path = decodeURI(req.url).replace(/\?.*$/, "").replace(/^\//, ""), let path = decodeURI(req.url).replace(/\?.*$/, "").replace(/^\//, ""),
query = "SELECT photos.*,albums.path AS path FROM photos " + query = "SELECT photos.*,albums.path AS path FROM photos " +
"INNER JOIN albums ON (albums.id=photos.albumId AND albums.path LIKE :path) " + "INNER JOIN albums ON (albums.id=photos.albumId AND albums.path LIKE :path) " +
"WHERE (photos.duplicate=0 AND photos.scanned NOT NULL " + index + ") ORDER BY taken DESC,id DESC LIMIT " + (limit * 2 + 1), "WHERE (photos.duplicate=0 AND photos.scanned AND photos.deleted=0 NOT NULL " + index + ") ORDER BY taken DESC,id DESC LIMIT " + (limit * 2 + 1),
replacements = { replacements = {
cursor: cursor, cursor: cursor,
path: path + "%" path: path + "%"

View File

@ -13,7 +13,7 @@ let photoDB = null;
const picturesPath = config.get("picturesPath").replace(/\/$/, "") + "/"; const picturesPath = config.get("picturesPath").replace(/\/$/, "") + "/";
let processQueue = [], triedClean = []; let processQueue = [], triedClean = [], lastScan;
//const rawExtension = /\.(nef|orf)$/i, extensions = [ "jpg", "jpeg", "png", "gif", "nef", "orf" ]; //const rawExtension = /\.(nef|orf)$/i, extensions = [ "jpg", "jpeg", "png", "gif", "nef", "orf" ];
@ -70,7 +70,6 @@ const mkdir = function (_path) {
} }
let parts = _path.split("/"), path; let parts = _path.split("/"), path;
parts.unshift(picturesPath); parts.unshift(picturesPath);
return Promise.mapSeries(parts, function (part) { return Promise.mapSeries(parts, function (part) {
@ -188,20 +187,18 @@ function moveCorrupt(path, file) {
/*******************************************************************************************************/ /*******************************************************************************************************/
function processBlock(items) { function processBlock(items) {
/* Invoke once per second to check if there are items to process */
setTimeout(processBlock, 1000);
if (items) { if (items) {
console.log("Adding " + items.length + " on top of " + processQueue.length + " assets.");
processQueue = processQueue.concat(items); processQueue = processQueue.concat(items);
} }
if (processRunning || processQueue.length == 0) { if (processRunning) {
/* Invoke once per second to check if there are items to process */
setTimeout(processBlock, 1000);
return; return;
} }
let lastMessage = moment(), toProcess = processQueue.length, processing = processQueue.splice(0), let processing = processQueue.splice(0), needsProcessing = [], duplicates = [];
needsProcessing = [], duplicates = [];
processRunning = true; processRunning = true;
@ -215,9 +212,13 @@ function processBlock(items) {
asset.hash = hash; asset.hash = hash;
}); });
}).then(function() { }).then(function() {
let toProcess = processing.length, lastMessage = moment();
/* Needs to be one at a time in case there are multiple HASH collisions */ /* Needs to be one at a time in case there are multiple HASH collisions */
return Promise.mapSeries(processing, function(asset) { return Promise.mapSeries(processing, function(asset) {
return photoDB.sequelize.query("SELECT * FROM photohashes WHERE hash=:hash OR photoId=:id", { return photoDB.sequelize.query("SELECT photohashes.*,photos.filename,albums.path FROM photohashes " +
"LEFT JOIN photos ON (photos.id=photohashes.photoId) " +
"LEFT JOIN albums ON (albums.id=photos.albumId) " +
"WHERE hash=:hash OR photoId=:id", {
replacements: asset, replacements: asset,
type: photoDB.sequelize.QueryTypes.SELECT type: photoDB.sequelize.QueryTypes.SELECT
}).then(function(results) { }).then(function(results) {
@ -227,7 +228,9 @@ function processBlock(items) {
} else if (results[0].hash != asset.hash) { } else if (results[0].hash != asset.hash) {
query = "UPDATE photohashes SET hash=:hash WHERE photoId=:id)"; query = "UPDATE photohashes SET hash=:hash WHERE photoId=:id)";
} else if (results[0].photoId != asset.id) { } else if (results[0].photoId != asset.id) {
console.log("Duplicate asset: " + asset.id + " vs " + results[0].photoId + ". Skipping " + asset.album.path + asset.filename); console.log("Duplicate asset: " +
"'" + asset.album.path + asset.filename + "' is a copy of " +
"'" + results[0].path + results[0].filename + "'");
duplicates.push(asset); duplicates.push(asset);
return; return;
} }
@ -242,9 +245,16 @@ function processBlock(items) {
return photoDB.sequelize.query(query, { return photoDB.sequelize.query(query, {
replacements: asset replacements: asset
}); });
}).then(function() {
toProcess--;
if (moment().add(-5, 'seconds') > lastMessage) {
console.log("Hash items to be processed: " + toProcess);
lastMessage = moment();
}
}); });
}); });
}).then(function() { }).then(function() {
let toProcess = processing.length, lastMessage = moment();
console.log(needsProcessing.length + " assets need to have metadata extracted"); console.log(needsProcessing.length + " assets need to have metadata extracted");
return Promise.map(needsProcessing, function(asset) { return Promise.map(needsProcessing, function(asset) {
var path = asset.album.path, var path = asset.album.path,
@ -382,7 +392,7 @@ function processBlock(items) {
console.warn("Adding " + path + file + " back onto processing queue."); console.warn("Adding " + path + file + " back onto processing queue.");
triedClean.push(path + file); triedClean.push(path + file);
processQueue.push([ path, file, created, albumId ]); processBlock([ path, file, created, albumId ]);
}); });
}); });
}).catch(function() { }).catch(function() {
@ -401,11 +411,50 @@ function processBlock(items) {
} }
}); });
return photoDB.sequelize.query("UPDATE photos SET duplicate=1,scanned=CURRENT_TIME WHERE id IN (:dups)", { if (dups.length == 0) {
return;
}
return photoDB.sequelize.query("UPDATE photos SET duplicate=1,scanned=CURRENT_TIMESTAMP WHERE id IN (:dups)", {
replacements: { replacements: {
dups: dups dups: dups
} }
}); });
}).then(function() {
console.log("Looking for removed assets");
return photoDB.sequelize.query("SELECT photos.scanned,photos.id,photos.filename,albums.path FROM photos " +
"LEFT JOIN albums ON (albums.id=photos.albumId) " +
"WHERE photos.deleted=0 AND DATETIME(photos.scanned)<DATETIME(:lastScan)", {
replacements: {
lastScan: lastScan
},
type: photoDB.sequelize.QueryTypes.SELECT
}).then(function(results) {
let deleted = [];
console.log("Checking " + results.length + " assets to see if they are on disk.");
return Promise.map(results, function(asset) {
return exists(asset.path + asset.filename).then(function(exist) {
if (!exist) {
console.log(asset.path + asset.filename + " no longer exists on disk. Marking as deleted.");
deleted.push(asset.id);
}
});
}).then(function() {
return photoDB.sequelize.query("UPDATE photos SET deleted=1,scanned=CURRENT_TIMESTAMP WHERE id IN (:deleted)", {
replacements: {
deleted: deleted
}
}).then(function() {
return photoDB.sequelize.query("DELETE FROM photohashes WHERE photoId IN (:deleted)", {
replacements: {
deleted: deleted
}
});
}).then(function() {
console.log(deleted.length + " assets deleted.");
});
});
});
}).then(function() { }).then(function() {
processRunning = false; processRunning = false;
}); });
@ -516,7 +565,6 @@ function findOrCreateDBAlbum(album) {
if (!album.parent) { if (!album.parent) {
console.warn("Creating top level album: " + picturesPath); console.warn("Creating top level album: " + picturesPath);
} }
console.log("album: " + album.path);
return photoDB.sequelize.query("INSERT INTO albums (path,parentId,name) VALUES(:path,:parentId,:name)", { return photoDB.sequelize.query("INSERT INTO albums (path,parentId,name) VALUES(:path,:parentId,:name)", {
replacements: album replacements: album
}).spread(function(results, metadata) { }).spread(function(results, metadata) {
@ -544,11 +592,6 @@ function findOrUpdateDBAsset(asset) {
type: photoDB.sequelize.QueryTypes.SELECT type: photoDB.sequelize.QueryTypes.SELECT
}).then(function(results) { }).then(function(results) {
if (results.length == 0) { if (results.length == 0) {
/* Not in DB at all; HASH needs to be created. We could use the sharp loader to create
* a buffer, then pass that around. It would complicate our code, so instead we will
* leverage the OS' ability to cache read-only file assets in the cache and to pass
* those to the sharp layer later :) Accordingly, we will load the file here, create a SHA
* and create that HASH entry (unless it already exists, in which case DUPLICATE!!!! Gah.) */
return photoDB.sequelize.query("INSERT INTO photos " + return photoDB.sequelize.query("INSERT INTO photos " +
"(albumId,filename,name) " + "(albumId,filename,name) " +
"VALUES(:albumId,:filename,:name)", { "VALUES(:albumId,:filename,:name)", {
@ -611,6 +654,8 @@ module.exports = {
let initialized = Date.now(); let initialized = Date.now();
let now = Date.now(); let now = Date.now();
const needsProcessing = []; const needsProcessing = [];
lastScan = new Date();
return scanDir(null, picturesPath).spread(function(albums, assets) { return scanDir(null, picturesPath).spread(function(albums, assets) {
console.log("Found " + assets.length + " assets in " + albums.length + " albums after " + console.log("Found " + assets.length + " assets in " + albums.length + " albums after " +
((Date.now() - now) / 1000) + "s"); ((Date.now() - now) / 1000) + "s");
@ -625,11 +670,13 @@ module.exports = {
((Date.now() - now) / 1000) + "s"); ((Date.now() - now) / 1000) + "s");
now = Date.now(); now = Date.now();
let processed = 0, start = Date.now(), last = 0; let processed = 0, start = Date.now(), last = 0, updateScanned = [];
return Promise.map(assets, function(asset) { return Promise.map(assets, function(asset) {
return findOrUpdateDBAsset(asset).then(function(asset) { return findOrUpdateDBAsset(asset).then(function(asset) {
if (asset.scanned < asset.stats.mtime) { if (asset.scanned < asset.stats.mtime) {
needsProcessing.push(asset); needsProcessing.push(asset);
} else {
updateScanned.push(asset.id);
} }
}).then(function(asset) { }).then(function(asset) {
processed++; processed++;
@ -647,6 +694,16 @@ module.exports = {
}); });
}, { }, {
concurrency: 5 concurrency: 5
}).then(function() {
if (updateScanned.length) {
return photoDB.sequelize.query("UPDATE photos SET scanned=CURRENT_TIMESTAMP WHERE id IN (:ids)", {
replacements: {
ids: updateScanned
}
}).then(function() {
console.log("Updated scan date of " + updateScanned.length + " assets");
});
}
}).then(function() { }).then(function() {
console.log(needsProcessing.length + " assets need HASH computed"); console.log(needsProcessing.length + " assets need HASH computed");
processBlock(needsProcessing); processBlock(needsProcessing);