Restructure when the photohashes is updated to be with the hash generation instead of bulked
This commit is contained in:
parent
71008fe43d
commit
4f1d72147a
@ -202,9 +202,45 @@ function processBlock(items) {
|
|||||||
});
|
});
|
||||||
|
|
||||||
let toProcess = processing.length, lastMessage = moment();
|
let toProcess = processing.length, lastMessage = moment();
|
||||||
|
console.log("Hash items to be processed: " + toProcess);
|
||||||
return Promise.map(processing, function(asset) {
|
return Promise.map(processing, function(asset) {
|
||||||
return computeHash(picturesPath + asset.album.path + asset.filename).then(function(hash) {
|
return computeHash(picturesPath + asset.album.path + asset.filename).then(function(hash) {
|
||||||
asset.hash = hash;
|
asset.hash = hash;
|
||||||
|
return asset;
|
||||||
|
}).then(function(asset) {
|
||||||
|
return photoDB.sequelize.query("SELECT photohashes.*,photos.filename,albums.path FROM photohashes " +
|
||||||
|
"LEFT JOIN photos ON (photos.id=photohashes.photoId) " +
|
||||||
|
"LEFT JOIN albums ON (albums.id=photos.albumId) " +
|
||||||
|
"WHERE hash=:hash OR photoId=:id", {
|
||||||
|
replacements: asset,
|
||||||
|
type: photoDB.sequelize.QueryTypes.SELECT
|
||||||
|
}).then(function(results) {
|
||||||
|
let query;
|
||||||
|
|
||||||
|
if (results.length == 0) {
|
||||||
|
query = "INSERT INTO photohashes (hash,photoId) VALUES(:hash,:id)";
|
||||||
|
} else if (results[0].hash != asset.hash) {
|
||||||
|
query = "UPDATE photohashes SET hash=:hash WHERE photoId=:id)";
|
||||||
|
} else if (results[0].photoId != asset.id) {
|
||||||
|
console.log("Duplicate asset: " +
|
||||||
|
"'" + asset.album.path + asset.filename + "' is a copy of " +
|
||||||
|
"'" + results[0].path + results[0].filename + "'");
|
||||||
|
duplicates.push(asset);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Even if the hash doesn't need to be updated, the entry needs to be scanned */
|
||||||
|
needsProcessing.push(asset);
|
||||||
|
|
||||||
|
if (!query) {
|
||||||
|
return asset;
|
||||||
|
}
|
||||||
|
|
||||||
|
return photoDB.sequelize.query(query, {
|
||||||
|
replacements: asset,
|
||||||
|
}).then(function() {
|
||||||
|
return asset;
|
||||||
|
});
|
||||||
}).then(function() {
|
}).then(function() {
|
||||||
toProcess--;
|
toProcess--;
|
||||||
if (moment().add(-5, 'seconds') > lastMessage) {
|
if (moment().add(-5, 'seconds') > lastMessage) {
|
||||||
@ -213,76 +249,11 @@ function processBlock(items) {
|
|||||||
}
|
}
|
||||||
});
|
});
|
||||||
}, {
|
}, {
|
||||||
concurrency: 10
|
concurrency: 5
|
||||||
}).then(function() {
|
}).catch(function(error) {
|
||||||
let toProcess = processing.length, lastMessage = moment();
|
console.log("Error commiting HASH transactions");
|
||||||
/* Needs to be one at a time in case there are multiple HASH collisions. To speed
|
throw error;
|
||||||
* up commits to the DB we will batch these into 100 record transactions where HASH
|
});
|
||||||
* collions are done via DB query *AND* in-memory table lookup in the current batch */
|
|
||||||
let batchSize = 100, batches = [];
|
|
||||||
while (processing.length) {
|
|
||||||
batches.push(processing.splice(0, batchSize));
|
|
||||||
}
|
|
||||||
return Promise.mapSeries(batches, function(batch) {
|
|
||||||
return photoDB.sequelize.transaction(function(transaction) {
|
|
||||||
return Promise.mapSeries(batch, function(asset, index) {
|
|
||||||
return photoDB.sequelize.query("SELECT photohashes.*,photos.filename,albums.path FROM photohashes " +
|
|
||||||
"LEFT JOIN photos ON (photos.id=photohashes.photoId) " +
|
|
||||||
"LEFT JOIN albums ON (albums.id=photos.albumId) " +
|
|
||||||
"WHERE hash=:hash OR photoId=:id", {
|
|
||||||
replacements: asset,
|
|
||||||
type: photoDB.sequelize.QueryTypes.SELECT
|
|
||||||
}).then(function(results) {
|
|
||||||
let query;
|
|
||||||
|
|
||||||
/* If this asset exists in this transaction block, push it into the results */
|
|
||||||
for (let i = 0; i < index; i++) {
|
|
||||||
if (batch[i].hash == asset.hash) {
|
|
||||||
results.push({
|
|
||||||
id: batch[i].id,
|
|
||||||
hash: batch[i].hash,
|
|
||||||
path: batch[i].album.path,
|
|
||||||
filename: batch[i].filename
|
|
||||||
});
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (results.length == 0) {
|
|
||||||
query = "INSERT INTO photohashes (hash,photoId) VALUES(:hash,:id)";
|
|
||||||
} else if (results[0].hash != asset.hash) {
|
|
||||||
query = "UPDATE photohashes SET hash=:hash WHERE photoId=:id)";
|
|
||||||
} else if (results[0].photoId != asset.id) {
|
|
||||||
console.log("Duplicate asset: " +
|
|
||||||
"'" + asset.album.path + asset.filename + "' is a copy of " +
|
|
||||||
"'" + results[0].path + results[0].filename + "'");
|
|
||||||
duplicates.push(asset);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Even if the hash doesn't need to be updated, the entry needs to be scanned */
|
|
||||||
needsProcessing.push(asset);
|
|
||||||
|
|
||||||
if (!query) {
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
return photoDB.sequelize.query(query, {
|
|
||||||
replacements: asset,
|
|
||||||
transaction: transaction
|
|
||||||
});
|
|
||||||
}).then(function() {
|
|
||||||
toProcess--;
|
|
||||||
if (moment().add(-5, 'seconds') > lastMessage) {
|
|
||||||
console.log("Hash items to be checked: " + toProcess);
|
|
||||||
lastMessage = moment();
|
|
||||||
}
|
|
||||||
});
|
|
||||||
});
|
|
||||||
});
|
|
||||||
}).catch(function(error) {
|
|
||||||
console.log("Error commiting HASH transactions");
|
|
||||||
throw error;
|
|
||||||
});
|
|
||||||
}).then(function() {
|
}).then(function() {
|
||||||
let toProcess = needsProcessing.length, lastMessage = moment();
|
let toProcess = needsProcessing.length, lastMessage = moment();
|
||||||
console.log(needsProcessing.length + " assets need to have metadata extracted");
|
console.log(needsProcessing.length + " assets need to have metadata extracted");
|
||||||
@ -711,16 +682,20 @@ module.exports = {
|
|||||||
((Date.now() - now) / 1000) + "s");
|
((Date.now() - now) / 1000) + "s");
|
||||||
now = Date.now();
|
now = Date.now();
|
||||||
|
|
||||||
|
console.log(albums.length + " assets remaining to have DB entries updated. ETA N/A");
|
||||||
|
|
||||||
let processed = 0, start = Date.now(), last = 0, updateScanned = [];
|
let processed = 0, start = Date.now(), last = 0, updateScanned = [];
|
||||||
return photoDB.sequelize.transaction(function(transaction) {
|
return photoDB.sequelize.transaction(function(transaction) {
|
||||||
return Promise.map(assets, function(asset) {
|
return Promise.map(assets, function(asset) {
|
||||||
return findOrUpdateDBAsset(transaction, asset).then(function(asset) {
|
return findOrUpdateDBAsset(transaction, asset).then(function(asset) {
|
||||||
if (asset.scanned < asset.stats.mtime) {
|
if (asset.scanned < asset.stats.mtime || !asset.added) {
|
||||||
needsProcessing.push(asset);
|
needsProcessing.push(asset);
|
||||||
} else {
|
} else {
|
||||||
updateScanned.push(asset.id);
|
updateScanned.push(asset.id);
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
}, {
|
||||||
|
concurrency: 10
|
||||||
}).then(function(asset) {
|
}).then(function(asset) {
|
||||||
processed++;
|
processed++;
|
||||||
|
|
||||||
@ -735,8 +710,6 @@ module.exports = {
|
|||||||
last = processed;
|
last = processed;
|
||||||
start = Date.now();
|
start = Date.now();
|
||||||
});
|
});
|
||||||
}, {
|
|
||||||
concurrency: 5
|
|
||||||
}).then(function() {
|
}).then(function() {
|
||||||
if (updateScanned.length) {
|
if (updateScanned.length) {
|
||||||
return photoDB.sequelize.query("UPDATE photos SET scanned=CURRENT_TIMESTAMP WHERE id IN (:ids)", {
|
return photoDB.sequelize.query("UPDATE photos SET scanned=CURRENT_TIMESTAMP WHERE id IN (:ids)", {
|
||||||
|
Loading…
x
Reference in New Issue
Block a user