Prevent duplicate hash entries in hash transaction
This commit is contained in:
parent
8b89ce7507
commit
7077862382
@ -212,55 +212,74 @@ function processBlock(items) {
|
||||
lastMessage = moment();
|
||||
}
|
||||
});
|
||||
}, {
|
||||
concurrency: 5
|
||||
}, {
|
||||
concurrency: 10
|
||||
}).then(function() {
|
||||
let toProcess = processing.length, lastMessage = moment();
|
||||
/* Needs to be one at a time in case there are multiple HASH collisions */
|
||||
return photoDB.sequelize.transaction(function(transaction) {
|
||||
return Promise.mapSeries(processing, function(asset) {
|
||||
return photoDB.sequelize.query("SELECT photohashes.*,photos.filename,albums.path FROM photohashes " +
|
||||
"LEFT JOIN photos ON (photos.id=photohashes.photoId) " +
|
||||
"LEFT JOIN albums ON (albums.id=photos.albumId) " +
|
||||
"WHERE hash=:hash OR photoId=:id", {
|
||||
replacements: asset,
|
||||
type: photoDB.sequelize.QueryTypes.SELECT
|
||||
}).then(function(results) {
|
||||
let query;
|
||||
if (results.length == 0) {
|
||||
query = "INSERT INTO photohashes (hash,photoId) VALUES(:hash,:id)";
|
||||
} else if (results[0].hash != asset.hash) {
|
||||
query = "UPDATE photohashes SET hash=:hash WHERE photoId=:id)";
|
||||
} else if (results[0].photoId != asset.id) {
|
||||
console.log("Duplicate asset: " +
|
||||
"'" + asset.album.path + asset.filename + "' is a copy of " +
|
||||
"'" + results[0].path + results[0].filename + "'");
|
||||
duplicates.push(asset);
|
||||
return;
|
||||
}
|
||||
|
||||
/* Even if the hash doesn't need to be updated, the entry needs to be scanned */
|
||||
needsProcessing.push(asset);
|
||||
|
||||
if (!query) {
|
||||
return;
|
||||
}
|
||||
|
||||
return photoDB.sequelize.query(query, {
|
||||
/* Needs to be one at a time in case there are multiple HASH collisions. To speed
|
||||
* up commits to the DB we will batch these into 100 record transactions where HASH
|
||||
* collions are done via DB query *AND* in-memory table lookup in the current batch */
|
||||
let batchSize = 100, batches = [];
|
||||
while (processing.length) {
|
||||
batches.push(processing.splice(0, batchSize));
|
||||
}
|
||||
return Promise.mapSeries(batches, function(batch) {
|
||||
return photoDB.sequelize.transaction(function(transaction) {
|
||||
return Promise.mapSeries(batch, function(asset, index) {
|
||||
return photoDB.sequelize.query("SELECT photohashes.*,photos.filename,albums.path FROM photohashes " +
|
||||
"LEFT JOIN photos ON (photos.id=photohashes.photoId) " +
|
||||
"LEFT JOIN albums ON (albums.id=photos.albumId) " +
|
||||
"WHERE hash=:hash OR photoId=:id", {
|
||||
replacements: asset,
|
||||
transaction: transaction
|
||||
type: photoDB.sequelize.QueryTypes.SELECT
|
||||
}).then(function(results) {
|
||||
let query;
|
||||
|
||||
/* If this asset exists in this transaction block, push it into the results */
|
||||
for (let i = 0; i < index; i++) {
|
||||
if (batch[i].hash == asset.hash) {
|
||||
results.push(batch[i]);
|
||||
}
|
||||
}
|
||||
|
||||
if (results.length == 0) {
|
||||
query = "INSERT INTO photohashes (hash,photoId) VALUES(:hash,:id)";
|
||||
} else if (results[0].hash != asset.hash) {
|
||||
query = "UPDATE photohashes SET hash=:hash WHERE photoId=:id)";
|
||||
} else if (results[0].photoId != asset.id) {
|
||||
console.log("Duplicate asset: " +
|
||||
"'" + asset.album.path + asset.filename + "' is a copy of " +
|
||||
"'" + results[0].path + results[0].filename + "'");
|
||||
duplicates.push(asset);
|
||||
return;
|
||||
}
|
||||
|
||||
/* Even if the hash doesn't need to be updated, the entry needs to be scanned */
|
||||
needsProcessing.push(asset);
|
||||
|
||||
if (!query) {
|
||||
return;
|
||||
}
|
||||
|
||||
return photoDB.sequelize.query(query, {
|
||||
replacements: asset,
|
||||
transaction: transaction
|
||||
});
|
||||
}).then(function() {
|
||||
toProcess--;
|
||||
if (moment().add(-5, 'seconds') > lastMessage) {
|
||||
console.log("Hash items to be checked: " + toProcess);
|
||||
lastMessage = moment();
|
||||
}
|
||||
});
|
||||
}).then(function() {
|
||||
toProcess--;
|
||||
if (moment().add(-5, 'seconds') > lastMessage) {
|
||||
console.log("Hash items to be checked: " + toProcess);
|
||||
lastMessage = moment();
|
||||
}
|
||||
});
|
||||
});
|
||||
}).catch(function(error) {
|
||||
console.log("Error commiting HASH transactions");
|
||||
throw error;
|
||||
});
|
||||
}).then(function() {
|
||||
let toProcess = processing.length, lastMessage = moment();
|
||||
let toProcess = needsProcessing.length, lastMessage = moment();
|
||||
console.log(needsProcessing.length + " assets need to have metadata extracted");
|
||||
return Promise.map(needsProcessing, function(asset) {
|
||||
var path = asset.album.path,
|
||||
|
Loading…
x
Reference in New Issue
Block a user