Processed all 30,000 photos without issue

Signed-off-by: James Ketrenos <james_git@ketrenos.com>
This commit is contained in:
James Ketr 2023-01-13 11:37:29 -08:00
parent fba22f9909
commit 72acc401de

View File

@ -281,19 +281,30 @@ const processBlock = async (items) => {
return b.stats.mtime - a.stats.mtime;
});
let toProcess = processing.length, lastMessage = moment();
let toProcess = processing.length;
setStatus(`Items to be processed: ${toProcess}`);
const updateHash = [],
insertHash = [],
updateAsset = [];
let last = 0,
lastTime = Date.now(),
start = Date.now();
await Promise.map(processing, async (asset) => {
toProcess--;
if (moment().add(-5, 'seconds') > lastMessage) {
setStatus("Items to be processed: " + toProcess);
lastMessage = moment();
const elapsed = Date.now() - lastTime;
if (elapsed > 5000) {
const processed = processing.length - toProcess,
rate = Math.ceil(100 * (processed - last) / elapsed) / 100,
eta = Math.ceil(toProcess *
(((Date.now() - start) / 1000) / processed));
setStatus(`Processed ${processed}/${processing.length} ` +
`(${rate} assets/second). ETA ${eta}s`);
last = processed;
lastTime = Date.now();
}
/* Create JPG from RAW if there is a RAW file and no JPG */
@ -311,38 +322,55 @@ const processBlock = async (items) => {
asset.hash = await computeHash(
picturesPath + asset.album.path + asset.filename)
/* Scan for existing DB entries -- this is a promise, which
* means the closure for the *Hash lists could introduce a race
* condition where the array didn't contain a match prior to calling
* the DB, but during the DB transaction, a match came into being
*/
const results = await photoDB.sequelize.query(
"SELECT photohashes.*,photos.filename,albums.path FROM photohashes " +
"LEFT JOIN photos ON (photos.id=photohashes.photoId) " +
"LEFT JOIN albums ON (albums.id=photos.albumId) " +
"WHERE hash=:hash OR photoId=:id", {
replacements: asset,
type: photoDB.sequelize.QueryTypes.SELECT
});
/* Writes to DB for new assets hasn't happened yet, so the DB
* won't have new duplicates */
let duplicate = insertHash.find(entry => entry.hash === asset.hash)
|| updateHash.find(entry => entry.hash === asset.hash);
* won't have new duplicates */
let duplicate;
let results = [];
/* In addition to checking new entries, check the DB for old
* duplicates */
if (!duplicate) {
results = await photoDB.sequelize.query(
"SELECT photohashes.*,photos.filename,albums.path FROM photohashes " +
"LEFT JOIN photos ON (photos.id=photohashes.photoId) " +
"LEFT JOIN albums ON (albums.id=photos.albumId) " +
"WHERE hash=:hash OR photoId=:id", {
replacements: asset,
type: photoDB.sequelize.QueryTypes.SELECT
});
if (results.length == 0) {
if (results.length == 0) {
duplicate = insertHash.find(entry => entry.hash === asset.hash);
if (duplicate) {
console.log(`DUPLICATE in INSERT hash list (${insertHash.length}) ` +
`for ${duplicate.filename}`);
} else {
insertHash.push(asset);
} else if (results[0].hash != asset.hash) {
updateHash.push(asset);
} else if (results[0].photoId != asset.id) {
duplicate = results[0];
}
} else if (results[0].hash != asset.hash) {
duplicate = updateHash.find(entry => entry.hash === asset.hash);
if (duplicate) {
console.log(`DUPLICATE in UPDATE hash list (${updateHash.length}) ` +
`for ${duplicate.filename}`);
} else {
updateHash.push(asset);
}
} else if (results[0].photoId != asset.id) {
duplicate = results[0];
console.log(`DUPLICATE in HASH db for ${duplicate.filename}`);
}
/* Output log information about duplicates */
if (duplicate) {
setStatus("Duplicate asset: " +
"'" + asset.album.path + asset.filename + "' is a copy of " +
"'" + duplicate.album.path + duplicate.filename + "'");
/* Fixup structure based on whether obtained from *Hash array or
* from DB */
if (!duplicate.path && duplicate.album) {
duplicate.path = duplicate.album.path;
}
setStatus(`Duplicate asset: ` +
`'${asset.album.path}${asset.filename}' is a copy of ` +
`'${duplicate.path}${duplicate.filename}'`);
if (asset.duplicate != duplicate.photoId) {
asset.duplicate = duplicate.photoId;
duplicates.push(asset);
@ -374,6 +402,33 @@ const processBlock = async (items) => {
});
try {
console.log(`Updating ${updateAsset.length} asset entries.`);
await photoDB.sequelize.transaction(async (t) => {
/* Update the DB with the image information */
await Promise.mapSeries(updateAsset, async (item) => {
await photoDB.sequelize.query("UPDATE photos SET " +
"added=:added,modified=:modified,taken=:taken,width=:width," +
"height=:height,size=:size,scanned=CURRENT_TIMESTAMP " +
"WHERE id=:id", {
replacements: item,
transaction: t
});
});
});
} catch (error) {
console.error(error);
process.exit(-1);
}
/* Run UPDATE for hashes first in case a new photo is being added
* that has the same hash as an OLD entry. This could happen
* if an original file is COPIED and the original DB entry is
* edited (UPDATE), and the original is moved elsewhere (INSERT)
*
* If the UPDATE does not occur first, the INSERT will fail.
*/
try {
console.log(`Updating ${updateHash.length} asset HASH entries.`);
await photoDB.sequelize.transaction(async (t) => {
await Promise.mapSeries(updateHash, async (item) => {
await photoDB.sequelize.query(
@ -383,17 +438,8 @@ const processBlock = async (items) => {
});
});
});
await photoDB.sequelize.transaction(async (t) => {
/* Update the DB with the image information */
await Promise.mapSeries(updateAsset, async (item) => {
await photoDB.sequelize.query("UPDATE photos SET " +
"added=:added,modified=:modified,taken=:taken,width=:width,height=:height,size=:size,scanned=CURRENT_TIMESTAMP " +
"WHERE id=:id", {
replacements: item,
transaction: t
});
});
});
console.log(`Inserting ${insertHash.length} HASH entries.`);
await photoDB.sequelize.transaction(async (t) => {
await Promise.mapSeries(insertHash, async (item) => {
await photoDB.sequelize.query(