"use strict"; const Promise = require("bluebird"), fs = require("fs"), config = require("config"), moment = require("moment"), crypto = require("crypto"); let scanning = 0; let photoDB = null; const picturesPath = config.get("picturesPath").replace(/\/$/, "") + "/"; const processQueue = [], triedClean = []; //const rawExtension = /\.(nef|orf)$/i, extensions = [ "jpg", "jpeg", "png", "gif", "nef", "orf" ]; const rawExtension = /\.nef$/i, extensions = [ "jpg", "jpeg", "png", "gif", "nef" ]; function removeNewerFile(path, fileA, fileB) { fs.stat(path + "/" + fileA, function(err, statsA) { if (err) { return; } fs.stat(path + "/" + fileB, function(err, statsB) { if (err) { return; } if (statsA.mtime > statsB.mtime) { console.log("Removing file by moving to 'corrupt':" + fileA); moveCorrupt(path, fileA); } else { console.log("Removing file by moving to 'corrupt':" + fileB); moveCorrupt(path, fileB); } }); }); } function _scanDir(parent, path) { let re = new RegExp("\.((" + extensions.join(")|(") + "))$", "i"), replacements = { path: path.slice(picturesPath.length), name: path.replace(/.*\//, "").replace(/_/g, " "), parent: parent || null }; /* Ensure that top level images are placed into an album with a root path */ replacements.path = replacements.path || "/"; let query = "SELECT id FROM albums WHERE path=:path AND "; if (!parent) { query += "parentId IS NULL"; } else { query += "parentId=:parent"; } return photoDB.sequelize.query(query, { replacements: replacements, type: photoDB.sequelize.QueryTypes.SELECT }).then(function(results) { if (results.length == 0) { // console.log("Adding " + path + " under " + parent, replacements); return photoDB.sequelize.query("INSERT INTO albums (path,parentId,name) VALUES(:path,:parent,:name)", { replacements: replacements }).spread(function(results, metadata) { return metadata.lastID; }); } else { return results[0].id; } }).then(function(parent) { return new Promise(function(resolve, reject) { console.log("Scanning ..." + replacements.path); fs.readdir(path, function(err, files) { if (err) { console.warn("Could not readdir " + path); return resolve([]); } return resolve(files); }); }).then(function(files) { scanning++; /* Remove 'thumbs' and 'raw' directories from being processed */ files = files.filter(function(file) { for (var i = 0; i < files.length; i++) { /* If this file has an original NEF/ORF on the system, don't add the JPG to the DB */ if (rawExtension.exec(files[i]) && file == files[i].replace(rawExtension, ".jpg")) { return false; } /* If there is a different CASE (eg. JPG vs jpg) don't add it, and remove the 'lower case' * version from disk. */ if (file != files[i] && file.toUpperCase() == files[i]) { removeNewerFile(path, file, files[i]); console.log("Duplicate file in " + path + ": ", file, files[i]); return false; } } return file != "raw" && file != "thumbs" && file != ".git" && file != "corrupt"; }); return mkdir(path + "/thumbs/scaled").then(function() { return Promise.map(files, function(file) { let filepath = path + "/" + file; return stat(filepath).then(function(stats) { if (stats.isDirectory()) { return scanDir(parent, filepath).catch(function(error) { console.warn("Could not scanDir " + filepath + ": " + error); }); } /* Check file extensions */ if (!re.exec(file)) { return; } const replacements = { path: path.slice(picturesPath.length), filename: file.replace(rawExtension, ".jpg") /* We will be converting from NEF/ORF => JPG */ }; replacements.path = replacements.path || "/"; return photoDB.sequelize.query("SELECT id FROM photos WHERE path=:path AND filename=:filename", { replacements: replacements, type: photoDB.sequelize.QueryTypes.SELECT }).then(function(photo) { if (photo.length == 0) { processQueue.push([ replacements.path, file, stats.mtime, parent ]); } }).catch(function(error) { console.error("Sequelize.query failed: ", error); }); }).catch(function(error) { console.warn("Could not stat " + filepath + ": " + error); }); }, { concurrency: 1 }).then(function() { scanning--; if (scanning == 0) { const endStamp = Date.now(); console.log("Scanning completed in " + Math.round(((endStamp - startStamp))) + "ms. " + processQueue.length + " items to process."); } }); }); }); }); } const startStamp = Date.now(); let processRunning = false; const { spawn } = require('child_process'); const sharp = require("sharp"), exif = require("exif-reader"); const stat = function (path) { if (path.indexOf(picturesPath) != 0) { path = picturesPath + path; } return new Promise(function (resolve, reject) { fs.stat(path, function (error, stats) { if (error) { return reject(error); } return resolve(stats); }); }); } const mkdir = function (_path) { if (_path.indexOf(picturesPath) == 0) { _path = _path.substring(picturesPath.length + 1); } let parts = _path.split("/"), path; parts.unshift(picturesPath); return Promise.mapSeries(parts, function (part) { if (!path) { path = picturesPath; } else { path += "/" + part; } return stat(path).catch(function (error) { if (error.code != "ENOENT") { throw error; } return new Promise(function (resolve, reject) { fs.mkdir(path, function (error) { if (error) { return reject(error); } return resolve(); }); }); }); }); } const exists = function(path) { return stat(path).then(function() { return true; }).catch(function() { return false; }); } function convertRawToJpg(path, file) { console.log("Converting " + path + "/" + file); path = picturesPath + path; return new Promise(function(resolve, reject) { return exists(path + "/" + file.replace(rawExtension, ".jpg")).then(function(exist) { if (exist) { console.log("Skipping already converted file: " + file); return; } const ufraw = spawn("ufraw-batch", [ "--silent", "--wb=camera", "--rotate=camera", "--out-type=jpg", "--compression=90", "--exif", "--overwrite", "--output", path + "/" + file.replace(rawExtension, ".jpg"), path + "/" + file ]); const stderr = []; ufraw.stderr.on('data', function(data) { stderr.push(data); }); return new Promise(function(ufraw, resolve, reject) { ufraw.on('exit', function(stderr, code, signal) { if (signal || code != 0) { let error = "UFRAW for " + path + "/" + file + " returned an error: " + code + "\n" + signal + "\n" + stderr.join("\n") + "\n"; console.error(error); return moveCorrupt(path, file).then(function() { console.warn("ufraw failed"); return reject(error); }).catch(function(error) { console.warn("moveCorrupt failed"); return reject(error); }); } return mkdir(path + "/raw").then(function() { fs.rename(path + "/" + file, path + "/raw/" + file, function(err) { if (err) { console.error("Unable to move RAW file: " + path + "/" + file); return reject(err); } return resolve(); }); }).catch(function(error) { console.warn("mkdir failed"); return reject(error); }); }.bind(this, ufraw)); }.bind(this, stderr)); }); }); } function moveCorrupt(path, file) { if (path.indexOf(picturesPath) != 0) { path = picturesPath + path; } console.warn("Moving corrupt file '" + file + "' to " + path + "/corrupt"); return mkdir(path + "/corrupt").then(function() { return new Promise(function(resolve, reject) { fs.rename(path + "/" + file, path + "/corrupt/" + file, function(err) { if (err) { console.error("Unable to move corrupt file: " + path + "/" + file); return reject(err); } return resolve(); }); }); }); } function triggerWatcher() { setTimeout(triggerWatcher, 1000); if (!processRunning && processQueue.length) { let lastMessage = moment(), toProcess = processQueue.length, processing = processQueue.splice(0); processRunning = true; /* Sort to newest files to be processed first */ processing.sort(function(a, b) { return b[2] - a[2]; }); return Promise.map(processing, function(entry) { var path = entry[0], file = entry[1], created = entry[2], albumId = entry[3]; let tmp = Promise.resolve(file); /* If this is a Nikon RAW file, convert it to JPG and move to /raw dir */ if (rawExtension.exec(file)) { tmp = exists(picturesPath + path + "/" + file.replace(rawExtension, ".jpg")).then(function(exist) { if (exist) { return file.replace(rawExtension, ".jpg"); /* We converted from NEF/ORF => JPG */ } return mkdir(picturesPath + path + "/raw").then(function() { return convertRawToJpg(path, file); }).then(function() { return file.replace(rawExtension, ".jpg"); /* We converted from NEF/ORF => JPG */ }); }); } return tmp.then(function(file) { var src = picturesPath + path + "/" + file, image = sharp(src); return image.limitInputPixels(1073741824).metadata().then(function(metadata) { if (metadata.exif) { metadata.exif = exif(metadata.exif); delete metadata.exif.thumbnail; delete metadata.exif.image; for (var key in metadata.exif.exif) { if (Buffer.isBuffer(metadata.exif.exif[key])) { metadata.exif.exif[key] = "Buffer[" + metadata.exif.exif[key].length + "]"; } } } let replacements = { albumId: albumId, name: file.replace(/.[^.]*$/, ""), path: path, filename: file, width: metadata.width, height: metadata.height, added: moment().format() }; /* Ensure that top level images are placed into an album with a root path */ replacements.path = replacements.path || "/"; if (metadata.exif && metadata.exif.exif && metadata.exif.exif.DateTimeOriginal && !isNaN(metadata.exif.exif.DateTimeOriginal.valueOf())) { replacements.taken = moment(metadata.exif.exif.DateTimeOriginal).format(); replacements.modified = moment(metadata.exif.exif.DateTimeOriginal).format(); if (replacements.taken == "Invalid date" || replacements.taken.replace(/T.*/, "") == "1899-11-30") { console.log("Invalid EXIF date information: ", JSON.stringify(metadata.exif.exif)); replacements.taken = replacements.modified = moment(created).format(); } } else { /* Attempt to infer the datestamp from the filename */ let date = moment(created).format(); let match = file.match(/WhatsApp Image (20[0-9][0-9]-[0-9][0-9]-[0-9][0-9]) at (.*).(jpeg|jpg)/); if (match) { date = moment((match[1]+" "+match[2]), "YYYY-MM-DD h.mm.ss a").format(); if (date == "Invalid date") { date = moment(created).format(); } } else { match = file.match(/(20[0-9][0-9]-?[0-9][0-9]-?[0-9][0-9])[_\-]?([0-9]{6})?/); if (match) { if (match[2]) { /* Stamp had time in it */ date = moment((match[1]+""+match[2]).replace(/-/g, ""), "YYYYMMDDHHmmss").format(); } else { date = moment(match[1].replace(/-/g, ""), "YYYYMMDD").format(); } if (date == "Invalid date") { date = moment(created).format(); } } else { date = moment(created).format(); } } replacements.taken = replacements.modified = date; } let dst = picturesPath + path + "/thumbs/" + file; return exists(dst).then(function(exist) { if (exist) { return; } return image.resize(256, 256).toFile(dst).catch(function(error) { console.error("Error resizing image: " + dst, error); throw error; }); }).then(function() { let dst = picturesPath + path + "/thumbs/scaled/" + file; return exists(dst).then(function(exist) { if (exist) { return; } return image.resize(Math.min(1024, metadata.width)).toFile(dst).catch(function(error) { console.error("Error resizing image: " + dst, error); throw error; }); }); }).then(function() { return photoDB.sequelize.query("INSERT INTO photos " + "(albumId,path,filename,added,modified,taken,width,height,name)" + "VALUES(:albumId,:path,:filename,DATETIME(:added),DATETIME(:modified),DATETIME(:taken),:width,:height,:name)", { replacements: replacements }); }).then(function() { toProcess--; if (moment().add(-5, 'seconds') > lastMessage) { console.log("Items to be processed: " + toProcess); lastMessage = moment(); } }); }).catch(function(error) { console.error("Error reading image " + src + ": ", error); return moveCorrupt(path, file).then(function() { /* If the original file was not a NEF/ORF, we are done... */ if (!rawExtension.exec(entry[1])) { return; } /* ... otherwise, attempt to re-convert the NEF/ORF->JPG and then resize again */ for (var i = 0; i < triedClean.length; i++) { if (triedClean[i] == path + "/" + file) { /* Move the NEF/ORF to /corrupt as well so it isn't re-checked again and again... */ // return moveCorrupt(path, entry[1]); console.error("Already attempted to convert NEF/ORF to JPG: " + path + "/" + file); return; } } console.warn("Adding " + path + "/" + file + " back onto processing queue."); triedClean.push(path + "/" + file); processQueue.push([ path, file, created, albumId ]); }); }); }).catch(function() { console.warn("Continuing file processing."); }); }, { concurrency: 1 }).then(function() { console.log("Completed processing queue."); }); } } /*******************************************************************************************************/ function scanDir(parent, path) { let re = new RegExp("\.((" + extensions.join(")|(") + "))$", "i"), album = { path: path.slice(picturesPath.length) + "/", name: path.replace(/.*\//, "").replace(/_/g, " "), parent: parent, allAssetCount: 0, allAlbumCount: 0 }, albums = [ album ], assets = []; return new Promise(function(resolve, reject) { fs.readdir(path, function(error, files) { if (error) { console.warn("Could not readdir: " + path); return resolve([]); } /* Remove 'thumbs' and 'raw' directories from being processed */ files = files.filter(function(file) { for (var i = 0; i < files.length; i++) { /* If this file has an original NEF/ORF on the system, don't add the JPG to the DB */ if (rawExtension.exec(files[i]) && file == files[i].replace(rawExtension, ".jpg")) { return false; } /* If there is a different CASE (eg. JPG vs jpg) don't add it, and remove the 'lower case' * version from disk. */ if (file != files[i] && file.toUpperCase() == files[i]) { removeNewerFile(path, file, files[i]); console.log("Duplicate file in " + path + ": ", file, files[i]); return false; } } return file != "raw" && file != "thumbs" && file != ".git" && file != "corrupt"; }); return resolve(files); }); }).then(function(files) { return Promise.map(files, function(file) { let filepath = path + "/" + file; return stat(filepath).then(function(stats) { if (stats.isDirectory()) { return scanDir(album, filepath).spread(function(_albums, _assets) { album.allAssetCount += _assets.length; album.allAlbumCount += _albums.length + 1; albums = albums.concat(_albums); assets = assets.concat(_assets); }).catch(function(error) { console.warn("Could not scanDir " + filepath + ": " + error); }); } /* Check file extensions */ if (!re.exec(file)) { return; } assets.push({ filename: file.replace(rawExtension, ".jpg"), /* We will be converting from NEF/ORF => JPG */ name: file.replace(/.[^.]*$/, ""), stats: stats, album: album }); }); }); }).then(function() { return [ albums, assets ]; }); } function findOrCreateDBAlbum(album) { let query = "SELECT id FROM albums WHERE path=:path AND "; if (!album.parent) { query += "parentId IS NULL"; album.parentId = null; } else { if (!album.parent.id) { let error = "Albums in array in non ancestral order!"; console.error(error); throw error; } album.parentId = album.parent.id; query += "parentId=:parentId"; } return photoDB.sequelize.query(query, { replacements: album, type: photoDB.sequelize.QueryTypes.SELECT }).then(function(results) { if (results.length == 0) { if (!album.parent) { console.warn("Creating top level album: " + album.path); } return photoDB.sequelize.query("INSERT INTO albums (path,parentId,name) VALUES(:path,:parentId,:name)", { replacements: album }).spread(function(results, metadata) { return metadata.lastID; }); } else { return results[0].id; } }).then(function(id) { album.id = id; return id; }); } function findOrUpdateDBAsset(asset) { let query = "SELECT id,scanned FROM photos WHERE albumId=:albumId AND filename=:filename"; if (!asset.album || !asset.album.id) { let error = "Asset being processed without an album"; console.error(error); throw error; } asset.albumId = asset.album.id; return photoDB.sequelize.query(query, { replacements: asset, type: photoDB.sequelize.QueryTypes.SELECT }).then(function(results) { if (results.length == 0) { /* Not in DB at all; HASH needs to be created. We could use the sharp loader to create * a buffer, then pass that around. It would complicate our code, so instead we will * leverage the OS' ability to cache read-only file assets in the cache and to pass * those to the sharp layer later :) Accordingly, we will load the file here, create a SHA * and create that HASH entry (unless it already exists, in which case DUPLICATE!!!! Gah.) */ return photoDB.sequelize.query("INSERT INTO photos " + "(albumId,filename,name) " + "VALUES(:albumId,:filename,:name)", { replacements: asset }).spread(function(results, metadata) { return [ metadata.lastID, null ]; }); } else { return [ results[0].id, results[0].scanned ]; } }).spread(function(id, scanned) { asset.id = id; asset.scanned = scanned; return asset; }); } function computeHash(filepath) { return new Promise(function(resolve, reject) { try { const input = fs.createReadStream(filepath), hash = crypto.createHash("sha256"); if (!input) { return reject() } input.on("readable", function() { const data = input.read(); if (data) { hash.update(data); } else { resolve(hash.digest("hex")); } }); } catch(error) { console.error("Unable to create hash: " + error); return reject(error); } }); } module.exports = { scan: function (db) { photoDB = db; /* 1. Scan for all assets which will be managed by the system. readdir * 2. Check if entry in DB. Check mod-time in DB vs. stats from #1 * - For albums * - For assets * 3. If not in DB, or mod-time changed, compute HASH of the file * 4. Check for HASH in photohash -- skip? * 5. Check for and create thumbs/FILE thumbs/scaled/FILE * 6. If necessary, create JPG from RAW * 7. Update last-scanned date in DB for entry * 8. Look up all DB entries with last-scanned date < NOW -- purge from DB (they were * removed on disk)? Also purge from the HASH table. */ let initialized = Date.now(); let now = Date.now(); return scanDir(null, picturesPath).spread(function(albums, assets) { console.log("Found " + assets.length + " assets in " + albums.length + " albums after " + ((Date.now() - now) / 1000) + "s"); /* One at a time, in series, as the album[] array has parents first, then descendants. * Operating in parallel could result in a child being searched for prior to the parent */ now = Date.now(); return Promise.mapSeries(albums, function(album) { return findOrCreateDBAlbum(album); }).then(function() { console.log("Processed " + albums.length + " album DB entries in " + ((Date.now() - now) / 1000) + "s"); now = Date.now(); let processed = 0, start = Date.now(), last = 0, hashNeeded = []; return Promise.map(assets, function(asset) { return findOrUpdateDBAsset(asset).then(function(asset) { if (asset.scanned < asset.stats.mtime) { hashNeeded.push(asset); } }).then(function(asset) { processed++; let elapsed = Date.now() - start; if (elapsed < 5000) { return asset; } let remaining = assets.length - processed; console.log(remaining + " assets remaining. ETA " + Math.ceil((elapsed / 1000) * remaining / (processed - last)) + "s"); last = processed; start = Date.now(); }); }, { concurrency: 5 }).then(function() { console.log(hashNeeded.length + " assets need HASH computed"); return Promise.map(hashNeeded, function(asset) { return computeHash(picturesPath + asset.album.path + asset.filename).then(function(hash) { asset.hash = hash; }); }).then(function() { let needsProcessing = []; return Promise.map(hashNeeded, function(asset) { return db.sequelize.query("SELECT hash FROM photohashes WHERE photoId=:id", { replacements: asset, type: photoDB.sequelize.QueryTypes.SELECT }).then(function(results) { let query; if (results.length == 0) { query = "INSERT INTO photohashes (hash,photoId) VALUES(:hash,:id)"; } else if (results[0].hash != asset.hash) { query = "UPDATE photohashes SET hash=:hash WHERE photoId=:id)"; } else { return; } return db.sequelize.query(query, { replacements: asset }).then(function() { /* HASH has been updated; add to the needsProcessing array */ needsProcessing.push(asset.id); }); }); }, { concurrency: 5 }).then(function() { if (!needsProcessing.length) { return; } /* 'needsProcessing' should only have their scanned stamp updated once thumbnails * have been created. */ console.log(needsProcessing.length + " had their HASH updated. Updating scanned stamp."); return db.sequelize.query("UPDATE photos SET scanned=CURRENT_TIMESTAMP WHERE id IN (:scanned)", { replacements: { scanned: needsProcessing } }); }); }); }).then(function() { console.log("Processed " + assets.length + " asset DB entries in " + ((Date.now() - now) / 1000) + "s"); }); }); /*triggerWatcher();*/ }).then(function() { console.log("Total time to initialize DB and all scans: " + ((Date.now() - initialized) / 1000) + "s"); }); } };