From e3cdad1f2aaa3a1da1523dc8b48796c0bca071f8 Mon Sep 17 00:00:00 2001 From: Wyatt Johnson Date: Tue, 16 Jan 2018 16:47:05 -0700 Subject: [PATCH 1/2] added asset re-write --- bin/cli-assets | 54 +++++++++++++++++++++++++++++++++++++++++++++++++ models/asset.js | 2 +- 2 files changed, 55 insertions(+), 1 deletion(-) diff --git a/bin/cli-assets b/bin/cli-assets index d14df320b..e13d7db28 100755 --- a/bin/cli-assets +++ b/bin/cli-assets @@ -14,6 +14,8 @@ const AssetsService = require('../services/assets'); const mongoose = require('../services/mongoose'); const scraper = require('../services/scraper'); const inquirer = require('inquirer'); +const { URL } = require('url'); +const errors = require('../errors'); // Register the shutdown criteria. util.onshutdown([() => mongoose.disconnect()]); @@ -125,6 +127,51 @@ async function merge(srcID, dstID) { } } +async function rewrite(search, replace) { + try { + search = new RegExp(search); + + const assets = await AssetModel.find({ + url: { $regex: search }, + }); + if (assets.length === 0) { + console.log(`No assets found with the pattern: ${search}`); + return util.shutdown(0); + } + + const bulk = AssetModel.collection.initializeUnorderedBulkOp(); + + let ops = 0; + assets.forEach(({ id, url: oldURL }) => { + // Replace the url. + const newURL = oldURL.replace(search, replace); + + // Try to validate that the new url is valid. + try { + new URL(newURL); + } catch (err) { + throw errors.ErrInvalidAssetURL; + } + + // If the url was updated with the operation, then queue up the update op. + if (newURL !== oldURL) { + ops++; + bulk.find({ id }).updateOne({ $set: { url: newURL } }); + } + }); + + if (ops > 0) { + await bulk.execute(); + } + console.log(`${ops} assets had their url's updated`); + + util.shutdown(0); + } catch (err) { + console.error(err); + util.shutdown(1); + } +} + //============================================================================== // Setting up the program command line arguments. //============================================================================== @@ -151,6 +198,13 @@ program ) .action(merge); +program + .command('rewrite ') + .description( + "rewrites asset url's using the provided regex replacement pattern" + ) + .action(rewrite); + program.parse(process.argv); // If there is no command listed, output help. diff --git a/models/asset.js b/models/asset.js index 1f565a110..68ca08bbf 100644 --- a/models/asset.js +++ b/models/asset.js @@ -41,7 +41,7 @@ const AssetSchema = new Schema( publication_date: Date, modified_date: Date, - // This object is used exclusivly for storing settings that are to override + // This object is used exclusively for storing settings that are to override // the base settings from the base Settings object. This is to be accessed // always after running `rectifySettings` against it. settings: { From 45d6800143517003ed0b127935b15857678a8a9b Mon Sep 17 00:00:00 2001 From: Wyatt Johnson Date: Tue, 16 Jan 2018 17:10:59 -0700 Subject: [PATCH 2/2] added dry mode --- bin/cli-assets | 47 +++++++++++++++++++++++++++++++++-------------- 1 file changed, 33 insertions(+), 14 deletions(-) diff --git a/bin/cli-assets b/bin/cli-assets index e13d7db28..78229d91b 100755 --- a/bin/cli-assets +++ b/bin/cli-assets @@ -15,7 +15,6 @@ const mongoose = require('../services/mongoose'); const scraper = require('../services/scraper'); const inquirer = require('inquirer'); const { URL } = require('url'); -const errors = require('../errors'); // Register the shutdown criteria. util.onshutdown([() => mongoose.disconnect()]); @@ -127,7 +126,7 @@ async function merge(srcID, dstID) { } } -async function rewrite(search, replace) { +async function rewrite(search, replace, options) { try { search = new RegExp(search); @@ -139,9 +138,7 @@ async function rewrite(search, replace) { return util.shutdown(0); } - const bulk = AssetModel.collection.initializeUnorderedBulkOp(); - - let ops = 0; + let opts = []; assets.forEach(({ id, url: oldURL }) => { // Replace the url. const newURL = oldURL.replace(search, replace); @@ -150,20 +147,41 @@ async function rewrite(search, replace) { try { new URL(newURL); } catch (err) { - throw errors.ErrInvalidAssetURL; + throw new Error( + `Rewrite would have replaced the valid URL ${oldURL} with an invalid one ${newURL}` + ); } - // If the url was updated with the operation, then queue up the update op. - if (newURL !== oldURL) { - ops++; - bulk.find({ id }).updateOne({ $set: { url: newURL } }); - } + opts.push({ + find: { id }, + updateOne: { $set: { url: newURL } }, + id, + oldURL, + newURL, + }); }); - if (ops > 0) { - await bulk.execute(); + if (opts.length > 0) { + if (options.dryRun) { + const table = new Table({ head: ['ID', 'Old URL', 'New URL'] }); + + opts.forEach(({ id, oldURL, newURL }) => { + table.push([id, oldURL, newURL]); + }); + + console.log(table.toString()); + } else { + const bulk = AssetModel.collection.initializeUnorderedBulkOp(); + opts.forEach(({ find, updateOne, oldURL, newURL }) => { + // If the url was updated with the operation, then queue up the update op. + if (newURL !== oldURL) { + bulk.find(find).updateOne(updateOne); + } + }); + await bulk.execute(); + console.log(`${opts.length} assets had their url's updated`); + } } - console.log(`${ops} assets had their url's updated`); util.shutdown(0); } catch (err) { @@ -200,6 +218,7 @@ program program .command('rewrite ') + .option('-d, --dry-run', 'enables dry run of the replacement') .description( "rewrites asset url's using the provided regex replacement pattern" )