Files
talk/services/wordlist.js
T
2017-10-11 16:41:09 -06:00

211 lines
5.1 KiB
JavaScript

const debug = require('debug')('talk:services:wordlist');
const _ = require('lodash');
const SettingsService = require('./settings');
const Errors = require('../errors');
const memoize = require('lodash/memoize');
const {escapeRegExp} = require('./regex');
/**
* Generate a regulare expression that catches the `phrases`.
*/
function generateRegExp(phrases) {
const inner = phrases
.map((phrase) =>
phrase.split(/\s+/)
.map((word) => escapeRegExp(word))
.join('[\\s"?!.]+')
).join('|');
return new RegExp(`(^|[^\\w])(${inner})(?=[^\\w]|$)`, 'iu');
}
/**
* Memoized version of generateRegExp.
*/
const generateRegExpMemoized = memoize(generateRegExp, (phrases) => phrases.join(','));
/**
* Never matching regexp that exits immediately.
*/
const neverMatch = /(?!)/;
/**
* The root wordlist object.
* @type {Object}
*/
class Wordlist {
constructor() {
this.regexp = {
banned: neverMatch,
suspect: neverMatch,
};
}
/**
* Loads wordlists in from the database
*/
load() {
return SettingsService
.retrieve()
.then((settings) => {
// Insert the settings wordlist.
this.upsert(settings.wordlist);
});
}
/**
* Inserts the wordlist data
* @param {Array} list list of words to be set to the wordlist
*/
upsert(lists) {
// Add the words to this array, but also lowercase the words so that an
// easy comparison can take place.
['banned', 'suspect'].forEach((k) => {
if (!(k in lists)) {
return;
}
this.regexp[k] = lists[k] && lists[k].length > 0
? generateRegExpMemoized(lists[k])
: neverMatch;
debug(`Added ${lists[k].length} words to the ${k} wordlist.`);
});
return Promise.resolve(this);
}
/**
* Scans a specific field for wordlist violations.
*/
scan(fieldName, phrase) {
let errors = {};
// If the field doesn't exist in the body, then it can't be profane!
if (!phrase) {
// Return that there wasn't a profane word here.
return errors;
}
// Check if the field contains a banned word.
if (this.regexp.banned.test(phrase)) {
debug(`the field "${fieldName}" contained a phrase "${phrase}" which contained a banned word/phrase`);
errors.banned = Errors.ErrContainsProfanity;
// Stop looping through the fields now, we discovered the worst possible
// situation (a banned word).
return errors;
}
// Check if the field contains a suspected word.
if (this.regexp.suspect.test(phrase)) {
debug(`the field "${fieldName}" contained a phrase "${phrase}" which contained a suspected word/phrase`);
errors.suspect = Errors.ErrContainsProfanity;
// Continue looping through the fields now, we discovered a possible bad
// word (suspect).
return errors;
}
return errors;
}
/**
* Perform the filtering based on the loaded wordlists.
*/
filter(body, ...fields) {
// Start with the sensible default that the content does not contain
// profanity.
let errors = {};
// Loop over all the fields from the body that we want to check.
for (let i = 0; i < fields.length; i++) {
let fieldName = fields[i];
let phrase = _.get(body, fieldName, false);
// If the field doesn't exist in the body, then it can't be profane!
if (!phrase) {
// Return that there wasn't a profane word here.
continue;
}
errors = Object.assign(errors, this.scan(fieldName, phrase));
// Check if the field contains a banned word.
if (errors.banned) {
// Stop looping through the fields now, we discovered the worst possible
// situation (a banned word).
break;
}
// Check if the field contains a banned word.
if (errors.suspect) {
// Continue looping through the fields now, we discovered a possible bad
// word (suspect).
continue;
}
}
return errors;
}
/**
* check potential username for banned words
*/
static usernameCheck(username) {
const wl = new Wordlist();
return wl
.load()
.then(() => {
if (wl.regexp.banned.test(username)) {
return Errors.ErrContainsProfanity;
}
});
}
/**
* Connect middleware for scanning request bodies for wordlisted words and
* attaching a ErrContainsProfanity to the req.wordlisted parameter, otherwise
* it will just set that parameter to false.
* @param {Array} fields selectors for the body to extract the fields to be
* tested
* @return {Function} the Connect middleware
*/
static filter(...fields) {
return async (req, res, next) => {
// Create a new instance of the Wordlist.
const wl = new Wordlist();
try {
await wl.load();
// Perform a filtering operation using the new instance of the
// Wordlist.
req.wordlist = wl.filter(req.body, ...fields);
} catch(err) {
return next(err);
}
// Call the next piece of middleware.
return next();
};
}
}
module.exports = Wordlist;