const debug = require('debug')('talk:services:wordlist'); const _ = require('lodash'); const SettingsService = require('./settings'); const Errors = require('../errors'); const memoize = require('lodash/memoize'); const {escapeRegExp} = require('./regex'); /** * Generate a regulare expression that catches the `phrases`. */ function generateRegExp(phrases) { const inner = phrases .map((phrase) => phrase.split(/\s+/) .map((word) => escapeRegExp(word)) .join('[\\s"?!.]+') ).join('|'); return new RegExp(`(^|[^\\w])(${inner})(?=[^\\w]|$)`, 'iu'); } /** * Memoized version of generateRegExp. */ const generateRegExpMemoized = memoize(generateRegExp, (phrases) => phrases.join(',')); /** * Never matching regexp that exits immediately. */ const neverMatch = /(?!)/; /** * The root wordlist object. * @type {Object} */ class Wordlist { constructor() { this.regexp = { banned: neverMatch, suspect: neverMatch, }; } /** * Loads wordlists in from the database */ load() { return SettingsService .retrieve() .then((settings) => { // Insert the settings wordlist. this.upsert(settings.wordlist); }); } /** * Inserts the wordlist data * @param {Array} list list of words to be set to the wordlist */ upsert(lists) { // Add the words to this array, but also lowercase the words so that an // easy comparison can take place. ['banned', 'suspect'].forEach((k) => { if (!(k in lists)) { return; } this.regexp[k] = lists[k] && lists[k].length > 0 ? generateRegExpMemoized(lists[k]) : neverMatch; debug(`Added ${lists[k].length} words to the ${k} wordlist.`); }); return Promise.resolve(this); } /** * Scans a specific field for wordlist violations. */ scan(fieldName, phrase) { let errors = {}; // If the field doesn't exist in the body, then it can't be profane! if (!phrase) { // Return that there wasn't a profane word here. return errors; } // Check if the field contains a banned word. if (this.regexp.banned.test(phrase)) { debug(`the field "${fieldName}" contained a phrase "${phrase}" which contained a banned word/phrase`); errors.banned = Errors.ErrContainsProfanity; // Stop looping through the fields now, we discovered the worst possible // situation (a banned word). return errors; } // Check if the field contains a suspected word. if (this.regexp.suspect.test(phrase)) { debug(`the field "${fieldName}" contained a phrase "${phrase}" which contained a suspected word/phrase`); errors.suspect = Errors.ErrContainsProfanity; // Continue looping through the fields now, we discovered a possible bad // word (suspect). return errors; } return errors; } /** * Perform the filtering based on the loaded wordlists. */ filter(body, ...fields) { // Start with the sensible default that the content does not contain // profanity. let errors = {}; // Loop over all the fields from the body that we want to check. for (let i = 0; i < fields.length; i++) { let fieldName = fields[i]; let phrase = _.get(body, fieldName, false); // If the field doesn't exist in the body, then it can't be profane! if (!phrase) { // Return that there wasn't a profane word here. continue; } errors = Object.assign(errors, this.scan(fieldName, phrase)); // Check if the field contains a banned word. if (errors.banned) { // Stop looping through the fields now, we discovered the worst possible // situation (a banned word). break; } // Check if the field contains a banned word. if (errors.suspect) { // Continue looping through the fields now, we discovered a possible bad // word (suspect). continue; } } return errors; } /** * check potential username for banned words */ static usernameCheck(username) { const wl = new Wordlist(); return wl .load() .then(() => { if (wl.regexp.banned.test(username)) { return Errors.ErrContainsProfanity; } }); } /** * Connect middleware for scanning request bodies for wordlisted words and * attaching a ErrContainsProfanity to the req.wordlisted parameter, otherwise * it will just set that parameter to false. * @param {Array} fields selectors for the body to extract the fields to be * tested * @return {Function} the Connect middleware */ static filter(...fields) { return async (req, res, next) => { // Create a new instance of the Wordlist. const wl = new Wordlist(); try { await wl.load(); // Perform a filtering operation using the new instance of the // Wordlist. req.wordlist = wl.filter(req.body, ...fields); } catch(err) { return next(err); } // Call the next piece of middleware. return next(); }; } } module.exports = Wordlist;