diff --git a/package.json b/package.json index 19db0b7e7..6fdf4560a 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "talk", - "version": "1.1.0", + "version": "1.4.0", "description": "A commenting platform from The Coral Project. https://coralproject.net", "main": "app.js", "scripts": { diff --git a/services/wordlist.js b/services/wordlist.js index 8dc16fba2..2ae8ed90c 100644 --- a/services/wordlist.js +++ b/services/wordlist.js @@ -2,9 +2,13 @@ const debug = require('debug')('talk:services:wordlist'); const _ = require('lodash'); const natural = require('natural'); const tokenizer = new natural.WordTokenizer(); +const nameTokenizer = new natural.RegexpTokenizer({pattern: /\_/}); const SettingsService = require('./settings'); const Errors = require('../errors'); +// REGEX to prevent emoji's from entering the wordlist. +const EMOJI_REGEX = /(?:[\u2700-\u27bf]|(?:\ud83c[\udde6-\uddff]){2}|[\ud800-\udbff][\udc00-\udfff])[\ufe0e\ufe0f]?(?:[\u0300-\u036f\ufe20-\ufe23\u20d0-\u20f0]|\ud83c[\udffb-\udfff])?(?:\u200d(?:[^\ud800-\udfff]|(?:\ud83c[\udde6-\uddff]){2}|[\ud800-\udbff][\udc00-\udfff])[\ufe0e\ufe0f]?(?:[\u0300-\u036f\ufe20-\ufe23\u20d0-\u20f0]|\ud83c[\udffb-\udfff])?)*/; + /** * The root wordlist object. * @type {Object} @@ -58,7 +62,27 @@ class Wordlist { * @return {Array} the parsed list */ static parseList(list) { - return _.uniq(list.map((word) => tokenizer.tokenize(word.toLowerCase()))); + return _.uniq(list.filter((word) => { + if (EMOJI_REGEX.test(word)) { + return false; + } + + return true; + }) + .map((word) => { + if (word.length === 1) { + return [word]; + } + + return tokenizer.tokenize(word.toLowerCase()); + }) + .filter((tokens) => { + if (tokens.length === 0) { + return false; + } + + return true; + })); } /** @@ -66,11 +90,11 @@ class Wordlist { * @param {String} phrase value to check for blockwords. * @return {Boolean} true if a blockword is found, false otherwise. */ - match(list, phrase) { + match(list, phrase, tk = tokenizer) { // Lowercase the word to ensure that we don't miss a match due to // capitalization. - let lowerPhraseWords = tokenizer.tokenize(phrase.toLowerCase()); + let lowerPhraseWords = tk.tokenize(phrase.toLowerCase()); // This will return true in the event that at least one blockword is found // in the phrase. @@ -199,28 +223,24 @@ class Wordlist { } /** - * check potential username for banned words, special characters + * check potential username for banned words */ static usernameCheck(username) { const wl = new Wordlist(); - return wl.load() + return wl + .load() .then(() => { - username = username.replace(/_/g, ''); - - // test each word, and fail if we find a match - const hasBadWords = wl.lists.banned.some(phrase => { - return username.indexOf(phrase.join('')) !== -1; - }); - - if (hasBadWords) { - throw Errors.ErrContainsProfanity; - } else { - return Promise.resolve(username); + if (!wl.checkName(wl.lists.banned, username)) { + return Errors.ErrContainsProfanity; } }); } + checkName(list, name) { + return !this.match(list, name, nameTokenizer); + } + /** * Connect middleware for scanning request bodies for wordlisted words and * attaching a ErrContainsProfanity to the req.wordlisted parameter, otherwise diff --git a/test/services/users.js b/test/services/users.js index 94aa92432..744906c5b 100644 --- a/test/services/users.js +++ b/test/services/users.js @@ -249,6 +249,17 @@ describe('services.UsersService', () => { done(); }); }); + + it('should not allow non-alphanumeric characters in usernames', () => { + return UsersService + .isValidUsername('hi🖕') + .then(() => { + expect(false).to.be.true; + }) + .catch((err) => { + expect(err).to.be.truthy; + }); + }); }); }); diff --git a/test/services/wordlist.js b/test/services/wordlist.js index 6bd05e640..26baf6405 100644 --- a/test/services/wordlist.js +++ b/test/services/wordlist.js @@ -9,7 +9,8 @@ describe('services.Wordlist', () => { banned: [ 'cookies', 'how to do bad things', - 'how to do really bad things' + 'how to do really bad things', + 's h i t' ], suspect: [ 'do bad things' @@ -32,9 +33,22 @@ describe('services.Wordlist', () => { }); - describe('#match', () => { + describe('#parseList', () => { + it('does not include emojis in the wordlist', () => { + let list = Wordlist.parseList([ + '🖕', + '🖕 asdf', + 'asd🖕asdf', + 'asd🖕', + ]); - const bannedList = Wordlist.parseList(wordlists.banned); + expect(list).to.have.length(0); + }); + }); + + const bannedList = Wordlist.parseList(wordlists.banned); + + describe('#match', () => { it('does match on a bad word', () => { [ @@ -62,6 +76,26 @@ describe('services.Wordlist', () => { }); + describe('#checkName', () => { + [ + 'flowers', + 'joy', + 'lots_of_candy' + ].forEach((username) => { + it(`does not match on list=banned name=${username}`, () => { + expect(wordlist.checkName(bannedList, username)).to.be.true; + }); + }); + + [ + 'cookies' + ].forEach((username) => { + it(`does match on list=banned name=${username}`, () => { + expect(wordlist.checkName(bannedList, username)).to.be.false; + }); + }); + }); + describe('#filter', () => { before(() => wordlist.upsert(wordlists));