diff --git a/client/coral-admin/src/components/CommentBodyHighlighter.js b/client/coral-admin/src/components/CommentBodyHighlighter.js index 39be90d81..9a430f7c7 100644 --- a/client/coral-admin/src/components/CommentBodyHighlighter.js +++ b/client/coral-admin/src/components/CommentBodyHighlighter.js @@ -17,6 +17,7 @@ export default ({suspectWords, bannedWords, body, ...rest}) => { return ( diff --git a/services/wordlist.js b/services/wordlist.js index 2ae8ed90c..8744de4ba 100644 --- a/services/wordlist.js +++ b/services/wordlist.js @@ -1,8 +1,8 @@ const debug = require('debug')('talk:services:wordlist'); const _ = require('lodash'); -const natural = require('natural'); -const tokenizer = new natural.WordTokenizer(); -const nameTokenizer = new natural.RegexpTokenizer({pattern: /\_/}); +const {RegexpTokenizer} = require('natural'); +const tokenizer = new RegexpTokenizer({pattern: /[\.\s\'\"\?\!]/}); +const nameTokenizer = new RegexpTokenizer({pattern: /\_/}); const SettingsService = require('./settings'); const Errors = require('../errors'); @@ -73,7 +73,7 @@ class Wordlist { if (word.length === 1) { return [word]; } - + return tokenizer.tokenize(word.toLowerCase()); }) .filter((tokens) => { diff --git a/test/server/services/wordlist.js b/test/server/services/wordlist.js index 417844da4..19545ff3e 100644 --- a/test/server/services/wordlist.js +++ b/test/server/services/wordlist.js @@ -10,10 +10,13 @@ describe('services.Wordlist', () => { 'cookies', 'how to do bad things', 'how to do really bad things', - 's h i t' + 's h i t', + '$hit', + 'p**ch', + 'p*ch', ], suspect: [ - 'do bad things' + 'do bad things', ] }; @@ -26,9 +29,19 @@ describe('services.Wordlist', () => { before(() => wordlist.upsert(wordlists)); - it('has entries', () => { - expect(wordlist.lists.banned).to.not.be.empty; - expect(wordlist.lists.suspect).to.not.be.empty; + it('parses the wordlists correctly', () => { + expect(wordlist.lists.banned).to.deep.equal([ + [ 'cookies' ], + [ 'how', 'to', 'do', 'bad', 'things' ], + [ 'how', 'to', 'do', 'really', 'bad', 'things' ], + [ 's', 'h', 'i', 't' ], + [ '$hit' ], + [ 'p**ch' ], + [ 'p*ch' ], + ]); + expect(wordlist.lists.suspect).to.deep.equal([ + [ 'do', 'bad', 'things' ], + ]); }); }); @@ -57,7 +70,9 @@ describe('services.Wordlist', () => { 'cookies', 'COOKIES.', 'how to do bad things', - 'How To do bad things!' + 'How To do bad things!', + 'This stuff is $hit!', + 'That\'s a p**ch!', ].forEach((word) => { expect(wordlist.match(bannedList, word)).to.be.true; }); @@ -68,7 +83,10 @@ describe('services.Wordlist', () => { 'how to', 'cookie', 'how to be a great person?', - 'how to not do really bad things?' + 'how to not do really bad things?', + 'i have $100 dollars.', + 'I have bad $ hit lling', + 'That\'s a p***ch!', ].forEach((word) => { expect(wordlist.match(bannedList, word)).to.be.false; }); @@ -76,6 +94,39 @@ describe('services.Wordlist', () => { }); + describe('#scan', () => { + + it('does match on a bad word', () => { + [ + 'how to do really bad things', + 'what is cookies', + 'cookies', + 'COOKIES.', + 'how to do bad things', + 'How To do bad things!', + 'This stuff is $hit!', + 'That\'s a p**ch!', + ].forEach((word) => { + expect(wordlist.scan('body', word)).to.not.be.undefined; + }); + }); + + it('does not match on a good word', () => { + [ + 'how to', + 'cookie', + 'how to be a great person?', + 'how to not do really bad things?', + 'i have $100 dollars.', + 'I have bad $ hit lling', + 'That\'s a p***ch!', + ].forEach((word) => { + expect(wordlist.scan('body', word)).to.be.undefined; + }); + }); + + }); + describe('#checkName', () => { [ 'flowers',