Merge pull request #873 from coralproject/banned-words

Banned Words / Suspect Words Fix
This commit is contained in:
Kim Gardner
2017-08-25 12:22:28 +01:00
committed by GitHub
3 changed files with 63 additions and 11 deletions
@@ -17,6 +17,7 @@ export default ({suspectWords, bannedWords, body, ...rest}) => {
return (
<Highlighter
{...rest}
autoEscape={true}
searchWords={searchWords}
textToHighlight={body}
/>
+4 -4
View File
@@ -1,8 +1,8 @@
const debug = require('debug')('talk:services:wordlist');
const _ = require('lodash');
const natural = require('natural');
const tokenizer = new natural.WordTokenizer();
const nameTokenizer = new natural.RegexpTokenizer({pattern: /\_/});
const {RegexpTokenizer} = require('natural');
const tokenizer = new RegexpTokenizer({pattern: /[\.\s\'\"\?\!]/});
const nameTokenizer = new RegexpTokenizer({pattern: /\_/});
const SettingsService = require('./settings');
const Errors = require('../errors');
@@ -73,7 +73,7 @@ class Wordlist {
if (word.length === 1) {
return [word];
}
return tokenizer.tokenize(word.toLowerCase());
})
.filter((tokens) => {
+58 -7
View File
@@ -10,10 +10,13 @@ describe('services.Wordlist', () => {
'cookies',
'how to do bad things',
'how to do really bad things',
's h i t'
's h i t',
'$hit',
'p**ch',
'p*ch',
],
suspect: [
'do bad things'
'do bad things',
]
};
@@ -26,9 +29,19 @@ describe('services.Wordlist', () => {
before(() => wordlist.upsert(wordlists));
it('has entries', () => {
expect(wordlist.lists.banned).to.not.be.empty;
expect(wordlist.lists.suspect).to.not.be.empty;
it('parses the wordlists correctly', () => {
expect(wordlist.lists.banned).to.deep.equal([
[ 'cookies' ],
[ 'how', 'to', 'do', 'bad', 'things' ],
[ 'how', 'to', 'do', 'really', 'bad', 'things' ],
[ 's', 'h', 'i', 't' ],
[ '$hit' ],
[ 'p**ch' ],
[ 'p*ch' ],
]);
expect(wordlist.lists.suspect).to.deep.equal([
[ 'do', 'bad', 'things' ],
]);
});
});
@@ -57,7 +70,9 @@ describe('services.Wordlist', () => {
'cookies',
'COOKIES.',
'how to do bad things',
'How To do bad things!'
'How To do bad things!',
'This stuff is $hit!',
'That\'s a p**ch!',
].forEach((word) => {
expect(wordlist.match(bannedList, word)).to.be.true;
});
@@ -68,7 +83,10 @@ describe('services.Wordlist', () => {
'how to',
'cookie',
'how to be a great person?',
'how to not do really bad things?'
'how to not do really bad things?',
'i have $100 dollars.',
'I have bad $ hit lling',
'That\'s a p***ch!',
].forEach((word) => {
expect(wordlist.match(bannedList, word)).to.be.false;
});
@@ -76,6 +94,39 @@ describe('services.Wordlist', () => {
});
describe('#scan', () => {
it('does match on a bad word', () => {
[
'how to do really bad things',
'what is cookies',
'cookies',
'COOKIES.',
'how to do bad things',
'How To do bad things!',
'This stuff is $hit!',
'That\'s a p**ch!',
].forEach((word) => {
expect(wordlist.scan('body', word)).to.not.be.undefined;
});
});
it('does not match on a good word', () => {
[
'how to',
'cookie',
'how to be a great person?',
'how to not do really bad things?',
'i have $100 dollars.',
'I have bad $ hit lling',
'That\'s a p***ch!',
].forEach((word) => {
expect(wordlist.scan('body', word)).to.be.undefined;
});
});
});
describe('#checkName', () => {
[
'flowers',