Merge branch 'master' into dynamic-email-templates

2026-06-30 02:23:06 +08:00 · 2017-03-29 13:09:31 -06:00
parent 009b381744 f80f253b98
commit ebcb3b4b02
4 changed files with 85 additions and 20 deletions
@@ -1,6 +1,6 @@
 {
  "name": "talk",
-  "version": "1.1.0",
+  "version": "1.4.0",
  "description": "A commenting platform from The Coral Project. https://coralproject.net",
  "main": "app.js",
  "scripts": {
@@ -2,9 +2,13 @@ const debug = require('debug')('talk:services:wordlist');
 const _ = require('lodash');
 const natural = require('natural');
 const tokenizer = new natural.WordTokenizer();
+const nameTokenizer = new natural.RegexpTokenizer({pattern: /\_/});
 const SettingsService = require('./settings');
 const Errors = require('../errors');

+// REGEX to prevent emoji's from entering the wordlist.
+const EMOJI_REGEX = /(?:[\u2700-\u27bf]|(?:\ud83c[\udde6-\uddff]){2}|[\ud800-\udbff][\udc00-\udfff])[\ufe0e\ufe0f]?(?:[\u0300-\u036f\ufe20-\ufe23\u20d0-\u20f0]|\ud83c[\udffb-\udfff])?(?:\u200d(?:[^\ud800-\udfff]|(?:\ud83c[\udde6-\uddff]){2}|[\ud800-\udbff][\udc00-\udfff])[\ufe0e\ufe0f]?(?:[\u0300-\u036f\ufe20-\ufe23\u20d0-\u20f0]|\ud83c[\udffb-\udfff])?)*/;
+
 /**
 * The root wordlist object.
 * @type {Object}
@@ -58,7 +62,27 @@ class Wordlist {
   * @return {Array}      the parsed list
   */
  static parseList(list) {
-    return _.uniq(list.map((word) => tokenizer.tokenize(word.toLowerCase())));
+    return _.uniq(list.filter((word) => {
+      if (EMOJI_REGEX.test(word)) {
+        return false;
+      }
+
+      return true;
+    })
+    .map((word) => {
+      if (word.length === 1) {
+        return [word];
+      }
+      
+      return tokenizer.tokenize(word.toLowerCase());
+    })
+    .filter((tokens) => {
+      if (tokens.length === 0) {
+        return false;
+      }
+
+      return true;
+    }));
  }

  /**
@@ -66,11 +90,11 @@ class Wordlist {
   * @param  {String} phrase value to check for blockwords.
   * @return {Boolean}       true if a blockword is found, false otherwise.
   */
-  match(list, phrase) {
+  match(list, phrase, tk = tokenizer) {

    // Lowercase the word to ensure that we don't miss a match due to
    // capitalization.
-    let lowerPhraseWords = tokenizer.tokenize(phrase.toLowerCase());
+    let lowerPhraseWords = tk.tokenize(phrase.toLowerCase());

    // This will return true in the event that at least one blockword is found
    // in the phrase.
@@ -199,28 +223,24 @@ class Wordlist {
  }

  /**
-   * check potential username for banned words, special characters
+   * check potential username for banned words
   */
  static usernameCheck(username) {
    const wl = new Wordlist();

-    return wl.load()
+    return wl
+      .load()
      .then(() => {
-        username = username.replace(/_/g, '');
-
-        // test each word, and fail if we find a match
-        const hasBadWords = wl.lists.banned.some(phrase => {
-          return username.indexOf(phrase.join('')) !== -1;
-        });
-
-        if (hasBadWords) {
-          throw Errors.ErrContainsProfanity;
-        } else {
-          return Promise.resolve(username);
+        if (!wl.checkName(wl.lists.banned, username)) {
+          return Errors.ErrContainsProfanity;
        }
      });
  }

+  checkName(list, name) {
+    return !this.match(list, name, nameTokenizer);
+  }
+
  /**
   * Connect middleware for scanning request bodies for wordlisted words and
   * attaching a ErrContainsProfanity to the req.wordlisted parameter, otherwise
@@ -249,6 +249,17 @@ describe('services.UsersService', () => {
        done();
      });
    });
+
+    it('should not allow non-alphanumeric characters in usernames', () => {
+      return UsersService
+        .isValidUsername('hi🖕')
+        .then(() => {
+          expect(false).to.be.true;
+        })
+        .catch((err) => {
+          expect(err).to.be.truthy;
+        });
+    });
  });

 });
@@ -9,7 +9,8 @@ describe('services.Wordlist', () => {
    banned: [
      'cookies',
      'how to do bad things',
-      'how to do really bad things'
+      'how to do really bad things',
+      's h i t'
    ],
    suspect: [
      'do bad things'
@@ -32,9 +33,22 @@ describe('services.Wordlist', () => {

  });

-  describe('#match', () => {
+  describe('#parseList', () => {
+    it('does not include emojis in the wordlist', () => {
+      let list = Wordlist.parseList([
+        '🖕',
+        '🖕 asdf',
+        'asd🖕asdf',
+        'asd🖕',
+      ]);

-    const bannedList = Wordlist.parseList(wordlists.banned);
+      expect(list).to.have.length(0);
+    });
+  });
+
+  const bannedList = Wordlist.parseList(wordlists.banned);
+
+  describe('#match', () => {

    it('does match on a bad word', () => {
      [
@@ -62,6 +76,26 @@ describe('services.Wordlist', () => {

  });

+  describe('#checkName', () => {
+    [
+      'flowers',
+      'joy',
+      'lots_of_candy'
+    ].forEach((username) => {
+      it(`does not match on list=banned name=${username}`, () => {
+        expect(wordlist.checkName(bannedList, username)).to.be.true;
+      });
+    });
+
+    [
+      'cookies'
+    ].forEach((username) => {
+      it(`does match on list=banned name=${username}`, () => {
+        expect(wordlist.checkName(bannedList, username)).to.be.false;
+      });
+    });
+  });
+
  describe('#filter', () => {

    before(() => wordlist.upsert(wordlists));