diff --git a/client/coral-admin/src/components/CommentBodyHighlighter.js b/client/coral-admin/src/components/CommentBodyHighlighter.js index 3b735f9ce..fe5bcff96 100644 --- a/client/coral-admin/src/components/CommentBodyHighlighter.js +++ b/client/coral-admin/src/components/CommentBodyHighlighter.js @@ -1,24 +1,89 @@ import React from 'react'; import {matchLinks} from '../utils'; -const wordSeparator = /([.\s'"?!])/; +const capturingWordSeparator = /([.\s'"?!])/; +const wordSeparator = /[.\s'"?!]/; -// markWords looks for `words` inside `body` and highlights them by returning +// markPhrases looks for `phrases` inside `body` and highlights them by returning // an array of React Elements. -function markWords(body, words, keyPrefix) { - const tokens = body.split(wordSeparator); +function markPhrases(body, phrases, keyPrefix) { + const tokens = body.split(capturingWordSeparator); + const phraseWords = phrases.map((phrase) => phrase.toLowerCase().split(wordSeparator)); const content = []; let tmp = []; - tokens.forEach((token, i) => { - if (words.indexOf(token.toLowerCase()) >= 0) { - content.push(...tmp); - tmp = []; - content.push({token}); - return; + + for (let l = 0; l < tokens.length; l++) { + + // matchedWords is > 0 when a full match was found and contains + // the range length from this index to the end of the match. + let matchedWords = 0; + + // Skip word separators and ''. + if (tokens[l] !== '' && !tokens[l].match(wordSeparator)) { + for (let m = 0; m < phraseWords.length; m++) { + const words = phraseWords[m]; + + // We try to match the full phrase, index keeps track + // of where we are now on the tokens array while matching + // the words of the phrase. + let index = l; + for (let n = 0; n < words.length; n++, index++) { + + // Skip word separators and ''. + while (index < tokens.length && (tokens[index].match(wordSeparator) || tokens[index] === '')) { + index++; + } + + // No more tokens left. + if (index >= tokens.length) { + break; + } + + const token = tokens[index].toLowerCase(); + const word = words[n]; + if (token !== word) { + break; + } + + // Full match! + if (n === words.length - 1) { + + // Save the matched range length into matched words. + matchedWords = index - l + 1; + break; + } + } + + // We matched a word so break out the loop. + if (matchedWords) { + break; + } + } } - tmp.push(token); - }); - content.push(...tmp); + + // We have a match! + if (matchedWords) { + const match = tokens.slice(l, l + matchedWords).join(''); + + // Append whatever we have in `tmp` and clear it. + content.push(tmp.join('')); + tmp = []; + + content.push({match}); + + // Move index further if we matched more than one word. + l += matchedWords - 1; + + continue; + } + + // No match, we just push this into `tmp`. + tmp.push(tokens[l]); + } + + // Append any non matched tokens currently in `tmp`. + content.push(tmp.join('')); + return content; } @@ -41,7 +106,7 @@ function markLinks(body) { } export default ({suspectWords, bannedWords, body, ...rest}) => { - const words = [...suspectWords, ...bannedWords].map((word) => word.toLowerCase()); + const phrases = [...suspectWords, ...bannedWords]; // First highlight links. const content = markLinks(body) @@ -52,8 +117,8 @@ export default ({suspectWords, bannedWords, body, ...rest}) => { return element; } - // Highlight suspect and banned words inside this part of text. - return markWords(element, words, index); + // Highlight suspect and banned phrase inside this part of text. + return markPhrases(element, phrases, index); }); return (