diff --git a/src/core/client/stream/tabs/Comments/Stream/PostCommentForm/PostCommentFormContainer.tsx b/src/core/client/stream/tabs/Comments/Stream/PostCommentForm/PostCommentFormContainer.tsx index e2cf71550..5ae9c9622 100644 --- a/src/core/client/stream/tabs/Comments/Stream/PostCommentForm/PostCommentFormContainer.tsx +++ b/src/core/client/stream/tabs/Comments/Stream/PostCommentForm/PostCommentFormContainer.tsx @@ -163,7 +163,14 @@ export class PostCommentFormContainer extends Component { state, form ) => { - if (this.state.submitStatus && state.dirty) { + if ( + this.state.submitStatus && + state.dirty && + // This is required because the body is somehow being "dirtied" once the + // form has been reinitialized. + // FIXME: (wyattjoh) discover why when a comment is submitted, it eventually sets this as the body afterwards + state.values.body !== "

" + ) { this.setState({ submitStatus: null }); } if (state.values.body) { diff --git a/src/core/server/config.ts b/src/core/server/config.ts index 1d9fe2e7c..5151c380f 100644 --- a/src/core/server/config.ts +++ b/src/core/server/config.ts @@ -259,6 +259,13 @@ const config = convict({ default: false, env: "DISABLE_JOB_PROCESSORS", }, + word_list_timeout: { + doc: + "The word list timeout (in ms) that should be used to limit the amount of time the process is frozen processing a word list comparison", + format: "ms", + default: "100", + env: "WORD_LIST_TIMEOUT", + }, analytics_frontend_key: { doc: "Analytics write key from RudderStack for the Javascript client.", format: String, diff --git a/src/core/server/helpers/createTesterWithTimeout.ts b/src/core/server/helpers/createTesterWithTimeout.ts new file mode 100644 index 000000000..1998050ae --- /dev/null +++ b/src/core/server/helpers/createTesterWithTimeout.ts @@ -0,0 +1,53 @@ +import vm from "vm"; + +import logger from "coral-server/logger"; + +export type TestWithTimeout = (testString: string) => boolean | null; + +/** + * createTesterWithTimeout will create a tester that after the timeout, will + * return null instead of a boolean. + * + * @param regexp the regular expression to wrap + * @param timeout the timeout to use + */ +export default function createTesterWithTimeout( + regexp: RegExp, + timeout: number +): TestWithTimeout { + // Create the script we're executing as a part of this regex test operation. + const script = new vm.Script("regexp.test(testString)"); + + // Create a null context object to isolate it with primitives. + const sandbox = Object.create(null); + sandbox.regexp = regexp; + sandbox.testString = ""; + + // Turn the sandbox into a context. + const ctx = vm.createContext(sandbox); + + return (testString: string) => { + let result: boolean; + + try { + // Set the testString to the one we're evaluating for this context. + sandbox.testString = testString; + + // Run the operation in this context. + result = script.runInContext(ctx, { timeout }); + } catch (err) { + if (err.code === "ERR_SCRIPT_EXECUTION_TIMEOUT") { + return null; + } + + logger.error( + { err }, + "an error occurred evaluating the regular expression" + ); + + return null; + } + + return result; + }; +} diff --git a/src/core/server/services/comments/pipeline/phases/wordList.ts b/src/core/server/services/comments/pipeline/phases/wordList.ts index 7b7720426..71eab6880 100755 --- a/src/core/server/services/comments/pipeline/phases/wordList.ts +++ b/src/core/server/services/comments/pipeline/phases/wordList.ts @@ -16,6 +16,7 @@ const list = new WordList(); // This phase checks the comment against the wordList. export const wordList: IntermediateModerationPhase = ({ + config, tenant, comment, bodyText, @@ -25,12 +26,12 @@ export const wordList: IntermediateModerationPhase = ({ return; } - // Decide the status based on whether or not the current story/settings - // has pre-mod enabled or not. If the comment was rejected based on the - // wordList, then reject it, otherwise if the moderation setting is - // premod, set it to `premod`. - if (list.test(tenant, "banned", bodyText)) { - // Add the flag related to Trust to the comment. + // Get the timeout to use. + const timeout = (config.get("word_list_timeout") as unknown) as number; + + // Test the comment for banned words. + const banned = list.test(tenant, "banned", timeout, bodyText); + if (banned) { return { status: GQLCOMMENT_STATUS.REJECTED, actions: [ @@ -40,15 +41,21 @@ export const wordList: IntermediateModerationPhase = ({ }, ], }; + } else if (banned === null) { + return { + status: GQLCOMMENT_STATUS.SYSTEM_WITHHELD, + actions: [ + { + actionType: ACTION_TYPE.FLAG, + reason: GQLCOMMENT_FLAG_REASON.COMMENT_DETECTED_BANNED_WORD, + }, + ], + }; } - // If the comment has a suspect word or a link, we need to add a - // flag to it to indicate that it needs to be looked at. - // Otherwise just return the new comment. - - // If the wordList has matched the suspect word filter and we haven't disabled - // auto-flagging suspect words, then we should flag the comment! - if (list.test(tenant, "suspect", bodyText)) { + // Test the comment for suspect words. + const suspect = list.test(tenant, "suspect", timeout, bodyText); + if (suspect) { return { actions: [ { @@ -57,5 +64,15 @@ export const wordList: IntermediateModerationPhase = ({ }, ], }; + } else if (suspect === null) { + return { + status: GQLCOMMENT_STATUS.SYSTEM_WITHHELD, + actions: [ + { + actionType: ACTION_TYPE.FLAG, + reason: GQLCOMMENT_FLAG_REASON.COMMENT_DETECTED_SUSPECT_WORD, + }, + ], + }; } }; diff --git a/src/core/server/services/comments/pipeline/wordList.ts b/src/core/server/services/comments/pipeline/wordList.ts index cb78e04ca..8a355a16a 100644 --- a/src/core/server/services/comments/pipeline/wordList.ts +++ b/src/core/server/services/comments/pipeline/wordList.ts @@ -1,26 +1,40 @@ import { LanguageCode } from "coral-common/helpers"; import createWordListRegExp from "coral-common/utils/createWordListRegExp"; import { createTimer } from "coral-server/helpers"; +import createTesterWithTimeout, { + TestWithTimeout, +} from "coral-server/helpers/createTesterWithTimeout"; import logger from "coral-server/logger"; import { Tenant } from "coral-server/models/tenant"; interface Lists { - banned: RegExp | false; - suspect: RegExp | false; + banned: TestWithTimeout | false; + suspect: TestWithTimeout | false; } export type Options = Pick; export class WordList { + /** + * cache is a weak map of word list options to word lists. It's a weak map + * so when the tenant document is updated and the old tenant is discarded, the + * list will also be discarded without explicit syncing by the garbage + * collection system. + */ private readonly cache = new WeakMap(); - private generate(locale: LanguageCode, list: string[]) { + private generate(locale: LanguageCode, list: string[], timeout: number) { // If a word list has no entries, then we can make a simple tester. if (list.length === 0) { return false; } - return createWordListRegExp(locale, list); + // Generate the regular expression for this list. + const regexp = createWordListRegExp(locale, list); + + // Create a managed regular expression from the provided regular expression + // so we can time it out if it takes too long! + return createTesterWithTimeout(regexp, timeout); } /** @@ -28,10 +42,10 @@ export class WordList { * * @param options options used to generate Lists */ - private create(options: Options): Lists { + private create(options: Options, timeout: number): Lists { return { - banned: this.generate(options.locale, options.wordList.banned), - suspect: this.generate(options.locale, options.wordList.suspect), + banned: this.generate(options.locale, options.wordList.banned, timeout), + suspect: this.generate(options.locale, options.wordList.suspect, timeout), }; } @@ -41,11 +55,11 @@ export class WordList { * * @param options the options object that is also used as the cache key */ - private lists(options: Options, cache: boolean): Lists { + private lists(options: Options, cache: boolean, timeout: number): Lists { // If the request isn't supposed to use the cache, then just return a new // one. if (!cache) { - return this.create(options); + return this.create(options, timeout); } // As this is supposed to be cached, try to get it from the cache, or create @@ -53,7 +67,7 @@ export class WordList { let lists = this.cache.get(options); if (!lists) { const timer = createTimer(); - lists = this.create(options); + lists = this.create(options, timeout); logger.info( { tenantID: options.id, took: timer() }, "regenerated word list cache" @@ -78,20 +92,30 @@ export class WordList { public test( options: Options, listName: keyof Lists, + timeout: number, testString: string, cache = true - ): boolean { - const list = this.lists(options, cache)[listName]; - if (!list) { + ): boolean | null { + const test = this.lists(options, cache, timeout)[listName]; + if (!test) { return false; } const timer = createTimer(); - const result = list.test(testString); - logger.info( - { tenantID: options.id, listName, took: timer() }, - "word list phrase test complete" - ); + + // Test the string against the list and timeout if it takes too long. + const result = test(testString); + if (result === null) { + logger.info( + { tenantID: options.id, listName, took: timer(), testString }, + "word list phrase test timed out" + ); + } else { + logger.info( + { tenantID: options.id, listName, took: timer() }, + "word list phrase test complete" + ); + } return result; }