[CORL-1126] Run regex in a sandbox so it can time out (#2988)

* Run regex in a sandbox so it can time out

If it times out, we flag it and system withhold
it for future analysis.

CORL-1126

* Pass regex and test string into wordList sandbox

Allows us to use the pre-cached regex instead of
regenerating it via an in-line script copy.

CORL-1126

* feat: improve script reuse

* fix: renamed some functions

* fix: improve script performance

* feat: added testString to log output if it times out

* fix: fixed issue with form alert not showing

* fix: use a timeout from config

Co-authored-by: Wyatt Johnson <me@wyattjoh.ca>
Co-authored-by: Wyatt Johnson <wyattjoh@gmail.com>
This commit is contained in:
Nick Funk
2020-06-12 12:09:37 -06:00
committed by GitHub
parent ebe7731222
commit feaf4858df
5 changed files with 140 additions and 32 deletions
@@ -163,7 +163,14 @@ export class PostCommentFormContainer extends Component<Props, State> {
state,
form
) => {
if (this.state.submitStatus && state.dirty) {
if (
this.state.submitStatus &&
state.dirty &&
// This is required because the body is somehow being "dirtied" once the
// form has been reinitialized.
// FIXME: (wyattjoh) discover why when a comment is submitted, it eventually sets this as the body afterwards
state.values.body !== "<div><br></div>"
) {
this.setState({ submitStatus: null });
}
if (state.values.body) {
+7
View File
@@ -259,6 +259,13 @@ const config = convict({
default: false,
env: "DISABLE_JOB_PROCESSORS",
},
word_list_timeout: {
doc:
"The word list timeout (in ms) that should be used to limit the amount of time the process is frozen processing a word list comparison",
format: "ms",
default: "100",
env: "WORD_LIST_TIMEOUT",
},
analytics_frontend_key: {
doc: "Analytics write key from RudderStack for the Javascript client.",
format: String,
@@ -0,0 +1,53 @@
import vm from "vm";
import logger from "coral-server/logger";
export type TestWithTimeout = (testString: string) => boolean | null;
/**
* createTesterWithTimeout will create a tester that after the timeout, will
* return null instead of a boolean.
*
* @param regexp the regular expression to wrap
* @param timeout the timeout to use
*/
export default function createTesterWithTimeout(
regexp: RegExp,
timeout: number
): TestWithTimeout {
// Create the script we're executing as a part of this regex test operation.
const script = new vm.Script("regexp.test(testString)");
// Create a null context object to isolate it with primitives.
const sandbox = Object.create(null);
sandbox.regexp = regexp;
sandbox.testString = "";
// Turn the sandbox into a context.
const ctx = vm.createContext(sandbox);
return (testString: string) => {
let result: boolean;
try {
// Set the testString to the one we're evaluating for this context.
sandbox.testString = testString;
// Run the operation in this context.
result = script.runInContext(ctx, { timeout });
} catch (err) {
if (err.code === "ERR_SCRIPT_EXECUTION_TIMEOUT") {
return null;
}
logger.error(
{ err },
"an error occurred evaluating the regular expression"
);
return null;
}
return result;
};
}
@@ -16,6 +16,7 @@ const list = new WordList();
// This phase checks the comment against the wordList.
export const wordList: IntermediateModerationPhase = ({
config,
tenant,
comment,
bodyText,
@@ -25,12 +26,12 @@ export const wordList: IntermediateModerationPhase = ({
return;
}
// Decide the status based on whether or not the current story/settings
// has pre-mod enabled or not. If the comment was rejected based on the
// wordList, then reject it, otherwise if the moderation setting is
// premod, set it to `premod`.
if (list.test(tenant, "banned", bodyText)) {
// Add the flag related to Trust to the comment.
// Get the timeout to use.
const timeout = (config.get("word_list_timeout") as unknown) as number;
// Test the comment for banned words.
const banned = list.test(tenant, "banned", timeout, bodyText);
if (banned) {
return {
status: GQLCOMMENT_STATUS.REJECTED,
actions: [
@@ -40,15 +41,21 @@ export const wordList: IntermediateModerationPhase = ({
},
],
};
} else if (banned === null) {
return {
status: GQLCOMMENT_STATUS.SYSTEM_WITHHELD,
actions: [
{
actionType: ACTION_TYPE.FLAG,
reason: GQLCOMMENT_FLAG_REASON.COMMENT_DETECTED_BANNED_WORD,
},
],
};
}
// If the comment has a suspect word or a link, we need to add a
// flag to it to indicate that it needs to be looked at.
// Otherwise just return the new comment.
// If the wordList has matched the suspect word filter and we haven't disabled
// auto-flagging suspect words, then we should flag the comment!
if (list.test(tenant, "suspect", bodyText)) {
// Test the comment for suspect words.
const suspect = list.test(tenant, "suspect", timeout, bodyText);
if (suspect) {
return {
actions: [
{
@@ -57,5 +64,15 @@ export const wordList: IntermediateModerationPhase = ({
},
],
};
} else if (suspect === null) {
return {
status: GQLCOMMENT_STATUS.SYSTEM_WITHHELD,
actions: [
{
actionType: ACTION_TYPE.FLAG,
reason: GQLCOMMENT_FLAG_REASON.COMMENT_DETECTED_SUSPECT_WORD,
},
],
};
}
};
@@ -1,26 +1,40 @@
import { LanguageCode } from "coral-common/helpers";
import createWordListRegExp from "coral-common/utils/createWordListRegExp";
import { createTimer } from "coral-server/helpers";
import createTesterWithTimeout, {
TestWithTimeout,
} from "coral-server/helpers/createTesterWithTimeout";
import logger from "coral-server/logger";
import { Tenant } from "coral-server/models/tenant";
interface Lists {
banned: RegExp | false;
suspect: RegExp | false;
banned: TestWithTimeout | false;
suspect: TestWithTimeout | false;
}
export type Options = Pick<Tenant, "id" | "locale" | "wordList">;
export class WordList {
/**
* cache is a weak map of word list options to word lists. It's a weak map
* so when the tenant document is updated and the old tenant is discarded, the
* list will also be discarded without explicit syncing by the garbage
* collection system.
*/
private readonly cache = new WeakMap<Options, Lists>();
private generate(locale: LanguageCode, list: string[]) {
private generate(locale: LanguageCode, list: string[], timeout: number) {
// If a word list has no entries, then we can make a simple tester.
if (list.length === 0) {
return false;
}
return createWordListRegExp(locale, list);
// Generate the regular expression for this list.
const regexp = createWordListRegExp(locale, list);
// Create a managed regular expression from the provided regular expression
// so we can time it out if it takes too long!
return createTesterWithTimeout(regexp, timeout);
}
/**
@@ -28,10 +42,10 @@ export class WordList {
*
* @param options options used to generate Lists
*/
private create(options: Options): Lists {
private create(options: Options, timeout: number): Lists {
return {
banned: this.generate(options.locale, options.wordList.banned),
suspect: this.generate(options.locale, options.wordList.suspect),
banned: this.generate(options.locale, options.wordList.banned, timeout),
suspect: this.generate(options.locale, options.wordList.suspect, timeout),
};
}
@@ -41,11 +55,11 @@ export class WordList {
*
* @param options the options object that is also used as the cache key
*/
private lists(options: Options, cache: boolean): Lists {
private lists(options: Options, cache: boolean, timeout: number): Lists {
// If the request isn't supposed to use the cache, then just return a new
// one.
if (!cache) {
return this.create(options);
return this.create(options, timeout);
}
// As this is supposed to be cached, try to get it from the cache, or create
@@ -53,7 +67,7 @@ export class WordList {
let lists = this.cache.get(options);
if (!lists) {
const timer = createTimer();
lists = this.create(options);
lists = this.create(options, timeout);
logger.info(
{ tenantID: options.id, took: timer() },
"regenerated word list cache"
@@ -78,20 +92,30 @@ export class WordList {
public test(
options: Options,
listName: keyof Lists,
timeout: number,
testString: string,
cache = true
): boolean {
const list = this.lists(options, cache)[listName];
if (!list) {
): boolean | null {
const test = this.lists(options, cache, timeout)[listName];
if (!test) {
return false;
}
const timer = createTimer();
const result = list.test(testString);
logger.info(
{ tenantID: options.id, listName, took: timer() },
"word list phrase test complete"
);
// Test the string against the list and timeout if it takes too long.
const result = test(testString);
if (result === null) {
logger.info(
{ tenantID: options.id, listName, took: timer(), testString },
"word list phrase test timed out"
);
} else {
logger.info(
{ tenantID: options.id, listName, took: timer() },
"word list phrase test complete"
);
}
return result;
}