mirror of
https://github.com/wassname/talk.git
synced 2026-06-27 19:17:09 +08:00
[CORL-1126] Run regex in a sandbox so it can time out (#2988)
* Run regex in a sandbox so it can time out If it times out, we flag it and system withhold it for future analysis. CORL-1126 * Pass regex and test string into wordList sandbox Allows us to use the pre-cached regex instead of regenerating it via an in-line script copy. CORL-1126 * feat: improve script reuse * fix: renamed some functions * fix: improve script performance * feat: added testString to log output if it times out * fix: fixed issue with form alert not showing * fix: use a timeout from config Co-authored-by: Wyatt Johnson <me@wyattjoh.ca> Co-authored-by: Wyatt Johnson <wyattjoh@gmail.com>
This commit is contained in:
+8
-1
@@ -163,7 +163,14 @@ export class PostCommentFormContainer extends Component<Props, State> {
|
||||
state,
|
||||
form
|
||||
) => {
|
||||
if (this.state.submitStatus && state.dirty) {
|
||||
if (
|
||||
this.state.submitStatus &&
|
||||
state.dirty &&
|
||||
// This is required because the body is somehow being "dirtied" once the
|
||||
// form has been reinitialized.
|
||||
// FIXME: (wyattjoh) discover why when a comment is submitted, it eventually sets this as the body afterwards
|
||||
state.values.body !== "<div><br></div>"
|
||||
) {
|
||||
this.setState({ submitStatus: null });
|
||||
}
|
||||
if (state.values.body) {
|
||||
|
||||
@@ -259,6 +259,13 @@ const config = convict({
|
||||
default: false,
|
||||
env: "DISABLE_JOB_PROCESSORS",
|
||||
},
|
||||
word_list_timeout: {
|
||||
doc:
|
||||
"The word list timeout (in ms) that should be used to limit the amount of time the process is frozen processing a word list comparison",
|
||||
format: "ms",
|
||||
default: "100",
|
||||
env: "WORD_LIST_TIMEOUT",
|
||||
},
|
||||
analytics_frontend_key: {
|
||||
doc: "Analytics write key from RudderStack for the Javascript client.",
|
||||
format: String,
|
||||
|
||||
@@ -0,0 +1,53 @@
|
||||
import vm from "vm";
|
||||
|
||||
import logger from "coral-server/logger";
|
||||
|
||||
export type TestWithTimeout = (testString: string) => boolean | null;
|
||||
|
||||
/**
|
||||
* createTesterWithTimeout will create a tester that after the timeout, will
|
||||
* return null instead of a boolean.
|
||||
*
|
||||
* @param regexp the regular expression to wrap
|
||||
* @param timeout the timeout to use
|
||||
*/
|
||||
export default function createTesterWithTimeout(
|
||||
regexp: RegExp,
|
||||
timeout: number
|
||||
): TestWithTimeout {
|
||||
// Create the script we're executing as a part of this regex test operation.
|
||||
const script = new vm.Script("regexp.test(testString)");
|
||||
|
||||
// Create a null context object to isolate it with primitives.
|
||||
const sandbox = Object.create(null);
|
||||
sandbox.regexp = regexp;
|
||||
sandbox.testString = "";
|
||||
|
||||
// Turn the sandbox into a context.
|
||||
const ctx = vm.createContext(sandbox);
|
||||
|
||||
return (testString: string) => {
|
||||
let result: boolean;
|
||||
|
||||
try {
|
||||
// Set the testString to the one we're evaluating for this context.
|
||||
sandbox.testString = testString;
|
||||
|
||||
// Run the operation in this context.
|
||||
result = script.runInContext(ctx, { timeout });
|
||||
} catch (err) {
|
||||
if (err.code === "ERR_SCRIPT_EXECUTION_TIMEOUT") {
|
||||
return null;
|
||||
}
|
||||
|
||||
logger.error(
|
||||
{ err },
|
||||
"an error occurred evaluating the regular expression"
|
||||
);
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
return result;
|
||||
};
|
||||
}
|
||||
@@ -16,6 +16,7 @@ const list = new WordList();
|
||||
|
||||
// This phase checks the comment against the wordList.
|
||||
export const wordList: IntermediateModerationPhase = ({
|
||||
config,
|
||||
tenant,
|
||||
comment,
|
||||
bodyText,
|
||||
@@ -25,12 +26,12 @@ export const wordList: IntermediateModerationPhase = ({
|
||||
return;
|
||||
}
|
||||
|
||||
// Decide the status based on whether or not the current story/settings
|
||||
// has pre-mod enabled or not. If the comment was rejected based on the
|
||||
// wordList, then reject it, otherwise if the moderation setting is
|
||||
// premod, set it to `premod`.
|
||||
if (list.test(tenant, "banned", bodyText)) {
|
||||
// Add the flag related to Trust to the comment.
|
||||
// Get the timeout to use.
|
||||
const timeout = (config.get("word_list_timeout") as unknown) as number;
|
||||
|
||||
// Test the comment for banned words.
|
||||
const banned = list.test(tenant, "banned", timeout, bodyText);
|
||||
if (banned) {
|
||||
return {
|
||||
status: GQLCOMMENT_STATUS.REJECTED,
|
||||
actions: [
|
||||
@@ -40,15 +41,21 @@ export const wordList: IntermediateModerationPhase = ({
|
||||
},
|
||||
],
|
||||
};
|
||||
} else if (banned === null) {
|
||||
return {
|
||||
status: GQLCOMMENT_STATUS.SYSTEM_WITHHELD,
|
||||
actions: [
|
||||
{
|
||||
actionType: ACTION_TYPE.FLAG,
|
||||
reason: GQLCOMMENT_FLAG_REASON.COMMENT_DETECTED_BANNED_WORD,
|
||||
},
|
||||
],
|
||||
};
|
||||
}
|
||||
|
||||
// If the comment has a suspect word or a link, we need to add a
|
||||
// flag to it to indicate that it needs to be looked at.
|
||||
// Otherwise just return the new comment.
|
||||
|
||||
// If the wordList has matched the suspect word filter and we haven't disabled
|
||||
// auto-flagging suspect words, then we should flag the comment!
|
||||
if (list.test(tenant, "suspect", bodyText)) {
|
||||
// Test the comment for suspect words.
|
||||
const suspect = list.test(tenant, "suspect", timeout, bodyText);
|
||||
if (suspect) {
|
||||
return {
|
||||
actions: [
|
||||
{
|
||||
@@ -57,5 +64,15 @@ export const wordList: IntermediateModerationPhase = ({
|
||||
},
|
||||
],
|
||||
};
|
||||
} else if (suspect === null) {
|
||||
return {
|
||||
status: GQLCOMMENT_STATUS.SYSTEM_WITHHELD,
|
||||
actions: [
|
||||
{
|
||||
actionType: ACTION_TYPE.FLAG,
|
||||
reason: GQLCOMMENT_FLAG_REASON.COMMENT_DETECTED_SUSPECT_WORD,
|
||||
},
|
||||
],
|
||||
};
|
||||
}
|
||||
};
|
||||
|
||||
@@ -1,26 +1,40 @@
|
||||
import { LanguageCode } from "coral-common/helpers";
|
||||
import createWordListRegExp from "coral-common/utils/createWordListRegExp";
|
||||
import { createTimer } from "coral-server/helpers";
|
||||
import createTesterWithTimeout, {
|
||||
TestWithTimeout,
|
||||
} from "coral-server/helpers/createTesterWithTimeout";
|
||||
import logger from "coral-server/logger";
|
||||
import { Tenant } from "coral-server/models/tenant";
|
||||
|
||||
interface Lists {
|
||||
banned: RegExp | false;
|
||||
suspect: RegExp | false;
|
||||
banned: TestWithTimeout | false;
|
||||
suspect: TestWithTimeout | false;
|
||||
}
|
||||
|
||||
export type Options = Pick<Tenant, "id" | "locale" | "wordList">;
|
||||
|
||||
export class WordList {
|
||||
/**
|
||||
* cache is a weak map of word list options to word lists. It's a weak map
|
||||
* so when the tenant document is updated and the old tenant is discarded, the
|
||||
* list will also be discarded without explicit syncing by the garbage
|
||||
* collection system.
|
||||
*/
|
||||
private readonly cache = new WeakMap<Options, Lists>();
|
||||
|
||||
private generate(locale: LanguageCode, list: string[]) {
|
||||
private generate(locale: LanguageCode, list: string[], timeout: number) {
|
||||
// If a word list has no entries, then we can make a simple tester.
|
||||
if (list.length === 0) {
|
||||
return false;
|
||||
}
|
||||
|
||||
return createWordListRegExp(locale, list);
|
||||
// Generate the regular expression for this list.
|
||||
const regexp = createWordListRegExp(locale, list);
|
||||
|
||||
// Create a managed regular expression from the provided regular expression
|
||||
// so we can time it out if it takes too long!
|
||||
return createTesterWithTimeout(regexp, timeout);
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -28,10 +42,10 @@ export class WordList {
|
||||
*
|
||||
* @param options options used to generate Lists
|
||||
*/
|
||||
private create(options: Options): Lists {
|
||||
private create(options: Options, timeout: number): Lists {
|
||||
return {
|
||||
banned: this.generate(options.locale, options.wordList.banned),
|
||||
suspect: this.generate(options.locale, options.wordList.suspect),
|
||||
banned: this.generate(options.locale, options.wordList.banned, timeout),
|
||||
suspect: this.generate(options.locale, options.wordList.suspect, timeout),
|
||||
};
|
||||
}
|
||||
|
||||
@@ -41,11 +55,11 @@ export class WordList {
|
||||
*
|
||||
* @param options the options object that is also used as the cache key
|
||||
*/
|
||||
private lists(options: Options, cache: boolean): Lists {
|
||||
private lists(options: Options, cache: boolean, timeout: number): Lists {
|
||||
// If the request isn't supposed to use the cache, then just return a new
|
||||
// one.
|
||||
if (!cache) {
|
||||
return this.create(options);
|
||||
return this.create(options, timeout);
|
||||
}
|
||||
|
||||
// As this is supposed to be cached, try to get it from the cache, or create
|
||||
@@ -53,7 +67,7 @@ export class WordList {
|
||||
let lists = this.cache.get(options);
|
||||
if (!lists) {
|
||||
const timer = createTimer();
|
||||
lists = this.create(options);
|
||||
lists = this.create(options, timeout);
|
||||
logger.info(
|
||||
{ tenantID: options.id, took: timer() },
|
||||
"regenerated word list cache"
|
||||
@@ -78,20 +92,30 @@ export class WordList {
|
||||
public test(
|
||||
options: Options,
|
||||
listName: keyof Lists,
|
||||
timeout: number,
|
||||
testString: string,
|
||||
cache = true
|
||||
): boolean {
|
||||
const list = this.lists(options, cache)[listName];
|
||||
if (!list) {
|
||||
): boolean | null {
|
||||
const test = this.lists(options, cache, timeout)[listName];
|
||||
if (!test) {
|
||||
return false;
|
||||
}
|
||||
|
||||
const timer = createTimer();
|
||||
const result = list.test(testString);
|
||||
logger.info(
|
||||
{ tenantID: options.id, listName, took: timer() },
|
||||
"word list phrase test complete"
|
||||
);
|
||||
|
||||
// Test the string against the list and timeout if it takes too long.
|
||||
const result = test(testString);
|
||||
if (result === null) {
|
||||
logger.info(
|
||||
{ tenantID: options.id, listName, took: timer(), testString },
|
||||
"word list phrase test timed out"
|
||||
);
|
||||
} else {
|
||||
logger.info(
|
||||
{ tenantID: options.id, listName, took: timer() },
|
||||
"word list phrase test complete"
|
||||
);
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user