diff --git a/index.ts b/index.ts index 8361981..289eb16 100644 --- a/index.ts +++ b/index.ts @@ -2069,8 +2069,14 @@ export default function (pi: ExtensionAPI) { ); }, getUpdates: async (body, pollSignal) => { + // Long-poll: server-side timeout is 30s. We bypass our retry helper + // (the poll loop already retries by re-entering after a sleep) and + // give the per-attempt timeout enough headroom that a healthy poll + // can never trip it. return callTelegramApi("getUpdates", body, { signal: pollSignal, + retry: false, + attemptTimeoutMs: 60_000, }); }, persistConfig: async () => { diff --git a/lib/api.ts b/lib/api.ts index ea8dcd8..b890df4 100644 --- a/lib/api.ts +++ b/lib/api.ts @@ -29,7 +29,7 @@ export interface TelegramApiClient { call: ( method: string, body: Record, - options?: { signal?: AbortSignal }, + options?: TelegramFetchOptions, ) => Promise; callMultipart: ( method: string, @@ -56,7 +56,9 @@ function sanitizeFileName(name: string): string { // Network-layer codes that warrant a retry. HTTP 4xx/5xx are NOT retried here - // those go through `data.ok` / `response.ok` and are surfaced to callers so -// rate-limits and logic errors stay loud. +// rate-limits and logic errors stay loud. ATTEMPT_TIMEOUT is our own per-attempt +// timeout firing; we treat it as transient but rethrow as a non-AbortError so +// it cannot masquerade as a caller-abort upstream (e.g. polling loop). const TRANSIENT_FETCH_CODES = new Set([ "UND_ERR_CONNECT_TIMEOUT", "UND_ERR_SOCKET", @@ -66,18 +68,27 @@ const TRANSIENT_FETCH_CODES = new Set([ "ETIMEDOUT", "ENOTFOUND", "EAI_AGAIN", - "ABORT_ERR", // our own per-attempt timeout, see below + "ATTEMPT_TIMEOUT", ]); -const FETCH_ATTEMPT_TIMEOUT_MS = 15_000; -const FETCH_RETRY_DELAYS_MS = [500, 2000]; +const DEFAULT_FETCH_ATTEMPT_TIMEOUT_MS = 5_000; +const DEFAULT_FETCH_RETRY_DELAYS_MS = [500]; + +export interface TelegramFetchOptions { + signal?: AbortSignal; + // Override per-attempt timeout. Long-poll callers (getUpdates) should pass + // a value larger than the server-side timeout (~30s) plus margin so the + // healthy long-poll completes without us aborting it. + attemptTimeoutMs?: number; + // When false, no in-memory retry is performed. Outer loops (polling) that + // already retry by re-entering should disable this to avoid double-retry. + retry?: boolean; +} function transientCode(err: unknown): string | undefined { - const e = err as { code?: string; cause?: { code?: string }; name?: string }; + const e = err as { code?: string; cause?: { code?: string } }; const code = e?.code ?? e?.cause?.code; if (code && TRANSIENT_FETCH_CODES.has(code)) return code; - // AbortError from our timeout has name="AbortError", no code - if (e?.name === "AbortError") return "ABORT_ERR"; return undefined; } @@ -86,29 +97,45 @@ function transientCode(err: unknown): string | undefined { * AbortController timeout so a stuck connection cannot wedge the bridge forever. * * The caller's own AbortSignal (if any) is honored - if it aborts, we re-throw - * immediately and do not retry. + * the original AbortError immediately and do not retry. When OUR timeout fires + * (caller did not abort), we re-throw as a plain Error tagged with code + * "ATTEMPT_TIMEOUT" so upstream code can't confuse it with a caller-abort. */ async function fetchWithRetry( url: string, init: RequestInit, callerSignal?: AbortSignal, + options?: { attemptTimeoutMs?: number; retry?: boolean }, ): Promise { - for (let attempt = 0; attempt <= FETCH_RETRY_DELAYS_MS.length; attempt++) { + const attemptTimeoutMs = + options?.attemptTimeoutMs ?? DEFAULT_FETCH_ATTEMPT_TIMEOUT_MS; + const retryDelays = + options?.retry === false ? [] : DEFAULT_FETCH_RETRY_DELAYS_MS; + for (let attempt = 0; attempt <= retryDelays.length; attempt++) { const timeoutCtl = new AbortController(); - const timer = setTimeout( - () => timeoutCtl.abort(), - FETCH_ATTEMPT_TIMEOUT_MS, - ); + const timer = setTimeout(() => timeoutCtl.abort(), attemptTimeoutMs); const onCallerAbort = () => timeoutCtl.abort(); callerSignal?.addEventListener("abort", onCallerAbort, { once: true }); + let raised: unknown; try { return await fetch(url, { ...init, signal: timeoutCtl.signal }); } catch (err) { if (callerSignal?.aborted) throw err; - const code = transientCode(err); - const isLast = attempt === FETCH_RETRY_DELAYS_MS.length; - if (!code || isLast) throw err; - const base = FETCH_RETRY_DELAYS_MS[attempt]; + // Our own timeout fired - normalize to a non-AbortError so polling + // loops that key off DOMException("AbortError") don't false-positive. + const isOurTimeout = + (err as { name?: string })?.name === "AbortError" && + timeoutCtl.signal.aborted; + raised = isOurTimeout + ? Object.assign( + new Error(`fetch attempt timed out after ${attemptTimeoutMs}ms`), + { code: "ATTEMPT_TIMEOUT" }, + ) + : err; + const code = transientCode(raised); + const isLast = attempt === retryDelays.length; + if (!code || isLast) throw raised; + const base = retryDelays[attempt]; const jitter = Math.round(base * (0.8 + Math.random() * 0.4)); console.warn( `[pi-telegram] transient fetch error ${code} on ${url.replace(/bot[^/]+/, "bot***")}, retry ${attempt + 1} in ${jitter}ms`, @@ -150,7 +177,7 @@ export async function callTelegram( botToken: string | undefined, method: string, body: Record, - options?: { signal?: AbortSignal }, + options?: TelegramFetchOptions, ): Promise { if (!botToken) { throw new Error("Telegram bot token is not configured"); @@ -163,6 +190,10 @@ export async function callTelegram( body: JSON.stringify(body), }, options?.signal, + { + attemptTimeoutMs: options?.attemptTimeoutMs, + retry: options?.retry, + }, ); const data = (await response.json()) as TelegramApiResponse; if (!data.ok || data.result === undefined) { diff --git a/lib/polling.ts b/lib/polling.ts index 98453bb..cfedba0 100644 --- a/lib/polling.ts +++ b/lib/polling.ts @@ -54,6 +54,9 @@ export function shouldStopTelegramPolling( signalAborted: boolean, error: unknown, ): boolean { + // AbortError-from-our-own-timeout is normalized in fetchWithRetry so it + // can't reach here as a DOMException. Any AbortError that does reach here + // is therefore a real caller-abort and should stop the loop. return ( signalAborted || (error instanceof DOMException && error.name === "AbortError")