mirror of
https://github.com/wassname/pi-telegram.git
synced 2026-06-27 17:31:23 +08:00
Stop fetch retry from killing the polling loop
Two real bugs surfaced after the original retry helper: 1. Healthy long-polls were tripping our 15s per-attempt timeout. The getUpdates request asks Telegram for a 30s server-side long-poll, so our internal timeout aborted every healthy connection and turned it into ABORT_ERR -> retry -> exhausted -> "disconnected", with no auto-reconnect. The fix: long-poll bypasses the retry helper and uses a 60s per-attempt timeout, since the poll loop already retries by re-entering after sleep(). 2. Our own internal AbortController timeout produced a DOMException AbortError indistinguishable from a caller-abort. The poll loop's shouldStopTelegramPolling treated that as "user wants to stop" and exited. Now fetchWithRetry normalizes its own timeout into a tagged Error with code ATTEMPT_TIMEOUT, so only real caller-aborts surface as AbortError upstream. Also: per-attempt timeout default dropped 15s -> 5s, retry budget dropped from [500, 2000] to [500] (so 2 attempts, not 3) for outbound sends, since they serialize and a long retry tail makes the bridge feel hung. Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
@@ -2069,8 +2069,14 @@ export default function (pi: ExtensionAPI) {
|
|||||||
);
|
);
|
||||||
},
|
},
|
||||||
getUpdates: async (body, pollSignal) => {
|
getUpdates: async (body, pollSignal) => {
|
||||||
|
// Long-poll: server-side timeout is 30s. We bypass our retry helper
|
||||||
|
// (the poll loop already retries by re-entering after a sleep) and
|
||||||
|
// give the per-attempt timeout enough headroom that a healthy poll
|
||||||
|
// can never trip it.
|
||||||
return callTelegramApi<TelegramUpdate[]>("getUpdates", body, {
|
return callTelegramApi<TelegramUpdate[]>("getUpdates", body, {
|
||||||
signal: pollSignal,
|
signal: pollSignal,
|
||||||
|
retry: false,
|
||||||
|
attemptTimeoutMs: 60_000,
|
||||||
});
|
});
|
||||||
},
|
},
|
||||||
persistConfig: async () => {
|
persistConfig: async () => {
|
||||||
|
|||||||
+50
-19
@@ -29,7 +29,7 @@ export interface TelegramApiClient {
|
|||||||
call: <TResponse>(
|
call: <TResponse>(
|
||||||
method: string,
|
method: string,
|
||||||
body: Record<string, unknown>,
|
body: Record<string, unknown>,
|
||||||
options?: { signal?: AbortSignal },
|
options?: TelegramFetchOptions,
|
||||||
) => Promise<TResponse>;
|
) => Promise<TResponse>;
|
||||||
callMultipart: <TResponse>(
|
callMultipart: <TResponse>(
|
||||||
method: string,
|
method: string,
|
||||||
@@ -56,7 +56,9 @@ function sanitizeFileName(name: string): string {
|
|||||||
|
|
||||||
// Network-layer codes that warrant a retry. HTTP 4xx/5xx are NOT retried here -
|
// Network-layer codes that warrant a retry. HTTP 4xx/5xx are NOT retried here -
|
||||||
// those go through `data.ok` / `response.ok` and are surfaced to callers so
|
// those go through `data.ok` / `response.ok` and are surfaced to callers so
|
||||||
// rate-limits and logic errors stay loud.
|
// rate-limits and logic errors stay loud. ATTEMPT_TIMEOUT is our own per-attempt
|
||||||
|
// timeout firing; we treat it as transient but rethrow as a non-AbortError so
|
||||||
|
// it cannot masquerade as a caller-abort upstream (e.g. polling loop).
|
||||||
const TRANSIENT_FETCH_CODES = new Set([
|
const TRANSIENT_FETCH_CODES = new Set([
|
||||||
"UND_ERR_CONNECT_TIMEOUT",
|
"UND_ERR_CONNECT_TIMEOUT",
|
||||||
"UND_ERR_SOCKET",
|
"UND_ERR_SOCKET",
|
||||||
@@ -66,18 +68,27 @@ const TRANSIENT_FETCH_CODES = new Set([
|
|||||||
"ETIMEDOUT",
|
"ETIMEDOUT",
|
||||||
"ENOTFOUND",
|
"ENOTFOUND",
|
||||||
"EAI_AGAIN",
|
"EAI_AGAIN",
|
||||||
"ABORT_ERR", // our own per-attempt timeout, see below
|
"ATTEMPT_TIMEOUT",
|
||||||
]);
|
]);
|
||||||
|
|
||||||
const FETCH_ATTEMPT_TIMEOUT_MS = 15_000;
|
const DEFAULT_FETCH_ATTEMPT_TIMEOUT_MS = 5_000;
|
||||||
const FETCH_RETRY_DELAYS_MS = [500, 2000];
|
const DEFAULT_FETCH_RETRY_DELAYS_MS = [500];
|
||||||
|
|
||||||
|
export interface TelegramFetchOptions {
|
||||||
|
signal?: AbortSignal;
|
||||||
|
// Override per-attempt timeout. Long-poll callers (getUpdates) should pass
|
||||||
|
// a value larger than the server-side timeout (~30s) plus margin so the
|
||||||
|
// healthy long-poll completes without us aborting it.
|
||||||
|
attemptTimeoutMs?: number;
|
||||||
|
// When false, no in-memory retry is performed. Outer loops (polling) that
|
||||||
|
// already retry by re-entering should disable this to avoid double-retry.
|
||||||
|
retry?: boolean;
|
||||||
|
}
|
||||||
|
|
||||||
function transientCode(err: unknown): string | undefined {
|
function transientCode(err: unknown): string | undefined {
|
||||||
const e = err as { code?: string; cause?: { code?: string }; name?: string };
|
const e = err as { code?: string; cause?: { code?: string } };
|
||||||
const code = e?.code ?? e?.cause?.code;
|
const code = e?.code ?? e?.cause?.code;
|
||||||
if (code && TRANSIENT_FETCH_CODES.has(code)) return code;
|
if (code && TRANSIENT_FETCH_CODES.has(code)) return code;
|
||||||
// AbortError from our timeout has name="AbortError", no code
|
|
||||||
if (e?.name === "AbortError") return "ABORT_ERR";
|
|
||||||
return undefined;
|
return undefined;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -86,29 +97,45 @@ function transientCode(err: unknown): string | undefined {
|
|||||||
* AbortController timeout so a stuck connection cannot wedge the bridge forever.
|
* AbortController timeout so a stuck connection cannot wedge the bridge forever.
|
||||||
*
|
*
|
||||||
* The caller's own AbortSignal (if any) is honored - if it aborts, we re-throw
|
* The caller's own AbortSignal (if any) is honored - if it aborts, we re-throw
|
||||||
* immediately and do not retry.
|
* the original AbortError immediately and do not retry. When OUR timeout fires
|
||||||
|
* (caller did not abort), we re-throw as a plain Error tagged with code
|
||||||
|
* "ATTEMPT_TIMEOUT" so upstream code can't confuse it with a caller-abort.
|
||||||
*/
|
*/
|
||||||
async function fetchWithRetry(
|
async function fetchWithRetry(
|
||||||
url: string,
|
url: string,
|
||||||
init: RequestInit,
|
init: RequestInit,
|
||||||
callerSignal?: AbortSignal,
|
callerSignal?: AbortSignal,
|
||||||
|
options?: { attemptTimeoutMs?: number; retry?: boolean },
|
||||||
): Promise<Response> {
|
): Promise<Response> {
|
||||||
for (let attempt = 0; attempt <= FETCH_RETRY_DELAYS_MS.length; attempt++) {
|
const attemptTimeoutMs =
|
||||||
|
options?.attemptTimeoutMs ?? DEFAULT_FETCH_ATTEMPT_TIMEOUT_MS;
|
||||||
|
const retryDelays =
|
||||||
|
options?.retry === false ? [] : DEFAULT_FETCH_RETRY_DELAYS_MS;
|
||||||
|
for (let attempt = 0; attempt <= retryDelays.length; attempt++) {
|
||||||
const timeoutCtl = new AbortController();
|
const timeoutCtl = new AbortController();
|
||||||
const timer = setTimeout(
|
const timer = setTimeout(() => timeoutCtl.abort(), attemptTimeoutMs);
|
||||||
() => timeoutCtl.abort(),
|
|
||||||
FETCH_ATTEMPT_TIMEOUT_MS,
|
|
||||||
);
|
|
||||||
const onCallerAbort = () => timeoutCtl.abort();
|
const onCallerAbort = () => timeoutCtl.abort();
|
||||||
callerSignal?.addEventListener("abort", onCallerAbort, { once: true });
|
callerSignal?.addEventListener("abort", onCallerAbort, { once: true });
|
||||||
|
let raised: unknown;
|
||||||
try {
|
try {
|
||||||
return await fetch(url, { ...init, signal: timeoutCtl.signal });
|
return await fetch(url, { ...init, signal: timeoutCtl.signal });
|
||||||
} catch (err) {
|
} catch (err) {
|
||||||
if (callerSignal?.aborted) throw err;
|
if (callerSignal?.aborted) throw err;
|
||||||
const code = transientCode(err);
|
// Our own timeout fired - normalize to a non-AbortError so polling
|
||||||
const isLast = attempt === FETCH_RETRY_DELAYS_MS.length;
|
// loops that key off DOMException("AbortError") don't false-positive.
|
||||||
if (!code || isLast) throw err;
|
const isOurTimeout =
|
||||||
const base = FETCH_RETRY_DELAYS_MS[attempt];
|
(err as { name?: string })?.name === "AbortError" &&
|
||||||
|
timeoutCtl.signal.aborted;
|
||||||
|
raised = isOurTimeout
|
||||||
|
? Object.assign(
|
||||||
|
new Error(`fetch attempt timed out after ${attemptTimeoutMs}ms`),
|
||||||
|
{ code: "ATTEMPT_TIMEOUT" },
|
||||||
|
)
|
||||||
|
: err;
|
||||||
|
const code = transientCode(raised);
|
||||||
|
const isLast = attempt === retryDelays.length;
|
||||||
|
if (!code || isLast) throw raised;
|
||||||
|
const base = retryDelays[attempt];
|
||||||
const jitter = Math.round(base * (0.8 + Math.random() * 0.4));
|
const jitter = Math.round(base * (0.8 + Math.random() * 0.4));
|
||||||
console.warn(
|
console.warn(
|
||||||
`[pi-telegram] transient fetch error ${code} on ${url.replace(/bot[^/]+/, "bot***")}, retry ${attempt + 1} in ${jitter}ms`,
|
`[pi-telegram] transient fetch error ${code} on ${url.replace(/bot[^/]+/, "bot***")}, retry ${attempt + 1} in ${jitter}ms`,
|
||||||
@@ -150,7 +177,7 @@ export async function callTelegram<TResponse>(
|
|||||||
botToken: string | undefined,
|
botToken: string | undefined,
|
||||||
method: string,
|
method: string,
|
||||||
body: Record<string, unknown>,
|
body: Record<string, unknown>,
|
||||||
options?: { signal?: AbortSignal },
|
options?: TelegramFetchOptions,
|
||||||
): Promise<TResponse> {
|
): Promise<TResponse> {
|
||||||
if (!botToken) {
|
if (!botToken) {
|
||||||
throw new Error("Telegram bot token is not configured");
|
throw new Error("Telegram bot token is not configured");
|
||||||
@@ -163,6 +190,10 @@ export async function callTelegram<TResponse>(
|
|||||||
body: JSON.stringify(body),
|
body: JSON.stringify(body),
|
||||||
},
|
},
|
||||||
options?.signal,
|
options?.signal,
|
||||||
|
{
|
||||||
|
attemptTimeoutMs: options?.attemptTimeoutMs,
|
||||||
|
retry: options?.retry,
|
||||||
|
},
|
||||||
);
|
);
|
||||||
const data = (await response.json()) as TelegramApiResponse<TResponse>;
|
const data = (await response.json()) as TelegramApiResponse<TResponse>;
|
||||||
if (!data.ok || data.result === undefined) {
|
if (!data.ok || data.result === undefined) {
|
||||||
|
|||||||
@@ -54,6 +54,9 @@ export function shouldStopTelegramPolling(
|
|||||||
signalAborted: boolean,
|
signalAborted: boolean,
|
||||||
error: unknown,
|
error: unknown,
|
||||||
): boolean {
|
): boolean {
|
||||||
|
// AbortError-from-our-own-timeout is normalized in fetchWithRetry so it
|
||||||
|
// can't reach here as a DOMException. Any AbortError that does reach here
|
||||||
|
// is therefore a real caller-abort and should stop the loop.
|
||||||
return (
|
return (
|
||||||
signalAborted ||
|
signalAborted ||
|
||||||
(error instanceof DOMException && error.name === "AbortError")
|
(error instanceof DOMException && error.name === "AbortError")
|
||||||
|
|||||||
Reference in New Issue
Block a user