Files
2026-04-11 11:32:44 +04:00

235 lines
6.0 KiB
TypeScript

/**
* Telegram media and text extraction helpers
* Normalizes inbound Telegram messages into reusable file, text, id, and history metadata
*/
export interface TelegramPhotoSizeLike {
file_id: string;
file_size?: number;
}
export interface TelegramDocumentLike {
file_id: string;
file_name?: string;
mime_type?: string;
}
export interface TelegramVideoLike {
file_id: string;
file_name?: string;
mime_type?: string;
}
export interface TelegramAudioLike {
file_id: string;
file_name?: string;
mime_type?: string;
}
export interface TelegramVoiceLike {
file_id: string;
mime_type?: string;
}
export interface TelegramAnimationLike {
file_id: string;
file_name?: string;
mime_type?: string;
}
export interface TelegramStickerLike {
file_id: string;
}
export interface TelegramMessageLike {
message_id: number;
text?: string;
caption?: string;
photo?: TelegramPhotoSizeLike[];
document?: TelegramDocumentLike;
video?: TelegramVideoLike;
audio?: TelegramAudioLike;
voice?: TelegramVoiceLike;
animation?: TelegramAnimationLike;
sticker?: TelegramStickerLike;
}
export interface TelegramFileInfo {
file_id: string;
fileName: string;
mimeType?: string;
isImage: boolean;
}
export interface DownloadedTelegramFileLike {
path: string;
}
export function guessExtensionFromMime(
mimeType: string | undefined,
fallback: string,
): string {
if (!mimeType) return fallback;
const normalized = mimeType.toLowerCase();
if (normalized === "image/jpeg") return ".jpg";
if (normalized === "image/png") return ".png";
if (normalized === "image/webp") return ".webp";
if (normalized === "image/gif") return ".gif";
if (normalized === "audio/ogg") return ".ogg";
if (normalized === "audio/mpeg") return ".mp3";
if (normalized === "audio/wav") return ".wav";
if (normalized === "video/mp4") return ".mp4";
if (normalized === "application/pdf") return ".pdf";
return fallback;
}
export function guessMediaType(path: string): string | undefined {
const normalized = path.toLowerCase();
if (normalized.endsWith(".jpg") || normalized.endsWith(".jpeg")) {
return "image/jpeg";
}
if (normalized.endsWith(".png")) return "image/png";
if (normalized.endsWith(".webp")) return "image/webp";
if (normalized.endsWith(".gif")) return "image/gif";
return undefined;
}
export function isImageMimeType(mimeType: string | undefined): boolean {
return mimeType?.toLowerCase().startsWith("image/") ?? false;
}
export function extractTelegramMessageText(
message: TelegramMessageLike,
): string {
return (message.text || message.caption || "").trim();
}
export function extractTelegramMessagesText(
messages: TelegramMessageLike[],
): string {
return messages.map(extractTelegramMessageText).filter(Boolean).join("\n\n");
}
export function extractFirstTelegramMessageText(
messages: TelegramMessageLike[],
): string {
return messages.map(extractTelegramMessageText).find(Boolean) ?? "";
}
export function collectTelegramMessageIds(
messages: TelegramMessageLike[],
): number[] {
return [...new Set(messages.map((message) => message.message_id))];
}
export function formatTelegramHistoryText(
rawText: string,
files: DownloadedTelegramFileLike[],
): string {
let summary = rawText.length > 0 ? rawText : "(no text)";
if (files.length > 0) {
summary += `\nAttachments:`;
for (const file of files) {
summary += `\n- ${file.path}`;
}
}
return summary;
}
export function collectTelegramFileInfos(
messages: TelegramMessageLike[],
): TelegramFileInfo[] {
const files: TelegramFileInfo[] = [];
for (const message of messages) {
if (Array.isArray(message.photo) && message.photo.length > 0) {
const photo = [...message.photo]
.sort((a, b) => (a.file_size ?? 0) - (b.file_size ?? 0))
.pop();
if (photo) {
files.push({
file_id: photo.file_id,
fileName: `photo-${message.message_id}.jpg`,
mimeType: "image/jpeg",
isImage: true,
});
}
}
if (message.document) {
const fileName =
message.document.file_name ||
`document-${message.message_id}${guessExtensionFromMime(
message.document.mime_type,
"",
)}`;
files.push({
file_id: message.document.file_id,
fileName,
mimeType: message.document.mime_type,
isImage: isImageMimeType(message.document.mime_type),
});
}
if (message.video) {
const fileName =
message.video.file_name ||
`video-${message.message_id}${guessExtensionFromMime(
message.video.mime_type,
".mp4",
)}`;
files.push({
file_id: message.video.file_id,
fileName,
mimeType: message.video.mime_type,
isImage: false,
});
}
if (message.audio) {
const fileName =
message.audio.file_name ||
`audio-${message.message_id}${guessExtensionFromMime(
message.audio.mime_type,
".mp3",
)}`;
files.push({
file_id: message.audio.file_id,
fileName,
mimeType: message.audio.mime_type,
isImage: false,
});
}
if (message.voice) {
files.push({
file_id: message.voice.file_id,
fileName: `voice-${message.message_id}${guessExtensionFromMime(
message.voice.mime_type,
".ogg",
)}`,
mimeType: message.voice.mime_type,
isImage: false,
});
}
if (message.animation) {
const fileName =
message.animation.file_name ||
`animation-${message.message_id}${guessExtensionFromMime(
message.animation.mime_type,
".mp4",
)}`;
files.push({
file_id: message.animation.file_id,
fileName,
mimeType: message.animation.mime_type,
isImage: false,
});
}
if (message.sticker) {
files.push({
file_id: message.sticker.file_id,
fileName: `sticker-${message.message_id}.webp`,
mimeType: "image/webp",
isImage: true,
});
}
}
return files;
}