From a3ac5739859c78015162b01930990dd07027e3f2 Mon Sep 17 00:00:00 2001 From: banteg <4562643+banteg@users.noreply.github.com> Date: Mon, 29 Dec 2025 02:35:20 +0400 Subject: [PATCH] refactor: truncate telegram messages instead of chunking --- codex_telegram_bridge/readme.md | 2 +- .../src/codex_telegram_bridge/rendering.py | 71 +------------------ .../codex_telegram_bridge/telegram_client.py | 47 ++++++++---- 3 files changed, 34 insertions(+), 86 deletions(-) diff --git a/codex_telegram_bridge/readme.md b/codex_telegram_bridge/readme.md index 3e5630e..a40892a 100644 --- a/codex_telegram_bridge/readme.md +++ b/codex_telegram_bridge/readme.md @@ -50,7 +50,7 @@ Optional flags: - `src/codex_telegram_bridge/constants.py`: limits and config path constants - `src/codex_telegram_bridge/config.py`: config loading and chat-id parsing helpers - `src/codex_telegram_bridge/exec_render.py`: renderers for codex exec JSONL events -- `src/codex_telegram_bridge/rendering.py`: markdown rendering + chunking +- `src/codex_telegram_bridge/rendering.py`: markdown rendering - `src/codex_telegram_bridge/routes.py`: sqlite routing store - `src/codex_telegram_bridge/telegram_client.py`: Telegram Bot API client - `src/codex_telegram_bridge/exec_bridge.py`: codex exec + resume bridge diff --git a/codex_telegram_bridge/src/codex_telegram_bridge/rendering.py b/codex_telegram_bridge/src/codex_telegram_bridge/rendering.py index 95f0b92..4017ecc 100644 --- a/codex_telegram_bridge/src/codex_telegram_bridge/rendering.py +++ b/codex_telegram_bridge/src/codex_telegram_bridge/rendering.py @@ -6,6 +6,7 @@ from typing import Any, Dict, List, Tuple from markdown_it import MarkdownIt from sulguk import transform_html + def render_markdown(md: str) -> Tuple[str, List[Dict[str, Any]]]: html = MarkdownIt("commonmark", {"html": False}).render(md or "") rendered = transform_html(html) @@ -20,73 +21,3 @@ def render_markdown(md: str) -> Tuple[str, List[Dict[str, Any]]]: d.pop("language", None) entities.append(d) return text, entities - - -def _chunk_text_with_indices(text: str, limit: int) -> List[Tuple[str, int, int]]: - text = text or "" - if len(text) <= limit: - return [(text, 0, len(text))] - - out: List[Tuple[str, int, int]] = [] - buf: List[str] = [] - size = 0 - buf_start = 0 - pos = 0 - - for line in text.splitlines(keepends=True): - line_len = len(line) - line_start = pos - line_end = pos + line_len - - if line_len > limit: - if buf: - out.append(("".join(buf), buf_start, line_start)) - buf, size = [], 0 - for i in range(0, line_len, limit): - part = line[i : i + limit] - out.append((part, line_start + i, line_start + i + len(part))) - pos = line_end - buf_start = pos - continue - - if size + line_len > limit: - out.append(("".join(buf), buf_start, line_start)) - buf = [line] - size = line_len - buf_start = line_start - else: - if not buf: - buf_start = line_start - buf.append(line) - size += line_len - - pos = line_end - - if buf: - out.append(("".join(buf), buf_start, pos)) - return out - - -def _slice_entities(entities: List[Dict[str, Any]], start: int, end: int) -> List[Dict[str, Any]]: - out: List[Dict[str, Any]] = [] - for ent in entities: - try: - ent_start = int(ent.get("offset", 0)) - ent_len = int(ent.get("length", 0)) - except (TypeError, ValueError): - continue - if ent_len <= 0: - continue - ent_end = ent_start + ent_len - if ent_end <= start or ent_start >= end: - continue - new_start = max(ent_start, start) - new_end = min(ent_end, end) - new_len = new_end - new_start - if new_len <= 0: - continue - new_ent = dict(ent) - new_ent["offset"] = new_start - start - new_ent["length"] = new_len - out.append(new_ent) - return out diff --git a/codex_telegram_bridge/src/codex_telegram_bridge/telegram_client.py b/codex_telegram_bridge/src/codex_telegram_bridge/telegram_client.py index 5c6e393..0303c43 100644 --- a/codex_telegram_bridge/src/codex_telegram_bridge/telegram_client.py +++ b/codex_telegram_bridge/src/codex_telegram_bridge/telegram_client.py @@ -6,7 +6,9 @@ import urllib.request from typing import Any, Dict, List, Optional from .constants import DEFAULT_CHUNK_LEN, TELEGRAM_HARD_LIMIT -from .rendering import render_markdown, _chunk_text_with_indices, _slice_entities +from .rendering import render_markdown + +ELLIPSIS = "…" class TelegramClient: @@ -64,7 +66,7 @@ class TelegramClient: entities: Optional[List[Dict[str, Any]]] = None, ) -> Dict[str, Any]: if len(text) > TELEGRAM_HARD_LIMIT: - raise ValueError("send_message received too-long text; chunk it first") + raise ValueError("send_message received too-long text") params: Dict[str, Any] = { "chat_id": chat_id, "text": text, @@ -111,20 +113,35 @@ class TelegramClient: disable_notification: bool = False, chunk_len: int = DEFAULT_CHUNK_LEN, ) -> List[Dict[str, Any]]: - sent: List[Dict[str, Any]] = [] rendered_text, entities = render_markdown(text) - chunks = _chunk_text_with_indices(rendered_text, limit=chunk_len) - for i, (chunk, start, end) in enumerate(chunks): - chunk_entities = _slice_entities(entities, start, end) if entities else None - msg = self.send_message( - chat_id=chat_id, - text=chunk, - reply_to_message_id=(reply_to_message_id if i == 0 else None), - disable_notification=disable_notification, - entities=chunk_entities, - ) - sent.append(msg) - return sent + limit = min(chunk_len, TELEGRAM_HARD_LIMIT) + if len(rendered_text) > limit: + suffix = "\n" + ELLIPSIS + keep = max(0, limit - len(suffix)) + rendered_text = rendered_text[:keep] + suffix + if entities: + trimmed: List[Dict[str, Any]] = [] + for ent in entities: + start = int(ent["offset"]) + length = int(ent["length"]) + if start >= keep: + continue + end = min(start + length, keep) + if end <= start: + continue + d = dict(ent) + d["length"] = end - start + trimmed.append(d) + entities = trimmed + + msg = self.send_message( + chat_id=chat_id, + text=rendered_text, + reply_to_message_id=reply_to_message_id, + disable_notification=disable_notification, + entities=entities or None, + ) + return [msg] def send_chat_action(self, chat_id: int, action: str = "typing") -> Dict[str, Any]: params: Dict[str, Any] = {