From f4646278641c9687d15439e100a370102eb761b2 Mon Sep 17 00:00:00 2001 From: banteg <4562643+banteg@users.noreply.github.com> Date: Mon, 29 Dec 2025 11:57:14 +0400 Subject: [PATCH] fix(exec-bridge): restore sulguk rendering pipeline --- codex_telegram_bridge/pyproject.toml | 1 + .../src/codex_telegram_bridge/exec_bridge.py | 83 ++++++---- .../src/codex_telegram_bridge/rendering.py | 69 ++------- .../tests/test_exec_bridge.py | 143 +++++++++++++++++- 4 files changed, 215 insertions(+), 81 deletions(-) diff --git a/codex_telegram_bridge/pyproject.toml b/codex_telegram_bridge/pyproject.toml index 3bc1e73..e3d872b 100644 --- a/codex_telegram_bridge/pyproject.toml +++ b/codex_telegram_bridge/pyproject.toml @@ -7,6 +7,7 @@ requires-python = ">=3.12" dependencies = [ "httpx>=0.28.1", "markdown-it-py", + "sulguk>=0.11.0", "typer", ] diff --git a/codex_telegram_bridge/src/codex_telegram_bridge/exec_bridge.py b/codex_telegram_bridge/src/codex_telegram_bridge/exec_bridge.py index c446e42..483fd88 100644 --- a/codex_telegram_bridge/src/codex_telegram_bridge/exec_bridge.py +++ b/codex_telegram_bridge/src/codex_telegram_bridge/exec_bridge.py @@ -13,7 +13,6 @@ import time from collections import deque from collections.abc import Awaitable, Callable from dataclasses import dataclass -from html import unescape from logging.handlers import RotatingFileHandler from typing import Any @@ -22,7 +21,7 @@ import typer from .config import load_telegram_config from .constants import TELEGRAM_HARD_LIMIT from .exec_render import ExecProgressRenderer, render_event_cli -from .rendering import render_to_html, strip_tags +from .rendering import render_markdown from .telegram_client import TelegramClient logger = logging.getLogger("exec_bridge") @@ -89,6 +88,45 @@ def _clamp_tg_text(text: str, limit: int = TELEGRAM_TEXT_LIMIT) -> str: return text[: limit - 20] + "\n...(truncated)" +def truncate_for_telegram(text: str, limit: int) -> str: + """ + Truncate text to fit Telegram limits while preserving the trailing `resume: ...` + line (if present), otherwise preserving the last non-empty line. + """ + if len(text) <= limit: + return text + + lines = text.splitlines() + + tail_lines: list[str] | None = None + is_resume_tail = False + for i in range(len(lines) - 1, -1, -1): + line = lines[i] + if "resume" in line and UUID_PATTERN.search(line): + tail_lines = lines[i:] + is_resume_tail = True + break + + if tail_lines is None: + for i in range(len(lines) - 1, -1, -1): + if lines[i].strip(): + tail_lines = [lines[i]] + break + + tail = "\n".join(tail_lines or []).strip("\n") + sep = "\n…\n" + + max_tail = limit if is_resume_tail else (limit // 4) + tail = tail[-max_tail:] if max_tail > 0 else "" + + head_budget = limit - len(sep) - len(tail) + if head_budget <= 0: + return tail[-limit:] if tail else text[:limit] + + head = text[:head_budget].rstrip() + return (head + sep + tail)[:limit] + + async def _send_markdown( bot: TelegramClient, *, @@ -97,24 +135,15 @@ async def _send_markdown( reply_to_message_id: int | None = None, disable_notification: bool = False, ) -> dict[str, Any]: - md = text - if len(md) > TELEGRAM_MARKDOWN_LIMIT: - md = md[: TELEGRAM_MARKDOWN_LIMIT - 20] + "\n…(truncated)" - - rendered = render_to_html(md) - if len(rendered) > TELEGRAM_TEXT_LIMIT: - plain = _clamp_tg_text(unescape(strip_tags(rendered))) - return await bot.send_message( - chat_id=chat_id, - text=plain, - reply_to_message_id=reply_to_message_id, - disable_notification=disable_notification, - ) + rendered, entities = render_markdown(text) + if len(rendered) > TELEGRAM_MARKDOWN_LIMIT: + rendered = truncate_for_telegram(rendered, TELEGRAM_MARKDOWN_LIMIT) + entities = [] return await bot.send_message( chat_id=chat_id, text=rendered, - parse_mode="HTML", + entities=entities or None, reply_to_message_id=reply_to_message_id, disable_notification=disable_notification, ) @@ -382,17 +411,16 @@ async def _handle_message( async def _edit_progress(md: str) -> None: if progress_id is None: return - parse_mode: str | None = "HTML" - rendered = render_to_html(md) + rendered, entities = render_markdown(md) if len(rendered) > TELEGRAM_TEXT_LIMIT: - rendered = _clamp_tg_text(unescape(strip_tags(rendered))) - parse_mode = None + rendered = truncate_for_telegram(rendered, TELEGRAM_TEXT_LIMIT) + entities = [] try: await cfg.bot.edit_message_text( chat_id=chat_id, message_id=progress_id, text=rendered, - parse_mode=parse_mode, + entities=entities or None, ) except Exception as e: logger.info( @@ -404,11 +432,14 @@ async def _handle_message( try: initial_md = progress_renderer.render_progress(0.0) - initial_rendered = render_to_html(initial_md) + initial_rendered, initial_entities = render_markdown(initial_md) + if len(initial_rendered) > TELEGRAM_TEXT_LIMIT: + initial_rendered = truncate_for_telegram(initial_rendered, TELEGRAM_TEXT_LIMIT) + initial_entities = [] progress_msg = await cfg.bot.send_message( chat_id=chat_id, text=initial_rendered, - parse_mode="HTML", + entities=initial_entities or None, reply_to_message_id=user_msg_id, disable_notification=cfg.progress_silent, ) @@ -474,7 +505,7 @@ async def _handle_message( progress_renderer.render_final(elapsed, answer, status=status) + f"\n\nresume: `{session_id}`" ) - final_rendered = render_to_html(final_md) + final_rendered, final_entities = render_markdown(final_md) can_edit_final = progress_id is not None and len(final_rendered) <= TELEGRAM_TEXT_LIMIT if cfg.final_notify or not can_edit_final: @@ -483,7 +514,7 @@ async def _handle_message( chat_id=chat_id, text=final_md, reply_to_message_id=user_msg_id, - disable_notification=cfg.progress_silent, + disable_notification=False, ) if progress_id is not None: try: @@ -495,7 +526,7 @@ async def _handle_message( chat_id=chat_id, message_id=progress_id, text=final_rendered, - parse_mode="HTML", + entities=final_entities or None, ) diff --git a/codex_telegram_bridge/src/codex_telegram_bridge/rendering.py b/codex_telegram_bridge/src/codex_telegram_bridge/rendering.py index 3760338..61a00ba 100644 --- a/codex_telegram_bridge/src/codex_telegram_bridge/rendering.py +++ b/codex_telegram_bridge/src/codex_telegram_bridge/rendering.py @@ -1,65 +1,26 @@ from __future__ import annotations import re -from html import escape +from typing import Any from markdown_it import MarkdownIt +from sulguk import transform_html -_md = MarkdownIt("commonmark", {"html": False, "breaks": True}) - -_CODE_CLASS_RE = re.compile(r'') -_IMG_ALT_RE = re.compile(r']*alt="([^"]*)"[^>]*/?>') -_IMG_RE = re.compile(r"]*>") -_OL_OPEN_RE = re.compile(r'\s*') -_TAG_RE = re.compile(r"<[^>]+>") +_md = MarkdownIt("commonmark", {"html": False}) -def strip_tags(html: str) -> str: - return _TAG_RE.sub("", html) +def render_markdown(md: str) -> tuple[str, list[dict[str, Any]]]: + html = _md.render(md or "") + rendered = transform_html(html) + text = re.sub(r"(?m)^(\s*)•", r"\1-", rendered.text) -def render_to_html(text: str) -> str: - """ - Render Markdown to Telegram-compatible HTML. + # FIX: Telegram requires MessageEntity.language (if present) to be a String. + entities: list[dict[str, Any]] = [] + for e in rendered.entities: + d = dict(e) + if "language" in d and not isinstance(d["language"], str): + d.pop("language", None) + entities.append(d) + return text, entities - Telegram supports only a subset of HTML tags, so we post-process the - MarkdownIt output to flatten unsupported block tags (p/ul/li/etc) into - plain text with newlines and simple bullets. - """ - html = _md.render(text or "") - - # Paragraphs and line breaks. - html = html.replace("

", "") - html = html.replace("
\n", "\n").replace("
\n", "\n") - html = html.replace("
", "\n").replace("
", "\n") - html = html.replace("

\n", "\n\n").replace("

", "\n\n") - - # Lists -> "- " lines. - html = html.replace("
    \n", "").replace("
\n", "") - html = _OL_OPEN_RE.sub("", html).replace("\n", "") - html = html.replace("
  • ", "- ") - html = html.replace("
  • \n", "\n").replace("", "\n") - - # Headings -> bold line. - for level in range(1, 7): - html = html.replace(f"", "") - html = html.replace(f"\n", "\n\n").replace( - f"", "\n\n" - ) - - # Code fences may include language class; Telegram doesn't need it. - html = _CODE_CLASS_RE.sub("", html) - - # Images are not supported: keep alt text if present. - html = _IMG_ALT_RE.sub(lambda m: escape(m.group(1) or ""), html) - html = _IMG_RE.sub("", html) - - #
    isn't supported; render a separator line. - html = html.replace("
    ", "\n----\n\n").replace("
    ", "\n----\n\n") - - # Flatten blockquotes. - html = html.replace("
    \n", "") - html = html.replace("
    \n", "\n\n").replace("", "\n\n") - - html = re.sub(r"\n{3,}", "\n\n", html) - return html.strip() diff --git a/codex_telegram_bridge/tests/test_exec_bridge.py b/codex_telegram_bridge/tests/test_exec_bridge.py index 104bc64..9687ef7 100644 --- a/codex_telegram_bridge/tests/test_exec_bridge.py +++ b/codex_telegram_bridge/tests/test_exec_bridge.py @@ -1,4 +1,6 @@ -from codex_telegram_bridge.exec_bridge import extract_session_id +import asyncio + +from codex_telegram_bridge.exec_bridge import extract_session_id, truncate_for_telegram def test_extract_session_id_finds_uuid_v7() -> None: @@ -7,3 +9,142 @@ def test_extract_session_id_finds_uuid_v7() -> None: assert extract_session_id(text) == uuid + +def test_truncate_for_telegram_preserves_resume_line() -> None: + uuid = "019b66fc-64c2-7a71-81cd-081c504cfeb2" + md = ("x" * 10_000) + f"\nresume: `{uuid}`" + + out = truncate_for_telegram(md, 400) + + assert len(out) <= 400 + assert uuid in out + assert out.rstrip().endswith(f"resume: `{uuid}`") + + +class _FakeBot: + def __init__(self) -> None: + self._next_id = 1 + self.send_calls: list[dict] = [] + self.edit_calls: list[dict] = [] + self.delete_calls: list[dict] = [] + + async def send_message( + self, + chat_id: int, + text: str, + reply_to_message_id: int | None = None, + disable_notification: bool | None = False, + entities: list[dict] | None = None, + parse_mode: str | None = None, + ) -> dict: + self.send_calls.append( + { + "chat_id": chat_id, + "text": text, + "reply_to_message_id": reply_to_message_id, + "disable_notification": disable_notification, + "entities": entities, + "parse_mode": parse_mode, + } + ) + msg_id = self._next_id + self._next_id += 1 + return {"message_id": msg_id} + + async def edit_message_text( + self, + chat_id: int, + message_id: int, + text: str, + entities: list[dict] | None = None, + parse_mode: str | None = None, + ) -> dict: + self.edit_calls.append( + { + "chat_id": chat_id, + "message_id": message_id, + "text": text, + "entities": entities, + "parse_mode": parse_mode, + } + ) + return {"message_id": message_id} + + async def delete_message(self, chat_id: int, message_id: int) -> bool: + self.delete_calls.append({"chat_id": chat_id, "message_id": message_id}) + return True + + +class _FakeRunner: + def __init__(self, *, answer: str, saw_agent_message: bool = True) -> None: + self._answer = answer + self._saw_agent_message = saw_agent_message + + async def run_serialized(self, *_args, **_kwargs) -> tuple[str, str, bool]: + return ("019b66fc-64c2-7a71-81cd-081c504cfeb2", self._answer, self._saw_agent_message) + + +def test_final_notify_sends_loud_final_message() -> None: + from codex_telegram_bridge.exec_bridge import BridgeConfig, _handle_message + + bot = _FakeBot() + runner = _FakeRunner(answer="ok") + cfg = BridgeConfig( + bot=bot, # type: ignore[arg-type] + runner=runner, # type: ignore[arg-type] + chat_id=123, + ignore_backlog=True, + progress_edit_every_s=999.0, + progress_silent=True, + final_notify=True, + startup_msg="", + max_concurrency=1, + ) + + asyncio.run( + _handle_message( + cfg, + semaphore=asyncio.Semaphore(1), + chat_id=123, + user_msg_id=10, + text="hi", + resume_session=None, + ) + ) + + assert len(bot.send_calls) == 2 + assert bot.send_calls[0]["disable_notification"] is True + assert bot.send_calls[1]["disable_notification"] is False + + +def test_new_final_message_forces_notification_when_too_long_to_edit() -> None: + from codex_telegram_bridge.exec_bridge import BridgeConfig, _handle_message + + bot = _FakeBot() + runner = _FakeRunner(answer="x" * 10_000) + cfg = BridgeConfig( + bot=bot, # type: ignore[arg-type] + runner=runner, # type: ignore[arg-type] + chat_id=123, + ignore_backlog=True, + progress_edit_every_s=999.0, + progress_silent=True, + final_notify=False, + startup_msg="", + max_concurrency=1, + ) + + asyncio.run( + _handle_message( + cfg, + semaphore=asyncio.Semaphore(1), + chat_id=123, + user_msg_id=10, + text="hi", + resume_session=None, + ) + ) + + assert len(bot.send_calls) == 2 + assert bot.send_calls[0]["disable_notification"] is True + assert bot.send_calls[1]["disable_notification"] is False