From f4646278641c9687d15439e100a370102eb761b2 Mon Sep 17 00:00:00 2001
From: banteg <4562643+banteg@users.noreply.github.com>
Date: Mon, 29 Dec 2025 11:57:14 +0400
Subject: [PATCH] fix(exec-bridge): restore sulguk rendering pipeline
---
codex_telegram_bridge/pyproject.toml | 1 +
.../src/codex_telegram_bridge/exec_bridge.py | 83 ++++++----
.../src/codex_telegram_bridge/rendering.py | 69 ++-------
.../tests/test_exec_bridge.py | 143 +++++++++++++++++-
4 files changed, 215 insertions(+), 81 deletions(-)
diff --git a/codex_telegram_bridge/pyproject.toml b/codex_telegram_bridge/pyproject.toml
index 3bc1e73..e3d872b 100644
--- a/codex_telegram_bridge/pyproject.toml
+++ b/codex_telegram_bridge/pyproject.toml
@@ -7,6 +7,7 @@ requires-python = ">=3.12"
dependencies = [
"httpx>=0.28.1",
"markdown-it-py",
+ "sulguk>=0.11.0",
"typer",
]
diff --git a/codex_telegram_bridge/src/codex_telegram_bridge/exec_bridge.py b/codex_telegram_bridge/src/codex_telegram_bridge/exec_bridge.py
index c446e42..483fd88 100644
--- a/codex_telegram_bridge/src/codex_telegram_bridge/exec_bridge.py
+++ b/codex_telegram_bridge/src/codex_telegram_bridge/exec_bridge.py
@@ -13,7 +13,6 @@ import time
from collections import deque
from collections.abc import Awaitable, Callable
from dataclasses import dataclass
-from html import unescape
from logging.handlers import RotatingFileHandler
from typing import Any
@@ -22,7 +21,7 @@ import typer
from .config import load_telegram_config
from .constants import TELEGRAM_HARD_LIMIT
from .exec_render import ExecProgressRenderer, render_event_cli
-from .rendering import render_to_html, strip_tags
+from .rendering import render_markdown
from .telegram_client import TelegramClient
logger = logging.getLogger("exec_bridge")
@@ -89,6 +88,45 @@ def _clamp_tg_text(text: str, limit: int = TELEGRAM_TEXT_LIMIT) -> str:
return text[: limit - 20] + "\n...(truncated)"
+def truncate_for_telegram(text: str, limit: int) -> str:
+ """
+ Truncate text to fit Telegram limits while preserving the trailing `resume: ...`
+ line (if present), otherwise preserving the last non-empty line.
+ """
+ if len(text) <= limit:
+ return text
+
+ lines = text.splitlines()
+
+ tail_lines: list[str] | None = None
+ is_resume_tail = False
+ for i in range(len(lines) - 1, -1, -1):
+ line = lines[i]
+ if "resume" in line and UUID_PATTERN.search(line):
+ tail_lines = lines[i:]
+ is_resume_tail = True
+ break
+
+ if tail_lines is None:
+ for i in range(len(lines) - 1, -1, -1):
+ if lines[i].strip():
+ tail_lines = [lines[i]]
+ break
+
+ tail = "\n".join(tail_lines or []).strip("\n")
+ sep = "\n…\n"
+
+ max_tail = limit if is_resume_tail else (limit // 4)
+ tail = tail[-max_tail:] if max_tail > 0 else ""
+
+ head_budget = limit - len(sep) - len(tail)
+ if head_budget <= 0:
+ return tail[-limit:] if tail else text[:limit]
+
+ head = text[:head_budget].rstrip()
+ return (head + sep + tail)[:limit]
+
+
async def _send_markdown(
bot: TelegramClient,
*,
@@ -97,24 +135,15 @@ async def _send_markdown(
reply_to_message_id: int | None = None,
disable_notification: bool = False,
) -> dict[str, Any]:
- md = text
- if len(md) > TELEGRAM_MARKDOWN_LIMIT:
- md = md[: TELEGRAM_MARKDOWN_LIMIT - 20] + "\n…(truncated)"
-
- rendered = render_to_html(md)
- if len(rendered) > TELEGRAM_TEXT_LIMIT:
- plain = _clamp_tg_text(unescape(strip_tags(rendered)))
- return await bot.send_message(
- chat_id=chat_id,
- text=plain,
- reply_to_message_id=reply_to_message_id,
- disable_notification=disable_notification,
- )
+ rendered, entities = render_markdown(text)
+ if len(rendered) > TELEGRAM_MARKDOWN_LIMIT:
+ rendered = truncate_for_telegram(rendered, TELEGRAM_MARKDOWN_LIMIT)
+ entities = []
return await bot.send_message(
chat_id=chat_id,
text=rendered,
- parse_mode="HTML",
+ entities=entities or None,
reply_to_message_id=reply_to_message_id,
disable_notification=disable_notification,
)
@@ -382,17 +411,16 @@ async def _handle_message(
async def _edit_progress(md: str) -> None:
if progress_id is None:
return
- parse_mode: str | None = "HTML"
- rendered = render_to_html(md)
+ rendered, entities = render_markdown(md)
if len(rendered) > TELEGRAM_TEXT_LIMIT:
- rendered = _clamp_tg_text(unescape(strip_tags(rendered)))
- parse_mode = None
+ rendered = truncate_for_telegram(rendered, TELEGRAM_TEXT_LIMIT)
+ entities = []
try:
await cfg.bot.edit_message_text(
chat_id=chat_id,
message_id=progress_id,
text=rendered,
- parse_mode=parse_mode,
+ entities=entities or None,
)
except Exception as e:
logger.info(
@@ -404,11 +432,14 @@ async def _handle_message(
try:
initial_md = progress_renderer.render_progress(0.0)
- initial_rendered = render_to_html(initial_md)
+ initial_rendered, initial_entities = render_markdown(initial_md)
+ if len(initial_rendered) > TELEGRAM_TEXT_LIMIT:
+ initial_rendered = truncate_for_telegram(initial_rendered, TELEGRAM_TEXT_LIMIT)
+ initial_entities = []
progress_msg = await cfg.bot.send_message(
chat_id=chat_id,
text=initial_rendered,
- parse_mode="HTML",
+ entities=initial_entities or None,
reply_to_message_id=user_msg_id,
disable_notification=cfg.progress_silent,
)
@@ -474,7 +505,7 @@ async def _handle_message(
progress_renderer.render_final(elapsed, answer, status=status)
+ f"\n\nresume: `{session_id}`"
)
- final_rendered = render_to_html(final_md)
+ final_rendered, final_entities = render_markdown(final_md)
can_edit_final = progress_id is not None and len(final_rendered) <= TELEGRAM_TEXT_LIMIT
if cfg.final_notify or not can_edit_final:
@@ -483,7 +514,7 @@ async def _handle_message(
chat_id=chat_id,
text=final_md,
reply_to_message_id=user_msg_id,
- disable_notification=cfg.progress_silent,
+ disable_notification=False,
)
if progress_id is not None:
try:
@@ -495,7 +526,7 @@ async def _handle_message(
chat_id=chat_id,
message_id=progress_id,
text=final_rendered,
- parse_mode="HTML",
+ entities=final_entities or None,
)
diff --git a/codex_telegram_bridge/src/codex_telegram_bridge/rendering.py b/codex_telegram_bridge/src/codex_telegram_bridge/rendering.py
index 3760338..61a00ba 100644
--- a/codex_telegram_bridge/src/codex_telegram_bridge/rendering.py
+++ b/codex_telegram_bridge/src/codex_telegram_bridge/rendering.py
@@ -1,65 +1,26 @@
from __future__ import annotations
import re
-from html import escape
+from typing import Any
from markdown_it import MarkdownIt
+from sulguk import transform_html
-_md = MarkdownIt("commonmark", {"html": False, "breaks": True})
-
-_CODE_CLASS_RE = re.compile(r' ", "")
- html = html.replace("')
-_IMG_ALT_RE = re.compile(r']*alt="([^"]*)"[^>]*/?>')
-_IMG_RE = re.compile(r"
]*>")
-_OL_OPEN_RE = re.compile(r'
\s*')
-_TAG_RE = re.compile(r"<[^>]+>")
+_md = MarkdownIt("commonmark", {"html": False})
-def strip_tags(html: str) -> str:
- return _TAG_RE.sub("", html)
+def render_markdown(md: str) -> tuple[str, list[dict[str, Any]]]:
+ html = _md.render(md or "")
+ rendered = transform_html(html)
+ text = re.sub(r"(?m)^(\s*)•", r"\1-", rendered.text)
-def render_to_html(text: str) -> str:
- """
- Render Markdown to Telegram-compatible HTML.
+ # FIX: Telegram requires MessageEntity.language (if present) to be a String.
+ entities: list[dict[str, Any]] = []
+ for e in rendered.entities:
+ d = dict(e)
+ if "language" in d and not isinstance(d["language"], str):
+ d.pop("language", None)
+ entities.append(d)
+ return text, entities
- Telegram supports only a subset of HTML tags, so we post-process the
- MarkdownIt output to flatten unsupported block tags (p/ul/li/etc) into
- plain text with newlines and simple bullets.
- """
- html = _md.render(text or "")
-
- # Paragraphs and line breaks.
- html = html.replace("
\n", "\n").replace("
\n", "\n")
- html = html.replace("
", "\n").replace("
", "\n")
- html = html.replace("
", html)
-
- # Images are not supported: keep alt text if present.
- html = _IMG_ALT_RE.sub(lambda m: escape(m.group(1) or ""), html)
- html = _IMG_RE.sub("", html)
-
- #
isn't supported; render a separator line.
- html = html.replace("
", "\n----\n\n").replace("
", "\n----\n\n")
-
- # Flatten blockquotes.
- html = html.replace("\n", "")
- html = html.replace("
\n", "\n\n").replace("", "\n\n")
-
- html = re.sub(r"\n{3,}", "\n\n", html)
- return html.strip()
diff --git a/codex_telegram_bridge/tests/test_exec_bridge.py b/codex_telegram_bridge/tests/test_exec_bridge.py
index 104bc64..9687ef7 100644
--- a/codex_telegram_bridge/tests/test_exec_bridge.py
+++ b/codex_telegram_bridge/tests/test_exec_bridge.py
@@ -1,4 +1,6 @@
-from codex_telegram_bridge.exec_bridge import extract_session_id
+import asyncio
+
+from codex_telegram_bridge.exec_bridge import extract_session_id, truncate_for_telegram
def test_extract_session_id_finds_uuid_v7() -> None:
@@ -7,3 +9,142 @@ def test_extract_session_id_finds_uuid_v7() -> None:
assert extract_session_id(text) == uuid
+
+def test_truncate_for_telegram_preserves_resume_line() -> None:
+ uuid = "019b66fc-64c2-7a71-81cd-081c504cfeb2"
+ md = ("x" * 10_000) + f"\nresume: `{uuid}`"
+
+ out = truncate_for_telegram(md, 400)
+
+ assert len(out) <= 400
+ assert uuid in out
+ assert out.rstrip().endswith(f"resume: `{uuid}`")
+
+
+class _FakeBot:
+ def __init__(self) -> None:
+ self._next_id = 1
+ self.send_calls: list[dict] = []
+ self.edit_calls: list[dict] = []
+ self.delete_calls: list[dict] = []
+
+ async def send_message(
+ self,
+ chat_id: int,
+ text: str,
+ reply_to_message_id: int | None = None,
+ disable_notification: bool | None = False,
+ entities: list[dict] | None = None,
+ parse_mode: str | None = None,
+ ) -> dict:
+ self.send_calls.append(
+ {
+ "chat_id": chat_id,
+ "text": text,
+ "reply_to_message_id": reply_to_message_id,
+ "disable_notification": disable_notification,
+ "entities": entities,
+ "parse_mode": parse_mode,
+ }
+ )
+ msg_id = self._next_id
+ self._next_id += 1
+ return {"message_id": msg_id}
+
+ async def edit_message_text(
+ self,
+ chat_id: int,
+ message_id: int,
+ text: str,
+ entities: list[dict] | None = None,
+ parse_mode: str | None = None,
+ ) -> dict:
+ self.edit_calls.append(
+ {
+ "chat_id": chat_id,
+ "message_id": message_id,
+ "text": text,
+ "entities": entities,
+ "parse_mode": parse_mode,
+ }
+ )
+ return {"message_id": message_id}
+
+ async def delete_message(self, chat_id: int, message_id: int) -> bool:
+ self.delete_calls.append({"chat_id": chat_id, "message_id": message_id})
+ return True
+
+
+class _FakeRunner:
+ def __init__(self, *, answer: str, saw_agent_message: bool = True) -> None:
+ self._answer = answer
+ self._saw_agent_message = saw_agent_message
+
+ async def run_serialized(self, *_args, **_kwargs) -> tuple[str, str, bool]:
+ return ("019b66fc-64c2-7a71-81cd-081c504cfeb2", self._answer, self._saw_agent_message)
+
+
+def test_final_notify_sends_loud_final_message() -> None:
+ from codex_telegram_bridge.exec_bridge import BridgeConfig, _handle_message
+
+ bot = _FakeBot()
+ runner = _FakeRunner(answer="ok")
+ cfg = BridgeConfig(
+ bot=bot, # type: ignore[arg-type]
+ runner=runner, # type: ignore[arg-type]
+ chat_id=123,
+ ignore_backlog=True,
+ progress_edit_every_s=999.0,
+ progress_silent=True,
+ final_notify=True,
+ startup_msg="",
+ max_concurrency=1,
+ )
+
+ asyncio.run(
+ _handle_message(
+ cfg,
+ semaphore=asyncio.Semaphore(1),
+ chat_id=123,
+ user_msg_id=10,
+ text="hi",
+ resume_session=None,
+ )
+ )
+
+ assert len(bot.send_calls) == 2
+ assert bot.send_calls[0]["disable_notification"] is True
+ assert bot.send_calls[1]["disable_notification"] is False
+
+
+def test_new_final_message_forces_notification_when_too_long_to_edit() -> None:
+ from codex_telegram_bridge.exec_bridge import BridgeConfig, _handle_message
+
+ bot = _FakeBot()
+ runner = _FakeRunner(answer="x" * 10_000)
+ cfg = BridgeConfig(
+ bot=bot, # type: ignore[arg-type]
+ runner=runner, # type: ignore[arg-type]
+ chat_id=123,
+ ignore_backlog=True,
+ progress_edit_every_s=999.0,
+ progress_silent=True,
+ final_notify=False,
+ startup_msg="",
+ max_concurrency=1,
+ )
+
+ asyncio.run(
+ _handle_message(
+ cfg,
+ semaphore=asyncio.Semaphore(1),
+ chat_id=123,
+ user_msg_id=10,
+ text="hi",
+ resume_session=None,
+ )
+ )
+
+ assert len(bot.send_calls) == 2
+ assert bot.send_calls[0]["disable_notification"] is True
+ assert bot.send_calls[1]["disable_notification"] is False