feat(telegram): add overflow split mode (#101)

2026-01-12 18:17:12 +04:00
parent 04671593aa
commit 9d5fccab92
9 changed files with 363 additions and 8 deletions
@@ -40,6 +40,20 @@ example, `http://localhost:8000/v1`) and a dummy `OPENAI_API_KEY` if your server
 ignores it. If your server requires a specific model name, set
 `voice_transcription_model` (for example, `whisper-1`).

+## Message overflow
+
+By default, takopi trims long final responses to ~3500 characters to stay under
+Telegram's 4096 character limit after entity parsing. You can opt into splitting
+instead:
+
+```toml
+[transports.telegram]
+message_overflow = "split" # trim | split
+```
+
+Split mode sends multiple messages. Each chunk includes the footer; follow-up
+chunks add a "continued (N/M)" header.
+
 ## Forum topics (optional)

 Takopi can bind Telegram forum topics to a project/branch and persist resume tokens
@@ -64,6 +64,13 @@ bot_token = "123456789:ABCdefGHIjklMNOpqrsTUVwxyz"
 chat_id = 123456789
 ```

+Optional: split long final responses instead of trimming them:
+
+```toml
+[transports.telegram]
+message_overflow = "split" # trim | split
+```
+
 ---

 ## 2. Your first handoff
@@ -113,6 +113,15 @@ async def _send_or_edit_message(
    thread_id: int | None = None,
 ) -> tuple[MessageRef | None, bool]:
    msg = message
+    followups = message.extra.get("followups")
+    if followups:
+        extra = dict(message.extra)
+        if reply_to is not None:
+            extra.setdefault("followup_reply_to_message_id", reply_to.message_id)
+        if thread_id is not None:
+            extra.setdefault("followup_thread_id", thread_id)
+        extra.setdefault("followup_notify", notify)
+        msg = RenderedMessage(text=message.text, extra=extra)
    if edit_ref is not None:
        logger.debug(
            "transport.edit_message",
@@ -97,6 +97,7 @@ class TelegramTransportSettings(BaseModel):

    bot_token: NonEmptyStr
    chat_id: StrictInt
+    message_overflow: Literal["trim", "split"] = "trim"
    voice_transcription: bool = False
    voice_max_bytes: StrictInt = 10 * 1024 * 1024
    voice_transcription_model: NonEmptyStr = "gpt-4o-mini-transcribe"
@@ -101,7 +101,7 @@ class TelegramBackend(TransportBackend):
        )
        bot = TelegramClient(token)
        transport = TelegramTransport(bot)
-        presenter = TelegramPresenter()
+        presenter = TelegramPresenter(message_overflow=settings.message_overflow)
        exec_cfg = ExecBridgeConfig(
            transport=transport,
            presenter=presenter,
@@ -18,7 +18,7 @@ from ..settings import (
    TelegramTransportSettings,
 )
 from .client import BotClient
-from .render import prepare_telegram
+from .render import MAX_BODY_CHARS, prepare_telegram, prepare_telegram_multi
 from .types import TelegramCallbackQuery, TelegramIncomingMessage

 logger = get_logger(__name__)
@@ -43,8 +43,14 @@ CLEAR_MARKUP = {"inline_keyboard": []}


 class TelegramPresenter:
-    def __init__(self, *, formatter: MarkdownFormatter | None = None) -> None:
+    def __init__(
+        self,
+        *,
+        formatter: MarkdownFormatter | None = None,
+        message_overflow: str = "trim",
+    ) -> None:
        self._formatter = formatter or MarkdownFormatter()
+        self._message_overflow = message_overflow

    def render_progress(
        self,
@@ -74,6 +80,23 @@ class TelegramPresenter:
        parts = self._formatter.render_final_parts(
            state, elapsed_s=elapsed_s, status=status, answer=answer
        )
+        if self._message_overflow == "split":
+            payloads = prepare_telegram_multi(parts, max_body_chars=MAX_BODY_CHARS)
+            text, entities = payloads[0]
+            extra = {"entities": entities, "reply_markup": CLEAR_MARKUP}
+            if len(payloads) > 1:
+                followups = [
+                    RenderedMessage(
+                        text=followup_text,
+                        extra={
+                            "entities": followup_entities,
+                            "reply_markup": CLEAR_MARKUP,
+                        },
+                    )
+                    for followup_text, followup_entities in payloads[1:]
+                ]
+                extra["followups"] = followups
+            return RenderedMessage(text=text, extra=extra)
        text, entities = prepare_telegram(parts)
        return RenderedMessage(
            text=text,
@@ -107,6 +130,34 @@ class TelegramTransport:
    def __init__(self, bot: BotClient) -> None:
        self._bot = bot

+    @staticmethod
+    def _extract_followups(message: RenderedMessage) -> list[RenderedMessage]:
+        followups = message.extra.get("followups")
+        if not isinstance(followups, list):
+            return []
+        return [item for item in followups if isinstance(item, RenderedMessage)]
+
+    async def _send_followups(
+        self,
+        *,
+        chat_id: int,
+        followups: list[RenderedMessage],
+        reply_to_message_id: int | None,
+        message_thread_id: int | None,
+        notify: bool,
+    ) -> None:
+        for followup in followups:
+            await self._bot.send_message(
+                chat_id=chat_id,
+                text=followup.text,
+                entities=followup.extra.get("entities"),
+                parse_mode=followup.extra.get("parse_mode"),
+                reply_markup=followup.extra.get("reply_markup"),
+                reply_to_message_id=reply_to_message_id,
+                message_thread_id=message_thread_id,
+                disable_notification=not notify,
+            )
+
    async def close(self) -> None:
        await self._bot.close()

@@ -135,6 +186,17 @@ class TelegramTransport:
            )
            notify = options.notify
            message_thread_id = options.thread_id
+        else:
+            reply_to_message_id = cast(
+                int | None,
+                message.extra.get("followup_reply_to_message_id"),
+            )
+            message_thread_id = cast(
+                int | None,
+                message.extra.get("followup_thread_id"),
+            )
+            notify = bool(message.extra.get("followup_notify", True))
+        followups = self._extract_followups(message)
        sent = await self._bot.send_message(
            chat_id=chat_id,
            text=message.text,
@@ -148,6 +210,14 @@ class TelegramTransport:
        )
        if sent is None:
            return None
+        if followups:
+            await self._send_followups(
+                chat_id=chat_id,
+                followups=followups,
+                reply_to_message_id=reply_to_message_id,
+                message_thread_id=message_thread_id,
+                notify=notify,
+            )
        message_id = sent.message_id
        return MessageRef(
            channel_id=chat_id,
@@ -163,6 +233,7 @@ class TelegramTransport:
        entities = message.extra.get("entities")
        parse_mode = message.extra.get("parse_mode")
        reply_markup = message.extra.get("reply_markup")
+        followups = self._extract_followups(message)
        edited = await self._bot.edit_message_text(
            chat_id=chat_id,
            message_id=message_id,
@@ -174,6 +245,21 @@ class TelegramTransport:
        )
        if edited is None:
            return ref if not wait else None
+        if followups:
+            reply_to_message_id = cast(
+                int | None, message.extra.get("followup_reply_to_message_id")
+            )
+            message_thread_id = cast(
+                int | None, message.extra.get("followup_thread_id")
+            )
+            notify = bool(message.extra.get("followup_notify", True))
+            await self._send_followups(
+                chat_id=chat_id,
+                followups=followups,
+                reply_to_message_id=reply_to_message_id,
+                message_thread_id=message_thread_id,
+                notify=notify,
+            )
        message_id = edited.message_id
        return MessageRef(
            channel_id=chat_id,
@@ -1,6 +1,7 @@
 from __future__ import annotations

 import re
+from dataclasses import dataclass
 from typing import Any

 from markdown_it import MarkdownIt
@@ -8,8 +9,18 @@ from sulguk import transform_html

 from ..markdown import MarkdownParts, assemble_markdown_parts

+MAX_BODY_CHARS = 3500
+
 _MD_RENDERER = MarkdownIt("commonmark", {"html": False})
 _BULLET_RE = re.compile(r"(?m)^(\s*)•")
+_FENCE_RE = re.compile(r"^(?P<indent>[ \t]*)(?P<fence>[`~]{3,})(?P<info>.*)$")
+
+
+@dataclass(frozen=True, slots=True)
+class _FenceState:
+    fence: str
+    indent: str
+    header: str


 def render_markdown(md: str) -> tuple[str, list[dict[str, Any]]]:
@@ -22,18 +33,166 @@ def render_markdown(md: str) -> tuple[str, list[dict[str, Any]]]:
    return text, entities


-def trim_body(body: str | None) -> str | None:
+def _split_line_ending(line: str) -> tuple[str, str]:
+    if line.endswith("\r\n"):
+        return line[:-2], "\r\n"
+    if line.endswith("\n"):
+        return line[:-1], "\n"
+    if line.endswith("\r"):
+        return line[:-1], "\r"
+    return line, ""
+
+
+def _split_long_line(line: str, max_chars: int) -> list[str]:
+    if len(line) <= max_chars:
+        return [line]
+    content, ending = _split_line_ending(line)
+    parts: list[str] = []
+    for idx in range(0, len(content), max_chars):
+        chunk = content[idx : idx + max_chars]
+        if idx + max_chars >= len(content):
+            chunk += ending
+        parts.append(chunk)
+    if not parts and ending:
+        parts.append(ending)
+    return parts
+
+
+def _split_block(block: str, max_chars: int) -> list[str]:
+    if len(block) <= max_chars:
+        return [block]
+    pieces: list[str] = []
+    current = ""
+    for line in block.splitlines(keepends=True):
+        for part in _split_long_line(line, max_chars):
+            if not part:
+                continue
+            if current and len(current) + len(part) > max_chars:
+                pieces.append(current)
+                current = ""
+            current += part
+            if len(current) == max_chars:
+                pieces.append(current)
+                current = ""
+    if current:
+        pieces.append(current)
+    return pieces
+
+
+def _update_fence_state(line: str, state: _FenceState | None) -> _FenceState | None:
+    match = _FENCE_RE.match(line)
+    if match is None:
+        return state
+    fence = match.group("fence")
+    indent = match.group("indent")
+    if state is None:
+        return _FenceState(fence=fence, indent=indent, header=line)
+    if fence[0] == state.fence[0] and len(fence) >= len(state.fence):
+        return None
+    return state
+
+
+def _scan_fence_state(text: str, state: _FenceState | None) -> _FenceState | None:
+    for line in text.splitlines():
+        state = _update_fence_state(line, state)
+    return state
+
+
+def _ensure_trailing_newline(text: str) -> str:
+    if text.endswith("\n") or text.endswith("\r"):
+        return text
+    return text + "\n"
+
+
+def _close_fence_chunk(text: str, state: _FenceState) -> str:
+    return _ensure_trailing_newline(text) + f"{state.indent}{state.fence}\n"
+
+
+def _reopen_fence_prefix(state: _FenceState) -> str:
+    return f"{state.header}\n"
+
+
+def split_markdown_body(body: str, max_chars: int) -> list[str]:
+    if not body or not body.strip():
+        return []
+    max_chars = max(1, int(max_chars))
+    segments = re.split(r"(\n{2,})", body)
+    blocks: list[str] = []
+    for idx in range(0, len(segments), 2):
+        paragraph = segments[idx]
+        separator = segments[idx + 1] if idx + 1 < len(segments) else ""
+        block = paragraph + separator
+        if block:
+            blocks.append(block)
+
+    chunks: list[str] = []
+    current = ""
+    state: _FenceState | None = None
+    for block in blocks:
+        for piece in _split_block(block, max_chars):
+            if not current:
+                current = piece
+                state = _scan_fence_state(piece, state)
+                continue
+            if len(current) + len(piece) <= max_chars:
+                current += piece
+                state = _scan_fence_state(piece, state)
+                continue
+
+            if state is not None:
+                current = _close_fence_chunk(current, state)
+            chunks.append(current)
+            current = _reopen_fence_prefix(state) if state is not None else ""
+            current += piece
+            state = _scan_fence_state(piece, state)
+
+    if current:
+        chunks.append(current)
+
+    return [chunk for chunk in chunks if chunk.strip()]
+
+
+def trim_body(body: str | None, *, max_chars: int = MAX_BODY_CHARS) -> str | None:
    if not body:
        return None
-    if len(body) > 3500:
-        body = body[: 3500 - 1] + "…"
+    if len(body) > max_chars:
+        body = body[: max_chars - 1] + "…"
    return body if body.strip() else None


 def prepare_telegram(parts: MarkdownParts) -> tuple[str, list[dict[str, Any]]]:
    trimmed = MarkdownParts(
        header=parts.header or "",
-        body=trim_body(parts.body),
+        body=trim_body(parts.body, max_chars=MAX_BODY_CHARS),
        footer=parts.footer,
    )
    return render_markdown(assemble_markdown_parts(trimmed))
+
+
+def prepare_telegram_multi(
+    parts: MarkdownParts, *, max_body_chars: int = MAX_BODY_CHARS
+) -> list[tuple[str, list[dict[str, Any]]]]:
+    body = parts.body
+    if body is not None and not body.strip():
+        body = None
+    body_chunks = split_markdown_body(body, max_body_chars) if body is not None else []
+    if not body_chunks:
+        body_chunks = [""]
+    total = len(body_chunks)
+
+    payloads: list[tuple[str, list[dict[str, Any]]]] = []
+    for idx, chunk in enumerate(body_chunks, start=1):
+        header = parts.header or ""
+        if idx > 1:
+            if header:
+                header = f"{header} · continued ({idx}/{total})"
+            else:
+                header = f"continued ({idx}/{total})"
+        payloads.append(
+            render_markdown(
+                assemble_markdown_parts(
+                    MarkdownParts(header=header, body=chunk, footer=parts.footer)
+                )
+            )
+        )
+    return payloads
@@ -1,4 +1,4 @@
-from takopi.telegram.render import render_markdown
+from takopi.telegram.render import render_markdown, split_markdown_body


 def test_render_markdown_basic_entities() -> None:
@@ -18,3 +18,13 @@ def test_render_markdown_code_fence_language_is_string() -> None:
    assert entities is not None
    assert any(e.get("type") == "pre" and e.get("language") == "py" for e in entities)
    assert any(e.get("type") == "code" for e in entities)
+
+
+def test_split_markdown_body_closes_and_reopens_fence() -> None:
+    body = "```py\n" + ("line\n" * 10) + "```\n\npost"
+
+    chunks = split_markdown_body(body, max_chars=40)
+
+    assert len(chunks) > 1
+    assert chunks[0].rstrip().endswith("```")
+    assert chunks[1].startswith("```py\n")
@@ -32,6 +32,7 @@ from takopi.telegram.bridge import (
    send_with_resume,
 )
 from takopi.telegram.client import BotClient
+from takopi.telegram.render import MAX_BODY_CHARS
 from takopi.telegram.topic_state import TopicStateStore, resolve_state_path
 from takopi.context import RunContext
 from takopi.config import ProjectConfig, ProjectsConfig
@@ -486,6 +487,26 @@ def test_telegram_presenter_final_clears_button() -> None:
    assert rendered.extra["reply_markup"]["inline_keyboard"] == []


+def test_telegram_presenter_split_overflow_adds_followups() -> None:
+    presenter = TelegramPresenter(message_overflow="split")
+    state = ProgressTracker(engine="codex").snapshot()
+
+    rendered = presenter.render_final(
+        state,
+        elapsed_s=0.0,
+        status="done",
+        answer="x" * (MAX_BODY_CHARS + 10),
+    )
+
+    followups = rendered.extra.get("followups")
+    assert followups
+    assert all(isinstance(item, RenderedMessage) for item in followups)
+    assert rendered.extra["reply_markup"]["inline_keyboard"] == []
+    assert all(
+        item.extra["reply_markup"]["inline_keyboard"] == [] for item in followups
+    )
+
+
@pytest.mark.anyio
 async def test_telegram_transport_passes_replace_and_wait() -> None:
    bot = _FakeBot()
@@ -532,6 +553,54 @@ async def test_telegram_transport_passes_reply_markup() -> None:
    assert bot.edit_calls[0]["reply_markup"] == markup


+@pytest.mark.anyio
+async def test_telegram_transport_sends_followups() -> None:
+    bot = _FakeBot()
+    transport = TelegramTransport(bot)
+    reply = MessageRef(channel_id=123, message_id=10)
+    followup = RenderedMessage(text="part 2")
+
+    await transport.send(
+        channel_id=123,
+        message=RenderedMessage(text="part 1", extra={"followups": [followup]}),
+        options=SendOptions(reply_to=reply, notify=False, thread_id=7),
+    )
+
+    assert len(bot.send_calls) == 2
+    assert bot.send_calls[1]["text"] == "part 2"
+    assert bot.send_calls[1]["reply_to_message_id"] == 10
+    assert bot.send_calls[1]["message_thread_id"] == 7
+    assert bot.send_calls[1]["replace_message_id"] is None
+    assert bot.send_calls[1]["disable_notification"] is True
+
+
+@pytest.mark.anyio
+async def test_telegram_transport_edits_and_sends_followups() -> None:
+    bot = _FakeBot()
+    transport = TelegramTransport(bot)
+    followup = RenderedMessage(text="part 2")
+
+    await transport.edit(
+        ref=MessageRef(channel_id=123, message_id=42),
+        message=RenderedMessage(
+            text="part 1",
+            extra={
+                "followups": [followup],
+                "followup_reply_to_message_id": 10,
+                "followup_thread_id": 7,
+                "followup_notify": False,
+            },
+        ),
+    )
+
+    assert len(bot.edit_calls) == 1
+    assert len(bot.send_calls) == 1
+    assert bot.send_calls[0]["text"] == "part 2"
+    assert bot.send_calls[0]["reply_to_message_id"] == 10
+    assert bot.send_calls[0]["message_thread_id"] == 7
+    assert bot.send_calls[0]["disable_notification"] is True
+
+
@pytest.mark.anyio
 async def test_telegram_transport_edit_wait_false_returns_ref() -> None:
    class _OutboxBot(BotClient):