diff --git a/docs/transports/telegram.md b/docs/transports/telegram.md index 6b9a406..3af66b5 100644 --- a/docs/transports/telegram.md +++ b/docs/transports/telegram.md @@ -40,6 +40,20 @@ example, `http://localhost:8000/v1`) and a dummy `OPENAI_API_KEY` if your server ignores it. If your server requires a specific model name, set `voice_transcription_model` (for example, `whisper-1`). +## Message overflow + +By default, takopi trims long final responses to ~3500 characters to stay under +Telegram's 4096 character limit after entity parsing. You can opt into splitting +instead: + +```toml +[transports.telegram] +message_overflow = "split" # trim | split +``` + +Split mode sends multiple messages. Each chunk includes the footer; follow-up +chunks add a "continued (N/M)" header. + ## Forum topics (optional) Takopi can bind Telegram forum topics to a project/branch and persist resume tokens diff --git a/docs/user-guide.md b/docs/user-guide.md index 94b9ebd..5f19dae 100644 --- a/docs/user-guide.md +++ b/docs/user-guide.md @@ -64,6 +64,13 @@ bot_token = "123456789:ABCdefGHIjklMNOpqrsTUVwxyz" chat_id = 123456789 ``` +Optional: split long final responses instead of trimming them: + +```toml +[transports.telegram] +message_overflow = "split" # trim | split +``` + --- ## 2. Your first handoff diff --git a/src/takopi/runner_bridge.py b/src/takopi/runner_bridge.py index 587d038..c3b7d59 100644 --- a/src/takopi/runner_bridge.py +++ b/src/takopi/runner_bridge.py @@ -113,6 +113,15 @@ async def _send_or_edit_message( thread_id: int | None = None, ) -> tuple[MessageRef | None, bool]: msg = message + followups = message.extra.get("followups") + if followups: + extra = dict(message.extra) + if reply_to is not None: + extra.setdefault("followup_reply_to_message_id", reply_to.message_id) + if thread_id is not None: + extra.setdefault("followup_thread_id", thread_id) + extra.setdefault("followup_notify", notify) + msg = RenderedMessage(text=message.text, extra=extra) if edit_ref is not None: logger.debug( "transport.edit_message", diff --git a/src/takopi/settings.py b/src/takopi/settings.py index bcc0275..984e5fa 100644 --- a/src/takopi/settings.py +++ b/src/takopi/settings.py @@ -97,6 +97,7 @@ class TelegramTransportSettings(BaseModel): bot_token: NonEmptyStr chat_id: StrictInt + message_overflow: Literal["trim", "split"] = "trim" voice_transcription: bool = False voice_max_bytes: StrictInt = 10 * 1024 * 1024 voice_transcription_model: NonEmptyStr = "gpt-4o-mini-transcribe" diff --git a/src/takopi/telegram/backend.py b/src/takopi/telegram/backend.py index 1ade6b6..730af1e 100644 --- a/src/takopi/telegram/backend.py +++ b/src/takopi/telegram/backend.py @@ -101,7 +101,7 @@ class TelegramBackend(TransportBackend): ) bot = TelegramClient(token) transport = TelegramTransport(bot) - presenter = TelegramPresenter() + presenter = TelegramPresenter(message_overflow=settings.message_overflow) exec_cfg = ExecBridgeConfig( transport=transport, presenter=presenter, diff --git a/src/takopi/telegram/bridge.py b/src/takopi/telegram/bridge.py index 26961f0..5486f5c 100644 --- a/src/takopi/telegram/bridge.py +++ b/src/takopi/telegram/bridge.py @@ -18,7 +18,7 @@ from ..settings import ( TelegramTransportSettings, ) from .client import BotClient -from .render import prepare_telegram +from .render import MAX_BODY_CHARS, prepare_telegram, prepare_telegram_multi from .types import TelegramCallbackQuery, TelegramIncomingMessage logger = get_logger(__name__) @@ -43,8 +43,14 @@ CLEAR_MARKUP = {"inline_keyboard": []} class TelegramPresenter: - def __init__(self, *, formatter: MarkdownFormatter | None = None) -> None: + def __init__( + self, + *, + formatter: MarkdownFormatter | None = None, + message_overflow: str = "trim", + ) -> None: self._formatter = formatter or MarkdownFormatter() + self._message_overflow = message_overflow def render_progress( self, @@ -74,6 +80,23 @@ class TelegramPresenter: parts = self._formatter.render_final_parts( state, elapsed_s=elapsed_s, status=status, answer=answer ) + if self._message_overflow == "split": + payloads = prepare_telegram_multi(parts, max_body_chars=MAX_BODY_CHARS) + text, entities = payloads[0] + extra = {"entities": entities, "reply_markup": CLEAR_MARKUP} + if len(payloads) > 1: + followups = [ + RenderedMessage( + text=followup_text, + extra={ + "entities": followup_entities, + "reply_markup": CLEAR_MARKUP, + }, + ) + for followup_text, followup_entities in payloads[1:] + ] + extra["followups"] = followups + return RenderedMessage(text=text, extra=extra) text, entities = prepare_telegram(parts) return RenderedMessage( text=text, @@ -107,6 +130,34 @@ class TelegramTransport: def __init__(self, bot: BotClient) -> None: self._bot = bot + @staticmethod + def _extract_followups(message: RenderedMessage) -> list[RenderedMessage]: + followups = message.extra.get("followups") + if not isinstance(followups, list): + return [] + return [item for item in followups if isinstance(item, RenderedMessage)] + + async def _send_followups( + self, + *, + chat_id: int, + followups: list[RenderedMessage], + reply_to_message_id: int | None, + message_thread_id: int | None, + notify: bool, + ) -> None: + for followup in followups: + await self._bot.send_message( + chat_id=chat_id, + text=followup.text, + entities=followup.extra.get("entities"), + parse_mode=followup.extra.get("parse_mode"), + reply_markup=followup.extra.get("reply_markup"), + reply_to_message_id=reply_to_message_id, + message_thread_id=message_thread_id, + disable_notification=not notify, + ) + async def close(self) -> None: await self._bot.close() @@ -135,6 +186,17 @@ class TelegramTransport: ) notify = options.notify message_thread_id = options.thread_id + else: + reply_to_message_id = cast( + int | None, + message.extra.get("followup_reply_to_message_id"), + ) + message_thread_id = cast( + int | None, + message.extra.get("followup_thread_id"), + ) + notify = bool(message.extra.get("followup_notify", True)) + followups = self._extract_followups(message) sent = await self._bot.send_message( chat_id=chat_id, text=message.text, @@ -148,6 +210,14 @@ class TelegramTransport: ) if sent is None: return None + if followups: + await self._send_followups( + chat_id=chat_id, + followups=followups, + reply_to_message_id=reply_to_message_id, + message_thread_id=message_thread_id, + notify=notify, + ) message_id = sent.message_id return MessageRef( channel_id=chat_id, @@ -163,6 +233,7 @@ class TelegramTransport: entities = message.extra.get("entities") parse_mode = message.extra.get("parse_mode") reply_markup = message.extra.get("reply_markup") + followups = self._extract_followups(message) edited = await self._bot.edit_message_text( chat_id=chat_id, message_id=message_id, @@ -174,6 +245,21 @@ class TelegramTransport: ) if edited is None: return ref if not wait else None + if followups: + reply_to_message_id = cast( + int | None, message.extra.get("followup_reply_to_message_id") + ) + message_thread_id = cast( + int | None, message.extra.get("followup_thread_id") + ) + notify = bool(message.extra.get("followup_notify", True)) + await self._send_followups( + chat_id=chat_id, + followups=followups, + reply_to_message_id=reply_to_message_id, + message_thread_id=message_thread_id, + notify=notify, + ) message_id = edited.message_id return MessageRef( channel_id=chat_id, diff --git a/src/takopi/telegram/render.py b/src/takopi/telegram/render.py index cc5283f..3a75806 100644 --- a/src/takopi/telegram/render.py +++ b/src/takopi/telegram/render.py @@ -1,6 +1,7 @@ from __future__ import annotations import re +from dataclasses import dataclass from typing import Any from markdown_it import MarkdownIt @@ -8,8 +9,18 @@ from sulguk import transform_html from ..markdown import MarkdownParts, assemble_markdown_parts +MAX_BODY_CHARS = 3500 + _MD_RENDERER = MarkdownIt("commonmark", {"html": False}) _BULLET_RE = re.compile(r"(?m)^(\s*)•") +_FENCE_RE = re.compile(r"^(?P[ \t]*)(?P[`~]{3,})(?P.*)$") + + +@dataclass(frozen=True, slots=True) +class _FenceState: + fence: str + indent: str + header: str def render_markdown(md: str) -> tuple[str, list[dict[str, Any]]]: @@ -22,18 +33,166 @@ def render_markdown(md: str) -> tuple[str, list[dict[str, Any]]]: return text, entities -def trim_body(body: str | None) -> str | None: +def _split_line_ending(line: str) -> tuple[str, str]: + if line.endswith("\r\n"): + return line[:-2], "\r\n" + if line.endswith("\n"): + return line[:-1], "\n" + if line.endswith("\r"): + return line[:-1], "\r" + return line, "" + + +def _split_long_line(line: str, max_chars: int) -> list[str]: + if len(line) <= max_chars: + return [line] + content, ending = _split_line_ending(line) + parts: list[str] = [] + for idx in range(0, len(content), max_chars): + chunk = content[idx : idx + max_chars] + if idx + max_chars >= len(content): + chunk += ending + parts.append(chunk) + if not parts and ending: + parts.append(ending) + return parts + + +def _split_block(block: str, max_chars: int) -> list[str]: + if len(block) <= max_chars: + return [block] + pieces: list[str] = [] + current = "" + for line in block.splitlines(keepends=True): + for part in _split_long_line(line, max_chars): + if not part: + continue + if current and len(current) + len(part) > max_chars: + pieces.append(current) + current = "" + current += part + if len(current) == max_chars: + pieces.append(current) + current = "" + if current: + pieces.append(current) + return pieces + + +def _update_fence_state(line: str, state: _FenceState | None) -> _FenceState | None: + match = _FENCE_RE.match(line) + if match is None: + return state + fence = match.group("fence") + indent = match.group("indent") + if state is None: + return _FenceState(fence=fence, indent=indent, header=line) + if fence[0] == state.fence[0] and len(fence) >= len(state.fence): + return None + return state + + +def _scan_fence_state(text: str, state: _FenceState | None) -> _FenceState | None: + for line in text.splitlines(): + state = _update_fence_state(line, state) + return state + + +def _ensure_trailing_newline(text: str) -> str: + if text.endswith("\n") or text.endswith("\r"): + return text + return text + "\n" + + +def _close_fence_chunk(text: str, state: _FenceState) -> str: + return _ensure_trailing_newline(text) + f"{state.indent}{state.fence}\n" + + +def _reopen_fence_prefix(state: _FenceState) -> str: + return f"{state.header}\n" + + +def split_markdown_body(body: str, max_chars: int) -> list[str]: + if not body or not body.strip(): + return [] + max_chars = max(1, int(max_chars)) + segments = re.split(r"(\n{2,})", body) + blocks: list[str] = [] + for idx in range(0, len(segments), 2): + paragraph = segments[idx] + separator = segments[idx + 1] if idx + 1 < len(segments) else "" + block = paragraph + separator + if block: + blocks.append(block) + + chunks: list[str] = [] + current = "" + state: _FenceState | None = None + for block in blocks: + for piece in _split_block(block, max_chars): + if not current: + current = piece + state = _scan_fence_state(piece, state) + continue + if len(current) + len(piece) <= max_chars: + current += piece + state = _scan_fence_state(piece, state) + continue + + if state is not None: + current = _close_fence_chunk(current, state) + chunks.append(current) + current = _reopen_fence_prefix(state) if state is not None else "" + current += piece + state = _scan_fence_state(piece, state) + + if current: + chunks.append(current) + + return [chunk for chunk in chunks if chunk.strip()] + + +def trim_body(body: str | None, *, max_chars: int = MAX_BODY_CHARS) -> str | None: if not body: return None - if len(body) > 3500: - body = body[: 3500 - 1] + "…" + if len(body) > max_chars: + body = body[: max_chars - 1] + "…" return body if body.strip() else None def prepare_telegram(parts: MarkdownParts) -> tuple[str, list[dict[str, Any]]]: trimmed = MarkdownParts( header=parts.header or "", - body=trim_body(parts.body), + body=trim_body(parts.body, max_chars=MAX_BODY_CHARS), footer=parts.footer, ) return render_markdown(assemble_markdown_parts(trimmed)) + + +def prepare_telegram_multi( + parts: MarkdownParts, *, max_body_chars: int = MAX_BODY_CHARS +) -> list[tuple[str, list[dict[str, Any]]]]: + body = parts.body + if body is not None and not body.strip(): + body = None + body_chunks = split_markdown_body(body, max_body_chars) if body is not None else [] + if not body_chunks: + body_chunks = [""] + total = len(body_chunks) + + payloads: list[tuple[str, list[dict[str, Any]]]] = [] + for idx, chunk in enumerate(body_chunks, start=1): + header = parts.header or "" + if idx > 1: + if header: + header = f"{header} · continued ({idx}/{total})" + else: + header = f"continued ({idx}/{total})" + payloads.append( + render_markdown( + assemble_markdown_parts( + MarkdownParts(header=header, body=chunk, footer=parts.footer) + ) + ) + ) + return payloads diff --git a/tests/test_rendering.py b/tests/test_rendering.py index 25342ba..eaae326 100644 --- a/tests/test_rendering.py +++ b/tests/test_rendering.py @@ -1,4 +1,4 @@ -from takopi.telegram.render import render_markdown +from takopi.telegram.render import render_markdown, split_markdown_body def test_render_markdown_basic_entities() -> None: @@ -18,3 +18,13 @@ def test_render_markdown_code_fence_language_is_string() -> None: assert entities is not None assert any(e.get("type") == "pre" and e.get("language") == "py" for e in entities) assert any(e.get("type") == "code" for e in entities) + + +def test_split_markdown_body_closes_and_reopens_fence() -> None: + body = "```py\n" + ("line\n" * 10) + "```\n\npost" + + chunks = split_markdown_body(body, max_chars=40) + + assert len(chunks) > 1 + assert chunks[0].rstrip().endswith("```") + assert chunks[1].startswith("```py\n") diff --git a/tests/test_telegram_bridge.py b/tests/test_telegram_bridge.py index 965b537..96a9020 100644 --- a/tests/test_telegram_bridge.py +++ b/tests/test_telegram_bridge.py @@ -32,6 +32,7 @@ from takopi.telegram.bridge import ( send_with_resume, ) from takopi.telegram.client import BotClient +from takopi.telegram.render import MAX_BODY_CHARS from takopi.telegram.topic_state import TopicStateStore, resolve_state_path from takopi.context import RunContext from takopi.config import ProjectConfig, ProjectsConfig @@ -486,6 +487,26 @@ def test_telegram_presenter_final_clears_button() -> None: assert rendered.extra["reply_markup"]["inline_keyboard"] == [] +def test_telegram_presenter_split_overflow_adds_followups() -> None: + presenter = TelegramPresenter(message_overflow="split") + state = ProgressTracker(engine="codex").snapshot() + + rendered = presenter.render_final( + state, + elapsed_s=0.0, + status="done", + answer="x" * (MAX_BODY_CHARS + 10), + ) + + followups = rendered.extra.get("followups") + assert followups + assert all(isinstance(item, RenderedMessage) for item in followups) + assert rendered.extra["reply_markup"]["inline_keyboard"] == [] + assert all( + item.extra["reply_markup"]["inline_keyboard"] == [] for item in followups + ) + + @pytest.mark.anyio async def test_telegram_transport_passes_replace_and_wait() -> None: bot = _FakeBot() @@ -532,6 +553,54 @@ async def test_telegram_transport_passes_reply_markup() -> None: assert bot.edit_calls[0]["reply_markup"] == markup +@pytest.mark.anyio +async def test_telegram_transport_sends_followups() -> None: + bot = _FakeBot() + transport = TelegramTransport(bot) + reply = MessageRef(channel_id=123, message_id=10) + followup = RenderedMessage(text="part 2") + + await transport.send( + channel_id=123, + message=RenderedMessage(text="part 1", extra={"followups": [followup]}), + options=SendOptions(reply_to=reply, notify=False, thread_id=7), + ) + + assert len(bot.send_calls) == 2 + assert bot.send_calls[1]["text"] == "part 2" + assert bot.send_calls[1]["reply_to_message_id"] == 10 + assert bot.send_calls[1]["message_thread_id"] == 7 + assert bot.send_calls[1]["replace_message_id"] is None + assert bot.send_calls[1]["disable_notification"] is True + + +@pytest.mark.anyio +async def test_telegram_transport_edits_and_sends_followups() -> None: + bot = _FakeBot() + transport = TelegramTransport(bot) + followup = RenderedMessage(text="part 2") + + await transport.edit( + ref=MessageRef(channel_id=123, message_id=42), + message=RenderedMessage( + text="part 1", + extra={ + "followups": [followup], + "followup_reply_to_message_id": 10, + "followup_thread_id": 7, + "followup_notify": False, + }, + ), + ) + + assert len(bot.edit_calls) == 1 + assert len(bot.send_calls) == 1 + assert bot.send_calls[0]["text"] == "part 2" + assert bot.send_calls[0]["reply_to_message_id"] == 10 + assert bot.send_calls[0]["message_thread_id"] == 7 + assert bot.send_calls[0]["disable_notification"] is True + + @pytest.mark.anyio async def test_telegram_transport_edit_wait_false_returns_ref() -> None: class _OutboxBot(BotClient):