feat(telegram): add overflow split mode (#101)

This commit is contained in:
banteg
2026-01-12 18:17:12 +04:00
committed by GitHub
parent 04671593aa
commit 9d5fccab92
9 changed files with 363 additions and 8 deletions
+14
View File
@@ -40,6 +40,20 @@ example, `http://localhost:8000/v1`) and a dummy `OPENAI_API_KEY` if your server
ignores it. If your server requires a specific model name, set ignores it. If your server requires a specific model name, set
`voice_transcription_model` (for example, `whisper-1`). `voice_transcription_model` (for example, `whisper-1`).
## Message overflow
By default, takopi trims long final responses to ~3500 characters to stay under
Telegram's 4096 character limit after entity parsing. You can opt into splitting
instead:
```toml
[transports.telegram]
message_overflow = "split" # trim | split
```
Split mode sends multiple messages. Each chunk includes the footer; follow-up
chunks add a "continued (N/M)" header.
## Forum topics (optional) ## Forum topics (optional)
Takopi can bind Telegram forum topics to a project/branch and persist resume tokens Takopi can bind Telegram forum topics to a project/branch and persist resume tokens
+7
View File
@@ -64,6 +64,13 @@ bot_token = "123456789:ABCdefGHIjklMNOpqrsTUVwxyz"
chat_id = 123456789 chat_id = 123456789
``` ```
Optional: split long final responses instead of trimming them:
```toml
[transports.telegram]
message_overflow = "split" # trim | split
```
--- ---
## 2. Your first handoff ## 2. Your first handoff
+9
View File
@@ -113,6 +113,15 @@ async def _send_or_edit_message(
thread_id: int | None = None, thread_id: int | None = None,
) -> tuple[MessageRef | None, bool]: ) -> tuple[MessageRef | None, bool]:
msg = message msg = message
followups = message.extra.get("followups")
if followups:
extra = dict(message.extra)
if reply_to is not None:
extra.setdefault("followup_reply_to_message_id", reply_to.message_id)
if thread_id is not None:
extra.setdefault("followup_thread_id", thread_id)
extra.setdefault("followup_notify", notify)
msg = RenderedMessage(text=message.text, extra=extra)
if edit_ref is not None: if edit_ref is not None:
logger.debug( logger.debug(
"transport.edit_message", "transport.edit_message",
+1
View File
@@ -97,6 +97,7 @@ class TelegramTransportSettings(BaseModel):
bot_token: NonEmptyStr bot_token: NonEmptyStr
chat_id: StrictInt chat_id: StrictInt
message_overflow: Literal["trim", "split"] = "trim"
voice_transcription: bool = False voice_transcription: bool = False
voice_max_bytes: StrictInt = 10 * 1024 * 1024 voice_max_bytes: StrictInt = 10 * 1024 * 1024
voice_transcription_model: NonEmptyStr = "gpt-4o-mini-transcribe" voice_transcription_model: NonEmptyStr = "gpt-4o-mini-transcribe"
+1 -1
View File
@@ -101,7 +101,7 @@ class TelegramBackend(TransportBackend):
) )
bot = TelegramClient(token) bot = TelegramClient(token)
transport = TelegramTransport(bot) transport = TelegramTransport(bot)
presenter = TelegramPresenter() presenter = TelegramPresenter(message_overflow=settings.message_overflow)
exec_cfg = ExecBridgeConfig( exec_cfg = ExecBridgeConfig(
transport=transport, transport=transport,
presenter=presenter, presenter=presenter,
+88 -2
View File
@@ -18,7 +18,7 @@ from ..settings import (
TelegramTransportSettings, TelegramTransportSettings,
) )
from .client import BotClient from .client import BotClient
from .render import prepare_telegram from .render import MAX_BODY_CHARS, prepare_telegram, prepare_telegram_multi
from .types import TelegramCallbackQuery, TelegramIncomingMessage from .types import TelegramCallbackQuery, TelegramIncomingMessage
logger = get_logger(__name__) logger = get_logger(__name__)
@@ -43,8 +43,14 @@ CLEAR_MARKUP = {"inline_keyboard": []}
class TelegramPresenter: class TelegramPresenter:
def __init__(self, *, formatter: MarkdownFormatter | None = None) -> None: def __init__(
self,
*,
formatter: MarkdownFormatter | None = None,
message_overflow: str = "trim",
) -> None:
self._formatter = formatter or MarkdownFormatter() self._formatter = formatter or MarkdownFormatter()
self._message_overflow = message_overflow
def render_progress( def render_progress(
self, self,
@@ -74,6 +80,23 @@ class TelegramPresenter:
parts = self._formatter.render_final_parts( parts = self._formatter.render_final_parts(
state, elapsed_s=elapsed_s, status=status, answer=answer state, elapsed_s=elapsed_s, status=status, answer=answer
) )
if self._message_overflow == "split":
payloads = prepare_telegram_multi(parts, max_body_chars=MAX_BODY_CHARS)
text, entities = payloads[0]
extra = {"entities": entities, "reply_markup": CLEAR_MARKUP}
if len(payloads) > 1:
followups = [
RenderedMessage(
text=followup_text,
extra={
"entities": followup_entities,
"reply_markup": CLEAR_MARKUP,
},
)
for followup_text, followup_entities in payloads[1:]
]
extra["followups"] = followups
return RenderedMessage(text=text, extra=extra)
text, entities = prepare_telegram(parts) text, entities = prepare_telegram(parts)
return RenderedMessage( return RenderedMessage(
text=text, text=text,
@@ -107,6 +130,34 @@ class TelegramTransport:
def __init__(self, bot: BotClient) -> None: def __init__(self, bot: BotClient) -> None:
self._bot = bot self._bot = bot
@staticmethod
def _extract_followups(message: RenderedMessage) -> list[RenderedMessage]:
followups = message.extra.get("followups")
if not isinstance(followups, list):
return []
return [item for item in followups if isinstance(item, RenderedMessage)]
async def _send_followups(
self,
*,
chat_id: int,
followups: list[RenderedMessage],
reply_to_message_id: int | None,
message_thread_id: int | None,
notify: bool,
) -> None:
for followup in followups:
await self._bot.send_message(
chat_id=chat_id,
text=followup.text,
entities=followup.extra.get("entities"),
parse_mode=followup.extra.get("parse_mode"),
reply_markup=followup.extra.get("reply_markup"),
reply_to_message_id=reply_to_message_id,
message_thread_id=message_thread_id,
disable_notification=not notify,
)
async def close(self) -> None: async def close(self) -> None:
await self._bot.close() await self._bot.close()
@@ -135,6 +186,17 @@ class TelegramTransport:
) )
notify = options.notify notify = options.notify
message_thread_id = options.thread_id message_thread_id = options.thread_id
else:
reply_to_message_id = cast(
int | None,
message.extra.get("followup_reply_to_message_id"),
)
message_thread_id = cast(
int | None,
message.extra.get("followup_thread_id"),
)
notify = bool(message.extra.get("followup_notify", True))
followups = self._extract_followups(message)
sent = await self._bot.send_message( sent = await self._bot.send_message(
chat_id=chat_id, chat_id=chat_id,
text=message.text, text=message.text,
@@ -148,6 +210,14 @@ class TelegramTransport:
) )
if sent is None: if sent is None:
return None return None
if followups:
await self._send_followups(
chat_id=chat_id,
followups=followups,
reply_to_message_id=reply_to_message_id,
message_thread_id=message_thread_id,
notify=notify,
)
message_id = sent.message_id message_id = sent.message_id
return MessageRef( return MessageRef(
channel_id=chat_id, channel_id=chat_id,
@@ -163,6 +233,7 @@ class TelegramTransport:
entities = message.extra.get("entities") entities = message.extra.get("entities")
parse_mode = message.extra.get("parse_mode") parse_mode = message.extra.get("parse_mode")
reply_markup = message.extra.get("reply_markup") reply_markup = message.extra.get("reply_markup")
followups = self._extract_followups(message)
edited = await self._bot.edit_message_text( edited = await self._bot.edit_message_text(
chat_id=chat_id, chat_id=chat_id,
message_id=message_id, message_id=message_id,
@@ -174,6 +245,21 @@ class TelegramTransport:
) )
if edited is None: if edited is None:
return ref if not wait else None return ref if not wait else None
if followups:
reply_to_message_id = cast(
int | None, message.extra.get("followup_reply_to_message_id")
)
message_thread_id = cast(
int | None, message.extra.get("followup_thread_id")
)
notify = bool(message.extra.get("followup_notify", True))
await self._send_followups(
chat_id=chat_id,
followups=followups,
reply_to_message_id=reply_to_message_id,
message_thread_id=message_thread_id,
notify=notify,
)
message_id = edited.message_id message_id = edited.message_id
return MessageRef( return MessageRef(
channel_id=chat_id, channel_id=chat_id,
+163 -4
View File
@@ -1,6 +1,7 @@
from __future__ import annotations from __future__ import annotations
import re import re
from dataclasses import dataclass
from typing import Any from typing import Any
from markdown_it import MarkdownIt from markdown_it import MarkdownIt
@@ -8,8 +9,18 @@ from sulguk import transform_html
from ..markdown import MarkdownParts, assemble_markdown_parts from ..markdown import MarkdownParts, assemble_markdown_parts
MAX_BODY_CHARS = 3500
_MD_RENDERER = MarkdownIt("commonmark", {"html": False}) _MD_RENDERER = MarkdownIt("commonmark", {"html": False})
_BULLET_RE = re.compile(r"(?m)^(\s*)•") _BULLET_RE = re.compile(r"(?m)^(\s*)•")
_FENCE_RE = re.compile(r"^(?P<indent>[ \t]*)(?P<fence>[`~]{3,})(?P<info>.*)$")
@dataclass(frozen=True, slots=True)
class _FenceState:
fence: str
indent: str
header: str
def render_markdown(md: str) -> tuple[str, list[dict[str, Any]]]: def render_markdown(md: str) -> tuple[str, list[dict[str, Any]]]:
@@ -22,18 +33,166 @@ def render_markdown(md: str) -> tuple[str, list[dict[str, Any]]]:
return text, entities return text, entities
def trim_body(body: str | None) -> str | None: def _split_line_ending(line: str) -> tuple[str, str]:
if line.endswith("\r\n"):
return line[:-2], "\r\n"
if line.endswith("\n"):
return line[:-1], "\n"
if line.endswith("\r"):
return line[:-1], "\r"
return line, ""
def _split_long_line(line: str, max_chars: int) -> list[str]:
if len(line) <= max_chars:
return [line]
content, ending = _split_line_ending(line)
parts: list[str] = []
for idx in range(0, len(content), max_chars):
chunk = content[idx : idx + max_chars]
if idx + max_chars >= len(content):
chunk += ending
parts.append(chunk)
if not parts and ending:
parts.append(ending)
return parts
def _split_block(block: str, max_chars: int) -> list[str]:
if len(block) <= max_chars:
return [block]
pieces: list[str] = []
current = ""
for line in block.splitlines(keepends=True):
for part in _split_long_line(line, max_chars):
if not part:
continue
if current and len(current) + len(part) > max_chars:
pieces.append(current)
current = ""
current += part
if len(current) == max_chars:
pieces.append(current)
current = ""
if current:
pieces.append(current)
return pieces
def _update_fence_state(line: str, state: _FenceState | None) -> _FenceState | None:
match = _FENCE_RE.match(line)
if match is None:
return state
fence = match.group("fence")
indent = match.group("indent")
if state is None:
return _FenceState(fence=fence, indent=indent, header=line)
if fence[0] == state.fence[0] and len(fence) >= len(state.fence):
return None
return state
def _scan_fence_state(text: str, state: _FenceState | None) -> _FenceState | None:
for line in text.splitlines():
state = _update_fence_state(line, state)
return state
def _ensure_trailing_newline(text: str) -> str:
if text.endswith("\n") or text.endswith("\r"):
return text
return text + "\n"
def _close_fence_chunk(text: str, state: _FenceState) -> str:
return _ensure_trailing_newline(text) + f"{state.indent}{state.fence}\n"
def _reopen_fence_prefix(state: _FenceState) -> str:
return f"{state.header}\n"
def split_markdown_body(body: str, max_chars: int) -> list[str]:
if not body or not body.strip():
return []
max_chars = max(1, int(max_chars))
segments = re.split(r"(\n{2,})", body)
blocks: list[str] = []
for idx in range(0, len(segments), 2):
paragraph = segments[idx]
separator = segments[idx + 1] if idx + 1 < len(segments) else ""
block = paragraph + separator
if block:
blocks.append(block)
chunks: list[str] = []
current = ""
state: _FenceState | None = None
for block in blocks:
for piece in _split_block(block, max_chars):
if not current:
current = piece
state = _scan_fence_state(piece, state)
continue
if len(current) + len(piece) <= max_chars:
current += piece
state = _scan_fence_state(piece, state)
continue
if state is not None:
current = _close_fence_chunk(current, state)
chunks.append(current)
current = _reopen_fence_prefix(state) if state is not None else ""
current += piece
state = _scan_fence_state(piece, state)
if current:
chunks.append(current)
return [chunk for chunk in chunks if chunk.strip()]
def trim_body(body: str | None, *, max_chars: int = MAX_BODY_CHARS) -> str | None:
if not body: if not body:
return None return None
if len(body) > 3500: if len(body) > max_chars:
body = body[: 3500 - 1] + "" body = body[: max_chars - 1] + ""
return body if body.strip() else None return body if body.strip() else None
def prepare_telegram(parts: MarkdownParts) -> tuple[str, list[dict[str, Any]]]: def prepare_telegram(parts: MarkdownParts) -> tuple[str, list[dict[str, Any]]]:
trimmed = MarkdownParts( trimmed = MarkdownParts(
header=parts.header or "", header=parts.header or "",
body=trim_body(parts.body), body=trim_body(parts.body, max_chars=MAX_BODY_CHARS),
footer=parts.footer, footer=parts.footer,
) )
return render_markdown(assemble_markdown_parts(trimmed)) return render_markdown(assemble_markdown_parts(trimmed))
def prepare_telegram_multi(
parts: MarkdownParts, *, max_body_chars: int = MAX_BODY_CHARS
) -> list[tuple[str, list[dict[str, Any]]]]:
body = parts.body
if body is not None and not body.strip():
body = None
body_chunks = split_markdown_body(body, max_body_chars) if body is not None else []
if not body_chunks:
body_chunks = [""]
total = len(body_chunks)
payloads: list[tuple[str, list[dict[str, Any]]]] = []
for idx, chunk in enumerate(body_chunks, start=1):
header = parts.header or ""
if idx > 1:
if header:
header = f"{header} · continued ({idx}/{total})"
else:
header = f"continued ({idx}/{total})"
payloads.append(
render_markdown(
assemble_markdown_parts(
MarkdownParts(header=header, body=chunk, footer=parts.footer)
)
)
)
return payloads
+11 -1
View File
@@ -1,4 +1,4 @@
from takopi.telegram.render import render_markdown from takopi.telegram.render import render_markdown, split_markdown_body
def test_render_markdown_basic_entities() -> None: def test_render_markdown_basic_entities() -> None:
@@ -18,3 +18,13 @@ def test_render_markdown_code_fence_language_is_string() -> None:
assert entities is not None assert entities is not None
assert any(e.get("type") == "pre" and e.get("language") == "py" for e in entities) assert any(e.get("type") == "pre" and e.get("language") == "py" for e in entities)
assert any(e.get("type") == "code" for e in entities) assert any(e.get("type") == "code" for e in entities)
def test_split_markdown_body_closes_and_reopens_fence() -> None:
body = "```py\n" + ("line\n" * 10) + "```\n\npost"
chunks = split_markdown_body(body, max_chars=40)
assert len(chunks) > 1
assert chunks[0].rstrip().endswith("```")
assert chunks[1].startswith("```py\n")
+69
View File
@@ -32,6 +32,7 @@ from takopi.telegram.bridge import (
send_with_resume, send_with_resume,
) )
from takopi.telegram.client import BotClient from takopi.telegram.client import BotClient
from takopi.telegram.render import MAX_BODY_CHARS
from takopi.telegram.topic_state import TopicStateStore, resolve_state_path from takopi.telegram.topic_state import TopicStateStore, resolve_state_path
from takopi.context import RunContext from takopi.context import RunContext
from takopi.config import ProjectConfig, ProjectsConfig from takopi.config import ProjectConfig, ProjectsConfig
@@ -486,6 +487,26 @@ def test_telegram_presenter_final_clears_button() -> None:
assert rendered.extra["reply_markup"]["inline_keyboard"] == [] assert rendered.extra["reply_markup"]["inline_keyboard"] == []
def test_telegram_presenter_split_overflow_adds_followups() -> None:
presenter = TelegramPresenter(message_overflow="split")
state = ProgressTracker(engine="codex").snapshot()
rendered = presenter.render_final(
state,
elapsed_s=0.0,
status="done",
answer="x" * (MAX_BODY_CHARS + 10),
)
followups = rendered.extra.get("followups")
assert followups
assert all(isinstance(item, RenderedMessage) for item in followups)
assert rendered.extra["reply_markup"]["inline_keyboard"] == []
assert all(
item.extra["reply_markup"]["inline_keyboard"] == [] for item in followups
)
@pytest.mark.anyio @pytest.mark.anyio
async def test_telegram_transport_passes_replace_and_wait() -> None: async def test_telegram_transport_passes_replace_and_wait() -> None:
bot = _FakeBot() bot = _FakeBot()
@@ -532,6 +553,54 @@ async def test_telegram_transport_passes_reply_markup() -> None:
assert bot.edit_calls[0]["reply_markup"] == markup assert bot.edit_calls[0]["reply_markup"] == markup
@pytest.mark.anyio
async def test_telegram_transport_sends_followups() -> None:
bot = _FakeBot()
transport = TelegramTransport(bot)
reply = MessageRef(channel_id=123, message_id=10)
followup = RenderedMessage(text="part 2")
await transport.send(
channel_id=123,
message=RenderedMessage(text="part 1", extra={"followups": [followup]}),
options=SendOptions(reply_to=reply, notify=False, thread_id=7),
)
assert len(bot.send_calls) == 2
assert bot.send_calls[1]["text"] == "part 2"
assert bot.send_calls[1]["reply_to_message_id"] == 10
assert bot.send_calls[1]["message_thread_id"] == 7
assert bot.send_calls[1]["replace_message_id"] is None
assert bot.send_calls[1]["disable_notification"] is True
@pytest.mark.anyio
async def test_telegram_transport_edits_and_sends_followups() -> None:
bot = _FakeBot()
transport = TelegramTransport(bot)
followup = RenderedMessage(text="part 2")
await transport.edit(
ref=MessageRef(channel_id=123, message_id=42),
message=RenderedMessage(
text="part 1",
extra={
"followups": [followup],
"followup_reply_to_message_id": 10,
"followup_thread_id": 7,
"followup_notify": False,
},
),
)
assert len(bot.edit_calls) == 1
assert len(bot.send_calls) == 1
assert bot.send_calls[0]["text"] == "part 2"
assert bot.send_calls[0]["reply_to_message_id"] == 10
assert bot.send_calls[0]["message_thread_id"] == 7
assert bot.send_calls[0]["disable_notification"] is True
@pytest.mark.anyio @pytest.mark.anyio
async def test_telegram_transport_edit_wait_false_returns_ref() -> None: async def test_telegram_transport_edit_wait_false_returns_ref() -> None:
class _OutboxBot(BotClient): class _OutboxBot(BotClient):