feat(telegram): add overflow split mode (#101)
This commit is contained in:
@@ -40,6 +40,20 @@ example, `http://localhost:8000/v1`) and a dummy `OPENAI_API_KEY` if your server
|
||||
ignores it. If your server requires a specific model name, set
|
||||
`voice_transcription_model` (for example, `whisper-1`).
|
||||
|
||||
## Message overflow
|
||||
|
||||
By default, takopi trims long final responses to ~3500 characters to stay under
|
||||
Telegram's 4096 character limit after entity parsing. You can opt into splitting
|
||||
instead:
|
||||
|
||||
```toml
|
||||
[transports.telegram]
|
||||
message_overflow = "split" # trim | split
|
||||
```
|
||||
|
||||
Split mode sends multiple messages. Each chunk includes the footer; follow-up
|
||||
chunks add a "continued (N/M)" header.
|
||||
|
||||
## Forum topics (optional)
|
||||
|
||||
Takopi can bind Telegram forum topics to a project/branch and persist resume tokens
|
||||
|
||||
@@ -64,6 +64,13 @@ bot_token = "123456789:ABCdefGHIjklMNOpqrsTUVwxyz"
|
||||
chat_id = 123456789
|
||||
```
|
||||
|
||||
Optional: split long final responses instead of trimming them:
|
||||
|
||||
```toml
|
||||
[transports.telegram]
|
||||
message_overflow = "split" # trim | split
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 2. Your first handoff
|
||||
|
||||
@@ -113,6 +113,15 @@ async def _send_or_edit_message(
|
||||
thread_id: int | None = None,
|
||||
) -> tuple[MessageRef | None, bool]:
|
||||
msg = message
|
||||
followups = message.extra.get("followups")
|
||||
if followups:
|
||||
extra = dict(message.extra)
|
||||
if reply_to is not None:
|
||||
extra.setdefault("followup_reply_to_message_id", reply_to.message_id)
|
||||
if thread_id is not None:
|
||||
extra.setdefault("followup_thread_id", thread_id)
|
||||
extra.setdefault("followup_notify", notify)
|
||||
msg = RenderedMessage(text=message.text, extra=extra)
|
||||
if edit_ref is not None:
|
||||
logger.debug(
|
||||
"transport.edit_message",
|
||||
|
||||
@@ -97,6 +97,7 @@ class TelegramTransportSettings(BaseModel):
|
||||
|
||||
bot_token: NonEmptyStr
|
||||
chat_id: StrictInt
|
||||
message_overflow: Literal["trim", "split"] = "trim"
|
||||
voice_transcription: bool = False
|
||||
voice_max_bytes: StrictInt = 10 * 1024 * 1024
|
||||
voice_transcription_model: NonEmptyStr = "gpt-4o-mini-transcribe"
|
||||
|
||||
@@ -101,7 +101,7 @@ class TelegramBackend(TransportBackend):
|
||||
)
|
||||
bot = TelegramClient(token)
|
||||
transport = TelegramTransport(bot)
|
||||
presenter = TelegramPresenter()
|
||||
presenter = TelegramPresenter(message_overflow=settings.message_overflow)
|
||||
exec_cfg = ExecBridgeConfig(
|
||||
transport=transport,
|
||||
presenter=presenter,
|
||||
|
||||
@@ -18,7 +18,7 @@ from ..settings import (
|
||||
TelegramTransportSettings,
|
||||
)
|
||||
from .client import BotClient
|
||||
from .render import prepare_telegram
|
||||
from .render import MAX_BODY_CHARS, prepare_telegram, prepare_telegram_multi
|
||||
from .types import TelegramCallbackQuery, TelegramIncomingMessage
|
||||
|
||||
logger = get_logger(__name__)
|
||||
@@ -43,8 +43,14 @@ CLEAR_MARKUP = {"inline_keyboard": []}
|
||||
|
||||
|
||||
class TelegramPresenter:
|
||||
def __init__(self, *, formatter: MarkdownFormatter | None = None) -> None:
|
||||
def __init__(
|
||||
self,
|
||||
*,
|
||||
formatter: MarkdownFormatter | None = None,
|
||||
message_overflow: str = "trim",
|
||||
) -> None:
|
||||
self._formatter = formatter or MarkdownFormatter()
|
||||
self._message_overflow = message_overflow
|
||||
|
||||
def render_progress(
|
||||
self,
|
||||
@@ -74,6 +80,23 @@ class TelegramPresenter:
|
||||
parts = self._formatter.render_final_parts(
|
||||
state, elapsed_s=elapsed_s, status=status, answer=answer
|
||||
)
|
||||
if self._message_overflow == "split":
|
||||
payloads = prepare_telegram_multi(parts, max_body_chars=MAX_BODY_CHARS)
|
||||
text, entities = payloads[0]
|
||||
extra = {"entities": entities, "reply_markup": CLEAR_MARKUP}
|
||||
if len(payloads) > 1:
|
||||
followups = [
|
||||
RenderedMessage(
|
||||
text=followup_text,
|
||||
extra={
|
||||
"entities": followup_entities,
|
||||
"reply_markup": CLEAR_MARKUP,
|
||||
},
|
||||
)
|
||||
for followup_text, followup_entities in payloads[1:]
|
||||
]
|
||||
extra["followups"] = followups
|
||||
return RenderedMessage(text=text, extra=extra)
|
||||
text, entities = prepare_telegram(parts)
|
||||
return RenderedMessage(
|
||||
text=text,
|
||||
@@ -107,6 +130,34 @@ class TelegramTransport:
|
||||
def __init__(self, bot: BotClient) -> None:
|
||||
self._bot = bot
|
||||
|
||||
@staticmethod
|
||||
def _extract_followups(message: RenderedMessage) -> list[RenderedMessage]:
|
||||
followups = message.extra.get("followups")
|
||||
if not isinstance(followups, list):
|
||||
return []
|
||||
return [item for item in followups if isinstance(item, RenderedMessage)]
|
||||
|
||||
async def _send_followups(
|
||||
self,
|
||||
*,
|
||||
chat_id: int,
|
||||
followups: list[RenderedMessage],
|
||||
reply_to_message_id: int | None,
|
||||
message_thread_id: int | None,
|
||||
notify: bool,
|
||||
) -> None:
|
||||
for followup in followups:
|
||||
await self._bot.send_message(
|
||||
chat_id=chat_id,
|
||||
text=followup.text,
|
||||
entities=followup.extra.get("entities"),
|
||||
parse_mode=followup.extra.get("parse_mode"),
|
||||
reply_markup=followup.extra.get("reply_markup"),
|
||||
reply_to_message_id=reply_to_message_id,
|
||||
message_thread_id=message_thread_id,
|
||||
disable_notification=not notify,
|
||||
)
|
||||
|
||||
async def close(self) -> None:
|
||||
await self._bot.close()
|
||||
|
||||
@@ -135,6 +186,17 @@ class TelegramTransport:
|
||||
)
|
||||
notify = options.notify
|
||||
message_thread_id = options.thread_id
|
||||
else:
|
||||
reply_to_message_id = cast(
|
||||
int | None,
|
||||
message.extra.get("followup_reply_to_message_id"),
|
||||
)
|
||||
message_thread_id = cast(
|
||||
int | None,
|
||||
message.extra.get("followup_thread_id"),
|
||||
)
|
||||
notify = bool(message.extra.get("followup_notify", True))
|
||||
followups = self._extract_followups(message)
|
||||
sent = await self._bot.send_message(
|
||||
chat_id=chat_id,
|
||||
text=message.text,
|
||||
@@ -148,6 +210,14 @@ class TelegramTransport:
|
||||
)
|
||||
if sent is None:
|
||||
return None
|
||||
if followups:
|
||||
await self._send_followups(
|
||||
chat_id=chat_id,
|
||||
followups=followups,
|
||||
reply_to_message_id=reply_to_message_id,
|
||||
message_thread_id=message_thread_id,
|
||||
notify=notify,
|
||||
)
|
||||
message_id = sent.message_id
|
||||
return MessageRef(
|
||||
channel_id=chat_id,
|
||||
@@ -163,6 +233,7 @@ class TelegramTransport:
|
||||
entities = message.extra.get("entities")
|
||||
parse_mode = message.extra.get("parse_mode")
|
||||
reply_markup = message.extra.get("reply_markup")
|
||||
followups = self._extract_followups(message)
|
||||
edited = await self._bot.edit_message_text(
|
||||
chat_id=chat_id,
|
||||
message_id=message_id,
|
||||
@@ -174,6 +245,21 @@ class TelegramTransport:
|
||||
)
|
||||
if edited is None:
|
||||
return ref if not wait else None
|
||||
if followups:
|
||||
reply_to_message_id = cast(
|
||||
int | None, message.extra.get("followup_reply_to_message_id")
|
||||
)
|
||||
message_thread_id = cast(
|
||||
int | None, message.extra.get("followup_thread_id")
|
||||
)
|
||||
notify = bool(message.extra.get("followup_notify", True))
|
||||
await self._send_followups(
|
||||
chat_id=chat_id,
|
||||
followups=followups,
|
||||
reply_to_message_id=reply_to_message_id,
|
||||
message_thread_id=message_thread_id,
|
||||
notify=notify,
|
||||
)
|
||||
message_id = edited.message_id
|
||||
return MessageRef(
|
||||
channel_id=chat_id,
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import re
|
||||
from dataclasses import dataclass
|
||||
from typing import Any
|
||||
|
||||
from markdown_it import MarkdownIt
|
||||
@@ -8,8 +9,18 @@ from sulguk import transform_html
|
||||
|
||||
from ..markdown import MarkdownParts, assemble_markdown_parts
|
||||
|
||||
MAX_BODY_CHARS = 3500
|
||||
|
||||
_MD_RENDERER = MarkdownIt("commonmark", {"html": False})
|
||||
_BULLET_RE = re.compile(r"(?m)^(\s*)•")
|
||||
_FENCE_RE = re.compile(r"^(?P<indent>[ \t]*)(?P<fence>[`~]{3,})(?P<info>.*)$")
|
||||
|
||||
|
||||
@dataclass(frozen=True, slots=True)
|
||||
class _FenceState:
|
||||
fence: str
|
||||
indent: str
|
||||
header: str
|
||||
|
||||
|
||||
def render_markdown(md: str) -> tuple[str, list[dict[str, Any]]]:
|
||||
@@ -22,18 +33,166 @@ def render_markdown(md: str) -> tuple[str, list[dict[str, Any]]]:
|
||||
return text, entities
|
||||
|
||||
|
||||
def trim_body(body: str | None) -> str | None:
|
||||
def _split_line_ending(line: str) -> tuple[str, str]:
|
||||
if line.endswith("\r\n"):
|
||||
return line[:-2], "\r\n"
|
||||
if line.endswith("\n"):
|
||||
return line[:-1], "\n"
|
||||
if line.endswith("\r"):
|
||||
return line[:-1], "\r"
|
||||
return line, ""
|
||||
|
||||
|
||||
def _split_long_line(line: str, max_chars: int) -> list[str]:
|
||||
if len(line) <= max_chars:
|
||||
return [line]
|
||||
content, ending = _split_line_ending(line)
|
||||
parts: list[str] = []
|
||||
for idx in range(0, len(content), max_chars):
|
||||
chunk = content[idx : idx + max_chars]
|
||||
if idx + max_chars >= len(content):
|
||||
chunk += ending
|
||||
parts.append(chunk)
|
||||
if not parts and ending:
|
||||
parts.append(ending)
|
||||
return parts
|
||||
|
||||
|
||||
def _split_block(block: str, max_chars: int) -> list[str]:
|
||||
if len(block) <= max_chars:
|
||||
return [block]
|
||||
pieces: list[str] = []
|
||||
current = ""
|
||||
for line in block.splitlines(keepends=True):
|
||||
for part in _split_long_line(line, max_chars):
|
||||
if not part:
|
||||
continue
|
||||
if current and len(current) + len(part) > max_chars:
|
||||
pieces.append(current)
|
||||
current = ""
|
||||
current += part
|
||||
if len(current) == max_chars:
|
||||
pieces.append(current)
|
||||
current = ""
|
||||
if current:
|
||||
pieces.append(current)
|
||||
return pieces
|
||||
|
||||
|
||||
def _update_fence_state(line: str, state: _FenceState | None) -> _FenceState | None:
|
||||
match = _FENCE_RE.match(line)
|
||||
if match is None:
|
||||
return state
|
||||
fence = match.group("fence")
|
||||
indent = match.group("indent")
|
||||
if state is None:
|
||||
return _FenceState(fence=fence, indent=indent, header=line)
|
||||
if fence[0] == state.fence[0] and len(fence) >= len(state.fence):
|
||||
return None
|
||||
return state
|
||||
|
||||
|
||||
def _scan_fence_state(text: str, state: _FenceState | None) -> _FenceState | None:
|
||||
for line in text.splitlines():
|
||||
state = _update_fence_state(line, state)
|
||||
return state
|
||||
|
||||
|
||||
def _ensure_trailing_newline(text: str) -> str:
|
||||
if text.endswith("\n") or text.endswith("\r"):
|
||||
return text
|
||||
return text + "\n"
|
||||
|
||||
|
||||
def _close_fence_chunk(text: str, state: _FenceState) -> str:
|
||||
return _ensure_trailing_newline(text) + f"{state.indent}{state.fence}\n"
|
||||
|
||||
|
||||
def _reopen_fence_prefix(state: _FenceState) -> str:
|
||||
return f"{state.header}\n"
|
||||
|
||||
|
||||
def split_markdown_body(body: str, max_chars: int) -> list[str]:
|
||||
if not body or not body.strip():
|
||||
return []
|
||||
max_chars = max(1, int(max_chars))
|
||||
segments = re.split(r"(\n{2,})", body)
|
||||
blocks: list[str] = []
|
||||
for idx in range(0, len(segments), 2):
|
||||
paragraph = segments[idx]
|
||||
separator = segments[idx + 1] if idx + 1 < len(segments) else ""
|
||||
block = paragraph + separator
|
||||
if block:
|
||||
blocks.append(block)
|
||||
|
||||
chunks: list[str] = []
|
||||
current = ""
|
||||
state: _FenceState | None = None
|
||||
for block in blocks:
|
||||
for piece in _split_block(block, max_chars):
|
||||
if not current:
|
||||
current = piece
|
||||
state = _scan_fence_state(piece, state)
|
||||
continue
|
||||
if len(current) + len(piece) <= max_chars:
|
||||
current += piece
|
||||
state = _scan_fence_state(piece, state)
|
||||
continue
|
||||
|
||||
if state is not None:
|
||||
current = _close_fence_chunk(current, state)
|
||||
chunks.append(current)
|
||||
current = _reopen_fence_prefix(state) if state is not None else ""
|
||||
current += piece
|
||||
state = _scan_fence_state(piece, state)
|
||||
|
||||
if current:
|
||||
chunks.append(current)
|
||||
|
||||
return [chunk for chunk in chunks if chunk.strip()]
|
||||
|
||||
|
||||
def trim_body(body: str | None, *, max_chars: int = MAX_BODY_CHARS) -> str | None:
|
||||
if not body:
|
||||
return None
|
||||
if len(body) > 3500:
|
||||
body = body[: 3500 - 1] + "…"
|
||||
if len(body) > max_chars:
|
||||
body = body[: max_chars - 1] + "…"
|
||||
return body if body.strip() else None
|
||||
|
||||
|
||||
def prepare_telegram(parts: MarkdownParts) -> tuple[str, list[dict[str, Any]]]:
|
||||
trimmed = MarkdownParts(
|
||||
header=parts.header or "",
|
||||
body=trim_body(parts.body),
|
||||
body=trim_body(parts.body, max_chars=MAX_BODY_CHARS),
|
||||
footer=parts.footer,
|
||||
)
|
||||
return render_markdown(assemble_markdown_parts(trimmed))
|
||||
|
||||
|
||||
def prepare_telegram_multi(
|
||||
parts: MarkdownParts, *, max_body_chars: int = MAX_BODY_CHARS
|
||||
) -> list[tuple[str, list[dict[str, Any]]]]:
|
||||
body = parts.body
|
||||
if body is not None and not body.strip():
|
||||
body = None
|
||||
body_chunks = split_markdown_body(body, max_body_chars) if body is not None else []
|
||||
if not body_chunks:
|
||||
body_chunks = [""]
|
||||
total = len(body_chunks)
|
||||
|
||||
payloads: list[tuple[str, list[dict[str, Any]]]] = []
|
||||
for idx, chunk in enumerate(body_chunks, start=1):
|
||||
header = parts.header or ""
|
||||
if idx > 1:
|
||||
if header:
|
||||
header = f"{header} · continued ({idx}/{total})"
|
||||
else:
|
||||
header = f"continued ({idx}/{total})"
|
||||
payloads.append(
|
||||
render_markdown(
|
||||
assemble_markdown_parts(
|
||||
MarkdownParts(header=header, body=chunk, footer=parts.footer)
|
||||
)
|
||||
)
|
||||
)
|
||||
return payloads
|
||||
|
||||
+11
-1
@@ -1,4 +1,4 @@
|
||||
from takopi.telegram.render import render_markdown
|
||||
from takopi.telegram.render import render_markdown, split_markdown_body
|
||||
|
||||
|
||||
def test_render_markdown_basic_entities() -> None:
|
||||
@@ -18,3 +18,13 @@ def test_render_markdown_code_fence_language_is_string() -> None:
|
||||
assert entities is not None
|
||||
assert any(e.get("type") == "pre" and e.get("language") == "py" for e in entities)
|
||||
assert any(e.get("type") == "code" for e in entities)
|
||||
|
||||
|
||||
def test_split_markdown_body_closes_and_reopens_fence() -> None:
|
||||
body = "```py\n" + ("line\n" * 10) + "```\n\npost"
|
||||
|
||||
chunks = split_markdown_body(body, max_chars=40)
|
||||
|
||||
assert len(chunks) > 1
|
||||
assert chunks[0].rstrip().endswith("```")
|
||||
assert chunks[1].startswith("```py\n")
|
||||
|
||||
@@ -32,6 +32,7 @@ from takopi.telegram.bridge import (
|
||||
send_with_resume,
|
||||
)
|
||||
from takopi.telegram.client import BotClient
|
||||
from takopi.telegram.render import MAX_BODY_CHARS
|
||||
from takopi.telegram.topic_state import TopicStateStore, resolve_state_path
|
||||
from takopi.context import RunContext
|
||||
from takopi.config import ProjectConfig, ProjectsConfig
|
||||
@@ -486,6 +487,26 @@ def test_telegram_presenter_final_clears_button() -> None:
|
||||
assert rendered.extra["reply_markup"]["inline_keyboard"] == []
|
||||
|
||||
|
||||
def test_telegram_presenter_split_overflow_adds_followups() -> None:
|
||||
presenter = TelegramPresenter(message_overflow="split")
|
||||
state = ProgressTracker(engine="codex").snapshot()
|
||||
|
||||
rendered = presenter.render_final(
|
||||
state,
|
||||
elapsed_s=0.0,
|
||||
status="done",
|
||||
answer="x" * (MAX_BODY_CHARS + 10),
|
||||
)
|
||||
|
||||
followups = rendered.extra.get("followups")
|
||||
assert followups
|
||||
assert all(isinstance(item, RenderedMessage) for item in followups)
|
||||
assert rendered.extra["reply_markup"]["inline_keyboard"] == []
|
||||
assert all(
|
||||
item.extra["reply_markup"]["inline_keyboard"] == [] for item in followups
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.anyio
|
||||
async def test_telegram_transport_passes_replace_and_wait() -> None:
|
||||
bot = _FakeBot()
|
||||
@@ -532,6 +553,54 @@ async def test_telegram_transport_passes_reply_markup() -> None:
|
||||
assert bot.edit_calls[0]["reply_markup"] == markup
|
||||
|
||||
|
||||
@pytest.mark.anyio
|
||||
async def test_telegram_transport_sends_followups() -> None:
|
||||
bot = _FakeBot()
|
||||
transport = TelegramTransport(bot)
|
||||
reply = MessageRef(channel_id=123, message_id=10)
|
||||
followup = RenderedMessage(text="part 2")
|
||||
|
||||
await transport.send(
|
||||
channel_id=123,
|
||||
message=RenderedMessage(text="part 1", extra={"followups": [followup]}),
|
||||
options=SendOptions(reply_to=reply, notify=False, thread_id=7),
|
||||
)
|
||||
|
||||
assert len(bot.send_calls) == 2
|
||||
assert bot.send_calls[1]["text"] == "part 2"
|
||||
assert bot.send_calls[1]["reply_to_message_id"] == 10
|
||||
assert bot.send_calls[1]["message_thread_id"] == 7
|
||||
assert bot.send_calls[1]["replace_message_id"] is None
|
||||
assert bot.send_calls[1]["disable_notification"] is True
|
||||
|
||||
|
||||
@pytest.mark.anyio
|
||||
async def test_telegram_transport_edits_and_sends_followups() -> None:
|
||||
bot = _FakeBot()
|
||||
transport = TelegramTransport(bot)
|
||||
followup = RenderedMessage(text="part 2")
|
||||
|
||||
await transport.edit(
|
||||
ref=MessageRef(channel_id=123, message_id=42),
|
||||
message=RenderedMessage(
|
||||
text="part 1",
|
||||
extra={
|
||||
"followups": [followup],
|
||||
"followup_reply_to_message_id": 10,
|
||||
"followup_thread_id": 7,
|
||||
"followup_notify": False,
|
||||
},
|
||||
),
|
||||
)
|
||||
|
||||
assert len(bot.edit_calls) == 1
|
||||
assert len(bot.send_calls) == 1
|
||||
assert bot.send_calls[0]["text"] == "part 2"
|
||||
assert bot.send_calls[0]["reply_to_message_id"] == 10
|
||||
assert bot.send_calls[0]["message_thread_id"] == 7
|
||||
assert bot.send_calls[0]["disable_notification"] is True
|
||||
|
||||
|
||||
@pytest.mark.anyio
|
||||
async def test_telegram_transport_edit_wait_false_returns_ref() -> None:
|
||||
class _OutboxBot(BotClient):
|
||||
|
||||
Reference in New Issue
Block a user