refactor: truncate telegram messages instead of chunking
This commit is contained in:
@@ -50,7 +50,7 @@ Optional flags:
|
|||||||
- `src/codex_telegram_bridge/constants.py`: limits and config path constants
|
- `src/codex_telegram_bridge/constants.py`: limits and config path constants
|
||||||
- `src/codex_telegram_bridge/config.py`: config loading and chat-id parsing helpers
|
- `src/codex_telegram_bridge/config.py`: config loading and chat-id parsing helpers
|
||||||
- `src/codex_telegram_bridge/exec_render.py`: renderers for codex exec JSONL events
|
- `src/codex_telegram_bridge/exec_render.py`: renderers for codex exec JSONL events
|
||||||
- `src/codex_telegram_bridge/rendering.py`: markdown rendering + chunking
|
- `src/codex_telegram_bridge/rendering.py`: markdown rendering
|
||||||
- `src/codex_telegram_bridge/routes.py`: sqlite routing store
|
- `src/codex_telegram_bridge/routes.py`: sqlite routing store
|
||||||
- `src/codex_telegram_bridge/telegram_client.py`: Telegram Bot API client
|
- `src/codex_telegram_bridge/telegram_client.py`: Telegram Bot API client
|
||||||
- `src/codex_telegram_bridge/exec_bridge.py`: codex exec + resume bridge
|
- `src/codex_telegram_bridge/exec_bridge.py`: codex exec + resume bridge
|
||||||
|
|||||||
@@ -6,6 +6,7 @@ from typing import Any, Dict, List, Tuple
|
|||||||
from markdown_it import MarkdownIt
|
from markdown_it import MarkdownIt
|
||||||
from sulguk import transform_html
|
from sulguk import transform_html
|
||||||
|
|
||||||
|
|
||||||
def render_markdown(md: str) -> Tuple[str, List[Dict[str, Any]]]:
|
def render_markdown(md: str) -> Tuple[str, List[Dict[str, Any]]]:
|
||||||
html = MarkdownIt("commonmark", {"html": False}).render(md or "")
|
html = MarkdownIt("commonmark", {"html": False}).render(md or "")
|
||||||
rendered = transform_html(html)
|
rendered = transform_html(html)
|
||||||
@@ -20,73 +21,3 @@ def render_markdown(md: str) -> Tuple[str, List[Dict[str, Any]]]:
|
|||||||
d.pop("language", None)
|
d.pop("language", None)
|
||||||
entities.append(d)
|
entities.append(d)
|
||||||
return text, entities
|
return text, entities
|
||||||
|
|
||||||
|
|
||||||
def _chunk_text_with_indices(text: str, limit: int) -> List[Tuple[str, int, int]]:
|
|
||||||
text = text or ""
|
|
||||||
if len(text) <= limit:
|
|
||||||
return [(text, 0, len(text))]
|
|
||||||
|
|
||||||
out: List[Tuple[str, int, int]] = []
|
|
||||||
buf: List[str] = []
|
|
||||||
size = 0
|
|
||||||
buf_start = 0
|
|
||||||
pos = 0
|
|
||||||
|
|
||||||
for line in text.splitlines(keepends=True):
|
|
||||||
line_len = len(line)
|
|
||||||
line_start = pos
|
|
||||||
line_end = pos + line_len
|
|
||||||
|
|
||||||
if line_len > limit:
|
|
||||||
if buf:
|
|
||||||
out.append(("".join(buf), buf_start, line_start))
|
|
||||||
buf, size = [], 0
|
|
||||||
for i in range(0, line_len, limit):
|
|
||||||
part = line[i : i + limit]
|
|
||||||
out.append((part, line_start + i, line_start + i + len(part)))
|
|
||||||
pos = line_end
|
|
||||||
buf_start = pos
|
|
||||||
continue
|
|
||||||
|
|
||||||
if size + line_len > limit:
|
|
||||||
out.append(("".join(buf), buf_start, line_start))
|
|
||||||
buf = [line]
|
|
||||||
size = line_len
|
|
||||||
buf_start = line_start
|
|
||||||
else:
|
|
||||||
if not buf:
|
|
||||||
buf_start = line_start
|
|
||||||
buf.append(line)
|
|
||||||
size += line_len
|
|
||||||
|
|
||||||
pos = line_end
|
|
||||||
|
|
||||||
if buf:
|
|
||||||
out.append(("".join(buf), buf_start, pos))
|
|
||||||
return out
|
|
||||||
|
|
||||||
|
|
||||||
def _slice_entities(entities: List[Dict[str, Any]], start: int, end: int) -> List[Dict[str, Any]]:
|
|
||||||
out: List[Dict[str, Any]] = []
|
|
||||||
for ent in entities:
|
|
||||||
try:
|
|
||||||
ent_start = int(ent.get("offset", 0))
|
|
||||||
ent_len = int(ent.get("length", 0))
|
|
||||||
except (TypeError, ValueError):
|
|
||||||
continue
|
|
||||||
if ent_len <= 0:
|
|
||||||
continue
|
|
||||||
ent_end = ent_start + ent_len
|
|
||||||
if ent_end <= start or ent_start >= end:
|
|
||||||
continue
|
|
||||||
new_start = max(ent_start, start)
|
|
||||||
new_end = min(ent_end, end)
|
|
||||||
new_len = new_end - new_start
|
|
||||||
if new_len <= 0:
|
|
||||||
continue
|
|
||||||
new_ent = dict(ent)
|
|
||||||
new_ent["offset"] = new_start - start
|
|
||||||
new_ent["length"] = new_len
|
|
||||||
out.append(new_ent)
|
|
||||||
return out
|
|
||||||
|
|||||||
@@ -6,7 +6,9 @@ import urllib.request
|
|||||||
from typing import Any, Dict, List, Optional
|
from typing import Any, Dict, List, Optional
|
||||||
|
|
||||||
from .constants import DEFAULT_CHUNK_LEN, TELEGRAM_HARD_LIMIT
|
from .constants import DEFAULT_CHUNK_LEN, TELEGRAM_HARD_LIMIT
|
||||||
from .rendering import render_markdown, _chunk_text_with_indices, _slice_entities
|
from .rendering import render_markdown
|
||||||
|
|
||||||
|
ELLIPSIS = "…"
|
||||||
|
|
||||||
|
|
||||||
class TelegramClient:
|
class TelegramClient:
|
||||||
@@ -64,7 +66,7 @@ class TelegramClient:
|
|||||||
entities: Optional[List[Dict[str, Any]]] = None,
|
entities: Optional[List[Dict[str, Any]]] = None,
|
||||||
) -> Dict[str, Any]:
|
) -> Dict[str, Any]:
|
||||||
if len(text) > TELEGRAM_HARD_LIMIT:
|
if len(text) > TELEGRAM_HARD_LIMIT:
|
||||||
raise ValueError("send_message received too-long text; chunk it first")
|
raise ValueError("send_message received too-long text")
|
||||||
params: Dict[str, Any] = {
|
params: Dict[str, Any] = {
|
||||||
"chat_id": chat_id,
|
"chat_id": chat_id,
|
||||||
"text": text,
|
"text": text,
|
||||||
@@ -111,20 +113,35 @@ class TelegramClient:
|
|||||||
disable_notification: bool = False,
|
disable_notification: bool = False,
|
||||||
chunk_len: int = DEFAULT_CHUNK_LEN,
|
chunk_len: int = DEFAULT_CHUNK_LEN,
|
||||||
) -> List[Dict[str, Any]]:
|
) -> List[Dict[str, Any]]:
|
||||||
sent: List[Dict[str, Any]] = []
|
|
||||||
rendered_text, entities = render_markdown(text)
|
rendered_text, entities = render_markdown(text)
|
||||||
chunks = _chunk_text_with_indices(rendered_text, limit=chunk_len)
|
limit = min(chunk_len, TELEGRAM_HARD_LIMIT)
|
||||||
for i, (chunk, start, end) in enumerate(chunks):
|
if len(rendered_text) > limit:
|
||||||
chunk_entities = _slice_entities(entities, start, end) if entities else None
|
suffix = "\n" + ELLIPSIS
|
||||||
|
keep = max(0, limit - len(suffix))
|
||||||
|
rendered_text = rendered_text[:keep] + suffix
|
||||||
|
if entities:
|
||||||
|
trimmed: List[Dict[str, Any]] = []
|
||||||
|
for ent in entities:
|
||||||
|
start = int(ent["offset"])
|
||||||
|
length = int(ent["length"])
|
||||||
|
if start >= keep:
|
||||||
|
continue
|
||||||
|
end = min(start + length, keep)
|
||||||
|
if end <= start:
|
||||||
|
continue
|
||||||
|
d = dict(ent)
|
||||||
|
d["length"] = end - start
|
||||||
|
trimmed.append(d)
|
||||||
|
entities = trimmed
|
||||||
|
|
||||||
msg = self.send_message(
|
msg = self.send_message(
|
||||||
chat_id=chat_id,
|
chat_id=chat_id,
|
||||||
text=chunk,
|
text=rendered_text,
|
||||||
reply_to_message_id=(reply_to_message_id if i == 0 else None),
|
reply_to_message_id=reply_to_message_id,
|
||||||
disable_notification=disable_notification,
|
disable_notification=disable_notification,
|
||||||
entities=chunk_entities,
|
entities=entities or None,
|
||||||
)
|
)
|
||||||
sent.append(msg)
|
return [msg]
|
||||||
return sent
|
|
||||||
|
|
||||||
def send_chat_action(self, chat_id: int, action: str = "typing") -> Dict[str, Any]:
|
def send_chat_action(self, chat_id: int, action: str = "typing") -> Dict[str, Any]:
|
||||||
params: Dict[str, Any] = {
|
params: Dict[str, Any] = {
|
||||||
|
|||||||
Reference in New Issue
Block a user