From ebc823f616d5741a905ca808d5d82cad6be5899c Mon Sep 17 00:00:00 2001 From: banteg <4562643+banteg@users.noreply.github.com> Date: Tue, 24 Feb 2026 17:15:44 +0400 Subject: [PATCH] fix: prevent Telegram 400 from local markdown links (#214) --- src/takopi/telegram/render.py | 22 +++++++++++++++++++++- tests/test_rendering.py | 16 ++++++++++++++++ 2 files changed, 37 insertions(+), 1 deletion(-) diff --git a/src/takopi/telegram/render.py b/src/takopi/telegram/render.py index da4c763..f7b836d 100644 --- a/src/takopi/telegram/render.py +++ b/src/takopi/telegram/render.py @@ -3,6 +3,7 @@ from __future__ import annotations import re from dataclasses import dataclass from typing import Any +from urllib.parse import urlparse from markdown_it import MarkdownIt from sulguk import transform_html @@ -78,10 +79,29 @@ def render_markdown(md: str) -> tuple[str, list[dict[str, Any]]]: text = _BULLET_RE.sub(r"\1-", rendered.text) - entities = [dict(e) for e in rendered.entities] + entities = _sanitize_entities(rendered.entities) return text, entities +def _sanitize_entities(entities: list[Any]) -> list[dict[str, Any]]: + sanitized: list[dict[str, Any]] = [] + for raw in entities: + entity = dict(raw) + if entity.get("type") == "text_link": + url = entity.get("url") + if not isinstance(url, str) or not _is_supported_text_link_url(url): + continue + sanitized.append(entity) + return sanitized + + +def _is_supported_text_link_url(url: str) -> bool: + parsed = urlparse(url) + if parsed.scheme in {"http", "https"} and bool(parsed.netloc): + return True + return parsed.scheme == "tg" and (bool(parsed.netloc) or bool(parsed.path)) + + def _split_line_ending(line: str) -> tuple[str, str]: if line.endswith("\r\n"): return line[:-2], "\r\n" diff --git a/tests/test_rendering.py b/tests/test_rendering.py index 3799695..af318c8 100644 --- a/tests/test_rendering.py +++ b/tests/test_rendering.py @@ -22,6 +22,22 @@ def test_render_markdown_code_fence_language_is_string() -> None: assert any(e.get("type") == "code" for e in entities) +def test_render_markdown_drops_local_text_links() -> None: + text, entities = render_markdown("[/tmp/file.py#L12](/tmp/file.py#L12)") + + assert "/tmp/file.py#L12" in text + assert not any(e.get("type") == "text_link" for e in entities) + + +def test_render_markdown_keeps_https_text_links() -> None: + _, entities = render_markdown("[docs](https://example.com/path)") + + assert any( + e.get("type") == "text_link" and e.get("url") == "https://example.com/path" + for e in entities + ) + + def test_render_markdown_keeps_ordered_numbering_with_unindented_sub_bullets() -> None: md = ( "1. Tune maker\n"