fix: prevent Telegram 400 from local markdown links (#214)

This commit is contained in:
banteg
2026-02-24 17:15:44 +04:00
committed by GitHub
parent 3e85848292
commit ebc823f616
2 changed files with 37 additions and 1 deletions
+21 -1
View File
@@ -3,6 +3,7 @@ from __future__ import annotations
import re
from dataclasses import dataclass
from typing import Any
from urllib.parse import urlparse
from markdown_it import MarkdownIt
from sulguk import transform_html
@@ -78,10 +79,29 @@ def render_markdown(md: str) -> tuple[str, list[dict[str, Any]]]:
text = _BULLET_RE.sub(r"\1-", rendered.text)
entities = [dict(e) for e in rendered.entities]
entities = _sanitize_entities(rendered.entities)
return text, entities
def _sanitize_entities(entities: list[Any]) -> list[dict[str, Any]]:
sanitized: list[dict[str, Any]] = []
for raw in entities:
entity = dict(raw)
if entity.get("type") == "text_link":
url = entity.get("url")
if not isinstance(url, str) or not _is_supported_text_link_url(url):
continue
sanitized.append(entity)
return sanitized
def _is_supported_text_link_url(url: str) -> bool:
parsed = urlparse(url)
if parsed.scheme in {"http", "https"} and bool(parsed.netloc):
return True
return parsed.scheme == "tg" and (bool(parsed.netloc) or bool(parsed.path))
def _split_line_ending(line: str) -> tuple[str, str]:
if line.endswith("\r\n"):
return line[:-2], "\r\n"
+16
View File
@@ -22,6 +22,22 @@ def test_render_markdown_code_fence_language_is_string() -> None:
assert any(e.get("type") == "code" for e in entities)
def test_render_markdown_drops_local_text_links() -> None:
text, entities = render_markdown("[/tmp/file.py#L12](/tmp/file.py#L12)")
assert "/tmp/file.py#L12" in text
assert not any(e.get("type") == "text_link" for e in entities)
def test_render_markdown_keeps_https_text_links() -> None:
_, entities = render_markdown("[docs](https://example.com/path)")
assert any(
e.get("type") == "text_link" and e.get("url") == "https://example.com/path"
for e in entities
)
def test_render_markdown_keeps_ordered_numbering_with_unindented_sub_bullets() -> None:
md = (
"1. Tune maker\n"