fix: prevent Telegram 400 from local markdown links (#214)
This commit is contained in:
@@ -3,6 +3,7 @@ from __future__ import annotations
|
||||
import re
|
||||
from dataclasses import dataclass
|
||||
from typing import Any
|
||||
from urllib.parse import urlparse
|
||||
|
||||
from markdown_it import MarkdownIt
|
||||
from sulguk import transform_html
|
||||
@@ -78,10 +79,29 @@ def render_markdown(md: str) -> tuple[str, list[dict[str, Any]]]:
|
||||
|
||||
text = _BULLET_RE.sub(r"\1-", rendered.text)
|
||||
|
||||
entities = [dict(e) for e in rendered.entities]
|
||||
entities = _sanitize_entities(rendered.entities)
|
||||
return text, entities
|
||||
|
||||
|
||||
def _sanitize_entities(entities: list[Any]) -> list[dict[str, Any]]:
|
||||
sanitized: list[dict[str, Any]] = []
|
||||
for raw in entities:
|
||||
entity = dict(raw)
|
||||
if entity.get("type") == "text_link":
|
||||
url = entity.get("url")
|
||||
if not isinstance(url, str) or not _is_supported_text_link_url(url):
|
||||
continue
|
||||
sanitized.append(entity)
|
||||
return sanitized
|
||||
|
||||
|
||||
def _is_supported_text_link_url(url: str) -> bool:
|
||||
parsed = urlparse(url)
|
||||
if parsed.scheme in {"http", "https"} and bool(parsed.netloc):
|
||||
return True
|
||||
return parsed.scheme == "tg" and (bool(parsed.netloc) or bool(parsed.path))
|
||||
|
||||
|
||||
def _split_line_ending(line: str) -> tuple[str, str]:
|
||||
if line.endswith("\r\n"):
|
||||
return line[:-2], "\r\n"
|
||||
|
||||
@@ -22,6 +22,22 @@ def test_render_markdown_code_fence_language_is_string() -> None:
|
||||
assert any(e.get("type") == "code" for e in entities)
|
||||
|
||||
|
||||
def test_render_markdown_drops_local_text_links() -> None:
|
||||
text, entities = render_markdown("[/tmp/file.py#L12](/tmp/file.py#L12)")
|
||||
|
||||
assert "/tmp/file.py#L12" in text
|
||||
assert not any(e.get("type") == "text_link" for e in entities)
|
||||
|
||||
|
||||
def test_render_markdown_keeps_https_text_links() -> None:
|
||||
_, entities = render_markdown("[docs](https://example.com/path)")
|
||||
|
||||
assert any(
|
||||
e.get("type") == "text_link" and e.get("url") == "https://example.com/path"
|
||||
for e in entities
|
||||
)
|
||||
|
||||
|
||||
def test_render_markdown_keeps_ordered_numbering_with_unindented_sub_bullets() -> None:
|
||||
md = (
|
||||
"1. Tune maker\n"
|
||||
|
||||
Reference in New Issue
Block a user