fix(telegram): preserve numbering with malformed nested lists (#202)

This commit is contained in:
banteg
2026-02-11 01:47:43 +04:00
committed by GitHub
parent 13ea8298d9
commit 56bc1681c6
2 changed files with 80 additions and 1 deletions
+50 -1
View File
@@ -14,6 +14,8 @@ MAX_BODY_CHARS = 3500
_MD_RENDERER = MarkdownIt("commonmark", {"html": False})
_BULLET_RE = re.compile(r"(?m)^(\s*)•")
_FENCE_RE = re.compile(r"^(?P<indent>[ \t]*)(?P<fence>[`~]{3,})(?P<info>.*)$")
_ORDERED_ITEM_RE = re.compile(r"^(?P<indent>[ \t]{0,3})(?P<marker>\d+[.)])\s+")
_UNORDERED_ITEM_RE = re.compile(r"^(?P<indent>[ \t]{0,3})[-+*]\s+")
@dataclass(frozen=True, slots=True)
@@ -23,8 +25,55 @@ class _FenceState:
header: str
def _normalize_nested_list_markers(md: str) -> str:
if not md:
return md
lines: list[str] = []
ordered_indent: str | None = None
fence_state: _FenceState | None = None
for raw_line in md.splitlines(keepends=True):
line, ending = _split_line_ending(raw_line)
fence_state = _update_fence_state(line, fence_state)
if fence_state is not None:
ordered_indent = None
lines.append(raw_line)
continue
if not line.strip():
ordered_indent = None
lines.append(raw_line)
continue
ordered_match = _ORDERED_ITEM_RE.match(line)
if ordered_match is not None:
ordered_indent = ordered_match.group("indent")
lines.append(raw_line)
continue
if ordered_indent is not None:
unordered_match = _UNORDERED_ITEM_RE.match(line)
if (
unordered_match is not None
and unordered_match.group("indent") == ordered_indent
):
lines.append(f"{ordered_indent} {line}{ending}")
continue
if line.startswith(ordered_indent) and len(line) > len(ordered_indent):
lines.append(raw_line)
continue
ordered_indent = None
lines.append(raw_line)
return "".join(lines)
def render_markdown(md: str) -> tuple[str, list[dict[str, Any]]]:
html = _MD_RENDERER.render(md or "")
html = _MD_RENDERER.render(_normalize_nested_list_markers(md or ""))
rendered = transform_html(html)
text = _BULLET_RE.sub(r"\1-", rendered.text)
+30
View File
@@ -1,3 +1,5 @@
import re
from takopi.telegram.render import render_markdown, split_markdown_body
@@ -20,6 +22,34 @@ def test_render_markdown_code_fence_language_is_string() -> None:
assert any(e.get("type") == "code" for e in entities)
def test_render_markdown_keeps_ordered_numbering_with_unindented_sub_bullets() -> None:
md = (
"1. Tune maker\n"
"- Sweep\n"
"- Keep data\n"
"1. Increase\n"
"- Raise target\n"
"- Keep\n"
"1. Train\n"
"- Start\n"
"1. Add\n"
"- Keep exposure\n"
"1. Run\n"
"- Target pnl\n"
)
text, _ = render_markdown(md)
numbered = [line for line in text.splitlines() if re.match(r"^\d+\.\s", line)]
assert numbered == [
"1. Tune maker",
"2. Increase",
"3. Train",
"4. Add",
"5. Run",
]
def test_split_markdown_body_closes_and_reopens_fence() -> None:
body = "```py\n" + ("line\n" * 10) + "```\n\npost"