fix(telegram): preserve numbering with malformed nested lists (#202)

2026-02-11 01:47:43 +04:00
parent 13ea8298d9
commit 56bc1681c6
2 changed files with 80 additions and 1 deletions
@@ -14,6 +14,8 @@ MAX_BODY_CHARS = 3500
 _MD_RENDERER = MarkdownIt("commonmark", {"html": False})
 _BULLET_RE = re.compile(r"(?m)^(\s*)•")
 _FENCE_RE = re.compile(r"^(?P<indent>[ \t]*)(?P<fence>[`~]{3,})(?P<info>.*)$")
+_ORDERED_ITEM_RE = re.compile(r"^(?P<indent>[ \t]{0,3})(?P<marker>\d+[.)])\s+")
+_UNORDERED_ITEM_RE = re.compile(r"^(?P<indent>[ \t]{0,3})[-+*]\s+")


@dataclass(frozen=True, slots=True)
@@ -23,8 +25,55 @@ class _FenceState:
    header: str


+def _normalize_nested_list_markers(md: str) -> str:
+    if not md:
+        return md
+
+    lines: list[str] = []
+    ordered_indent: str | None = None
+    fence_state: _FenceState | None = None
+
+    for raw_line in md.splitlines(keepends=True):
+        line, ending = _split_line_ending(raw_line)
+        fence_state = _update_fence_state(line, fence_state)
+        if fence_state is not None:
+            ordered_indent = None
+            lines.append(raw_line)
+            continue
+
+        if not line.strip():
+            ordered_indent = None
+            lines.append(raw_line)
+            continue
+
+        ordered_match = _ORDERED_ITEM_RE.match(line)
+        if ordered_match is not None:
+            ordered_indent = ordered_match.group("indent")
+            lines.append(raw_line)
+            continue
+
+        if ordered_indent is not None:
+            unordered_match = _UNORDERED_ITEM_RE.match(line)
+            if (
+                unordered_match is not None
+                and unordered_match.group("indent") == ordered_indent
+            ):
+                lines.append(f"{ordered_indent}   {line}{ending}")
+                continue
+
+            if line.startswith(ordered_indent) and len(line) > len(ordered_indent):
+                lines.append(raw_line)
+                continue
+
+            ordered_indent = None
+
+        lines.append(raw_line)
+
+    return "".join(lines)
+
+
 def render_markdown(md: str) -> tuple[str, list[dict[str, Any]]]:
-    html = _MD_RENDERER.render(md or "")
+    html = _MD_RENDERER.render(_normalize_nested_list_markers(md or ""))
    rendered = transform_html(html)

    text = _BULLET_RE.sub(r"\1-", rendered.text)
@@ -1,3 +1,5 @@
+import re
+
 from takopi.telegram.render import render_markdown, split_markdown_body


@@ -20,6 +22,34 @@ def test_render_markdown_code_fence_language_is_string() -> None:
    assert any(e.get("type") == "code" for e in entities)


+def test_render_markdown_keeps_ordered_numbering_with_unindented_sub_bullets() -> None:
+    md = (
+        "1. Tune maker\n"
+        "- Sweep\n"
+        "- Keep data\n"
+        "1. Increase\n"
+        "- Raise target\n"
+        "- Keep\n"
+        "1. Train\n"
+        "- Start\n"
+        "1. Add\n"
+        "- Keep exposure\n"
+        "1. Run\n"
+        "- Target pnl\n"
+    )
+
+    text, _ = render_markdown(md)
+    numbered = [line for line in text.splitlines() if re.match(r"^\d+\.\s", line)]
+
+    assert numbered == [
+        "1. Tune maker",
+        "2. Increase",
+        "3. Train",
+        "4. Add",
+        "5. Run",
+    ]
+
+
 def test_split_markdown_body_closes_and_reopens_fence() -> None:
    body = "```py\n" + ("line\n" * 10) + "```\n\npost"