diff --git a/src/takopi/telegram/render.py b/src/takopi/telegram/render.py index 3a75806..da4c763 100644 --- a/src/takopi/telegram/render.py +++ b/src/takopi/telegram/render.py @@ -14,6 +14,8 @@ MAX_BODY_CHARS = 3500 _MD_RENDERER = MarkdownIt("commonmark", {"html": False}) _BULLET_RE = re.compile(r"(?m)^(\s*)•") _FENCE_RE = re.compile(r"^(?P[ \t]*)(?P[`~]{3,})(?P.*)$") +_ORDERED_ITEM_RE = re.compile(r"^(?P[ \t]{0,3})(?P\d+[.)])\s+") +_UNORDERED_ITEM_RE = re.compile(r"^(?P[ \t]{0,3})[-+*]\s+") @dataclass(frozen=True, slots=True) @@ -23,8 +25,55 @@ class _FenceState: header: str +def _normalize_nested_list_markers(md: str) -> str: + if not md: + return md + + lines: list[str] = [] + ordered_indent: str | None = None + fence_state: _FenceState | None = None + + for raw_line in md.splitlines(keepends=True): + line, ending = _split_line_ending(raw_line) + fence_state = _update_fence_state(line, fence_state) + if fence_state is not None: + ordered_indent = None + lines.append(raw_line) + continue + + if not line.strip(): + ordered_indent = None + lines.append(raw_line) + continue + + ordered_match = _ORDERED_ITEM_RE.match(line) + if ordered_match is not None: + ordered_indent = ordered_match.group("indent") + lines.append(raw_line) + continue + + if ordered_indent is not None: + unordered_match = _UNORDERED_ITEM_RE.match(line) + if ( + unordered_match is not None + and unordered_match.group("indent") == ordered_indent + ): + lines.append(f"{ordered_indent} {line}{ending}") + continue + + if line.startswith(ordered_indent) and len(line) > len(ordered_indent): + lines.append(raw_line) + continue + + ordered_indent = None + + lines.append(raw_line) + + return "".join(lines) + + def render_markdown(md: str) -> tuple[str, list[dict[str, Any]]]: - html = _MD_RENDERER.render(md or "") + html = _MD_RENDERER.render(_normalize_nested_list_markers(md or "")) rendered = transform_html(html) text = _BULLET_RE.sub(r"\1-", rendered.text) diff --git a/tests/test_rendering.py b/tests/test_rendering.py index eaae326..3799695 100644 --- a/tests/test_rendering.py +++ b/tests/test_rendering.py @@ -1,3 +1,5 @@ +import re + from takopi.telegram.render import render_markdown, split_markdown_body @@ -20,6 +22,34 @@ def test_render_markdown_code_fence_language_is_string() -> None: assert any(e.get("type") == "code" for e in entities) +def test_render_markdown_keeps_ordered_numbering_with_unindented_sub_bullets() -> None: + md = ( + "1. Tune maker\n" + "- Sweep\n" + "- Keep data\n" + "1. Increase\n" + "- Raise target\n" + "- Keep\n" + "1. Train\n" + "- Start\n" + "1. Add\n" + "- Keep exposure\n" + "1. Run\n" + "- Target pnl\n" + ) + + text, _ = render_markdown(md) + numbered = [line for line in text.splitlines() if re.match(r"^\d+\.\s", line)] + + assert numbered == [ + "1. Tune maker", + "2. Increase", + "3. Train", + "4. Add", + "5. Run", + ] + + def test_split_markdown_body_closes_and_reopens_fence() -> None: body = "```py\n" + ("line\n" * 10) + "```\n\npost"