fix(telegram): preserve numbering with malformed nested lists (#202)
This commit is contained in:
@@ -14,6 +14,8 @@ MAX_BODY_CHARS = 3500
|
|||||||
_MD_RENDERER = MarkdownIt("commonmark", {"html": False})
|
_MD_RENDERER = MarkdownIt("commonmark", {"html": False})
|
||||||
_BULLET_RE = re.compile(r"(?m)^(\s*)•")
|
_BULLET_RE = re.compile(r"(?m)^(\s*)•")
|
||||||
_FENCE_RE = re.compile(r"^(?P<indent>[ \t]*)(?P<fence>[`~]{3,})(?P<info>.*)$")
|
_FENCE_RE = re.compile(r"^(?P<indent>[ \t]*)(?P<fence>[`~]{3,})(?P<info>.*)$")
|
||||||
|
_ORDERED_ITEM_RE = re.compile(r"^(?P<indent>[ \t]{0,3})(?P<marker>\d+[.)])\s+")
|
||||||
|
_UNORDERED_ITEM_RE = re.compile(r"^(?P<indent>[ \t]{0,3})[-+*]\s+")
|
||||||
|
|
||||||
|
|
||||||
@dataclass(frozen=True, slots=True)
|
@dataclass(frozen=True, slots=True)
|
||||||
@@ -23,8 +25,55 @@ class _FenceState:
|
|||||||
header: str
|
header: str
|
||||||
|
|
||||||
|
|
||||||
|
def _normalize_nested_list_markers(md: str) -> str:
|
||||||
|
if not md:
|
||||||
|
return md
|
||||||
|
|
||||||
|
lines: list[str] = []
|
||||||
|
ordered_indent: str | None = None
|
||||||
|
fence_state: _FenceState | None = None
|
||||||
|
|
||||||
|
for raw_line in md.splitlines(keepends=True):
|
||||||
|
line, ending = _split_line_ending(raw_line)
|
||||||
|
fence_state = _update_fence_state(line, fence_state)
|
||||||
|
if fence_state is not None:
|
||||||
|
ordered_indent = None
|
||||||
|
lines.append(raw_line)
|
||||||
|
continue
|
||||||
|
|
||||||
|
if not line.strip():
|
||||||
|
ordered_indent = None
|
||||||
|
lines.append(raw_line)
|
||||||
|
continue
|
||||||
|
|
||||||
|
ordered_match = _ORDERED_ITEM_RE.match(line)
|
||||||
|
if ordered_match is not None:
|
||||||
|
ordered_indent = ordered_match.group("indent")
|
||||||
|
lines.append(raw_line)
|
||||||
|
continue
|
||||||
|
|
||||||
|
if ordered_indent is not None:
|
||||||
|
unordered_match = _UNORDERED_ITEM_RE.match(line)
|
||||||
|
if (
|
||||||
|
unordered_match is not None
|
||||||
|
and unordered_match.group("indent") == ordered_indent
|
||||||
|
):
|
||||||
|
lines.append(f"{ordered_indent} {line}{ending}")
|
||||||
|
continue
|
||||||
|
|
||||||
|
if line.startswith(ordered_indent) and len(line) > len(ordered_indent):
|
||||||
|
lines.append(raw_line)
|
||||||
|
continue
|
||||||
|
|
||||||
|
ordered_indent = None
|
||||||
|
|
||||||
|
lines.append(raw_line)
|
||||||
|
|
||||||
|
return "".join(lines)
|
||||||
|
|
||||||
|
|
||||||
def render_markdown(md: str) -> tuple[str, list[dict[str, Any]]]:
|
def render_markdown(md: str) -> tuple[str, list[dict[str, Any]]]:
|
||||||
html = _MD_RENDERER.render(md or "")
|
html = _MD_RENDERER.render(_normalize_nested_list_markers(md or ""))
|
||||||
rendered = transform_html(html)
|
rendered = transform_html(html)
|
||||||
|
|
||||||
text = _BULLET_RE.sub(r"\1-", rendered.text)
|
text = _BULLET_RE.sub(r"\1-", rendered.text)
|
||||||
|
|||||||
@@ -1,3 +1,5 @@
|
|||||||
|
import re
|
||||||
|
|
||||||
from takopi.telegram.render import render_markdown, split_markdown_body
|
from takopi.telegram.render import render_markdown, split_markdown_body
|
||||||
|
|
||||||
|
|
||||||
@@ -20,6 +22,34 @@ def test_render_markdown_code_fence_language_is_string() -> None:
|
|||||||
assert any(e.get("type") == "code" for e in entities)
|
assert any(e.get("type") == "code" for e in entities)
|
||||||
|
|
||||||
|
|
||||||
|
def test_render_markdown_keeps_ordered_numbering_with_unindented_sub_bullets() -> None:
|
||||||
|
md = (
|
||||||
|
"1. Tune maker\n"
|
||||||
|
"- Sweep\n"
|
||||||
|
"- Keep data\n"
|
||||||
|
"1. Increase\n"
|
||||||
|
"- Raise target\n"
|
||||||
|
"- Keep\n"
|
||||||
|
"1. Train\n"
|
||||||
|
"- Start\n"
|
||||||
|
"1. Add\n"
|
||||||
|
"- Keep exposure\n"
|
||||||
|
"1. Run\n"
|
||||||
|
"- Target pnl\n"
|
||||||
|
)
|
||||||
|
|
||||||
|
text, _ = render_markdown(md)
|
||||||
|
numbered = [line for line in text.splitlines() if re.match(r"^\d+\.\s", line)]
|
||||||
|
|
||||||
|
assert numbered == [
|
||||||
|
"1. Tune maker",
|
||||||
|
"2. Increase",
|
||||||
|
"3. Train",
|
||||||
|
"4. Add",
|
||||||
|
"5. Run",
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
def test_split_markdown_body_closes_and_reopens_fence() -> None:
|
def test_split_markdown_body_closes_and_reopens_fence() -> None:
|
||||||
body = "```py\n" + ("line\n" * 10) + "```\n\npost"
|
body = "```py\n" + ("line\n" * 10) + "```\n\npost"
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user