Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 21 additions & 9 deletions src/pymax/formatting/markdown.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,10 @@


class Formatter:
# Characters above this value are encoded as surrogate pairs in UTF-16,
# occupying 2 code units instead of 1.
BMP_MAX = 0xFFFF

MARKERS = {
"```": "CODE",
"**": "STRONG",
Expand All @@ -14,6 +18,10 @@ class Formatter:

MARKER_ORDER = ["```", "**", "__", "~~", "`", "_", "*"]

@staticmethod
def _code_units_len(text: str) -> int:
return len(text.encode("utf-16-le")) // 2

@staticmethod
def _parse_link(
text: str,
Expand Down Expand Up @@ -64,15 +72,16 @@ def format_markdown(text: str) -> tuple[str, list[Element]]:
label, url, next_i = parsed_link

start = clean_pos
utf16_label_len = Formatter._code_units_len(label)

clean_text += label
clean_pos += len(label)
clean_pos += utf16_label_len

entities.append(
Element(
type="LINK",
from_=start,
length=len(label),
length=utf16_label_len,
attributes=ElementAttributes(url=url),
)
)
Expand All @@ -93,9 +102,10 @@ def format_markdown(text: str) -> tuple[str, list[Element]]:
start = clean_pos

while i < len(text) and text[i] != "\n":
clean_text += text[i]
ch = text[i]
clean_text += ch
i += 1
clean_pos += 1
clean_pos += 2 if ord(ch) > Formatter.BMP_MAX else 1

length = clean_pos - start

Expand Down Expand Up @@ -123,9 +133,10 @@ def format_markdown(text: str) -> tuple[str, list[Element]]:
start = clean_pos

while i < len(text) and text[i] != "\n":
clean_text += text[i]
ch = text[i]
clean_text += ch
i += 1
clean_pos += 1
clean_pos += 2 if ord(ch) > Formatter.BMP_MAX else 1

length = clean_pos - start

Expand Down Expand Up @@ -211,10 +222,11 @@ def format_markdown(text: str) -> tuple[str, list[Element]]:
line_start = False
continue

clean_text += text[i]
line_start = text[i] == "\n"
ch = text[i]
clean_text += ch
line_start = ch == "\n"

i += 1
clean_pos += 1
clean_pos += 2 if ord(ch) > Formatter.BMP_MAX else 1

return clean_text, entities