Coverage for yuio / md.py: 91%
547 statements
« prev ^ index » next coverage.py v7.13.1, created at 2026-01-05 11:41 +0000
« prev ^ index » next coverage.py v7.13.1, created at 2026-01-05 11:41 +0000
1# Yuio project, MIT license.
2#
3# https://github.com/taminomara/yuio/
4#
5# You're free to copy this file to your project and edit it for your needs,
6# just keep this copyright line please :3
8"""
9Yuio's primary format for higher-level io is Markdown (well, a reasonably rich subset
10of it).
13Formatting markdown
14-------------------
16.. autoclass:: MdFormatter
17 :members:
20.. _highlighting-code:
22Highlighting code
23-----------------
25Yuio supports basic code highlighting; it is just enough to format help messages
26for CLI, and color tracebacks when an error occurs.
28.. autoclass:: SyntaxHighlighter
29 :members:
32Markdown AST
33------------
35.. warning::
37 This is experimental API which can change within a minor release.
39.. autofunction:: parse
41.. autoclass:: AstBase
42 :members:
44.. autoclass:: Text
45 :members:
47.. autoclass:: Container
48 :members:
50.. autoclass:: Document
51 :members:
53.. autoclass:: ThematicBreak
54 :members:
56.. autoclass:: Heading
57 :members:
59.. autoclass:: Paragraph
60 :members:
62.. autoclass:: Quote
63 :members:
65.. autoclass:: Code
66 :members:
68.. autoclass:: ListItem
69 :members:
71.. autoclass:: List
72 :members:
75"""
77from __future__ import annotations
79import abc
80import contextlib
81import dataclasses
82import math
83import os
84import re
85from dataclasses import dataclass
87import yuio.color
88import yuio.string
89import yuio.theme
90from yuio.util import dedent as _dedent
92import yuio._typing_ext as _tx
93from typing import TYPE_CHECKING
95if TYPE_CHECKING:
96 import typing_extensions as _t
97else:
98 from yuio import _typing as _t
100__all__ = [
101 "AstBase",
102 "AstBase",
103 "Code",
104 "Code",
105 "Container",
106 "Container",
107 "Document",
108 "Document",
109 "Heading",
110 "Heading",
111 "List",
112 "List",
113 "ListItem",
114 "ListItem",
115 "MdFormatter",
116 "Paragraph",
117 "Paragraph",
118 "Quote",
119 "Quote",
120 "Raw",
121 "SyntaxHighlighter",
122 "Text",
123 "Text",
124 "ThematicBreak",
125 "ThematicBreak",
126 "parse",
127]
129T = _t.TypeVar("T")
130TAst = _t.TypeVar("TAst", bound="AstBase")
133@_t.final
134class MdFormatter:
135 """
136 A simple markdown formatter suitable for displaying rich text in the terminal.
138 :param ctx:
139 a :class:`~yuio.string.ReprContext` that's used to colorize or wrap
140 rendered markdown.
141 :param allow_headings:
142 if set to :data:`False`, headings are rendered as paragraphs.
144 All CommonMark block markup except tables is supported:
146 - headings:
148 .. code-block:: markdown
150 # Heading 1
151 ## Heading 2
153 Yuio has only two levels of headings. Headings past level two will look the same
154 as level two headings (you can adjust theme to change this).
156 If `allow_headings` is set to :data:`False`, headings look like paragraphs.
158 - lists, numbered lists, quotes:
160 .. code-block:: markdown
162 - List item 1,
163 - list item 2.
165 1. Numbered list item 1,
166 1. numbered list item 2.
168 > Quoted text.
170 - fenced code blocks with minimal syntax highlighting
171 (see :class:`SyntaxHighlighter`):
173 .. code-block:: markdown
175 ```python
176 for i in range(5, 8):
177 print(f"Hello, world! This is {{i}}th day past the apocalypse.")
178 ```
180 Yuio supports ``python``, ``traceback``, ``bash``, ``diff``,
181 and ``json`` syntaxes.
183 Inline markdown only handles inline code blocks:
185 .. code-block:: markdown
187 This is `code`. It will be rendered as code.
188 Other inline styles, such as _italic_, are not supported!
190 However, color tags are supported, so you can highlight text as follows:
192 .. code-block:: markdown
194 This is <c b>bold text</c>. It will be rendered bold.
196 """
198 def __init__(
199 self,
200 ctx: yuio.string.ReprContext,
201 *,
202 allow_headings: bool = True,
203 ):
204 self._ctx = ctx
205 self.allow_headings: bool = allow_headings
207 self._is_first_line: bool
208 self._out: list[yuio.string.ColorizedString]
209 self._indent: yuio.string.ColorizedString
210 self._continuation_indent: yuio.string.ColorizedString
212 @property
213 def ctx(self):
214 return self._ctx
216 @property
217 def width(self):
218 return self._ctx.width
220 def format(
221 self, md: str, *, dedent: bool = True
222 ) -> list[yuio.string.ColorizedString]:
223 """
224 Format a markdown document.
226 :param md:
227 markdown to format. Common indentation will be removed from this string,
228 making it suitable to use with triple quote literals.
229 :param dedent:
230 remove lading indent from markdown.
231 :returns:
232 rendered markdown as a list of individual lines without newline
233 characters at the end.
235 """
237 return self.format_node(self.parse(md, dedent=dedent))
239 def parse(self, md: str, /, *, dedent: bool = True) -> Document:
240 """
241 Parse a markdown document and return an AST node.
243 .. warning::
245 This is experimental API which can change within a minor release.
247 :param md:
248 markdown to parse. Common indentation will be removed from this string,
249 making it suitable to use with triple quote literals.
250 :param dedent:
251 remove lading indent from markdown.
252 :returns:
253 parsed AST node.
255 """
257 if dedent:
258 md = _dedent(md)
260 return _MdParser(self.allow_headings).parse(md)
262 def format_node(self, node: AstBase, /) -> list[yuio.string.ColorizedString]:
263 """
264 Format a parsed markdown document.
266 .. warning::
268 This is an experimental API which can change within a minor release.
270 :param md:
271 AST node to format.
272 :returns:
273 rendered markdown as a list of individual lines without newline
274 characters at the end.
276 """
278 self._is_first_line = True
279 self._separate_paragraphs = True
280 self._out = []
281 self._indent = yuio.string.ColorizedString()
282 self._continuation_indent = yuio.string.ColorizedString()
284 self._format(node)
286 return self._out
288 def colorize(
289 self,
290 text: str,
291 /,
292 *,
293 default_color: yuio.color.Color | str = yuio.color.Color.NONE,
294 ):
295 """
296 Parse and colorize contents of a paragraph.
298 This is a shortcut for calling :func:`colorize` with this formatter's theme.
300 :param line:
301 text to colorize.
302 :param default_color:
303 color or color tag to apply to the entire text.
304 :returns:
305 a colorized string.
307 """
309 return yuio.string.colorize(text, default_color=default_color, ctx=self.ctx)
311 @contextlib.contextmanager
312 def _with_indent(
313 self,
314 color: yuio.color.Color | str | None,
315 s: yuio.string.AnyString,
316 /,
317 *,
318 continue_with_spaces: bool = True,
319 ):
320 color = self.ctx.to_color(color)
321 indent = yuio.string.ColorizedString(color)
322 indent += s
324 old_indent = self._indent
325 old_continuation_indent = self._continuation_indent
327 if continue_with_spaces:
328 continuation_indent = yuio.string.ColorizedString(" " * indent.width)
329 else:
330 continuation_indent = indent
332 self._indent = self._indent + indent
333 self._continuation_indent = self._continuation_indent + continuation_indent
335 try:
336 yield
337 finally:
338 self._indent = old_indent
339 self._continuation_indent = old_continuation_indent
341 def _line(self, line: yuio.string.ColorizedString, /):
342 self._out.append(line)
344 self._is_first_line = False
345 self._indent = self._continuation_indent
347 def _format(self, node: AstBase, /):
348 getattr(self, f"_format_{node.__class__.__name__.lstrip('_')}")(node)
350 def _format_Raw(self, node: Raw, /):
351 for line in node.raw.wrap(
352 self.width,
353 indent=self._indent,
354 continuation_indent=self._continuation_indent,
355 break_long_nowrap_words=True,
356 ):
357 self._line(line)
359 def _format_Text(self, node: Text, /, *, default_color: yuio.color.Color):
360 s = self.colorize(
361 "\n".join(node.lines).strip(),
362 default_color=default_color,
363 )
365 for line in s.wrap(
366 self.width,
367 indent=self._indent,
368 continuation_indent=self._continuation_indent,
369 preserve_newlines=False,
370 break_long_nowrap_words=True,
371 ):
372 self._line(line)
374 def _format_Container(self, node: Container[TAst], /):
375 self._is_first_line = True
376 for item in node.items:
377 if not self._is_first_line and self._separate_paragraphs:
378 self._line(self._indent)
379 self._format(item)
381 def _format_Document(self, node: Document, /):
382 self._format_Container(node)
384 def _format_ThematicBreak(self, _: ThematicBreak):
385 decoration = self.ctx.get_msg_decoration("thematic_break")
386 self._line(self._indent + decoration)
388 def _format_Heading(self, node: Heading, /):
389 if not self._is_first_line:
390 self._line(self._indent)
392 decoration = self.ctx.get_msg_decoration(f"heading/{node.level}")
393 with self._with_indent(f"msg/decoration:heading/{node.level}", decoration):
394 self._format_Text(
395 node,
396 default_color=self.ctx.get_color(f"msg/text:heading/{node.level}"),
397 )
399 self._line(self._indent)
400 self._is_first_line = True
402 def _format_Paragraph(self, node: Paragraph, /):
403 self._format_Text(node, default_color=self.ctx.get_color("msg/text:paragraph"))
405 def _format_ListItem(self, node: ListItem, /, *, min_width: int = 0):
406 decoration = self.ctx.get_msg_decoration("list")
407 if node.number is not None:
408 decoration = f"{node.number:>{min_width}}." + " " * (
409 yuio.string.line_width(decoration) - min_width - 1
410 )
411 with self._with_indent("msg/decoration:list", decoration):
412 self._format_Container(node)
414 def _format_Quote(self, node: Quote, /):
415 decoration = self.ctx.get_msg_decoration("quote")
416 with self._with_indent(
417 "msg/decoration:quote", decoration, continue_with_spaces=False
418 ):
419 self._format_Container(node)
421 def _format_Code(self, node: Code, /):
422 s = SyntaxHighlighter.get_highlighter(node.syntax).highlight(
423 self.ctx.theme,
424 "\n".join(node.lines),
425 )
427 decoration = self.ctx.get_msg_decoration("code")
428 with self._with_indent("msg/decoration:code", decoration):
429 self._line(
430 s.indent(
431 indent=self._indent,
432 continuation_indent=self._continuation_indent,
433 )
434 )
436 def _format_List(self, node: List, /):
437 max_number = max(item.number or 0 for item in node.items)
438 min_width = math.ceil(math.log10(max_number)) if max_number > 0 else 1
439 self._is_first_line = True
440 for item in node.items:
441 if not self._is_first_line:
442 self._line(self._indent)
443 self._format_ListItem(item, min_width=min_width)
446@dataclass(kw_only=True, slots=True)
447class AstBase(abc.ABC):
448 """
449 Base class for all AST nodes that represent parsed markdown document.
451 """
453 def _dump_params(self) -> str:
454 s = self.__class__.__name__.lstrip("_")
455 for field in dataclasses.fields(self):
456 if field.repr:
457 s += f" {getattr(self, field.name)!r}"
458 return s
460 def dump(self, indent: str = "") -> str:
461 """
462 Dump an AST node into a lisp-like text representation.
464 """
466 return f"{indent}({self._dump_params()})"
469@dataclass(kw_only=True, slots=True)
470class Raw(AstBase):
471 """
472 Embeds already formatted paragraph into the document.
474 """
476 raw: yuio.string.ColorizedString
477 """
478 Raw colorized string to add to the document.
480 """
483@dataclass(kw_only=True, slots=True)
484class Text(AstBase):
485 """
486 Base class for all text-based AST nodes, i.e. paragraphs, headings, etc.
488 """
490 lines: list[str] = dataclasses.field(repr=False)
491 """
492 Text lines as parsed from the original document.
494 """
496 def dump(self, indent: str = "") -> str:
497 s = f"{indent}({self._dump_params()}"
498 indent += " "
499 for line in self.lines:
500 s += "\n" + indent
501 s += repr(line)
502 s += ")"
503 return s
506@dataclass(kw_only=True, slots=True)
507class Container(AstBase, _t.Generic[TAst]):
508 """
509 Base class for all container-based AST nodes, i.e. list items or quotes.
511 This class works as a list of items. Usually it contains arbitrary AST nodes,
512 but it can also be limited to specific kinds of nodes via its generic variable.
514 """
516 items: list[TAst] = dataclasses.field(repr=False)
517 """
518 Inner AST nodes in the container.
520 """
522 def dump(self, indent: str = "") -> str:
523 s = f"{indent or ''}({self._dump_params()}"
524 indent += " "
525 for items in self.items:
526 s += "\n"
527 s += items.dump(indent)
528 s += ")"
529 return s
532@dataclass(kw_only=True, slots=True)
533class Document(Container[AstBase]):
534 """
535 Root node that contains the entire markdown document.
537 """
540@dataclass(kw_only=True, slots=True)
541class ThematicBreak(AstBase):
542 """
543 Represents a visual break in text, a.k.a. an asterism.
545 """
548@dataclass(kw_only=True, slots=True)
549class Heading(Text):
550 """
551 Represents a heading.
553 """
555 level: int
556 """
557 Level of the heading, `1`-based.
559 """
562@dataclass(kw_only=True, slots=True)
563class Paragraph(Text):
564 """
565 Represents a regular paragraph.
567 """
570@dataclass(kw_only=True, slots=True)
571class Quote(Container[AstBase]):
572 """
573 Represents a quotation block.
575 """
578@dataclass(kw_only=True, slots=True)
579class Code(Text):
580 """
581 Represents a highlighted block of code.
583 """
585 syntax: str
586 """
587 Syntax indicator as parsed form the original document.
589 """
592@dataclass(kw_only=True, slots=True)
593class ListItem(Container[AstBase]):
594 """
595 A possibly numbered element of a list.
597 """
599 number: int | None
600 """
601 If present, this is the item's number in a numbered list.
603 """
606@dataclass(kw_only=True, slots=True)
607class List(Container[ListItem]):
608 """
609 A collection of list items.
611 """
614_HEADING_RE = re.compile(
615 r"""
616 ^
617 \s{0,3} # - Initial indent.
618 (?P<marker>\#{1,6}) # - Heading marker.
619 (?P<text>\s.*?)? # - Heading text. Unless empty, text must be separated
620 # from the heading marker by a space.
621 (?:(?<=\s)\#+)? # - Optional closing hashes. Must be separated from
622 # the previous content by a space. We use lookbehind
623 # here, because if the text is empty, the space
624 # between heading marker and closing hashes will be
625 # matched by the `text` group.
626 \s* # - Closing spaces.
627 $
628 """,
629 re.VERBOSE,
630)
631_SETEXT_HEADING_RE = re.compile(
632 r"""
633 ^
634 (?P<indent>\s{0,3}) # - Initial indent.
635 (?P<level>-|=) # - Heading underline.
636 \2* # - More heading underline.
637 \s* # - Closing spaces.
638 $
639 """,
640 re.VERBOSE,
641)
642_LIST_RE = re.compile(
643 r"""
644 ^
645 (?P<marker>
646 \s{0,3} # - Initial indent.
647 (?P<type>[-*+]) # - List marker.
648 (?:
649 \s(?:\s{0,3}(?=\S))? # - One mandatory and up to three optional spaces;
650 # When there are more than three optional spaces,
651 # we treat then as a list marker followed
652 # by a single space, followed by a code block.
653 | $)) # - For cases when a list starts with an empty line.
654 (?P<text>.*) # - Text of the first line in the list.
655 $
656 """,
657 re.VERBOSE,
658)
659_NUMBERED_LIST_RE = re.compile(
660 r"""
661 ^
662 (?P<marker>
663 \s{0,3} # - Initial indent.
664 (?P<number>\d{1,9}) # - Number.
665 (?P<type>[.:)]) # - Numbered list marker.
666 (?:
667 \s(?:\s{0,3}(?=\S))? # - One mandatory and up to three optional spaces;
668 # When there are more than three optional spaces,
669 # we treat then as a list marker followed
670 # by a single space, followed by a code block.
671 | $)) # - For cases when a list starts with an empty line.
672 (?P<text>.*) # - Text of the first line in the list.
673 $
674 """,
675 re.VERBOSE,
676)
677_CODE_BACKTICK_RE = re.compile(
678 r"""
679 ^
680 (?P<indent>\s{0,3}) # - Initial indent.
681 (?P<fence>```+) # - Backtick fence.
682 (?P<syntax>[^`]*) # - Syntax, can't contain backtick.
683 $
684 """,
685 re.VERBOSE,
686)
687_CODE_TILDE_RE = re.compile(
688 r"""
689 ^
690 (?P<indent>\s{0,3}) # - Initial indent.
691 (?P<fence>~~~+) # - Backtick fence.
692 (?P<syntax>.*) # - Syntax, can be anything.
693 $
694 """,
695 re.VERBOSE,
696)
697_CODE_FENCE_END_RE = re.compile(
698 r"""
699 ^
700 (?P<indent>\s{0,3}) # - Initial indent.
701 (?P<fence>~~~+|```+) # - Fence.
702 \s* # - Closing spaces.
703 $
704 """,
705 re.VERBOSE,
706)
707_CODE_RE = re.compile(
708 r"""
709 ^
710 \s{4} # - Initial code indent.
711 (?P<text>.*) # - First code line.
712 $
713 """,
714 re.VERBOSE,
715)
716_QUOTE_RE = re.compile(
717 r"""
718 ^
719 (?P<indent>\s{0,3}) # - Initial quote indent.
720 > # - Quote marker.
721 \s? # - Optional space after the marker.
722 (?P<text>.*) # - Text of the first line in the quote.
723 $
724 """,
725 re.VERBOSE,
726)
727_THEMATIC_BREAK_RE = re.compile(
728 r"""
729 ^
730 (?P<indent>\s{0,3}) # - Initial quote indent.
731 ([-*_])\s*(\2\s*){2,} # - At least three break characters separated by spaces.
732 $
733 """,
734 re.VERBOSE,
735)
736_LINE_FEED_RE = re.compile(r"\r\n|\r|\n")
739class _MdParser:
740 @dataclass(kw_only=True, slots=True)
741 class Default:
742 pass
744 @dataclass(kw_only=True, slots=True)
745 class List:
746 type: str
747 marker_len: int
748 list: List
749 parser: _MdParser
750 number: int | None = None
752 @dataclass(kw_only=True, slots=True)
753 class Quote:
754 parser: _MdParser
756 @dataclass(kw_only=True, slots=True)
757 class Code:
758 lines: list[str]
760 @dataclass(kw_only=True, slots=True)
761 class FencedCode:
762 indent: int
763 fence_symbol: str
764 fence_length: int
765 syntax: str
766 lines: list[str]
768 @dataclass(kw_only=True, slots=True)
769 class Paragraph:
770 lines: list[str]
772 State: _t.TypeAlias = Default | List | Quote | Code | FencedCode | Paragraph
774 def __init__(self, allow_headings: bool = True):
775 self._allow_headings = allow_headings
776 self._nodes: list[AstBase] = []
777 self._state: _MdParser.State = self.Default()
779 def _parser(self) -> _MdParser:
780 return _MdParser(self._allow_headings)
782 @staticmethod
783 def _is_blank(s: str) -> bool:
784 return not s or s.isspace()
786 def parse(self, s: str) -> Document:
787 s = s.expandtabs(tabsize=4)
788 for line in _LINE_FEED_RE.split(s):
789 self._handle_line(line)
790 return Document(items=self._finalize())
792 def _handle_line(self, line: str):
793 getattr(self, f"_handle_line_{self._state.__class__.__name__}")(line)
795 def _handle_lazy_line(self, line: str) -> bool:
796 return getattr(self, f"_handle_lazy_line_{self._state.__class__.__name__}")(
797 line
798 )
800 def _flush(self):
801 getattr(self, f"_flush_{self._state.__class__.__name__}")()
803 def _handle_line_List(self, line: str):
804 assert type(self._state) is self.List
805 if not line or line[: self._state.marker_len].isspace():
806 self._state.parser._handle_line(line[self._state.marker_len :])
807 elif (
808 (match := _LIST_RE.match(line)) or (match := _NUMBERED_LIST_RE.match(line))
809 ) and match.group("type") == self._state.type:
810 item = ListItem(
811 items=self._state.parser._finalize(),
812 number=self._state.number,
813 )
814 self._state.list.items.append(item)
815 self._state.marker_len = len(match.group("marker"))
816 self._state.parser._handle_line(match.group("text"))
817 if self._state.number is not None:
818 self._state.number += 1
819 elif not self._state.parser._handle_lazy_line(line):
820 self._flush_List()
821 self._handle_line_Default(line)
823 def _handle_lazy_line_List(self, line: str) -> bool:
824 assert type(self._state) is self.List
825 if self._state.parser._handle_lazy_line(line):
826 return True
827 return False
829 def _flush_List(self):
830 assert type(self._state) is self.List
831 item = ListItem(
832 items=self._state.parser._finalize(),
833 number=self._state.number,
834 )
835 self._state.list.items.append(item)
836 self._nodes.append(self._state.list)
837 self._state = self.Default()
839 def _handle_line_Quote(self, line: str):
840 assert type(self._state) is self.Quote
841 if match := _QUOTE_RE.match(line):
842 self._state.parser._handle_line(match.group("text"))
843 elif self._is_blank(line) or not self._state.parser._handle_lazy_line(line):
844 self._flush_Quote()
845 self._handle_line_Default(line)
847 def _handle_lazy_line_Quote(self, line: str) -> bool:
848 assert type(self._state) is self.Quote
849 if self._state.parser._handle_lazy_line(line):
850 return True
851 else:
852 return False
854 def _flush_Quote(self):
855 assert type(self._state) is self.Quote
856 self._nodes.append(Quote(items=self._state.parser._finalize()))
857 self._state = self.Default()
859 def _handle_line_Code(self, line: str):
860 assert type(self._state) is self.Code
861 if self._is_blank(line) or line.startswith(" "):
862 self._state.lines.append(line[4:])
863 else:
864 self._flush_Code()
865 self._handle_line_Default(line)
867 def _handle_lazy_line_Code(self, line: str) -> bool:
868 assert type(self._state) is self.Code
869 return False # No lazy continuations for code!
871 def _flush_Code(self):
872 assert type(self._state) is self.Code
873 while self._state.lines and self._is_blank(self._state.lines[-1]):
874 self._state.lines.pop()
875 self._nodes.append(
876 Code(
877 lines=self._state.lines,
878 syntax="",
879 )
880 )
881 self._state = self.Default()
883 def _handle_line_FencedCode(self, line: str):
884 assert type(self._state) is self.FencedCode
885 if (
886 (match := _CODE_FENCE_END_RE.match(line))
887 and match.group("fence")[0] == self._state.fence_symbol
888 and len(match.group("fence")) == self._state.fence_length
889 ):
890 self._flush_FencedCode()
891 else:
892 if self._state.indent == 0:
893 pass
894 elif line[: self._state.indent].isspace():
895 line = line[self._state.indent :]
896 else:
897 line = line.lstrip()
898 self._state.lines.append(line)
900 def _handle_lazy_line_FencedCode(self, line: str) -> bool:
901 assert type(self._state) is self.FencedCode
902 return False
904 def _flush_FencedCode(self):
905 assert type(self._state) is self.FencedCode
906 self._nodes.append(
907 Code(
908 lines=self._state.lines,
909 syntax=self._state.syntax,
910 )
911 )
912 self._state = self.Default()
914 def _handle_line_Paragraph(self, line: str):
915 assert type(self._state) is self.Paragraph
916 if match := _SETEXT_HEADING_RE.match(line):
917 level = 1 if match.group("level") == "=" else 2
918 self._nodes.append(
919 Heading(
920 lines=self._state.lines,
921 level=level,
922 )
923 )
924 self._state = self.Default()
925 elif (
926 self._is_blank(line)
927 or _THEMATIC_BREAK_RE.match(line)
928 or (self._allow_headings and _HEADING_RE.match(line))
929 or _CODE_BACKTICK_RE.match(line)
930 or _CODE_TILDE_RE.match(line)
931 or _LIST_RE.match(line)
932 or _NUMBERED_LIST_RE.match(line)
933 or _QUOTE_RE.match(line)
934 ):
935 self._flush_Paragraph()
936 self._handle_line_Default(line)
937 else:
938 self._state.lines.append(line)
940 def _handle_lazy_line_Paragraph(self, line: str) -> bool:
941 assert type(self._state) is self.Paragraph
942 if (
943 self._is_blank(line)
944 or _THEMATIC_BREAK_RE.match(line)
945 or (self._allow_headings and _HEADING_RE.match(line))
946 or _CODE_BACKTICK_RE.match(line)
947 or _CODE_TILDE_RE.match(line)
948 or _LIST_RE.match(line)
949 or _NUMBERED_LIST_RE.match(line)
950 or _QUOTE_RE.match(line)
951 ):
952 self._flush_Paragraph()
953 return False
954 else:
955 self._state.lines.append(line)
956 return True
958 def _flush_Paragraph(self):
959 assert type(self._state) is self.Paragraph
960 self._nodes.append(Paragraph(lines=self._state.lines))
961 self._state = self.Default()
963 def _handle_line_Default(self, line: str):
964 assert type(self._state) is self.Default
965 if self._is_blank(line):
966 pass # do nothing
967 elif _THEMATIC_BREAK_RE.match(line):
968 self._nodes.append(ThematicBreak())
969 elif self._allow_headings and (match := _HEADING_RE.match(line)):
970 level = len(match.group("marker"))
971 self._nodes.append(
972 Heading(
973 lines=[match.group("text").strip()],
974 level=level,
975 )
976 )
977 elif (match := _CODE_BACKTICK_RE.match(line)) or (
978 match := _CODE_TILDE_RE.match(line)
979 ):
980 indent = len(match.group("indent"))
981 syntax = match.group("syntax").strip()
982 fence_symbol = match.group("fence")[0]
983 fence_length = len(match.group("fence"))
984 self._state = self.FencedCode(
985 indent=indent,
986 fence_symbol=fence_symbol,
987 fence_length=fence_length,
988 syntax=syntax,
989 lines=[],
990 )
991 elif match := _CODE_RE.match(line):
992 self._state = self.Code(lines=[match.group("text")])
993 elif (match := _LIST_RE.match(line)) or (
994 match := _NUMBERED_LIST_RE.match(line)
995 ):
996 indent = len(match.group("marker"))
997 list_type = match.group("type")
998 number_str = match.groupdict().get("number", None)
999 number = int(number_str) if number_str else None
1000 self._state = self.List(
1001 type=list_type,
1002 marker_len=indent,
1003 list=List(items=[]),
1004 parser=self._parser(),
1005 number=number,
1006 )
1007 self._state.parser._handle_line(match.group("text"))
1008 elif match := _QUOTE_RE.match(line):
1009 self._state = self.Quote(parser=self._parser())
1010 self._state.parser._handle_line(match.group("text"))
1011 else:
1012 self._state = self.Paragraph(lines=[line])
1014 def _handle_lazy_line_Default(self, line: str) -> bool:
1015 assert type(self._state) is self.Default
1016 return False
1018 def _flush_Default(self):
1019 assert type(self._state) is self.Default
1021 def _finalize(self) -> list[AstBase]:
1022 self._flush()
1023 result = self._nodes
1024 self._nodes = []
1025 return result
1028def parse(md: str, /, *, dedent: bool = True, allow_headings: bool = True) -> Document:
1029 """
1030 Parse a markdown document and return an AST node.
1032 :param md:
1033 markdown to parse. Common indentation will be removed from this string,
1034 making it suitable to use with triple quote literals.
1035 :param dedent:
1036 remove lading indent from markdown.
1037 :param allow_headings:
1038 if set to :data:`False`, headings are rendered as paragraphs.
1039 :returns:
1040 parsed AST node.
1042 """
1044 if dedent:
1045 md = _dedent(md)
1047 return _MdParser(allow_headings).parse(md)
1050_SYNTAXES: dict[str, SyntaxHighlighter] = {}
1051"""
1052Global syntax registry.
1054"""
1057class SyntaxHighlighter(abc.ABC):
1058 @property
1059 @abc.abstractmethod
1060 def syntaxes(self) -> list[str]:
1061 """
1062 List of syntax names that should be associated with this highlighter.
1064 """
1066 return []
1068 @property
1069 def syntax(self) -> str:
1070 """
1071 The primary syntax name for this highlighter, defaults to the first element
1072 of the :attr:`~SyntaxHighlighter.syntaxes` list.
1074 This name is used to look up colors in a theme.
1076 """
1078 return self.syntaxes[0] if self.syntaxes else "unknown"
1080 @classmethod
1081 def register_highlighter(cls, highlighter: SyntaxHighlighter):
1082 """
1083 Register a highlighter in a global registry, and allow looking it up
1084 via the :meth:`~SyntaxHighlighter.get_highlighter` method.
1086 :param highlighter:
1087 a highlighter instance.
1089 """
1091 for syntax in highlighter.syntaxes:
1092 _SYNTAXES[syntax.lower().replace("_", "-")] = highlighter
1094 @classmethod
1095 def get_highlighter(cls, syntax: str, /) -> SyntaxHighlighter:
1096 """
1097 Look up highlighter by a syntax name.
1099 :param syntax:
1100 name of the syntax highlighter.
1101 :returns:
1102 a highlighter instance.
1104 If highlighter with the given name can't be found, returns a dummy
1105 highlighter that does nothing.
1107 """
1109 return _SYNTAXES.get(
1110 syntax.lower().replace("_", "-"),
1111 _DummySyntaxHighlighter(),
1112 )
1114 @abc.abstractmethod
1115 def highlight(
1116 self,
1117 theme: yuio.theme.Theme,
1118 code: str,
1119 default_color: yuio.color.Color | str | None = None,
1120 ) -> yuio.string.ColorizedString:
1121 """
1122 Highlight the given code using the given theme.
1124 :param theme:
1125 theme that will be used to look up color tags.
1126 :param code:
1127 code to highlight.
1128 :param default_color:
1129 color or color tag to apply to the entire code.
1131 """
1133 raise NotImplementedError()
1135 def _get_default_color(
1136 self,
1137 theme: yuio.theme.Theme,
1138 default_color: yuio.color.Color | str | None,
1139 ) -> yuio.color.Color:
1140 return theme.to_color(default_color) | theme.get_color(
1141 f"msg/text:code/{self.syntax}"
1142 )
1145class _DummySyntaxHighlighter(SyntaxHighlighter):
1146 @property
1147 def syntaxes(self) -> list[str]:
1148 return ["text", "plain-text"]
1150 def highlight(
1151 self,
1152 theme: yuio.theme.Theme,
1153 code: str,
1154 default_color: yuio.color.Color | str | None = None,
1155 ) -> yuio.string.ColorizedString:
1156 return yuio.string.ColorizedString(
1157 [
1158 self._get_default_color(theme, default_color),
1159 code,
1160 yuio.color.Color.NONE,
1161 ]
1162 )
1165SyntaxHighlighter.register_highlighter(_DummySyntaxHighlighter())
1168class _ReSyntaxHighlighter(SyntaxHighlighter):
1169 def __init__(
1170 self,
1171 syntaxes: list[str],
1172 pattern: _tx.StrRePattern,
1173 str_esc_pattern: _tx.StrRePattern | None = None,
1174 ):
1175 self._syntaxes = syntaxes
1176 self._pattern = pattern
1177 self._str_esc_pattern = str_esc_pattern
1179 @property
1180 def syntaxes(self) -> list[str]:
1181 return self._syntaxes
1183 def highlight(
1184 self,
1185 theme: yuio.theme.Theme,
1186 code: str,
1187 default_color: yuio.color.Color | str | None = None,
1188 ) -> yuio.string.ColorizedString:
1189 default_color = self._get_default_color(theme, default_color)
1191 raw = yuio.string.ColorizedString()
1193 last_pos = 0
1194 for code_unit in self._pattern.finditer(code):
1195 if last_pos < code_unit.start():
1196 raw += default_color
1197 raw += code[last_pos : code_unit.start()]
1198 last_pos = code_unit.end()
1200 for name, text in sorted(code_unit.groupdict().items()):
1201 if not text:
1202 continue
1203 name = name.split("__", maxsplit=1)[-1]
1204 if self._str_esc_pattern is not None and name == "str":
1205 str_color = default_color | theme.get_color(f"hl/str:{self.syntax}")
1206 esc_color = default_color | theme.get_color(
1207 f"hl/str/esc:{self.syntax}"
1208 )
1209 last_escape_pos = 0
1210 for escape_unit in self._str_esc_pattern.finditer(text):
1211 if last_escape_pos < escape_unit.start():
1212 raw += str_color
1213 raw += text[last_escape_pos : escape_unit.start()]
1214 last_escape_pos = escape_unit.end()
1215 if escape := text[escape_unit.start() : escape_unit.end()]:
1216 raw += esc_color
1217 raw += escape
1218 if last_escape_pos < len(text):
1219 raw += str_color
1220 raw += text[last_escape_pos:]
1221 else:
1222 raw += default_color | theme.get_color(f"hl/{name}:{self.syntax}")
1223 raw += text
1225 if last_pos < len(code):
1226 raw += default_color
1227 raw += code[last_pos:]
1229 return raw
1232_PY_SYNTAX = re.compile(
1233 r"""
1234 (?P<kwd>
1235 \b(?: # keyword
1236 and|as|assert|async|await|break|class|continue|def|del|elif|else|
1237 except|finally|for|from|global|if|import|in|is|lambda|
1238 nonlocal|not|or|pass|raise|return|try|while|with|yield
1239 )\b)
1240 | (?P<str>
1241 [rfut]*( # string prefix
1242 \"""(\\.|[^\\]|\n)*?\""" # long singly-quoted string
1243 | '''(\\.|[^\\]|\n)*?''' # long doubly-quoted string
1244 | '(?:\\.|[^\\'])*(?:'|\n) # singly-quoted string
1245 | "(?:\\.|[^\\"])*(?:"|\n))) # doubly-quoted string
1246 | (?P<lit>
1247 (?<![\.\w])(
1248 [+-]?\d+(?:\.\d*(?:e[+-]?\d+)?)? # int or float
1249 | [+-]?\.\d+(?:e[+-]?\d+)? # float that starts with dot
1250 | [+-]?0x[0-9a-fA-F]+ # hex
1251 | [+-]?0b[01]+ # bin
1252 | \b(?:None|True|False)\b)) # bool or none
1253 | (?P<type>
1254 \b(?: # type
1255 str|int|float|complex|list|tuple|range|dict|set|frozenset|bool|
1256 bytes|bytearray|memoryview|(?:[A-Z](?:[A-Z0-9_]*?[a-z]\w*)?)
1257 )\b)
1258 | (?P<punct>[{}()\[\]\\;,]) # punctuation
1259 | (?P<comment>\#.*$) # comment
1260 """,
1261 re.MULTILINE | re.VERBOSE,
1262)
1263_PY_ESC_PATTERN = re.compile(
1264 r"""
1265 \\(
1266 \n # escaped newline
1267 | [\\'"abfnrtv] # normal escape
1268 | [0-7]{3} # octal escape
1269 | x[0-9a-fA-F]{2} # hex escape
1270 | u[0-9a-fA-F]{4} # short unicode escape
1271 | U[0-9a-fA-F]{8} # long unicode escape
1272 | N\{[^}\n]+\} # unicode character names
1273 | [{}] # template
1274 | % # percent formatting
1275 (?:\([^)]*\))? # mapping key
1276 [#0\-+ ]* # conversion Flag
1277 (?:\*|\d+)? # field width
1278 (?:\.(?:\*|\d*))? # precision
1279 [hlL]? # unused length modifier
1280 . # conversion type
1281 )
1282 """,
1283 re.VERBOSE,
1284)
1287SyntaxHighlighter.register_highlighter(
1288 _ReSyntaxHighlighter(
1289 ["py", "py3", "py-3", "python", "python3", "python-3"],
1290 _PY_SYNTAX,
1291 str_esc_pattern=_PY_ESC_PATTERN,
1292 )
1293)
1294SyntaxHighlighter.register_highlighter(
1295 _ReSyntaxHighlighter(
1296 ["repr"],
1297 _PY_SYNTAX,
1298 str_esc_pattern=_PY_ESC_PATTERN,
1299 )
1300)
1301SyntaxHighlighter.register_highlighter(
1302 _ReSyntaxHighlighter(
1303 ["sh", "bash"],
1304 re.compile(
1305 r"""
1306 (?P<kwd>
1307 \b(?: # keyword
1308 if|then|elif|else|fi|time|for|in|until|while|do|done|case|
1309 esac|coproc|select|function
1310 )\b
1311 | \[\[ # `test` syntax: if [[ ... ]]
1312 | \]\])
1313 | (?P<a0__punct>(?:^|\|\|?|&&|\$\()) # chaining operator: pipe or logic
1314 (?P<a1__>\s*)
1315 (?P<a2__prog>[\w./~]([\w.@/-]|\\.)+) # prog
1316 | (?P<str>
1317 '[^']*' # singly-quoted string
1318 | "(?:\\.|[^\\"])*") # doubly-quoted string
1319 | (?P<punct>
1320 [{}()\[\]\\;!&|] # punctuation
1321 | <{1,3} # input redirect
1322 | [12]?>{1,2}(?:&[12])?) # output redirect
1323 | (?P<comment>\#.*$) # comment
1324 | (?P<flag>(?<![\w-])-[a-zA-Z0-9_-]+\b) # flag
1325 """,
1326 re.MULTILINE | re.VERBOSE,
1327 ),
1328 ),
1329)
1330SyntaxHighlighter.register_highlighter(
1331 _ReSyntaxHighlighter(
1332 ["sh-usage", "bash-usage"],
1333 re.compile(
1334 r"""
1335 (?P<kwd>
1336 \b(?: # keyword
1337 if|then|elif|else|fi|time|for|in|until|while|do|done|case|
1338 esac|coproc|select|function
1339 )\b)
1340 | (?P<prog>%\(prog\)s) # prog
1341 | (?P<str>
1342 '[^']*' # singly-quoted string
1343 | "(?:\\.|[^\\"])*") # doubly-quoted string
1344 | (?P<comment>\#.*$) # comment
1345 | (?P<flag>(?<![\w-])
1346 -[-\w]+\b # flag
1347 | <options> # options
1348 )
1349 | (?P<metavar><[^>]+>) # metavar
1350 | (?P<punct>[{}()\[\]\\;!&|]) # punctuation
1351 """,
1352 re.MULTILINE | re.VERBOSE,
1353 ),
1354 )
1355)
1356SyntaxHighlighter.register_highlighter(
1357 _ReSyntaxHighlighter(
1358 ["diff"],
1359 re.compile(
1360 r"""
1361 (?P<meta>^(\-\-\-|\+\+\+|\@\@)[^\r\n]*$)
1362 | (?P<added>^\+[^\r\n]*$)
1363 | (?P<removed>^\-[^\r\n]*$)
1364 """,
1365 re.MULTILINE | re.VERBOSE,
1366 ),
1367 ),
1368)
1369SyntaxHighlighter.register_highlighter(
1370 _ReSyntaxHighlighter(
1371 ["json"],
1372 re.compile(
1373 r"""
1374 (?P<lit>\b(?:true|false|null)\b) # keyword
1375 | (?P<str>"(?:\\.|[^\\"])*(?:"|\n)) # doubly-quoted string
1376 | (?P<punct>[{}\[\],:]) # punctuation
1377 """,
1378 re.MULTILINE | re.VERBOSE,
1379 ),
1380 str_esc_pattern=re.compile(
1381 r"""
1382 \\(
1383 \n
1384 | [\\/"bfnrt]
1385 | u[0-9a-fA-F]{4}
1386 )
1387 """,
1388 re.VERBOSE,
1389 ),
1390 ),
1391)
1394class _TbHighlighter(SyntaxHighlighter):
1395 @property
1396 def syntaxes(self) -> list[str]:
1397 return [
1398 "tb",
1399 "traceback",
1400 "py-tb",
1401 "py3-tb",
1402 "py-3-tb",
1403 "py-traceback",
1404 "py3-traceback",
1405 "py-3-traceback",
1406 "python-tb",
1407 "python3-tb",
1408 "python-3-tb",
1409 "python-traceback",
1410 "python3-traceback",
1411 "python-3-traceback",
1412 ]
1414 class _StackColors:
1415 def __init__(
1416 self, theme: yuio.theme.Theme, default_color: yuio.color.Color, tag: str
1417 ):
1418 self.file_color = default_color | theme.get_color(f"tb/frame/{tag}/file")
1419 self.file_path_color = default_color | theme.get_color(
1420 f"tb/frame/{tag}/file/path"
1421 )
1422 self.file_line_color = default_color | theme.get_color(
1423 f"tb/frame/{tag}/file/line"
1424 )
1425 self.file_module_color = default_color | theme.get_color(
1426 f"tb/frame/{tag}/file/module"
1427 )
1428 self.code_color = default_color | theme.get_color(f"tb/frame/{tag}/code")
1429 self.highlight_color = default_color | theme.get_color(
1430 f"tb/frame/{tag}/highlight"
1431 )
1433 _TB_RE = re.compile(
1434 r"^(?P<indent>[ |+]*)(Stack|Traceback|Exception Group Traceback) \(most recent call last\):$"
1435 )
1436 _TB_MSG_RE = re.compile(r"^(?P<indent>[ |+]*)[A-Za-z_][A-Za-z0-9_]*($|:.*$)")
1437 _TB_LINE_FILE = re.compile(
1438 r'^[ |+]*File (?P<file>"[^"]*"), line (?P<line>\d+)(?:, in (?P<loc>.*))?$'
1439 )
1440 _TB_LINE_HIGHLIGHT = re.compile(r"^[ |+^~-]*$")
1441 _SITE_PACKAGES = os.sep + "lib" + os.sep + "site-packages" + os.sep
1442 _LIB_PYTHON = os.sep + "lib" + os.sep + "python"
1444 def highlight(
1445 self,
1446 theme: yuio.theme.Theme,
1447 code: str,
1448 default_color: yuio.color.Color | str | None = None,
1449 ) -> yuio.string.ColorizedString:
1450 default_color = self._get_default_color(theme, default_color)
1452 py_highlighter = SyntaxHighlighter.get_highlighter("python")
1454 heading_color = default_color | theme.get_color("tb/heading")
1455 message_color = default_color | theme.get_color("tb/message")
1457 stack_normal_colors = self._StackColors(theme, default_color, "usr")
1458 stack_lib_colors = self._StackColors(theme, default_color, "lib")
1459 stack_colors = stack_normal_colors
1461 res = yuio.string.ColorizedString()
1463 PLAIN_TEXT, STACK, MESSAGE = 1, 2, 3
1464 state = PLAIN_TEXT
1465 stack_indent = ""
1466 message_indent = ""
1468 for line in code.splitlines(keepends=True):
1469 if state is STACK:
1470 if line.startswith(stack_indent):
1471 # We're still in the stack.
1472 if match := self._TB_LINE_FILE.match(line):
1473 file, line, loc = match.group("file", "line", "loc")
1475 if self._SITE_PACKAGES in file or self._LIB_PYTHON in file:
1476 stack_colors = stack_lib_colors
1477 else:
1478 stack_colors = stack_normal_colors
1480 res += yuio.color.Color.NONE
1481 res += stack_indent
1482 res += stack_colors.file_color
1483 res += "File "
1484 res += stack_colors.file_path_color
1485 res += file
1486 res += stack_colors.file_color
1487 res += ", line "
1488 res += stack_colors.file_line_color
1489 res += line
1490 res += stack_colors.file_color
1492 if loc:
1493 res += ", in "
1494 res += stack_colors.file_module_color
1495 res += loc
1496 res += stack_colors.file_color
1498 res += "\n"
1499 elif match := self._TB_LINE_HIGHLIGHT.match(line):
1500 res += yuio.color.Color.NONE
1501 res += stack_indent
1502 res += stack_colors.highlight_color
1503 res += line[len(stack_indent) :]
1504 else:
1505 res += yuio.color.Color.NONE
1506 res += stack_indent
1507 res += py_highlighter.highlight(
1508 theme,
1509 line[len(stack_indent) :],
1510 stack_colors.code_color,
1511 )
1512 continue
1513 else:
1514 # Stack has ended, this line is actually a message.
1515 state = MESSAGE
1517 if state is MESSAGE:
1518 if line and line != "\n" and line.startswith(message_indent):
1519 # We're still in the message.
1520 res += yuio.color.Color.NONE
1521 res += message_indent
1522 res += message_color
1523 res += line[len(message_indent) :]
1524 continue
1525 else:
1526 # Message has ended, this line is actually a plain text.
1527 state = PLAIN_TEXT
1529 if state is PLAIN_TEXT:
1530 if match := self._TB_RE.match(line):
1531 # Plain text has ended, this is actually a heading.
1532 message_indent = match.group("indent").replace("+", "|")
1533 stack_indent = message_indent + " "
1535 res += yuio.color.Color.NONE
1536 res += message_indent
1537 res += heading_color
1538 res += line[len(message_indent) :]
1540 state = STACK
1541 continue
1542 elif match := self._TB_MSG_RE.match(line):
1543 # Plain text has ended, this is an error message (without a traceback).
1544 message_indent = match.group("indent").replace("+", "|")
1545 stack_indent = message_indent + " "
1547 res += yuio.color.Color.NONE
1548 res += message_indent
1549 res += message_color
1550 res += line[len(message_indent) :]
1552 state = MESSAGE
1553 continue
1554 else:
1555 # We're still in plain text.
1556 res += yuio.color.Color.NONE
1557 res += line
1558 continue
1560 return res
1563SyntaxHighlighter.register_highlighter(_TbHighlighter())