Coverage for yuio / md.py: 90%
537 statements
« prev ^ index » next coverage.py v7.12.0, created at 2025-12-04 10:05 +0000
« prev ^ index » next coverage.py v7.12.0, created at 2025-12-04 10:05 +0000
1# Yuio project, MIT license.
2#
3# https://github.com/taminomara/yuio/
4#
5# You're free to copy this file to your project and edit it for your needs,
6# just keep this copyright line please :3
8"""
9Yuio's primary format for higher-level io is Markdown (well, a reasonably rich subset
10of it).
13Formatting markdown
14-------------------
16.. autoclass:: MdFormatter
17 :members:
20.. _highlighting-code:
22Highlighting code
23-----------------
25Yuio supports basic code highlighting; it is just enough to format help messages
26for CLI, and color tracebacks when an error occurs.
28.. autoclass:: SyntaxHighlighter
29 :members:
32Markdown AST
33------------
35.. warning::
37 This is an experimental API which can change within a minor release.
39.. autoclass:: AstBase
40 :members:
42.. autoclass:: Text
43 :members:
45.. autoclass:: Container
46 :members:
48.. autoclass:: Document
49 :members:
51.. autoclass:: ThematicBreak
52 :members:
54.. autoclass:: Heading
55 :members:
57.. autoclass:: Paragraph
58 :members:
60.. autoclass:: Quote
61 :members:
63.. autoclass:: Code
64 :members:
66.. autoclass:: ListItem
67 :members:
69.. autoclass:: List
70 :members:
73"""
75from __future__ import annotations
77import abc
78import contextlib
79import dataclasses
80import math
81import os
82import re
83import shutil
84from dataclasses import dataclass
86import yuio.color
87import yuio.string
88import yuio.theme
89from yuio import _typing as _t
90from yuio.util import dedent as _dedent
92__all__ = [
93 "AstBase",
94 "AstBase",
95 "Code",
96 "Code",
97 "Container",
98 "Container",
99 "Document",
100 "Document",
101 "Heading",
102 "Heading",
103 "List",
104 "List",
105 "ListItem",
106 "ListItem",
107 "MdFormatter",
108 "Paragraph",
109 "Paragraph",
110 "Quote",
111 "Quote",
112 "SyntaxHighlighter",
113 "Text",
114 "Text",
115 "ThematicBreak",
116 "ThematicBreak",
117]
119T = _t.TypeVar("T")
120TAst = _t.TypeVar("TAst", bound="AstBase")
123@_t.final
124class MdFormatter:
125 """
126 A simple markdown formatter suitable for displaying rich text in the terminal.
128 :param theme:
129 a theme that's used to colorize rendered markdown.
130 :param width:
131 maximum width for wrapping long paragraphs. If not given, it is inferred
132 via :func:`shutil.get_terminal_size`.
133 :param allow_headings:
134 if set to :data:`False`, headings are rendered as paragraphs.
136 All CommonMark block markup except tables is supported:
138 - headings:
140 .. code-block:: markdown
142 # Heading 1
143 ## Heading 2
145 Yuio has only two levels of headings. Headings past level two will look the same
146 as level two headings (you can adjust theme to change this).
148 If ``allow_headings`` is set to :data:`False`, headings look like paragraphs.
150 - lists, numbered lists, quotes:
152 .. code-block:: markdown
154 - List item 1,
155 - list item 2.
157 1. Numbered list item 1,
158 1. numbered list item 2.
160 > Quoted text.
162 - fenced code blocks with minimal syntax highlighting
163 (see :class:`SyntaxHighlighter`):
165 .. code-block:: markdown
167 ```python
168 for i in range(5, 8):
169 print(f"Hello, world! This is {{i}}th day past the apocalypse.")
170 ```
172 Yuio supports ``python``, ``traceback``, ``bash``, ``diff``,
173 and ``json`` syntaxes.
175 Inline markdown only handles inline code blocks:
177 .. code-block:: markdown
179 This is `code`. It will be rendered as code.
180 Other inline styles, such as _italic_, are not supported!
182 However, color tags are supported, so you can highlight text as follows:
184 .. code-block:: markdown
186 This is <c b>bold text</c>. It will be rendered bold.
188 """
190 def __init__(
191 self,
192 theme: yuio.theme.Theme,
193 *,
194 width: int | None = None,
195 allow_headings: bool = True,
196 ):
197 self.width = width
198 self.theme: yuio.theme.Theme = theme
199 self.allow_headings: bool = allow_headings
201 self._is_first_line: bool
202 self._out: list[yuio.string.ColorizedString]
203 self._indent: yuio.string.ColorizedString
204 self._continuation_indent: yuio.string.ColorizedString
206 @property
207 def width(self) -> int:
208 """
209 Target width for soft-wrapping text.
211 """
213 return self.__width
215 @width.setter
216 def width(self, width: int | None):
217 if width is None:
218 width = shutil.get_terminal_size().columns
219 self.__width = max(width, 0)
221 def format(
222 self, md: str, *, dedent: bool = True
223 ) -> list[yuio.string.ColorizedString]:
224 """
225 Format a markdown document.
227 :param md:
228 markdown to format. Common indentation will be removed from this string,
229 making it suitable to use with triple quote literals.
230 :param dedent:
231 remove lading indent from markdown.
232 :returns:
233 rendered markdown as a list of individual lines without newline
234 characters at the end.
236 """
238 return self.format_node(self.parse(md, dedent=dedent))
240 def parse(self, md: str, /, *, dedent: bool = True) -> Document:
241 """
242 Parse a markdown document and return an AST node.
244 .. warning::
246 This is an experimental API which can change within a minor release.
248 :param md:
249 markdown to parse. Common indentation will be removed from this string,
250 making it suitable to use with triple quote literals.
251 :param dedent:
252 remove lading indent from markdown.
253 :returns:
254 parsed AST node.
256 """
258 if dedent:
259 md = _dedent(md)
261 return _MdParser(self.allow_headings).parse(md)
263 def format_node(self, node: AstBase, /) -> list[yuio.string.ColorizedString]:
264 """
265 Format a parsed markdown document.
267 .. warning::
269 This is an experimental API which can change within a minor release.
271 :param md:
272 AST node to format.
273 :returns:
274 rendered markdown as a list of individual lines without newline
275 characters at the end.
277 """
279 self._is_first_line = True
280 self._out = []
281 self._indent = yuio.string.ColorizedString()
282 self._continuation_indent = yuio.string.ColorizedString()
284 self._format(node)
286 return self._out
288 def colorize(
289 self,
290 text: str,
291 /,
292 *,
293 default_color: yuio.color.Color | str = yuio.color.Color.NONE,
294 ):
295 """
296 Parse and colorize contents of a paragraph.
298 This is a shortcut for calling :func:`colorize` with this formatter's theme.
300 :param line:
301 text to colorize.
302 :param default_color:
303 color or color tag to apply to the entire text.
304 :returns:
305 a colorized string.
307 """
309 return yuio.string.colorize(text, default_color=default_color, ctx=self.theme)
311 @contextlib.contextmanager
312 def _with_indent(
313 self,
314 color: yuio.color.Color | str | None,
315 s: yuio.string.AnyString,
316 /,
317 *,
318 continue_with_spaces: bool = True,
319 ):
320 color = self.theme.to_color(color)
321 indent = yuio.string.ColorizedString(color)
322 indent += s
324 old_indent = self._indent
325 old_continuation_indent = self._continuation_indent
327 if continue_with_spaces:
328 continuation_indent = yuio.string.ColorizedString(" " * indent.width)
329 else:
330 continuation_indent = indent
332 self._indent = self._indent + indent
333 self._continuation_indent = self._continuation_indent + continuation_indent
335 try:
336 yield
337 finally:
338 self._indent = old_indent
339 self._continuation_indent = old_continuation_indent
341 def _line(self, line: yuio.string.ColorizedString, /):
342 self._out.append(line)
344 self._is_first_line = False
345 self._indent = self._continuation_indent
347 def _format(self, node: AstBase, /):
348 getattr(self, f"_format_{node.__class__.__name__.lstrip('_')}")(node)
350 def _format_Text(self, node: Text, /, *, default_color: yuio.color.Color):
351 s = self.colorize(
352 "\n".join(node.lines).strip(),
353 default_color=default_color,
354 )
356 for line in s.wrap(
357 self.width,
358 indent=self._indent,
359 continuation_indent=self._continuation_indent,
360 preserve_newlines=False,
361 ):
362 self._line(line)
364 def _format_Container(self, node: Container[TAst], /):
365 self._is_first_line = True
366 for item in node.items:
367 if not self._is_first_line:
368 self._line(self._indent)
369 self._format(item)
371 def _format_Document(self, node: Document, /):
372 self._format_Container(node)
374 def _format_ThematicBreak(self, _: ThematicBreak):
375 decoration = self.theme.msg_decorations.get("thematic_break", "")
376 self._line(self._indent + decoration)
378 def _format_Heading(self, node: Heading, /):
379 if not self._is_first_line:
380 self._line(self._indent)
382 decoration = self.theme.msg_decorations.get(f"heading/{node.level}", "")
383 with self._with_indent(f"msg/decoration:heading/{node.level}", decoration):
384 self._format_Text(
385 node,
386 default_color=self.theme.get_color(f"msg/text:heading/{node.level}"),
387 )
389 self._line(self._indent)
390 self._is_first_line = True
392 def _format_Paragraph(self, node: Paragraph, /):
393 self._format_Text(
394 node, default_color=self.theme.get_color("msg/text:paragraph")
395 )
397 def _format_ListItem(self, node: ListItem, /, *, min_width: int = 0):
398 decoration = self.theme.msg_decorations.get("list", "")
399 if node.number is not None:
400 decoration = f"{node.number:>{min_width}}." + " " * (
401 yuio.string.line_width(decoration) - min_width - 1
402 )
403 with self._with_indent("msg/decoration:list", decoration):
404 self._format_Container(node)
406 def _format_Quote(self, node: Quote, /):
407 decoration = self.theme.msg_decorations.get("quote", "")
408 with self._with_indent(
409 "msg/decoration:quote", decoration, continue_with_spaces=False
410 ):
411 self._format_Container(node)
413 def _format_Code(self, node: Code, /):
414 s = SyntaxHighlighter.get_highlighter(node.syntax).highlight(
415 self.theme,
416 "\n".join(node.lines),
417 )
419 decoration = self.theme.msg_decorations.get("code", "")
420 with self._with_indent("msg/decoration:code", decoration):
421 self._line(
422 s.indent(
423 indent=self._indent,
424 continuation_indent=self._continuation_indent,
425 )
426 )
428 def _format_List(self, node: List, /):
429 max_number = max(item.number or 0 for item in node.items)
430 min_width = math.ceil(math.log10(max_number)) if max_number > 0 else 1
431 self._is_first_line = True
432 for item in node.items:
433 if not self._is_first_line:
434 self._line(self._indent)
435 self._format_ListItem(item, min_width=min_width)
438@dataclass(kw_only=True, slots=True)
439class AstBase(abc.ABC):
440 """
441 Base class for all AST nodes that represent parsed markdown document.
443 """
445 def _dump_params(self) -> str:
446 s = self.__class__.__name__.lstrip("_")
447 for field in dataclasses.fields(self):
448 if field.repr:
449 s += f" {getattr(self, field.name)!r}"
450 return s
452 def dump(self, indent: str = "") -> str:
453 """
454 Dump an AST node into a lisp-like text representation.
456 """
458 return f"{indent}({self._dump_params()})"
461@dataclass(kw_only=True, slots=True)
462class Text(AstBase):
463 """
464 Base class for all text-based AST nodes, i.e. paragraphs.
466 """
468 lines: list[str] = dataclasses.field(repr=False)
469 """
470 Text lines as parsed from the original document.
472 """
474 def dump(self, indent: str = "") -> str:
475 s = f"{indent}({self._dump_params()}"
476 indent += " "
477 for line in self.lines:
478 s += "\n" + indent
479 s += repr(line)
480 s += ")"
481 return s
484@dataclass(kw_only=True, slots=True)
485class Container(AstBase, _t.Generic[TAst]):
486 """
487 Base class for all container-based AST nodes, i.e. list items or quotes.
489 This class works as a list of items. Usually it contains arbitrary AST nodes,
490 but it can also be limited to specific kinds of nodes via its generic variable.
492 """
494 items: list[TAst] = dataclasses.field(repr=False)
495 """
496 Inner AST nodes in the container.
498 """
500 def dump(self, indent: str = "") -> str:
501 s = f"{indent or ''}({self._dump_params()}"
502 indent += " "
503 for items in self.items:
504 s += "\n"
505 s += items.dump(indent)
506 s += ")"
507 return s
510@dataclass(kw_only=True, slots=True)
511class Document(Container[AstBase]):
512 """
513 Root node that contains the entire markdown document.
515 """
518@dataclass(kw_only=True, slots=True)
519class ThematicBreak(AstBase):
520 """
521 Represents a visual break in text, a.k.a. an asterism.
523 """
526@dataclass(kw_only=True, slots=True)
527class Heading(Text):
528 """
529 Represents a heading.
531 """
533 level: int
534 """
535 Level of the heading, `1`-based.
537 """
540@dataclass(kw_only=True, slots=True)
541class Paragraph(Text):
542 """
543 Represents a regular paragraph.
545 """
548@dataclass(kw_only=True, slots=True)
549class Quote(Container[AstBase]):
550 """
551 Represents a quotation block.
553 """
556@dataclass(kw_only=True, slots=True)
557class Code(Text):
558 """
559 Represents a highlighted block of code.
561 """
563 syntax: str
564 """
565 Syntax indicator as parsed form the original document.
567 """
570@dataclass(kw_only=True, slots=True)
571class ListItem(Container[AstBase]):
572 """
573 A possibly numbered element of a list.
575 """
577 number: int | None
578 """
579 If present, this is the item's number in a numbered list.
581 """
584@dataclass(kw_only=True, slots=True)
585class List(Container[ListItem]):
586 """
587 A collection of list items.
589 """
592_HEADING_RE = re.compile(
593 r"""
594 ^
595 \s{0,3} # - Initial indent.
596 (?P<marker>\#{1,6}) # - Heading marker.
597 (?P<text>\s.*?)? # - Heading text. Unless empty, text must be separated
598 # from the heading marker by a space.
599 (?:(?<=\s)\#+)? # - Optional closing hashes. Must be separated from
600 # the previous content by a space. We use lookbehind
601 # here, because if the text is empty, the space
602 # between heading marker and closing hashes will be
603 # matched by the `text` group.
604 \s* # - Closing spaces.
605 $
606 """,
607 re.VERBOSE,
608)
609_SETEXT_HEADING_RE = re.compile(
610 r"""
611 ^
612 (?P<indent>\s{0,3}) # - Initial indent.
613 (?P<level>-|=) # - Heading underline.
614 \2* # - More heading underline.
615 \s* # - Closing spaces.
616 $
617 """,
618 re.VERBOSE,
619)
620_LIST_RE = re.compile(
621 r"""
622 ^
623 (?P<marker>
624 \s{0,3} # - Initial indent.
625 (?P<type>[-*+]) # - List marker.
626 (?:
627 \s(?:\s{0,3}(?=\S))? # - One mandatory and up to three optional spaces;
628 # When there are more than three optional spaces,
629 # we treat then as a list marker followed
630 # by a single space, followed by a code block.
631 | $)) # - For cases when a list starts with an empty line.
632 (?P<text>.*) # - Text of the first line in the list.
633 $
634 """,
635 re.VERBOSE,
636)
637_NUMBERED_LIST_RE = re.compile(
638 r"""
639 ^
640 (?P<marker>
641 \s{0,3} # - Initial indent.
642 (?P<number>\d{1,9}) # - Number.
643 (?P<type>[.:)]) # - Numbered list marker.
644 (?:
645 \s(?:\s{0,3}(?=\S))? # - One mandatory and up to three optional spaces;
646 # When there are more than three optional spaces,
647 # we treat then as a list marker followed
648 # by a single space, followed by a code block.
649 | $)) # - For cases when a list starts with an empty line.
650 (?P<text>.*) # - Text of the first line in the list.
651 $
652 """,
653 re.VERBOSE,
654)
655_CODE_BACKTICK_RE = re.compile(
656 r"""
657 ^
658 (?P<indent>\s{0,3}) # - Initial indent.
659 (?P<fence>```+) # - Backtick fence.
660 (?P<syntax>[^`]*) # - Syntax, can't contain backtick.
661 $
662 """,
663 re.VERBOSE,
664)
665_CODE_TILDE_RE = re.compile(
666 r"""
667 ^
668 (?P<indent>\s{0,3}) # - Initial indent.
669 (?P<fence>~~~+) # - Backtick fence.
670 (?P<syntax>.*) # - Syntax, can be anything.
671 $
672 """,
673 re.VERBOSE,
674)
675_CODE_FENCE_END_RE = re.compile(
676 r"""
677 ^
678 (?P<indent>\s{0,3}) # - Initial indent.
679 (?P<fence>~~~+|```+) # - Fence.
680 \s* # - Closing spaces.
681 $
682 """,
683 re.VERBOSE,
684)
685_CODE_RE = re.compile(
686 r"""
687 ^
688 \s{4} # - Initial code indent.
689 (?P<text>.*) # - First code line.
690 $
691 """,
692 re.VERBOSE,
693)
694_QUOTE_RE = re.compile(
695 r"""
696 ^
697 (?P<indent>\s{0,3}) # - Initial quote indent.
698 > # - Quote marker.
699 \s? # - Optional space after the marker.
700 (?P<text>.*) # - Text of the first line in the quote.
701 $
702 """,
703 re.VERBOSE,
704)
705_THEMATIC_BREAK_RE = re.compile(
706 r"""
707 ^
708 (?P<indent>\s{0,3}) # - Initial quote indent.
709 ([-*_])\s*(\2\s*){2,} # - At least three break characters separated by spaces.
710 $
711 """,
712 re.VERBOSE,
713)
714_LINE_FEED_RE = re.compile(r"\r\n|\r|\n")
717class _MdParser:
718 @dataclass(kw_only=True, slots=True)
719 class Default:
720 pass
722 @dataclass(kw_only=True, slots=True)
723 class List:
724 type: str
725 marker_len: int
726 list: List
727 parser: _MdParser
728 number: int | None = None
730 @dataclass(kw_only=True, slots=True)
731 class Quote:
732 parser: _MdParser
734 @dataclass(kw_only=True, slots=True)
735 class Code:
736 lines: list[str]
738 @dataclass(kw_only=True, slots=True)
739 class FencedCode:
740 indent: int
741 fence_symbol: str
742 fence_length: int
743 syntax: str
744 lines: list[str]
746 @dataclass(kw_only=True, slots=True)
747 class Paragraph:
748 lines: list[str]
750 State: _t.TypeAlias = Default | List | Quote | Code | FencedCode | Paragraph
752 def __init__(self, allow_headings: bool = True):
753 self._allow_headings = allow_headings
754 self._nodes: list[AstBase] = []
755 self._state: _MdParser.State = self.Default()
757 def _parser(self) -> _MdParser:
758 return _MdParser(self._allow_headings)
760 @staticmethod
761 def _is_blank(s: str) -> bool:
762 return not s or s.isspace()
764 def parse(self, s: str) -> Document:
765 s = s.expandtabs(tabsize=4)
766 for line in _LINE_FEED_RE.split(s):
767 self._handle_line(line)
768 return Document(items=self._finalize())
770 def _handle_line(self, line: str):
771 getattr(self, f"_handle_line_{self._state.__class__.__name__}")(line)
773 def _handle_lazy_line(self, line: str) -> bool:
774 return getattr(self, f"_handle_lazy_line_{self._state.__class__.__name__}")(
775 line
776 )
778 def _flush(self):
779 getattr(self, f"_flush_{self._state.__class__.__name__}")()
781 def _handle_line_List(self, line: str):
782 assert type(self._state) is self.List
783 if not line or line[: self._state.marker_len].isspace():
784 self._state.parser._handle_line(line[self._state.marker_len :])
785 elif (
786 (match := _LIST_RE.match(line)) or (match := _NUMBERED_LIST_RE.match(line))
787 ) and match.group("type") == self._state.type:
788 item = ListItem(
789 items=self._state.parser._finalize(),
790 number=self._state.number,
791 )
792 self._state.list.items.append(item)
793 self._state.marker_len = len(match.group("marker"))
794 self._state.parser._handle_line(match.group("text"))
795 if self._state.number is not None:
796 self._state.number += 1
797 elif not self._state.parser._handle_lazy_line(line):
798 self._flush_List()
799 self._handle_line_Default(line)
801 def _handle_lazy_line_List(self, line: str) -> bool:
802 assert type(self._state) is self.List
803 if self._state.parser._handle_lazy_line(line):
804 return True
805 return False
807 def _flush_List(self):
808 assert type(self._state) is self.List
809 item = ListItem(
810 items=self._state.parser._finalize(),
811 number=self._state.number,
812 )
813 self._state.list.items.append(item)
814 self._nodes.append(self._state.list)
815 self._state = self.Default()
817 def _handle_line_Quote(self, line: str):
818 assert type(self._state) is self.Quote
819 if match := _QUOTE_RE.match(line):
820 self._state.parser._handle_line(match.group("text"))
821 elif self._is_blank(line) or not self._state.parser._handle_lazy_line(line):
822 self._flush_Quote()
823 self._handle_line_Default(line)
825 def _handle_lazy_line_Quote(self, line: str) -> bool:
826 assert type(self._state) is self.Quote
827 if self._state.parser._handle_lazy_line(line):
828 return True
829 else:
830 return False
832 def _flush_Quote(self):
833 assert type(self._state) is self.Quote
834 self._nodes.append(Quote(items=self._state.parser._finalize()))
835 self._state = self.Default()
837 def _handle_line_Code(self, line: str):
838 assert type(self._state) is self.Code
839 if self._is_blank(line) or line.startswith(" "):
840 self._state.lines.append(line[4:])
841 else:
842 self._flush_Code()
843 self._handle_line_Default(line)
845 def _handle_lazy_line_Code(self, line: str) -> bool:
846 assert type(self._state) is self.Code
847 return False # No lazy continuations for code!
849 def _flush_Code(self):
850 assert type(self._state) is self.Code
851 while self._state.lines and self._is_blank(self._state.lines[-1]):
852 self._state.lines.pop()
853 self._nodes.append(
854 Code(
855 lines=self._state.lines,
856 syntax="",
857 )
858 )
859 self._state = self.Default()
861 def _handle_line_FencedCode(self, line: str):
862 assert type(self._state) is self.FencedCode
863 if (
864 (match := _CODE_FENCE_END_RE.match(line))
865 and match.group("fence")[0] == self._state.fence_symbol
866 and len(match.group("fence")) == self._state.fence_length
867 ):
868 self._flush_FencedCode()
869 else:
870 if self._state.indent == 0:
871 pass
872 elif line[: self._state.indent].isspace():
873 line = line[self._state.indent :]
874 else:
875 line = line.lstrip()
876 self._state.lines.append(line)
878 def _handle_lazy_line_FencedCode(self, line: str) -> bool:
879 assert type(self._state) is self.FencedCode
880 return False
882 def _flush_FencedCode(self):
883 assert type(self._state) is self.FencedCode
884 self._nodes.append(
885 Code(
886 lines=self._state.lines,
887 syntax=self._state.syntax,
888 )
889 )
890 self._state = self.Default()
892 def _handle_line_Paragraph(self, line: str):
893 assert type(self._state) is self.Paragraph
894 if match := _SETEXT_HEADING_RE.match(line):
895 level = 1 if match.group("level") == "=" else 2
896 self._nodes.append(
897 Heading(
898 lines=self._state.lines,
899 level=level,
900 )
901 )
902 self._state = self.Default()
903 elif (
904 self._is_blank(line)
905 or _THEMATIC_BREAK_RE.match(line)
906 or (self._allow_headings and _HEADING_RE.match(line))
907 or _CODE_BACKTICK_RE.match(line)
908 or _CODE_TILDE_RE.match(line)
909 or _LIST_RE.match(line)
910 or _NUMBERED_LIST_RE.match(line)
911 or _QUOTE_RE.match(line)
912 ):
913 self._flush_Paragraph()
914 self._handle_line_Default(line)
915 else:
916 self._state.lines.append(line)
918 def _handle_lazy_line_Paragraph(self, line: str) -> bool:
919 assert type(self._state) is self.Paragraph
920 if (
921 self._is_blank(line)
922 or _THEMATIC_BREAK_RE.match(line)
923 or (self._allow_headings and _HEADING_RE.match(line))
924 or _CODE_BACKTICK_RE.match(line)
925 or _CODE_TILDE_RE.match(line)
926 or _LIST_RE.match(line)
927 or _NUMBERED_LIST_RE.match(line)
928 or _QUOTE_RE.match(line)
929 ):
930 self._flush_Paragraph()
931 return False
932 else:
933 self._state.lines.append(line)
934 return True
936 def _flush_Paragraph(self):
937 assert type(self._state) is self.Paragraph
938 self._nodes.append(Paragraph(lines=self._state.lines))
939 self._state = self.Default()
941 def _handle_line_Default(self, line: str):
942 assert type(self._state) is self.Default
943 if self._is_blank(line):
944 pass # do nothing
945 elif _THEMATIC_BREAK_RE.match(line):
946 self._nodes.append(ThematicBreak())
947 elif self._allow_headings and (match := _HEADING_RE.match(line)):
948 level = len(match.group("marker"))
949 self._nodes.append(
950 Heading(
951 lines=[match.group("text").strip()],
952 level=level,
953 )
954 )
955 elif (match := _CODE_BACKTICK_RE.match(line)) or (
956 match := _CODE_TILDE_RE.match(line)
957 ):
958 indent = len(match.group("indent"))
959 syntax = match.group("syntax").strip()
960 fence_symbol = match.group("fence")[0]
961 fence_length = len(match.group("fence"))
962 self._state = self.FencedCode(
963 indent=indent,
964 fence_symbol=fence_symbol,
965 fence_length=fence_length,
966 syntax=syntax,
967 lines=[],
968 )
969 elif match := _CODE_RE.match(line):
970 self._state = self.Code(lines=[match.group("text")])
971 elif (match := _LIST_RE.match(line)) or (
972 match := _NUMBERED_LIST_RE.match(line)
973 ):
974 indent = len(match.group("marker"))
975 list_type = match.group("type")
976 number_str = match.groupdict().get("number", None)
977 number = int(number_str) if number_str else None
978 self._state = self.List(
979 type=list_type,
980 marker_len=indent,
981 list=List(items=[]),
982 parser=self._parser(),
983 number=number,
984 )
985 self._state.parser._handle_line(match.group("text"))
986 elif match := _QUOTE_RE.match(line):
987 self._state = self.Quote(parser=self._parser())
988 self._state.parser._handle_line(match.group("text"))
989 else:
990 self._state = self.Paragraph(lines=[line])
992 def _handle_lazy_line_Default(self, line: str) -> bool:
993 assert type(self._state) is self.Default
994 return False
996 def _flush_Default(self):
997 assert type(self._state) is self.Default
999 def _finalize(self) -> list[AstBase]:
1000 self._flush()
1001 result = self._nodes
1002 self._nodes = []
1003 return result
1006_SYNTAXES: dict[str, SyntaxHighlighter] = {}
1007"""
1008Global syntax registry.
1010"""
1013class SyntaxHighlighter(abc.ABC):
1014 @property
1015 @abc.abstractmethod
1016 def syntaxes(self) -> list[str]:
1017 """
1018 List of syntax names that should be associated with this highlighter.
1020 """
1022 return []
1024 @property
1025 def syntax(self) -> str:
1026 """
1027 The primary syntax name for this highlighter, defaults to the first element
1028 of the :attr:`~SyntaxHighlighter.syntaxes` list.
1030 This name is used to look up colors in a theme.
1032 """
1034 return self.syntaxes[0] if self.syntaxes else "unknown"
1036 @classmethod
1037 def register_highlighter(cls, highlighter: SyntaxHighlighter):
1038 """
1039 Register a highlighter in a global registry, and allow looking it up
1040 via the :meth:`~SyntaxHighlighter.get_highlighter` method.
1042 :param highlighter:
1043 a highlighter instance.
1045 """
1047 for syntax in highlighter.syntaxes:
1048 _SYNTAXES[syntax.lower().replace("_", "-")] = highlighter
1050 @classmethod
1051 def get_highlighter(cls, syntax: str, /) -> SyntaxHighlighter:
1052 """
1053 Look up highlighter by a syntax name.
1055 :param syntax:
1056 name of the syntax highlighter.
1057 :returns:
1058 a highlighter instance.
1060 If highlighter with the given name can't be found, returns a dummy
1061 highlighter that does nothing.
1063 """
1065 return _SYNTAXES.get(
1066 syntax.lower().replace("_", "-"),
1067 _DummySyntaxHighlighter(),
1068 )
1070 @abc.abstractmethod
1071 def highlight(
1072 self,
1073 theme: yuio.theme.Theme,
1074 code: str,
1075 default_color: yuio.color.Color | str | None = None,
1076 ) -> yuio.string.ColorizedString:
1077 """
1078 Highlight the given code using the given theme.
1080 :param theme:
1081 theme that will be used to look up color tags.
1082 :param code:
1083 code to highlight.
1084 :param default_color:
1085 color or color tag to apply to the entire code.
1087 """
1089 raise NotImplementedError()
1091 def _get_default_color(
1092 self,
1093 theme: yuio.theme.Theme,
1094 default_color: yuio.color.Color | str | None,
1095 ) -> yuio.color.Color:
1096 return theme.to_color(default_color) | theme.get_color(
1097 f"msg/text:code/{self.syntax}"
1098 )
1101class _DummySyntaxHighlighter(SyntaxHighlighter):
1102 @property
1103 def syntaxes(self) -> list[str]:
1104 return ["text", "plain-text"]
1106 def highlight(
1107 self,
1108 theme: yuio.theme.Theme,
1109 code: str,
1110 default_color: yuio.color.Color | str | None = None,
1111 ) -> yuio.string.ColorizedString:
1112 return yuio.string.ColorizedString(
1113 [
1114 self._get_default_color(theme, default_color),
1115 code,
1116 yuio.color.Color.NONE,
1117 ]
1118 )
1121SyntaxHighlighter.register_highlighter(_DummySyntaxHighlighter())
1124class _ReSyntaxHighlighter(SyntaxHighlighter):
1125 def __init__(
1126 self,
1127 syntaxes: list[str],
1128 pattern: _t.StrRePattern,
1129 str_esc_pattern: _t.StrRePattern | None = None,
1130 ):
1131 self._syntaxes = syntaxes
1132 self._pattern = pattern
1133 self._str_esc_pattern = str_esc_pattern
1135 @property
1136 def syntaxes(self) -> list[str]:
1137 return self._syntaxes
1139 def highlight(
1140 self,
1141 theme: yuio.theme.Theme,
1142 code: str,
1143 default_color: yuio.color.Color | str | None = None,
1144 ) -> yuio.string.ColorizedString:
1145 default_color = self._get_default_color(theme, default_color)
1147 raw = yuio.string.ColorizedString()
1149 last_pos = 0
1150 for code_unit in self._pattern.finditer(code):
1151 if last_pos < code_unit.start():
1152 raw += default_color
1153 raw += code[last_pos : code_unit.start()]
1154 last_pos = code_unit.end()
1156 for name, text in sorted(code_unit.groupdict().items()):
1157 if not text:
1158 continue
1159 name = name.split("__", maxsplit=1)[-1]
1160 if self._str_esc_pattern is not None and name == "str":
1161 str_color = default_color | theme.get_color(f"hl/str:{self.syntax}")
1162 esc_color = default_color | theme.get_color(
1163 f"hl/str/esc:{self.syntax}"
1164 )
1165 last_escape_pos = 0
1166 for escape_unit in self._str_esc_pattern.finditer(text):
1167 if last_escape_pos < escape_unit.start():
1168 raw += str_color
1169 raw += text[last_escape_pos : escape_unit.start()]
1170 last_escape_pos = escape_unit.end()
1171 if escape := text[escape_unit.start() : escape_unit.end()]:
1172 raw += esc_color
1173 raw += escape
1174 if last_escape_pos < len(text):
1175 raw += str_color
1176 raw += text[last_escape_pos:]
1177 else:
1178 raw += default_color | theme.get_color(f"hl/{name}:{self.syntax}")
1179 raw += text
1181 if last_pos < len(code):
1182 raw += default_color
1183 raw += code[last_pos:]
1185 return raw
1188_PY_SYNTAX = re.compile(
1189 r"""
1190 (?P<kwd>
1191 \b(?: # keyword
1192 and|as|assert|async|await|break|class|continue|def|del|elif|else|
1193 except|finally|for|from|global|if|import|in|is|lambda|
1194 nonlocal|not|or|pass|raise|return|try|while|with|yield
1195 )\b)
1196 | (?P<str>
1197 [rfut]*( # string prefix
1198 '(?:\\.|[^\\'])*(?:'|\n) # singly-quoted string
1199 | "(?:\\.|[^\\"])*(?:"|\n) # doubly-quoted string
1200 | \"""(\\.|[^\\]|\n)*?\""" # long singly-quoted string
1201 | '''(\\.|[^\\]|\n)*?''')) # long doubly-quoted string
1202 | (?P<lit>
1203 \d+(?:\.\d*(?:e[+-]?\d+)?)? # int or float
1204 | \.\d+(?:e[+-]?\d+)? # float that starts with dot
1205 | 0x[0-9a-fA-F]+ # hex
1206 | 0b[01]+ # bin
1207 | \b(?!<\.)(?:None|True|False)\b) # bool or none
1208 | (?P<type>
1209 \b(?: # type
1210 str|int|float|complex|list|tuple|range|dict|set|frozenset|bool|
1211 bytes|bytearray|memoryview|(?:[A-Z](?:[a-z]\w*)?)
1212 )\b)
1213 | (?P<punct>[{}()\[\]\\;|!&,]) # punctuation
1214 | (?P<comment>\#.*$) # comment
1215 """,
1216 re.MULTILINE | re.VERBOSE,
1217)
1218_PY_ESC_PATTERN = re.compile(
1219 r"""
1220 \\(
1221 \n # escaped newline
1222 | [\\'"abfnrtv] # normal escape
1223 | [0-7]{3} # octal escape
1224 | x[0-9a-fA-F]{2} # hex escape
1225 | u[0-9a-fA-F]{4} # short unicode escape
1226 | U[0-9a-fA-F]{8} # long unicode escape
1227 | N\{[^}\n]+\} # unicode character names
1228 | [{}] # template
1229 | % # percent formatting
1230 (?:\([^)]*\))? # mapping key
1231 [#0\-+ ]* # conversion Flag
1232 (?:\*|\d+)? # field width
1233 (?:\.(?:\*|\d*))? # precision
1234 [hlL]? # unused length modifier
1235 . # conversion type
1236 )
1237 """,
1238 re.VERBOSE,
1239)
1242SyntaxHighlighter.register_highlighter(
1243 _ReSyntaxHighlighter(
1244 ["py", "py3", "py-3", "python", "python3", "python-3"],
1245 _PY_SYNTAX,
1246 str_esc_pattern=_PY_ESC_PATTERN,
1247 )
1248)
1249SyntaxHighlighter.register_highlighter(
1250 _ReSyntaxHighlighter(
1251 ["repr"],
1252 _PY_SYNTAX,
1253 str_esc_pattern=_PY_ESC_PATTERN,
1254 )
1255)
1256SyntaxHighlighter.register_highlighter(
1257 _ReSyntaxHighlighter(
1258 ["sh", "bash"],
1259 re.compile(
1260 r"""
1261 (?P<kwd>
1262 \b(?: # keyword
1263 if|then|elif|else|fi|time|for|in|until|while|do|done|case|
1264 esac|coproc|select|function
1265 )\b
1266 | \[\[ # `test` syntax: if [[ ... ]]
1267 | \]\])
1268 | (?P<a0__punct>(?:^|\|\|?|&&|\$\()) # chaining operator: pipe or logic
1269 (?P<a1__>\s*)
1270 (?P<a2__prog>([\w.@/-]|\\.)+) # prog
1271 | (?P<str>
1272 '(?:[.\n]*?)*' # singly-quoted string
1273 | "(?:\\.|[^\\"])*") # doubly-quoted string
1274 | (?P<punct>
1275 [{}()\[\]\\;!&|] # punctuation
1276 | <{1,3} # input redirect
1277 | [12]?>{1,2}(?:&[12])?) # output redirect
1278 | (?P<comment>\#.*$) # comment
1279 | (?P<flag>(?<![\w-])-[a-zA-Z0-9_-]+\b) # flag
1280 """,
1281 re.MULTILINE | re.VERBOSE,
1282 ),
1283 ),
1284)
1285SyntaxHighlighter.register_highlighter(
1286 _ReSyntaxHighlighter(
1287 ["sh-usage", "bash-usage"],
1288 re.compile(
1289 r"""
1290 (?P<kwd>
1291 \b(?: # keyword
1292 if|then|elif|else|fi|time|for|in|until|while|do|done|case|
1293 esac|coproc|select|function
1294 )\b)
1295 | (?P<prog>%\(prog\)s) # prog
1296 | (?P<metavar><[^>]+>) # metavar
1297 | (?P<str>
1298 '(?:[.\n]*?)*' # singly-quoted string
1299 | "(?:\\.|[^\\"])*") # doubly-quoted string
1300 | (?P<comment>\#.*$) # comment
1301 | (?P<flag>(?<![\w-])
1302 -[-a-zA-Z0-9_]+\b # flag
1303 | <options> # options
1304 )
1305 | (?P<punct>[{}()\[\]\\;!&|]) # punctuation
1306 """,
1307 re.MULTILINE | re.VERBOSE,
1308 ),
1309 )
1310)
1311SyntaxHighlighter.register_highlighter(
1312 _ReSyntaxHighlighter(
1313 ["diff"],
1314 re.compile(
1315 r"""
1316 (?P<meta>^(\-\-\-|\+\+\+|\@\@)[^\r\n]*$)
1317 | (?P<added>^\+[^\r\n]*$)
1318 | (?P<removed>^\-[^\r\n]*$)
1319 """,
1320 re.MULTILINE | re.VERBOSE,
1321 ),
1322 ),
1323)
1324SyntaxHighlighter.register_highlighter(
1325 _ReSyntaxHighlighter(
1326 ["json"],
1327 re.compile(
1328 r"""
1329 (?P<lit>\b(?:true|false|null)\b) # keyword
1330 | (?P<str>"(?:\\.|[^\\"])*(?:"|\n)) # doubly-quoted string
1331 | (?P<punct>[{}\[\],:]) # punctuation
1332 """,
1333 re.MULTILINE | re.VERBOSE,
1334 ),
1335 str_esc_pattern=re.compile(
1336 r"""
1337 \\(
1338 \n
1339 | [\\/"bfnrt]
1340 | u[0-9a-fA-F]{4}
1341 )
1342 """,
1343 re.VERBOSE,
1344 ),
1345 ),
1346)
1349class _TbHighlighter(SyntaxHighlighter):
1350 @property
1351 def syntaxes(self) -> list[str]:
1352 return [
1353 "tb",
1354 "traceback",
1355 "py-tb",
1356 "py3-tb",
1357 "py-3-tb",
1358 "py-traceback",
1359 "py3-traceback",
1360 "py-3-traceback",
1361 "python-tb",
1362 "python3-tb",
1363 "python-3-tb",
1364 "python-traceback",
1365 "python3-traceback",
1366 "python-3-traceback",
1367 ]
1369 class _StackColors:
1370 def __init__(
1371 self, theme: yuio.theme.Theme, default_color: yuio.color.Color, tag: str
1372 ):
1373 self.file_color = default_color | theme.get_color(f"tb/frame/{tag}/file")
1374 self.file_path_color = default_color | theme.get_color(
1375 f"tb/frame/{tag}/file/path"
1376 )
1377 self.file_line_color = default_color | theme.get_color(
1378 f"tb/frame/{tag}/file/line"
1379 )
1380 self.file_module_color = default_color | theme.get_color(
1381 f"tb/frame/{tag}/file/module"
1382 )
1383 self.code_color = default_color | theme.get_color(f"tb/frame/{tag}/code")
1384 self.highlight_color = default_color | theme.get_color(
1385 f"tb/frame/{tag}/highlight"
1386 )
1388 _TB_RE = re.compile(
1389 r"^(?P<indent>[ |+]*)(Stack|Traceback|Exception Group Traceback) \(most recent call last\):$"
1390 )
1391 _TB_MSG_RE = re.compile(r"^(?P<indent>[ |+]*)[A-Za-z_][A-Za-z0-9_]*($|:.*$)")
1392 _TB_LINE_FILE = re.compile(
1393 r'^[ |+]*File (?P<file>"[^"]*"), line (?P<line>\d+)(?:, in (?P<loc>.*))?$'
1394 )
1395 _TB_LINE_HIGHLIGHT = re.compile(r"^[ |+^~-]*$")
1396 _SITE_PACKAGES = os.sep + "lib" + os.sep + "site-packages" + os.sep
1397 _LIB_PYTHON = os.sep + "lib" + os.sep + "python"
1399 def highlight(
1400 self,
1401 theme: yuio.theme.Theme,
1402 code: str,
1403 default_color: yuio.color.Color | str | None = None,
1404 ) -> yuio.string.ColorizedString:
1405 default_color = self._get_default_color(theme, default_color)
1407 py_highlighter = SyntaxHighlighter.get_highlighter("python")
1409 heading_color = default_color | theme.get_color("tb/heading")
1410 message_color = default_color | theme.get_color("tb/message")
1412 stack_normal_colors = self._StackColors(theme, default_color, "usr")
1413 stack_lib_colors = self._StackColors(theme, default_color, "lib")
1414 stack_colors = stack_normal_colors
1416 res = yuio.string.ColorizedString()
1418 PLAIN_TEXT, STACK, MESSAGE = 1, 2, 3
1419 state = PLAIN_TEXT
1420 stack_indent = ""
1421 message_indent = ""
1423 for line in code.splitlines(keepends=True):
1424 if state is STACK:
1425 if line.startswith(stack_indent):
1426 # We're still in the stack.
1427 if match := self._TB_LINE_FILE.match(line):
1428 file, line, loc = match.group("file", "line", "loc")
1430 if self._SITE_PACKAGES in file or self._LIB_PYTHON in file:
1431 stack_colors = stack_lib_colors
1432 else:
1433 stack_colors = stack_normal_colors
1435 res += yuio.color.Color.NONE
1436 res += stack_indent
1437 res += stack_colors.file_color
1438 res += "File "
1439 res += stack_colors.file_path_color
1440 res += file
1441 res += stack_colors.file_color
1442 res += ", line "
1443 res += stack_colors.file_line_color
1444 res += line
1445 res += stack_colors.file_color
1447 if loc:
1448 res += ", in "
1449 res += stack_colors.file_module_color
1450 res += loc
1451 res += stack_colors.file_color
1453 res += "\n"
1454 elif match := self._TB_LINE_HIGHLIGHT.match(line):
1455 res += yuio.color.Color.NONE
1456 res += stack_indent
1457 res += stack_colors.highlight_color
1458 res += line[len(stack_indent) :]
1459 else:
1460 res += yuio.color.Color.NONE
1461 res += stack_indent
1462 res += py_highlighter.highlight(
1463 theme,
1464 line[len(stack_indent) :],
1465 stack_colors.code_color,
1466 )
1467 continue
1468 else:
1469 # Stack has ended, this line is actually a message.
1470 state = MESSAGE
1472 if state is MESSAGE:
1473 if line and line != "\n" and line.startswith(message_indent):
1474 # We're still in the message.
1475 res += yuio.color.Color.NONE
1476 res += message_indent
1477 res += message_color
1478 res += line[len(message_indent) :]
1479 continue
1480 else:
1481 # Message has ended, this line is actually a plain text.
1482 state = PLAIN_TEXT
1484 if state is PLAIN_TEXT:
1485 if match := self._TB_RE.match(line):
1486 # Plain text has ended, this is actually a heading.
1487 message_indent = match.group("indent").replace("+", "|")
1488 stack_indent = message_indent + " "
1490 res += yuio.color.Color.NONE
1491 res += message_indent
1492 res += heading_color
1493 res += line[len(message_indent) :]
1495 state = STACK
1496 continue
1497 elif match := self._TB_MSG_RE.match(line):
1498 # Plain text has ended, this is an error message (without a traceback).
1499 message_indent = match.group("indent").replace("+", "|")
1500 stack_indent = message_indent + " "
1502 res += yuio.color.Color.NONE
1503 res += message_indent
1504 res += message_color
1505 res += line[len(message_indent) :]
1507 state = MESSAGE
1508 continue
1509 else:
1510 # We're still in plain text.
1511 res += yuio.color.Color.NONE
1512 res += line
1513 continue
1515 return res
1518SyntaxHighlighter.register_highlighter(_TbHighlighter())