Coverage for yuio/md.py: 91%

1# Yuio project, MIT license.

3# https://github.com/taminomara/yuio/

5# You're free to copy this file to your project and edit it for your needs,

6# just keep this copyright line please :3

8"""

9Yuio's primary format for higher-level io is Markdown (well, a reasonably rich subset

10of it).

13Formatting markdown

14-------------------

16.. autoclass:: MdFormatter

17 :members:

20.. _highlighting-code:

22Highlighting code

23-----------------

25Yuio supports basic code highlighting; it is just enough to format help messages

26for CLI, and color tracebacks when an error occurs.

28.. autoclass:: SyntaxHighlighter

29 :members:

32Markdown AST

33------------

35.. warning::

37 This is experimental API which can change within a minor release.

39.. autofunction:: parse

41.. autoclass:: AstBase

42 :members:

44.. autoclass:: Text

45 :members:

47.. autoclass:: Container

48 :members:

50.. autoclass:: Document

51 :members:

53.. autoclass:: ThematicBreak

54 :members:

56.. autoclass:: Heading

57 :members:

59.. autoclass:: Paragraph

60 :members:

62.. autoclass:: Quote

63 :members:

65.. autoclass:: Code

66 :members:

68.. autoclass:: ListItem

69 :members:

71.. autoclass:: List

72 :members:

75"""

77from __future__ import annotations

79import abc

80import contextlib

81import dataclasses

82import math

83import os

84import re

85from dataclasses import dataclass

87import yuio.color

88import yuio.string

89import yuio.theme

90from yuio.util import dedent as _dedent

92import yuio._typing_ext as _tx

93from typing import TYPE_CHECKING

95if TYPE_CHECKING:

96 import typing_extensions as _t

97else:

98 from yuio import _typing as _t

100__all__ = [

101 "AstBase",

102 "AstBase",

103 "Code",

104 "Code",

105 "Container",

106 "Container",

107 "Document",

108 "Document",

109 "Heading",

110 "Heading",

111 "List",

112 "List",

113 "ListItem",

114 "ListItem",

115 "MdFormatter",

116 "Paragraph",

117 "Paragraph",

118 "Quote",

119 "Quote",

120 "Raw",

121 "SyntaxHighlighter",

122 "Text",

123 "Text",

124 "ThematicBreak",

125 "ThematicBreak",

126 "parse",

127]

128

129T = _t.TypeVar("T")

130TAst = _t.TypeVar("TAst", bound="AstBase")

131

132

133@_t.final

134class MdFormatter:

135 """

136 A simple markdown formatter suitable for displaying rich text in the terminal.

137

138 :param ctx:

139 a :class:`~yuio.string.ReprContext` that's used to colorize or wrap

140 rendered markdown.

141 :param allow_headings:

142 if set to :data:`False`, headings are rendered as paragraphs.

143

144 All CommonMark block markup except tables is supported:

145

146 - headings:

147

148 .. code-block:: markdown

149

150 # Heading 1

151 ## Heading 2

152

153 Yuio has only two levels of headings. Headings past level two will look the same

154 as level two headings (you can adjust theme to change this).

155

156 If `allow_headings` is set to :data:`False`, headings look like paragraphs.

157

158 - lists, numbered lists, quotes:

159

160 .. code-block:: markdown

161

162 - List item 1,

163 - list item 2.

164

165 1. Numbered list item 1,

166 1. numbered list item 2.

167

168 > Quoted text.

169

170 - fenced code blocks with minimal syntax highlighting

171 (see :class:`SyntaxHighlighter`):

172

173 .. code-block:: markdown

174

175 ```python

176 for i in range(5, 8):

177 print(f"Hello, world! This is {{i}}th day past the apocalypse.")

178 ```

179

180 Yuio supports ``python``, ``traceback``, ``bash``, ``diff``,

181 and ``json`` syntaxes.

182

183 Inline markdown only handles inline code blocks:

184

185 .. code-block:: markdown

186

187 This is `code`. It will be rendered as code.

188 Other inline styles, such as _italic_, are not supported!

189

190 However, color tags are supported, so you can highlight text as follows:

191

192 .. code-block:: markdown

193

194 This is <c b>bold text</c>. It will be rendered bold.

195

196 """

197

198 def __init__(

199 self,

200 ctx: yuio.string.ReprContext,

201 *,

202 allow_headings: bool = True,

203 ):

204 self._ctx = ctx

205 self.allow_headings: bool = allow_headings

206

207 self._is_first_line: bool

208 self._out: list[yuio.string.ColorizedString]

209 self._indent: yuio.string.ColorizedString

210 self._continuation_indent: yuio.string.ColorizedString

211

212 @property

213 def ctx(self):

214 return self._ctx

215

216 @property

217 def width(self):

218 return self._ctx.width

219

220 def format(

221 self, md: str, *, dedent: bool = True

222 ) -> list[yuio.string.ColorizedString]:

223 """

224 Format a markdown document.

225

226 :param md:

227 markdown to format. Common indentation will be removed from this string,

228 making it suitable to use with triple quote literals.

229 :param dedent:

230 remove lading indent from markdown.

231 :returns:

232 rendered markdown as a list of individual lines without newline

233 characters at the end.

234

235 """

236

237 return self.format_node(self.parse(md, dedent=dedent))

238

239 def parse(self, md: str, /, *, dedent: bool = True) -> Document:

240 """

241 Parse a markdown document and return an AST node.

242

243 .. warning::

244

245 This is experimental API which can change within a minor release.

246

247 :param md:

248 markdown to parse. Common indentation will be removed from this string,

249 making it suitable to use with triple quote literals.

250 :param dedent:

251 remove lading indent from markdown.

252 :returns:

253 parsed AST node.

254

255 """

256

257 if dedent:

258 md = _dedent(md)

259

260 return _MdParser(self.allow_headings).parse(md)

261

262 def format_node(self, node: AstBase, /) -> list[yuio.string.ColorizedString]:

263 """

264 Format a parsed markdown document.

265

266 .. warning::

267

268 This is an experimental API which can change within a minor release.

269

270 :param md:

271 AST node to format.

272 :returns:

273 rendered markdown as a list of individual lines without newline

274 characters at the end.

275

276 """

277

278 self._is_first_line = True

279 self._separate_paragraphs = True

280 self._out = []

281 self._indent = yuio.string.ColorizedString()

282 self._continuation_indent = yuio.string.ColorizedString()

283

284 self._format(node)

285

286 return self._out

287

288 def colorize(

289 self,

290 text: str,

291 /,

292 *,

293 default_color: yuio.color.Color | str = yuio.color.Color.NONE,

294 ):

295 """

296 Parse and colorize contents of a paragraph.

297

298 This is a shortcut for calling :func:`colorize` with this formatter's theme.

299

300 :param line:

301 text to colorize.

302 :param default_color:

303 color or color tag to apply to the entire text.

304 :returns:

305 a colorized string.

306

307 """

308

309 return yuio.string.colorize(text, default_color=default_color, ctx=self.ctx)

310

311 @contextlib.contextmanager

312 def _with_indent(

313 self,

314 color: yuio.color.Color | str | None,

315 s: yuio.string.AnyString,

316 /,

317 *,

318 continue_with_spaces: bool = True,

319 ):

320 color = self.ctx.to_color(color)

321 indent = yuio.string.ColorizedString(color)

322 indent += s

323

324 old_indent = self._indent

325 old_continuation_indent = self._continuation_indent

326

327 if continue_with_spaces:

328 continuation_indent = yuio.string.ColorizedString(" " * indent.width)

329 else:

330 continuation_indent = indent

331

332 self._indent = self._indent + indent

333 self._continuation_indent = self._continuation_indent + continuation_indent

334

335 try:

336 yield

337 finally:

338 self._indent = old_indent

339 self._continuation_indent = old_continuation_indent

340

341 def _line(self, line: yuio.string.ColorizedString, /):

342 self._out.append(line)

343

344 self._is_first_line = False

345 self._indent = self._continuation_indent

346

347 def _format(self, node: AstBase, /):

348 getattr(self, f"_format_{node.__class__.__name__.lstrip('_')}")(node)

349

350 def _format_Raw(self, node: Raw, /):

351 for line in node.raw.wrap(

352 self.width,

353 indent=self._indent,

354 continuation_indent=self._continuation_indent,

355 break_long_nowrap_words=True,

356 ):

357 self._line(line)

358

359 def _format_Text(self, node: Text, /, *, default_color: yuio.color.Color):

360 s = self.colorize(

361 "\n".join(node.lines).strip(),

362 default_color=default_color,

363 )

364

365 for line in s.wrap(

366 self.width,

367 indent=self._indent,

368 continuation_indent=self._continuation_indent,

369 preserve_newlines=False,

370 break_long_nowrap_words=True,

371 ):

372 self._line(line)

373

374 def _format_Container(self, node: Container[TAst], /):

375 self._is_first_line = True

376 for item in node.items:

377 if not self._is_first_line and self._separate_paragraphs:

378 self._line(self._indent)

379 self._format(item)

380

381 def _format_Document(self, node: Document, /):

382 self._format_Container(node)

383

384 def _format_ThematicBreak(self, _: ThematicBreak):

385 decoration = self.ctx.get_msg_decoration("thematic_break")

386 self._line(self._indent + decoration)

387

388 def _format_Heading(self, node: Heading, /):

389 if not self._is_first_line:

390 self._line(self._indent)

391

392 decoration = self.ctx.get_msg_decoration(f"heading/{node.level}")

393 with self._with_indent(f"msg/decoration:heading/{node.level}", decoration):

394 self._format_Text(

395 node,

396 default_color=self.ctx.get_color(f"msg/text:heading/{node.level}"),

397 )

398

399 self._line(self._indent)

400 self._is_first_line = True

401

402 def _format_Paragraph(self, node: Paragraph, /):

403 self._format_Text(node, default_color=self.ctx.get_color("msg/text:paragraph"))

404

405 def _format_ListItem(self, node: ListItem, /, *, min_width: int = 0):

406 decoration = self.ctx.get_msg_decoration("list")

407 if node.number is not None:

408 decoration = f"{node.number:>{min_width}}." + " " * (

409 yuio.string.line_width(decoration) - min_width - 1

410 )

411 with self._with_indent("msg/decoration:list", decoration):

412 self._format_Container(node)

413

414 def _format_Quote(self, node: Quote, /):

415 decoration = self.ctx.get_msg_decoration("quote")

416 with self._with_indent(

417 "msg/decoration:quote", decoration, continue_with_spaces=False

418 ):

419 self._format_Container(node)

420

421 def _format_Code(self, node: Code, /):

422 s = SyntaxHighlighter.get_highlighter(node.syntax).highlight(

423 self.ctx.theme,

424 "\n".join(node.lines),

425 )

426

427 decoration = self.ctx.get_msg_decoration("code")

428 with self._with_indent("msg/decoration:code", decoration):

429 self._line(

430 s.indent(

431 indent=self._indent,

432 continuation_indent=self._continuation_indent,

433 )

434 )

435

436 def _format_List(self, node: List, /):

437 max_number = max(item.number or 0 for item in node.items)

438 min_width = math.ceil(math.log10(max_number)) if max_number > 0 else 1

439 self._is_first_line = True

440 for item in node.items:

441 if not self._is_first_line:

442 self._line(self._indent)

443 self._format_ListItem(item, min_width=min_width)

444

445

446@dataclass(kw_only=True, slots=True)

447class AstBase(abc.ABC):

448 """

449 Base class for all AST nodes that represent parsed markdown document.

450

451 """

452

453 def _dump_params(self) -> str:

454 s = self.__class__.__name__.lstrip("_")

455 for field in dataclasses.fields(self):

456 if field.repr:

457 s += f" {getattr(self, field.name)!r}"

458 return s

459

460 def dump(self, indent: str = "") -> str:

461 """

462 Dump an AST node into a lisp-like text representation.

463

464 """

465

466 return f"{indent}({self._dump_params()})"

467

468

469@dataclass(kw_only=True, slots=True)

470class Raw(AstBase):

471 """

472 Embeds already formatted paragraph into the document.

473

474 """

475

476 raw: yuio.string.ColorizedString

477 """

478 Raw colorized string to add to the document.

479

480 """

481

482

483@dataclass(kw_only=True, slots=True)

484class Text(AstBase):

485 """

486 Base class for all text-based AST nodes, i.e. paragraphs, headings, etc.

487

488 """

489

490 lines: list[str] = dataclasses.field(repr=False)

491 """

492 Text lines as parsed from the original document.

493

494 """

495

496 def dump(self, indent: str = "") -> str:

497 s = f"{indent}({self._dump_params()}"

498 indent += " "

499 for line in self.lines:

500 s += "\n" + indent

501 s += repr(line)

502 s += ")"

503 return s

504

505

506@dataclass(kw_only=True, slots=True)

507class Container(AstBase, _t.Generic[TAst]):

508 """

509 Base class for all container-based AST nodes, i.e. list items or quotes.

510

511 This class works as a list of items. Usually it contains arbitrary AST nodes,

512 but it can also be limited to specific kinds of nodes via its generic variable.

513

514 """

515

516 items: list[TAst] = dataclasses.field(repr=False)

517 """

518 Inner AST nodes in the container.

519

520 """

521

522 def dump(self, indent: str = "") -> str:

523 s = f"{indent or ''}({self._dump_params()}"

524 indent += " "

525 for items in self.items:

526 s += "\n"

527 s += items.dump(indent)

528 s += ")"

529 return s

530

531

532@dataclass(kw_only=True, slots=True)

533class Document(Container[AstBase]):

534 """

535 Root node that contains the entire markdown document.

536

537 """

538

539

540@dataclass(kw_only=True, slots=True)

541class ThematicBreak(AstBase):

542 """

543 Represents a visual break in text, a.k.a. an asterism.

544

545 """

546

547

548@dataclass(kw_only=True, slots=True)

549class Heading(Text):

550 """

551 Represents a heading.

552

553 """

554

555 level: int

556 """

557 Level of the heading, `1`-based.

558

559 """

560

561

562@dataclass(kw_only=True, slots=True)

563class Paragraph(Text):

564 """

565 Represents a regular paragraph.

566

567 """

568

569

570@dataclass(kw_only=True, slots=True)

571class Quote(Container[AstBase]):

572 """

573 Represents a quotation block.

574

575 """

576

577

578@dataclass(kw_only=True, slots=True)

579class Code(Text):

580 """

581 Represents a highlighted block of code.

582

583 """

584

585 syntax: str

586 """

587 Syntax indicator as parsed form the original document.

588

589 """

590

591

592@dataclass(kw_only=True, slots=True)

593class ListItem(Container[AstBase]):

594 """

595 A possibly numbered element of a list.

596

597 """

598

599 number: int | None

600 """

601 If present, this is the item's number in a numbered list.

602

603 """

604

605

606@dataclass(kw_only=True, slots=True)

607class List(Container[ListItem]):

608 """

609 A collection of list items.

610

611 """

612

613

614_HEADING_RE = re.compile(

615 r"""

616 ^

617 \s{0,3} # - Initial indent.

618 (?P<marker>\#{1,6}) # - Heading marker.

619 (?P<text>\s.*?)? # - Heading text. Unless empty, text must be separated

620 # from the heading marker by a space.

621 (?:(?<=\s)\#+)? # - Optional closing hashes. Must be separated from

622 # the previous content by a space. We use lookbehind

623 # here, because if the text is empty, the space

624 # between heading marker and closing hashes will be

625 # matched by the `text` group.

626 \s* # - Closing spaces.

627 $

628 """,

629 re.VERBOSE,

630)

631_SETEXT_HEADING_RE = re.compile(

632 r"""

633 ^

634 (?P<indent>\s{0,3}) # - Initial indent.

635 (?P<level>-|=) # - Heading underline.

636 \2* # - More heading underline.

637 \s* # - Closing spaces.

638 $

639 """,

640 re.VERBOSE,

641)

642_LIST_RE = re.compile(

643 r"""

644 ^

645 (?P<marker>

646 \s{0,3} # - Initial indent.

647 (?P<type>[-*+]) # - List marker.

648 (?:

649 \s(?:\s{0,3}(?=\S))? # - One mandatory and up to three optional spaces;

650 # When there are more than three optional spaces,

651 # we treat then as a list marker followed

652 # by a single space, followed by a code block.

653 | $)) # - For cases when a list starts with an empty line.

654 (?P<text>.*) # - Text of the first line in the list.

655 $

656 """,

657 re.VERBOSE,

658)

659_NUMBERED_LIST_RE = re.compile(

660 r"""

661 ^

662 (?P<marker>

663 \s{0,3} # - Initial indent.

664 (?P<number>\d{1,9}) # - Number.

665 (?P<type>[.:)]) # - Numbered list marker.

666 (?:

667 \s(?:\s{0,3}(?=\S))? # - One mandatory and up to three optional spaces;

668 # When there are more than three optional spaces,

669 # we treat then as a list marker followed

670 # by a single space, followed by a code block.

671 | $)) # - For cases when a list starts with an empty line.

672 (?P<text>.*) # - Text of the first line in the list.

673 $

674 """,

675 re.VERBOSE,

676)

677_CODE_BACKTICK_RE = re.compile(

678 r"""

679 ^

680 (?P<indent>\s{0,3}) # - Initial indent.

681 (?P<fence>```+) # - Backtick fence.

682 (?P<syntax>[^`]*) # - Syntax, can't contain backtick.

683 $

684 """,

685 re.VERBOSE,

686)

687_CODE_TILDE_RE = re.compile(

688 r"""

689 ^

690 (?P<indent>\s{0,3}) # - Initial indent.

691 (?P<fence>~~~+) # - Backtick fence.

692 (?P<syntax>.*) # - Syntax, can be anything.

693 $

694 """,

695 re.VERBOSE,

696)

697_CODE_FENCE_END_RE = re.compile(

698 r"""

699 ^

700 (?P<indent>\s{0,3}) # - Initial indent.

701 (?P<fence>~~~+|```+) # - Fence.

702 \s* # - Closing spaces.

703 $

704 """,

705 re.VERBOSE,

706)

707_CODE_RE = re.compile(

708 r"""

709 ^

710 \s{4} # - Initial code indent.

711 (?P<text>.*) # - First code line.

712 $

713 """,

714 re.VERBOSE,

715)

716_QUOTE_RE = re.compile(

717 r"""

718 ^

719 (?P<indent>\s{0,3}) # - Initial quote indent.

720 > # - Quote marker.

721 \s? # - Optional space after the marker.

722 (?P<text>.*) # - Text of the first line in the quote.

723 $

724 """,

725 re.VERBOSE,

726)

727_THEMATIC_BREAK_RE = re.compile(

728 r"""

729 ^

730 (?P<indent>\s{0,3}) # - Initial quote indent.

731 ([-*_])\s*(\2\s*){2,} # - At least three break characters separated by spaces.

732 $

733 """,

734 re.VERBOSE,

735)

736_LINE_FEED_RE = re.compile(r"\r\n|\r|\n")

737

738

739class _MdParser:

740 @dataclass(kw_only=True, slots=True)

741 class Default:

742 pass

743

744 @dataclass(kw_only=True, slots=True)

745 class List:

746 type: str

747 marker_len: int

748 list: List

749 parser: _MdParser

750 number: int | None = None

751

752 @dataclass(kw_only=True, slots=True)

753 class Quote:

754 parser: _MdParser

755

756 @dataclass(kw_only=True, slots=True)

757 class Code:

758 lines: list[str]

759

760 @dataclass(kw_only=True, slots=True)

761 class FencedCode:

762 indent: int

763 fence_symbol: str

764 fence_length: int

765 syntax: str

766 lines: list[str]

767

768 @dataclass(kw_only=True, slots=True)

769 class Paragraph:

770 lines: list[str]

771

773

774 def __init__(self, allow_headings: bool = True):

775 self._allow_headings = allow_headings

776 self._nodes: list[AstBase] = []

777 self._state: _MdParser.State = self.Default()

778

779 def _parser(self) -> _MdParser:

780 return _MdParser(self._allow_headings)

781

782 @staticmethod

783 def _is_blank(s: str) -> bool:

784 return not s or s.isspace()

785

786 def parse(self, s: str) -> Document:

787 s = s.expandtabs(tabsize=4)

788 for line in _LINE_FEED_RE.split(s):

789 self._handle_line(line)

790 return Document(items=self._finalize())

791

792 def _handle_line(self, line: str):

793 getattr(self, f"_handle_line_{self._state.__class__.__name__}")(line)

794

795 def _handle_lazy_line(self, line: str) -> bool:

796 return getattr(self, f"_handle_lazy_line_{self._state.__class__.__name__}")(

797 line

798 )

799

800 def _flush(self):

801 getattr(self, f"_flush_{self._state.__class__.__name__}")()

802

803 def _handle_line_List(self, line: str):

804 assert type(self._state) is self.List

805 if not line or line[: self._state.marker_len].isspace():

806 self._state.parser._handle_line(line[self._state.marker_len :])

807 elif (

808 (match := _LIST_RE.match(line)) or (match := _NUMBERED_LIST_RE.match(line))

809 ) and match.group("type") == self._state.type:

810 item = ListItem(

811 items=self._state.parser._finalize(),

812 number=self._state.number,

813 )

814 self._state.list.items.append(item)

815 self._state.marker_len = len(match.group("marker"))

816 self._state.parser._handle_line(match.group("text"))

817 if self._state.number is not None:

818 self._state.number += 1

819 elif not self._state.parser._handle_lazy_line(line):

820 self._flush_List()

821 self._handle_line_Default(line)

822

823 def _handle_lazy_line_List(self, line: str) -> bool:

824 assert type(self._state) is self.List

825 if self._state.parser._handle_lazy_line(line):

826 return True

827 return False

828

829 def _flush_List(self):

830 assert type(self._state) is self.List

831 item = ListItem(

832 items=self._state.parser._finalize(),

833 number=self._state.number,

834 )

835 self._state.list.items.append(item)

836 self._nodes.append(self._state.list)

837 self._state = self.Default()

838

839 def _handle_line_Quote(self, line: str):

840 assert type(self._state) is self.Quote

841 if match := _QUOTE_RE.match(line):

842 self._state.parser._handle_line(match.group("text"))

843 elif self._is_blank(line) or not self._state.parser._handle_lazy_line(line):

844 self._flush_Quote()

845 self._handle_line_Default(line)

846

847 def _handle_lazy_line_Quote(self, line: str) -> bool:

848 assert type(self._state) is self.Quote

849 if self._state.parser._handle_lazy_line(line):

850 return True

851 else:

852 return False

853

854 def _flush_Quote(self):

855 assert type(self._state) is self.Quote

856 self._nodes.append(Quote(items=self._state.parser._finalize()))

857 self._state = self.Default()

858

859 def _handle_line_Code(self, line: str):

860 assert type(self._state) is self.Code

861 if self._is_blank(line) or line.startswith(" "):

862 self._state.lines.append(line[4:])

863 else:

864 self._flush_Code()

865 self._handle_line_Default(line)

866

867 def _handle_lazy_line_Code(self, line: str) -> bool:

868 assert type(self._state) is self.Code

869 return False # No lazy continuations for code!

870

871 def _flush_Code(self):

872 assert type(self._state) is self.Code

873 while self._state.lines and self._is_blank(self._state.lines[-1]):

874 self._state.lines.pop()

875 self._nodes.append(

876 Code(

877 lines=self._state.lines,

878 syntax="",

879 )

880 )

881 self._state = self.Default()

882

883 def _handle_line_FencedCode(self, line: str):

884 assert type(self._state) is self.FencedCode

885 if (

886 (match := _CODE_FENCE_END_RE.match(line))

887 and match.group("fence")[0] == self._state.fence_symbol

888 and len(match.group("fence")) == self._state.fence_length

889 ):

890 self._flush_FencedCode()

891 else:

892 if self._state.indent == 0:

893 pass

894 elif line[: self._state.indent].isspace():

895 line = line[self._state.indent :]

896 else:

897 line = line.lstrip()

898 self._state.lines.append(line)

899

900 def _handle_lazy_line_FencedCode(self, line: str) -> bool:

901 assert type(self._state) is self.FencedCode

902 return False

903

904 def _flush_FencedCode(self):

905 assert type(self._state) is self.FencedCode

906 self._nodes.append(

907 Code(

908 lines=self._state.lines,

909 syntax=self._state.syntax,

910 )

911 )

912 self._state = self.Default()

913

914 def _handle_line_Paragraph(self, line: str):

915 assert type(self._state) is self.Paragraph

916 if match := _SETEXT_HEADING_RE.match(line):

917 level = 1 if match.group("level") == "=" else 2

918 self._nodes.append(

919 Heading(

920 lines=self._state.lines,

921 level=level,

922 )

923 )

924 self._state = self.Default()

925 elif (

926 self._is_blank(line)

927 or _THEMATIC_BREAK_RE.match(line)

928 or (self._allow_headings and _HEADING_RE.match(line))

929 or _CODE_BACKTICK_RE.match(line)

930 or _CODE_TILDE_RE.match(line)

931 or _LIST_RE.match(line)

932 or _NUMBERED_LIST_RE.match(line)

933 or _QUOTE_RE.match(line)

934 ):

935 self._flush_Paragraph()

936 self._handle_line_Default(line)

937 else:

938 self._state.lines.append(line)

939

940 def _handle_lazy_line_Paragraph(self, line: str) -> bool:

941 assert type(self._state) is self.Paragraph

942 if (

943 self._is_blank(line)

944 or _THEMATIC_BREAK_RE.match(line)

945 or (self._allow_headings and _HEADING_RE.match(line))

946 or _CODE_BACKTICK_RE.match(line)

947 or _CODE_TILDE_RE.match(line)

948 or _LIST_RE.match(line)

949 or _NUMBERED_LIST_RE.match(line)

950 or _QUOTE_RE.match(line)

951 ):

952 self._flush_Paragraph()

953 return False

954 else:

955 self._state.lines.append(line)

956 return True

957

958 def _flush_Paragraph(self):

959 assert type(self._state) is self.Paragraph

960 self._nodes.append(Paragraph(lines=self._state.lines))

961 self._state = self.Default()

962

963 def _handle_line_Default(self, line: str):

964 assert type(self._state) is self.Default

965 if self._is_blank(line):

966 pass # do nothing

967 elif _THEMATIC_BREAK_RE.match(line):

968 self._nodes.append(ThematicBreak())

969 elif self._allow_headings and (match := _HEADING_RE.match(line)):

970 level = len(match.group("marker"))

971 self._nodes.append(

972 Heading(

973 lines=[match.group("text").strip()],

974 level=level,

975 )

976 )

977 elif (match := _CODE_BACKTICK_RE.match(line)) or (

978 match := _CODE_TILDE_RE.match(line)

979 ):

980 indent = len(match.group("indent"))

981 syntax = match.group("syntax").strip()

982 fence_symbol = match.group("fence")[0]

983 fence_length = len(match.group("fence"))

984 self._state = self.FencedCode(

985 indent=indent,

986 fence_symbol=fence_symbol,

987 fence_length=fence_length,

988 syntax=syntax,

989 lines=[],

990 )

991 elif match := _CODE_RE.match(line):

992 self._state = self.Code(lines=[match.group("text")])

993 elif (match := _LIST_RE.match(line)) or (

994 match := _NUMBERED_LIST_RE.match(line)

995 ):

996 indent = len(match.group("marker"))

997 list_type = match.group("type")

998 number_str = match.groupdict().get("number", None)

999 number = int(number_str) if number_str else None

1000 self._state = self.List(

1001 type=list_type,

1002 marker_len=indent,

1003 list=List(items=[]),

1004 parser=self._parser(),

1005 number=number,

1006 )

1007 self._state.parser._handle_line(match.group("text"))

1008 elif match := _QUOTE_RE.match(line):

1009 self._state = self.Quote(parser=self._parser())

1010 self._state.parser._handle_line(match.group("text"))

1011 else:

1012 self._state = self.Paragraph(lines=[line])

1013

1014 def _handle_lazy_line_Default(self, line: str) -> bool:

1015 assert type(self._state) is self.Default

1016 return False

1017

1018 def _flush_Default(self):

1019 assert type(self._state) is self.Default

1020

1021 def _finalize(self) -> list[AstBase]:

1022 self._flush()

1023 result = self._nodes

1024 self._nodes = []

1025 return result

1026

1027

1028def parse(md: str, /, *, dedent: bool = True, allow_headings: bool = True) -> Document:

1029 """

1030 Parse a markdown document and return an AST node.

1031

1032 :param md:

1033 markdown to parse. Common indentation will be removed from this string,

1034 making it suitable to use with triple quote literals.

1035 :param dedent:

1036 remove lading indent from markdown.

1037 :param allow_headings:

1038 if set to :data:`False`, headings are rendered as paragraphs.

1039 :returns:

1040 parsed AST node.

1041

1042 """

1043

1044 if dedent:

1045 md = _dedent(md)

1046

1047 return _MdParser(allow_headings).parse(md)

1048

1049

1050_SYNTAXES: dict[str, SyntaxHighlighter] = {}

1051"""

1052Global syntax registry.

1053

1054"""

1055

1056

1057class SyntaxHighlighter(abc.ABC):

1058 @property

1059 @abc.abstractmethod

1060 def syntaxes(self) -> list[str]:

1061 """

1062 List of syntax names that should be associated with this highlighter.

1063

1064 """

1065

1066 return []

1067

1068 @property

1069 def syntax(self) -> str:

1070 """

1071 The primary syntax name for this highlighter, defaults to the first element

1072 of the :attr:`~SyntaxHighlighter.syntaxes` list.

1073

1074 This name is used to look up colors in a theme.

1075

1076 """

1077

1078 return self.syntaxes[0] if self.syntaxes else "unknown"

1079

1080 @classmethod

1081 def register_highlighter(cls, highlighter: SyntaxHighlighter):

1082 """

1083 Register a highlighter in a global registry, and allow looking it up

1084 via the :meth:`~SyntaxHighlighter.get_highlighter` method.

1085

1086 :param highlighter:

1087 a highlighter instance.

1088

1089 """

1090

1091 for syntax in highlighter.syntaxes:

1092 _SYNTAXES[syntax.lower().replace("_", "-")] = highlighter

1093

1094 @classmethod

1095 def get_highlighter(cls, syntax: str, /) -> SyntaxHighlighter:

1096 """

1097 Look up highlighter by a syntax name.

1098

1099 :param syntax:

1100 name of the syntax highlighter.

1101 :returns:

1102 a highlighter instance.

1103

1104 If highlighter with the given name can't be found, returns a dummy

1105 highlighter that does nothing.

1106

1107 """

1108

1109 return _SYNTAXES.get(

1110 syntax.lower().replace("_", "-"),

1111 _DummySyntaxHighlighter(),

1112 )

1113

1114 @abc.abstractmethod

1115 def highlight(

1116 self,

1117 theme: yuio.theme.Theme,

1118 code: str,

1119 default_color: yuio.color.Color | str | None = None,

1120 ) -> yuio.string.ColorizedString:

1121 """

1122 Highlight the given code using the given theme.

1123

1124 :param theme:

1125 theme that will be used to look up color tags.

1126 :param code:

1127 code to highlight.

1128 :param default_color:

1129 color or color tag to apply to the entire code.

1130

1131 """

1132

1133 raise NotImplementedError()

1134

1135 def _get_default_color(

1136 self,

1137 theme: yuio.theme.Theme,

1138 default_color: yuio.color.Color | str | None,

1139 ) -> yuio.color.Color:

1140 return theme.to_color(default_color) | theme.get_color(

1141 f"msg/text:code/{self.syntax}"

1142 )

1143

1144

1145class _DummySyntaxHighlighter(SyntaxHighlighter):

1146 @property

1147 def syntaxes(self) -> list[str]:

1148 return ["text", "plain-text"]

1149

1150 def highlight(

1151 self,

1152 theme: yuio.theme.Theme,

1153 code: str,

1154 default_color: yuio.color.Color | str | None = None,

1155 ) -> yuio.string.ColorizedString:

1156 return yuio.string.ColorizedString(

1157 [

1158 self._get_default_color(theme, default_color),

1159 code,

1160 yuio.color.Color.NONE,

1161 ]

1162 )

1163

1164

1165SyntaxHighlighter.register_highlighter(_DummySyntaxHighlighter())

1166

1167

1168class _ReSyntaxHighlighter(SyntaxHighlighter):

1169 def __init__(

1170 self,

1171 syntaxes: list[str],

1172 pattern: _tx.StrRePattern,

1173 str_esc_pattern: _tx.StrRePattern | None = None,

1174 ):

1175 self._syntaxes = syntaxes

1176 self._pattern = pattern

1177 self._str_esc_pattern = str_esc_pattern

1178

1179 @property

1180 def syntaxes(self) -> list[str]:

1181 return self._syntaxes

1182

1183 def highlight(

1184 self,

1185 theme: yuio.theme.Theme,

1186 code: str,

1187 default_color: yuio.color.Color | str | None = None,

1188 ) -> yuio.string.ColorizedString:

1189 default_color = self._get_default_color(theme, default_color)

1190

1191 raw = yuio.string.ColorizedString()

1192

1193 last_pos = 0

1194 for code_unit in self._pattern.finditer(code):

1195 if last_pos < code_unit.start():

1196 raw += default_color

1197 raw += code[last_pos : code_unit.start()]

1198 last_pos = code_unit.end()

1199

1200 for name, text in sorted(code_unit.groupdict().items()):

1201 if not text:

1202 continue

1203 name = name.split("__", maxsplit=1)[-1]

1204 if self._str_esc_pattern is not None and name == "str":

1205 str_color = default_color | theme.get_color(f"hl/str:{self.syntax}")

1206 esc_color = default_color | theme.get_color(

1207 f"hl/str/esc:{self.syntax}"

1208 )

1209 last_escape_pos = 0

1210 for escape_unit in self._str_esc_pattern.finditer(text):

1211 if last_escape_pos < escape_unit.start():

1212 raw += str_color

1213 raw += text[last_escape_pos : escape_unit.start()]

1214 last_escape_pos = escape_unit.end()

1215 if escape := text[escape_unit.start() : escape_unit.end()]:

1216 raw += esc_color

1217 raw += escape

1218 if last_escape_pos < len(text):

1219 raw += str_color

1220 raw += text[last_escape_pos:]

1221 else:

1222 raw += default_color | theme.get_color(f"hl/{name}:{self.syntax}")

1223 raw += text

1224

1225 if last_pos < len(code):

1226 raw += default_color

1227 raw += code[last_pos:]

1228

1229 return raw

1230

1231

1232_PY_SYNTAX = re.compile(

1233 r"""

1234 (?P<kwd>

1235 \b(?: # keyword

1239 )\b)

1240 | (?P<str>

1241 [rfut]*( # string prefix

1242 \"""(\\.|[^\\]|\n)*?\""" # long singly-quoted string

1243 | '''(\\.|[^\\]|\n)*?''' # long doubly-quoted string

1244 | '(?:\\.|[^\\'])*(?:'|\n) # singly-quoted string

1245 | "(?:\\.|[^\\"])*(?:"|\n))) # doubly-quoted string

1246 | (?P<lit>

1247 (?<![\.\w])(

1248 [+-]?\d+(?:\.\d*(?:e[+-]?\d+)?)? # int or float

1249 | [+-]?\.\d+(?:e[+-]?\d+)? # float that starts with dot

1250 | [+-]?0x[0-9a-fA-F]+ # hex

1251 | [+-]?0b[01]+ # bin

1252 | \b(?:None|True|False)\b)) # bool or none

1253 | (?P<type>

1254 \b(?: # type

1256 bytes|bytearray|memoryview|(?:[A-Z](?:[A-Z0-9_]*?[a-z]\w*)?)

1257 )\b)

1258 | (?P<punct>[{}()\[\]\\;,]) # punctuation

1259 | (?P<comment>\#.*$) # comment

1260 """,

1261 re.MULTILINE | re.VERBOSE,

1262)

1263_PY_ESC_PATTERN = re.compile(

1264 r"""

1265 \\(

1266 \n # escaped newline

1267 | [\\'"abfnrtv] # normal escape

1268 | [0-7]{3} # octal escape

1269 | x[0-9a-fA-F]{2} # hex escape

1270 | u[0-9a-fA-F]{4} # short unicode escape

1271 | U[0-9a-fA-F]{8} # long unicode escape

1272 | N\{[^}\n]+\} # unicode character names

1273 | [{}] # template

1274 | % # percent formatting

1275 (?:$[^)]*$)? # mapping key

1276 [#0\-+ ]* # conversion Flag

1277 (?:\*|\d+)? # field width

1278 (?:\.(?:\*|\d*))? # precision

1279 [hlL]? # unused length modifier

1280 . # conversion type

1281 )

1282 """,

1283 re.VERBOSE,

1284)

1285

1286

1287SyntaxHighlighter.register_highlighter(

1288 _ReSyntaxHighlighter(

1289 ["py", "py3", "py-3", "python", "python3", "python-3"],

1290 _PY_SYNTAX,

1291 str_esc_pattern=_PY_ESC_PATTERN,

1292 )

1293)

1294SyntaxHighlighter.register_highlighter(

1295 _ReSyntaxHighlighter(

1296 ["repr"],

1297 _PY_SYNTAX,

1298 str_esc_pattern=_PY_ESC_PATTERN,

1299 )

1300)

1301SyntaxHighlighter.register_highlighter(

1302 _ReSyntaxHighlighter(

1303 ["sh", "bash"],

1304 re.compile(

1305 r"""

1306 (?P<kwd>

1307 \b(?: # keyword

1309 esac|coproc|select|function

1310 )\b

1311 | \[\[ # `test` syntax: if [[ ... ]]

1312 | \]\])

1313 | (?P<a0__punct>(?:^|\|\|?|&&|\$\()) # chaining operator: pipe or logic

1314 (?P<a1__>\s*)

1315 (?P<a2__prog>[\w./~]([\w.@/-]|\\.)+) # prog

1316 | (?P<str>

1317 '[^']*' # singly-quoted string

1318 | "(?:\\.|[^\\"])*") # doubly-quoted string

1319 | (?P<punct>

1320 [{}()\[\]\\;!&|] # punctuation

1321 | <{1,3} # input redirect

1322 | [12]?>{1,2}(?:&[12])?) # output redirect

1323 | (?P<comment>\#.*$) # comment

1324 | (?P<flag>(?<![\w-])-[a-zA-Z0-9_-]+\b) # flag

1325 """,

1326 re.MULTILINE | re.VERBOSE,

1327 ),

1328 ),

1329)

1330SyntaxHighlighter.register_highlighter(

1331 _ReSyntaxHighlighter(

1332 ["sh-usage", "bash-usage"],

1333 re.compile(

1334 r"""

1335 (?P<kwd>

1336 \b(?: # keyword

1338 esac|coproc|select|function

1339 )\b)

1340 | (?P<prog>%$prog$s) # prog

1341 | (?P<str>

1342 '[^']*' # singly-quoted string

1343 | "(?:\\.|[^\\"])*") # doubly-quoted string

1344 | (?P<comment>\#.*$) # comment

1345 | (?P<flag>(?<![\w-])

1346 -[-\w]+\b # flag

1347 | <options> # options

1348 )

1349 | (?P<metavar><[^>]+>) # metavar

1350 | (?P<punct>[{}()\[\]\\;!&|]) # punctuation

1351 """,

1352 re.MULTILINE | re.VERBOSE,

1353 ),

1354 )

1355)

1356SyntaxHighlighter.register_highlighter(

1357 _ReSyntaxHighlighter(

1358 ["diff"],

1359 re.compile(

1360 r"""

1361 (?P<meta>^(\-\-\-|\+\+\+|\@\@)[^\r\n]*$)

1362 | (?P<added>^\+[^\r\n]*$)

1363 | (?P<removed>^\-[^\r\n]*$)

1364 """,

1365 re.MULTILINE | re.VERBOSE,

1366 ),

1367 ),

1368)

1369SyntaxHighlighter.register_highlighter(

1370 _ReSyntaxHighlighter(

1371 ["json"],

1372 re.compile(

1373 r"""

1374 (?P<lit>\b(?:true|false|null)\b) # keyword

1375 | (?P<str>"(?:\\.|[^\\"])*(?:"|\n)) # doubly-quoted string

1376 | (?P<punct>[{}\[\],:]) # punctuation

1377 """,

1378 re.MULTILINE | re.VERBOSE,

1379 ),

1380 str_esc_pattern=re.compile(

1381 r"""

1382 \\(

1383 \n

1384 | [\\/"bfnrt]

1385 | u[0-9a-fA-F]{4}

1386 )

1387 """,

1388 re.VERBOSE,

1389 ),

1390 ),

1391)

1392

1393

1394class _TbHighlighter(SyntaxHighlighter):

1395 @property

1396 def syntaxes(self) -> list[str]:

1397 return [

1398 "tb",

1399 "traceback",

1400 "py-tb",

1401 "py3-tb",

1402 "py-3-tb",

1403 "py-traceback",

1404 "py3-traceback",

1405 "py-3-traceback",

1406 "python-tb",

1407 "python3-tb",

1408 "python-3-tb",

1409 "python-traceback",

1410 "python3-traceback",

1411 "python-3-traceback",

1412 ]

1413

1414 class _StackColors:

1415 def __init__(

1416 self, theme: yuio.theme.Theme, default_color: yuio.color.Color, tag: str

1417 ):

1418 self.file_color = default_color | theme.get_color(f"tb/frame/{tag}/file")

1419 self.file_path_color = default_color | theme.get_color(

1420 f"tb/frame/{tag}/file/path"

1421 )

1422 self.file_line_color = default_color | theme.get_color(

1423 f"tb/frame/{tag}/file/line"

1424 )

1425 self.file_module_color = default_color | theme.get_color(

1426 f"tb/frame/{tag}/file/module"

1427 )

1428 self.code_color = default_color | theme.get_color(f"tb/frame/{tag}/code")

1429 self.highlight_color = default_color | theme.get_color(

1430 f"tb/frame/{tag}/highlight"

1431 )

1432

1433 _TB_RE = re.compile(

1434 r"^(?P<indent>[ |+]*)(Stack|Traceback|Exception Group Traceback) $most recent call last$:$"

1435 )

1436 _TB_MSG_RE = re.compile(r"^(?P<indent>[ |+]*)[A-Za-z_][A-Za-z0-9_]*($|:.*$)")

1437 _TB_LINE_FILE = re.compile(

1438 r'^[ |+]*File (?P<file>"[^"]*"), line (?P<line>\d+)(?:, in (?P<loc>.*))?$'

1439 )

1440 _TB_LINE_HIGHLIGHT = re.compile(r"^[ |+^~-]*$")

1441 _SITE_PACKAGES = os.sep + "lib" + os.sep + "site-packages" + os.sep

1442 _LIB_PYTHON = os.sep + "lib" + os.sep + "python"

1443

1444 def highlight(

1445 self,

1446 theme: yuio.theme.Theme,

1447 code: str,

1448 default_color: yuio.color.Color | str | None = None,

1449 ) -> yuio.string.ColorizedString:

1450 default_color = self._get_default_color(theme, default_color)

1451

1452 py_highlighter = SyntaxHighlighter.get_highlighter("python")

1453

1454 heading_color = default_color | theme.get_color("tb/heading")

1455 message_color = default_color | theme.get_color("tb/message")

1456

1457 stack_normal_colors = self._StackColors(theme, default_color, "usr")

1458 stack_lib_colors = self._StackColors(theme, default_color, "lib")

1459 stack_colors = stack_normal_colors

1460

1461 res = yuio.string.ColorizedString()

1462

1463 PLAIN_TEXT, STACK, MESSAGE = 1, 2, 3

1464 state = PLAIN_TEXT

1465 stack_indent = ""

1466 message_indent = ""

1467

1468 for line in code.splitlines(keepends=True):

1469 if state is STACK:

1470 if line.startswith(stack_indent):

1471 # We're still in the stack.

1472 if match := self._TB_LINE_FILE.match(line):

1473 file, line, loc = match.group("file", "line", "loc")

1474

1475 if self._SITE_PACKAGES in file or self._LIB_PYTHON in file:

1476 stack_colors = stack_lib_colors

1477 else:

1478 stack_colors = stack_normal_colors

1479

1480 res += yuio.color.Color.NONE

1481 res += stack_indent

1482 res += stack_colors.file_color

1483 res += "File "

1484 res += stack_colors.file_path_color

1485 res += file

1486 res += stack_colors.file_color

1487 res += ", line "

1488 res += stack_colors.file_line_color

1489 res += line

1490 res += stack_colors.file_color

1491

1492 if loc:

1493 res += ", in "

1494 res += stack_colors.file_module_color

1495 res += loc

1496 res += stack_colors.file_color

1497

1498 res += "\n"

1499 elif match := self._TB_LINE_HIGHLIGHT.match(line):

1500 res += yuio.color.Color.NONE

1501 res += stack_indent

1502 res += stack_colors.highlight_color

1503 res += line[len(stack_indent) :]

1504 else:

1505 res += yuio.color.Color.NONE

1506 res += stack_indent

1507 res += py_highlighter.highlight(

1508 theme,

1509 line[len(stack_indent) :],

1510 stack_colors.code_color,

1511 )

1512 continue

1513 else:

1514 # Stack has ended, this line is actually a message.

1515 state = MESSAGE

1516

1517 if state is MESSAGE:

1518 if line and line != "\n" and line.startswith(message_indent):

1519 # We're still in the message.

1520 res += yuio.color.Color.NONE

1521 res += message_indent

1522 res += message_color

1523 res += line[len(message_indent) :]

1524 continue

1525 else:

1526 # Message has ended, this line is actually a plain text.

1527 state = PLAIN_TEXT

1528

1529 if state is PLAIN_TEXT:

1530 if match := self._TB_RE.match(line):

1531 # Plain text has ended, this is actually a heading.

1532 message_indent = match.group("indent").replace("+", "|")

1533 stack_indent = message_indent + " "

1534

1535 res += yuio.color.Color.NONE

1536 res += message_indent

1537 res += heading_color

1538 res += line[len(message_indent) :]

1539

1540 state = STACK

1541 continue

1542 elif match := self._TB_MSG_RE.match(line):

1543 # Plain text has ended, this is an error message (without a traceback).

1544 message_indent = match.group("indent").replace("+", "|")

1545 stack_indent = message_indent + " "

1546

1547 res += yuio.color.Color.NONE

1548 res += message_indent

1549 res += message_color

1550 res += line[len(message_indent) :]

1551

1552 state = MESSAGE

1553 continue

1554 else:

1555 # We're still in plain text.

1556 res += yuio.color.Color.NONE

1557 res += line

1558 continue

1559

1560 return res

1561

1562

1563SyntaxHighlighter.register_highlighter(_TbHighlighter())

Coverage for yuio / md.py: 91%

547 statements