Coverage for yuio / rst.py: 97%

948 statements  

« prev     ^ index     » next       coverage.py v7.13.3, created at 2026-02-03 15:42 +0000

1# Yuio project, MIT license. 

2# 

3# https://github.com/taminomara/yuio/ 

4# 

5# You're free to copy this file to your project and edit it for your needs, 

6# just keep this copyright line please :3 

7 

8""" 

9Parser for ReStructuredText. 

10 

11Yuio supports all RST features except tables and option lists. 

12 

13**Supported block markup:** 

14 

15- headings, 

16- numbered and bullet lists, 

17- definition lists, 

18- field lists, 

19- literal blocks, both indented and quoted, 

20- line blocks, 

21- quotes, 

22- doctest blocks, 

23- directives, 

24- hyperlink targets, 

25- footnotes, 

26- thematic breaks. 

27 

28**Supported roles:** 

29 

30- code: 

31 ``code-block``, 

32 ``sourcecode``, 

33 ``code``; 

34- admonitions: 

35 ``attention``, 

36 ``caution``, 

37 ``danger``, 

38 ``error``, 

39 ``hint``, 

40 ``important``, 

41 ``note``, 

42 ``seealso``, 

43 ``tip``, 

44 ``warning``; 

45- versioning: 

46 ``versionadded``, 

47 ``versionchanged``, 

48 ``deprecated``; 

49- any other directive is rendered as un-highlighted code. 

50 

51**Supported inline syntax:** 

52 

53- emphasis (``*em*``), 

54- strong emphasis (``**strong*``), 

55- inline code in backticks (```code```), 

56- interpreted text (```code```, ``:role:`code```), 

57- hyperlink references (```text`_``, ``text_``, ```text`__``, ``text__``) 

58 in terminals that can render them, 

59- footnotes (``[...]_``), 

60- inline internal targets and substitution references are parsed correctly, 

61 but they have no effect. 

62 

63**Supported inline roles:** 

64 

65- ``flag`` for CLI flags, 

66- any other role is interpreted as documentation reference with explicit titles 

67 (``{py:class}`title <mod.Class>```) and shortening paths via tilde 

68 (``{py:class}`~mod.Class```). 

69 

70.. autofunction:: parse 

71 

72.. autoclass:: RstParser 

73 :members: 

74 

75""" 

76 

77from __future__ import annotations 

78 

79import dataclasses 

80import re 

81import string 

82from dataclasses import dataclass 

83from enum import Enum 

84 

85import yuio.doc 

86from yuio.util import dedent as _dedent 

87 

88import yuio._typing_ext as _tx 

89from typing import TYPE_CHECKING 

90 

91if TYPE_CHECKING: 

92 import typing_extensions as _t 

93else: 

94 from yuio import _typing as _t 

95 

96__all__ = [ 

97 "RstParser", 

98 "parse", 

99] 

100 

101 

102class _LineEnding(Enum): 

103 NORMAL = "NORMAL" 

104 LITERAL_MARK = "LITERAL_MARK" # Line ends with double colon 

105 

106 

107_LINE_BLOCK_START_RE = re.compile( 

108 r""" 

109 ^ 

110 (?P<indent> 

111 (?P<open_marker>\|) 

112 (?P<space>\s+|$) 

113 ) 

114 (?P<tail>.*) 

115 """, 

116 re.VERBOSE, 

117) 

118 

119 

120_BULLET_LIST_START_RE = re.compile( 

121 r""" 

122 ^ 

123 (?P<indent> 

124 (?P<enumerator>[*+•‣⁃-]) 

125 (?P<space>\s+|$) 

126 ) 

127 (?P<tail>.*) 

128 $ 

129 """, 

130 re.VERBOSE, 

131) 

132 

133 

134_NUM_LIST_START_RE = re.compile( 

135 r""" 

136 ^ 

137 (?P<indent> 

138 (?P<open_marker>\(?) 

139 (?P<enumerator> 

140 (?P<enumerator_num>\d+) 

141 | (?P<enumerator_auto>\#) 

142 | (?P<enumerator_lowercase>[a-z]+) 

143 | (?P<enumerator_uppercase>[A-Z]+) 

144 ) 

145 (?P<close_marker>[).]) 

146 (?P<space>\s+|$) 

147 ) 

148 (?P<tail>.*) 

149 $ 

150 """, 

151 re.VERBOSE, 

152) 

153 

154 

155_EXPLICIT_MARKUP_START_RE = re.compile( 

156 r""" 

157 ^ 

158 (?P<indent> 

159 (?P<open_marker>\.\.) 

160 (?P<space>\s+|$) 

161 ) 

162 (?P<tail>.*) 

163 $ 

164 """, 

165 re.VERBOSE, 

166) 

167 

168_IMPLICIT_HYPERLINK_TARGET_RE = re.compile( 

169 r""" 

170 ^ 

171 (?P<indent> 

172 (?P<open_marker>__) 

173 (?P<space>\s+|$) 

174 ) 

175 (?P<tail>.*) 

176 $ 

177 """, 

178 re.VERBOSE, 

179) 

180 

181_FIELD_START_RE = re.compile( 

182 r""" 

183 ^ 

184 (?P<indent> 

185 (?P<open_marker>:) 

186 (?P<content>(?:[^:\\]|\\.|:(?!\s|`))+) 

187 (?P<close_marker>:) 

188 (?P<space>\s+|$) 

189 ) 

190 (?P<tail>.*) 

191 $ 

192 """, 

193 re.VERBOSE, 

194) 

195 

196 

197_PUNCT = tuple(string.punctuation) 

198 

199 

200@dataclass(slots=True) 

201class _Hyperlink: 

202 start: int 

203 end: int 

204 name: str 

205 type: _t.Literal["link", "footnote", "redirect"] 

206 content: str 

207 

208 

209class _LinkResolver: 

210 def __init__( 

211 self, 

212 targets: dict[str, _Hyperlink], 

213 anonymous_links: list[_Hyperlink], 

214 auto_numbered_footnotes: list[str] = [], 

215 auto_character_footnotes: list[str] = [], 

216 ) -> None: 

217 self._targets: dict[str, _Hyperlink] = targets 

218 

219 self._anonymous_links: list[_Hyperlink] = anonymous_links 

220 self._current_anonymous_link = 0 

221 

222 self._auto_numbered_footnotes: list[str] = auto_numbered_footnotes 

223 self._current_auto_numbered_footnote = 0 

224 self._auto_character_footnotes: list[str] = auto_character_footnotes 

225 self._current_auto_character_footnote = 0 

226 

227 def find_link(self, title: str, target: str | None, is_anonymous: bool): 

228 if target: 

229 # Process explicit target. 

230 target, is_redirect = _normalize_hyperlink_target(target) 

231 if is_redirect: 

232 link = self._resolve_redirect(target) 

233 else: 

234 link = _Hyperlink(0, 0, title, "link", target) 

235 if link and not is_anonymous: 

236 # Save implicitly declared anchor. 

237 anchor = _normalize_hyperlink_anchor(title) 

238 self._targets.setdefault(anchor, link) 

239 elif is_anonymous: 

240 link = self._next_anonymous_link() 

241 else: 

242 anchor = _normalize_hyperlink_anchor(title) 

243 if anchor.startswith("#"): 

244 anchor = anchor[1:] 

245 if not anchor: 

246 anchor = self._next_auto_numbered_footnote() or "" 

247 elif anchor.startswith("*"): 

248 anchor = anchor[1:] 

249 if not anchor: 

250 anchor = self._next_auto_character_footnote() or "" 

251 if not anchor: 

252 return None 

253 link = self._targets.get(anchor) 

254 if link and link.type == "redirect": 

255 link = self._resolve_redirect(link.content) 

256 if not link or not link.content: 

257 return None 

258 else: 

259 return link 

260 

261 def _next_anonymous_link(self): 

262 if self._current_anonymous_link >= len(self._anonymous_links): 

263 return None 

264 link = self._anonymous_links[self._current_anonymous_link] 

265 self._current_anonymous_link += 1 

266 return link 

267 

268 def _next_auto_numbered_footnote(self): 

269 if self._current_auto_numbered_footnote >= len(self._auto_numbered_footnotes): 

270 return None 

271 link = self._auto_numbered_footnotes[self._current_auto_numbered_footnote] 

272 self._current_auto_numbered_footnote += 1 

273 return link 

274 

275 def _next_auto_character_footnote(self): 

276 if self._current_auto_character_footnote >= len(self._auto_character_footnotes): 

277 return None 

278 link = self._auto_character_footnotes[self._current_auto_character_footnote] 

279 self._current_auto_character_footnote += 1 

280 return link 

281 

282 def _resolve_redirect(self, target: str): 

283 seen = set() 

284 while target not in seen: 

285 seen.add(target) 

286 link = self._targets.get(target) 

287 if link and link.type == "redirect": 

288 target = link.content 

289 elif link: 

290 return link 

291 return None 

292 

293 

294_FOOTNOTE_CHARS = "*†‡§¶#♠♥♦♣" 

295 

296 

297def _char_footnote(n: int, /) -> str: 

298 assert n > 0 

299 n_chars = len(_FOOTNOTE_CHARS) 

300 result = "" 

301 while n > 0: 

302 n -= 1 

303 result = _FOOTNOTE_CHARS[n % n_chars] + result 

304 n //= n_chars 

305 return result 

306 

307 

308@_t.final 

309class RstParser(yuio.doc.DocParser): 

310 """ 

311 Parses subset of CommonMark/MyST. 

312 

313 """ 

314 

315 def parse(self, s: str, /) -> yuio.doc.Document: 

316 self._lines = s.expandtabs(tabsize=4).splitlines(keepends=False) 

317 self._headings: dict[tuple[str, bool], int] = {} 

318 self._links: list[_Hyperlink] = [] 

319 self._anonymous_links: list[_Hyperlink] = [] 

320 self._targets: dict[str, _Hyperlink] = {} 

321 self._last_numbered_footnote = 1 

322 self._last_character_footnote = 1 

323 self._auto_numbered_footnotes: list[str] = [] 

324 self._auto_character_footnotes: list[str] = [] 

325 

326 root = yuio.doc.Document(items=[]) 

327 self._process_block(root, 0, len(self._lines)) 

328 link_resolver = _LinkResolver( 

329 self._targets, 

330 self._anonymous_links, 

331 self._auto_numbered_footnotes, 

332 self._auto_character_footnotes, 

333 ) 

334 yuio.doc._clean_tree(root) 

335 self._process_inline_text(root, link_resolver) 

336 return root 

337 

338 def parse_paragraph(self, s: str, /) -> list[str | yuio.doc.TextRegion]: 

339 return _InlineParser(s, _LinkResolver({}, [], [], [])).run() 

340 

341 def _process_inline_text( 

342 self, node: yuio.doc.AstBase, link_resolver: _LinkResolver 

343 ): 

344 if isinstance(node, yuio.doc.Admonition): 

345 processor = _InlineParser("\n".join(map(str, node.title)), link_resolver) 

346 node.title = processor.run() 

347 if isinstance(node, yuio.doc.Text): 

348 processor = _InlineParser("\n".join(map(str, node.items)), link_resolver) 

349 node.items = processor.run() 

350 elif isinstance(node, yuio.doc.Container): 

351 for item in node.items: 

352 self._process_inline_text(item, link_resolver) 

353 

354 def _process_block(self, parent: yuio.doc.Container[_t.Any], start: int, end: int): 

355 i = start 

356 prev_line_ending = _LineEnding.NORMAL 

357 

358 while i < end: 

359 i, prev_line_ending = self._consume_block(parent, i, end, prev_line_ending) 

360 

361 return parent 

362 

363 def _consume_block( 

364 self, 

365 parent: yuio.doc.Container[_t.Any], 

366 start: int, 

367 end: int, 

368 prev_line_ending: _LineEnding, 

369 ) -> tuple[int, _LineEnding]: 

370 if start >= end: # pragma: no cover 

371 return start, prev_line_ending 

372 

373 line = self._lines[start] 

374 

375 if _is_blank(line): 

376 return start + 1, prev_line_ending 

377 

378 result = None 

379 

380 if prev_line_ending == _LineEnding.LITERAL_MARK and ( 

381 line.startswith(" ") or line.startswith(_PUNCT) 

382 ): 

383 result = self._try_process_literal_text(parent, start, end) 

384 elif _is_heading_underline(self._lines, start, end): 

385 self._process_title(parent, line, self._lines[start + 1][0], False) 

386 result = start + 2 

387 elif _is_heading_overline(self._lines, start, end): 

388 self._process_title(parent, self._lines[start + 1], line[0], True) 

389 result = start + 3 

390 elif line.startswith(">>>"): 

391 result = self._process_doctest_block(parent, start, end) 

392 elif line.startswith(" "): 

393 result = self._process_block_quote(parent, start, end) 

394 elif match := _LINE_BLOCK_START_RE.match(line): 

395 result = self._process_line_block(parent, start, end, match) 

396 elif match := _BULLET_LIST_START_RE.match(line): 

397 result = self._process_bullet_list(parent, start, end, match) 

398 elif match := _NUM_LIST_START_RE.match(line): 

399 result = self._try_process_numbered_list(parent, start, end, match) 

400 elif match := _EXPLICIT_MARKUP_START_RE.match(line): 

401 result = self._try_process_explicit_markup(parent, start, end, match) 

402 elif match := _IMPLICIT_HYPERLINK_TARGET_RE.match(line): 

403 result = self._process_implicit_hyperlink_target(parent, start, end, match) 

404 elif match := _FIELD_START_RE.match(line): 

405 result = self._process_field_list(parent, start, end, match) 

406 elif ( 

407 start + 1 < end 

408 and self._lines[start + 1].startswith(" ") 

409 and not _is_blank(self._lines[start + 1]) 

410 ): 

411 result = self._process_def_list(parent, start, end) 

412 

413 if result is None: 

414 return self._process_paragraph(parent, start, end) 

415 else: 

416 return result, _LineEnding.NORMAL 

417 

418 def _process_title( 

419 self, 

420 parent: yuio.doc.Container[_t.Any], 

421 title: str, 

422 marker: str, 

423 is_overline: bool, 

424 ): 

425 if level := self._headings.get((marker, is_overline)): 

426 parent.items.append(yuio.doc.Heading(items=[title.strip()], level=level)) 

427 else: 

428 level = len(self._headings) + 1 

429 self._headings[(marker, is_overline)] = level 

430 parent.items.append(yuio.doc.Heading(items=[title.strip()], level=level)) 

431 

432 def _try_process_literal_text( 

433 self, parent: yuio.doc.Container[_t.Any], start: int, end: int 

434 ) -> int | None: 

435 ch = self._lines[start][0] 

436 

437 if ch.isspace(): 

438 end = self._gather_indented_lines(start, end, True) 

439 elif ch in _PUNCT: 

440 end = self._gather_prefixed_lines(start, end, ch) 

441 else: # pragma: no cover 

442 return None 

443 

444 node = yuio.doc.Code(lines=[], syntax="text") 

445 for i in range(start, end): 

446 node.lines.append(self._lines[i]) 

447 parent.items.append(node) 

448 

449 return end 

450 

451 def _process_line_block( 

452 self, 

453 parent: yuio.doc.Container[_t.Any], 

454 start: int, 

455 end: int, 

456 match: _tx.StrReMatch | None, 

457 ) -> int | None: 

458 block_end = start + 1 

459 lines = [] 

460 while match: 

461 self._lines[start] = match["tail"] 

462 block_end = self._gather_indented_lines(start + 1, end, False) 

463 lines.append(" ".join(self._lines[start:block_end])) 

464 

465 start = block_end 

466 if start >= end: 

467 match = None 

468 else: 

469 match = _LINE_BLOCK_START_RE.match(self._lines[start]) 

470 

471 node = yuio.doc.Paragraph(items=["\v".join(lines)]) 

472 parent.items.append(node) 

473 return block_end 

474 

475 def _process_bullet_list( 

476 self, 

477 parent: yuio.doc.Container[_t.Any], 

478 start: int, 

479 end: int, 

480 match: _tx.StrReMatch, 

481 ) -> int: 

482 if ( 

483 parent.items 

484 and isinstance(parent.items[-1], yuio.doc.List) 

485 and parent.items[-1].items 

486 ): 

487 list_node = parent.items[-1] 

488 prev_enumerator_kind = list_node.enumerator_kind 

489 prev_marker_kind = list_node.marker_kind 

490 prev_num = list_node.items[-1].number 

491 else: 

492 list_node = None 

493 prev_enumerator_kind = None 

494 prev_marker_kind = None 

495 prev_num = None 

496 

497 enumerator_kind = match["enumerator"] 

498 marker_kind = None 

499 num = None 

500 

501 if ( 

502 enumerator_kind != prev_enumerator_kind 

503 or marker_kind != prev_marker_kind 

504 or (prev_num is not None) 

505 ): 

506 list_node = None 

507 

508 if list_node is None: 

509 list_node = yuio.doc.List( 

510 items=[], enumerator_kind=enumerator_kind, marker_kind=marker_kind 

511 ) 

512 parent.items.append(list_node) 

513 

514 self._lines[start] = match["tail"] 

515 if not match["space"]: 

516 end = self._gather_indented_lines(start + 1, end, True) 

517 else: 

518 indent = len(match["indent"]) 

519 end = self._gather_exactly_indented_lines(start + 1, end, indent, True) 

520 

521 node = yuio.doc.ListItem(items=[], number=num) 

522 self._process_block(node, start, end) 

523 list_node.items.append(node) 

524 return end 

525 

526 def _try_process_numbered_list( 

527 self, 

528 parent: yuio.doc.Container[_t.Any], 

529 start: int, 

530 end: int, 

531 match: _tx.StrReMatch, 

532 ) -> int | None: 

533 if ( 

534 parent.items 

535 and isinstance(parent.items[-1], yuio.doc.List) 

536 and parent.items[-1].items 

537 ): 

538 list_node = parent.items[-1] 

539 prev_enumerator_kind = list_node.enumerator_kind 

540 prev_marker_kind = list_node.marker_kind 

541 prev_num = list_node.items[-1].number 

542 else: 

543 list_node = None 

544 prev_enumerator_kind = None 

545 prev_marker_kind = None 

546 prev_num = None 

547 

548 list_data = _detect_num_list_type( 

549 match, 

550 prev_enumerator_kind, 

551 prev_marker_kind, 

552 prev_num, 

553 ) 

554 

555 if list_data is None: 

556 return None # TODO: this is not covered, I don't know why 

557 

558 enumerator_kind, marker_kind, num = list_data 

559 

560 # Verify next line (if exists) is compatible 

561 if start + 1 < end: 

562 next_line = self._lines[start + 1] 

563 if not ( 

564 not next_line 

565 or next_line.startswith(" ") 

566 or _is_list_start(next_line, enumerator_kind, marker_kind, num) 

567 ): 

568 return None 

569 

570 if ( 

571 enumerator_kind != prev_enumerator_kind 

572 or marker_kind != prev_marker_kind 

573 or (prev_num is None or num != prev_num + 1) 

574 ): 

575 list_node = None 

576 

577 if list_node is None: 

578 list_node = yuio.doc.List( 

579 items=[], enumerator_kind=enumerator_kind, marker_kind=marker_kind 

580 ) 

581 parent.items.append(list_node) 

582 

583 self._lines[start] = match["tail"] 

584 if not match["space"]: 

585 end = self._gather_indented_lines(start + 1, end, True) 

586 else: 

587 indent = len(match["indent"]) 

588 end = self._gather_exactly_indented_lines(start + 1, end, indent, True) 

589 

590 node = yuio.doc.ListItem(items=[], number=num) 

591 self._process_block(node, start, end) 

592 list_node.items.append(node) 

593 return end 

594 

595 def _process_doctest_block( 

596 self, parent: yuio.doc.Container[_t.Any], start: int, end: int 

597 ) -> int | None: 

598 node = yuio.doc.Code(lines=[], syntax="python") 

599 

600 block_end = 0 

601 for i in range(start, end): 

602 line = self._lines[i] 

603 if _is_blank(line): 

604 break 

605 node.lines.append(line) 

606 block_end = i + 1 

607 

608 parent.items.append(node) 

609 return block_end 

610 

611 def _try_process_explicit_markup( 

612 self, 

613 parent: yuio.doc.Container[_t.Any], 

614 start: int, 

615 end: int, 

616 match: _tx.StrReMatch, 

617 ) -> int | None: 

618 """Try to process explicit markup (directives, comments, etc.).""" 

619 content = match["tail"].strip() 

620 

621 if not content: 

622 start += 1 

623 if start < end and not _is_blank(self._lines[start]): 

624 return self._gather_indented_lines(start + 1, end, True) 

625 else: 

626 return start 

627 

628 if content.startswith("["): 

629 return self._parse_footnote(parent, start, end, content) 

630 

631 if content.startswith("|"): 

632 # TODO: save substitution 

633 return self._gather_indented_lines(start + 1, end, False) 

634 

635 if content.startswith("_"): 

636 return self._parse_hyperlink_target(start, end, content) 

637 

638 # Directive 

639 if "::" in content: 

640 return self._parse_directive(parent, start, end, content) 

641 

642 # Default to comment 

643 return self._gather_indented_lines(start + 1, end, True) 

644 

645 def _parse_hyperlink_target(self, start: int, end: int, content: str): 

646 end = self._gather_indented_lines(start + 1, end, False) 

647 content += "\n".join(self._lines[start + 1 : end]) 

648 anchor, _, target = content[1:].partition(":") 

649 anchor = _normalize_hyperlink_anchor(anchor) 

650 target, is_redirect = _normalize_hyperlink_target(target) 

651 self._add_link( 

652 _Hyperlink( 

653 start, 

654 end, 

655 anchor, 

656 "redirect" if is_redirect else "link", 

657 target, 

658 ) 

659 ) 

660 return end 

661 

662 def _parse_footnote( 

663 self, parent: yuio.doc.Container[_t.Any], start: int, end: int, content: str 

664 ): 

665 end = self._gather_indented_lines(start + 1, end, True) 

666 name, _, content = content[1:].partition("]") 

667 self._lines[start] = content.strip() 

668 

669 if name.startswith("#"): 

670 name = name[1:] 

671 while True: 

672 auto_name = str(self._last_numbered_footnote) 

673 self._last_numbered_footnote += 1 

674 if auto_name not in self._targets: 

675 break 

676 if not name: 

677 self._auto_numbered_footnotes.append(auto_name) 

678 elif name.startswith("*"): 

679 name = name[1:] 

680 while True: 

681 auto_name = _char_footnote(self._last_character_footnote) 

682 self._last_character_footnote += 1 

683 if auto_name not in self._targets: 

684 break 

685 if not name: 

686 self._auto_character_footnotes.append(auto_name) 

687 else: 

688 auto_name = name 

689 

690 link = _Hyperlink(start, end, auto_name, "footnote", auto_name) 

691 self._add_link(link) 

692 if name and name not in self._targets: 

693 self._targets[name] = link 

694 

695 if parent.items and isinstance(parent.items[-1], yuio.doc.FootnoteContainer): 

696 container = parent.items[-1] 

697 else: 

698 container = yuio.doc.FootnoteContainer(items=[]) 

699 parent.items.append(container) 

700 

701 node = yuio.doc.Footnote( 

702 items=[], 

703 marker=auto_name, 

704 ) 

705 self._process_block(node, start, end) 

706 container.items.append(node) 

707 

708 return end 

709 

710 def _add_link(self, link: _Hyperlink): 

711 if link.content: 

712 start = link.start 

713 for prev_link in reversed(self._links): 

714 if prev_link.content: 

715 break 

716 if not ( 

717 prev_link.end == start 

718 or all( 

719 _is_blank(line) for line in self._lines[prev_link.end : start] 

720 ) 

721 ): 

722 break 

723 prev_link.type = link.type 

724 prev_link.content = link.content 

725 start = prev_link.start 

726 self._links.append(link) 

727 if link.name == "_": 

728 self._anonymous_links.append(link) 

729 elif link.name not in self._targets: 

730 self._targets[link.name] = link 

731 

732 def _parse_directive( 

733 self, parent: yuio.doc.Container[_t.Any], start: int, end: int, content: str 

734 ) -> int: 

735 name, _, arg = content.partition("::") 

736 name = name.strip() 

737 arg = arg.strip() 

738 

739 end = self._gather_indented_lines(start + 1, end, True) 

740 

741 i = start + 1 

742 

743 # Parse arguments and options. 

744 while i < end: 

745 arg_line = self._lines[i] 

746 i += 1 

747 if _is_blank(arg_line): 

748 break 

749 

750 parent.items.extend( 

751 yuio.doc._process_directive( 

752 name, 

753 arg, 

754 lambda: self._lines[i:end], 

755 lambda: self._process_block(yuio.doc.Document(items=[]), i, end).items, 

756 ) 

757 ) 

758 

759 return end 

760 

761 def _process_block_quote( 

762 self, parent: yuio.doc.Container[_t.Any], start: int, end: int 

763 ) -> int: 

764 end = self._gather_indented_lines(start, end, True) 

765 node = yuio.doc.Quote(items=[]) 

766 self._process_block(node, start, end) 

767 parent.items.append(node) 

768 return end 

769 

770 def _process_implicit_hyperlink_target( 

771 self, 

772 parent: yuio.doc.Container[_t.Any], 

773 start: int, 

774 end: int, 

775 match: _tx.StrReMatch, 

776 ) -> int: 

777 return self._parse_hyperlink_target(start, end, f"__: {match.group('tail')}") 

778 

779 def _process_field_list( 

780 self, 

781 parent: yuio.doc.Container[_t.Any], 

782 start: int, 

783 end: int, 

784 match: _tx.StrReMatch, 

785 ) -> int: 

786 self._lines[start] = match["tail"] 

787 end = self._gather_indented_lines(start + 1, end, True) 

788 node = yuio.doc.Admonition( 

789 items=[], 

790 title=[match["content"].strip() + "\\ :"], 

791 type="field", 

792 ) 

793 self._process_block(node, start, end) 

794 parent.items.append(node) 

795 return end 

796 

797 def _process_def_list( 

798 self, parent: yuio.doc.Container[_t.Any], start: int, end: int 

799 ) -> int: 

800 end = self._gather_indented_lines(start + 1, end, True) 

801 node = yuio.doc.Admonition( 

802 items=[], 

803 title=[self._lines[start].strip()], 

804 type="definition", 

805 ) 

806 self._process_block(node, start + 1, end) 

807 parent.items.append(node) 

808 return end 

809 

810 def _process_paragraph( 

811 self, parent: yuio.doc.Container[_t.Any], start: int, end: int 

812 ) -> tuple[int, _LineEnding]: 

813 end = self._gather_exactly_indented_lines(start, end, 0, False) 

814 if end == start + 1 and self._lines[start].strip() == "::": 

815 return end, _LineEnding.LITERAL_MARK 

816 elif end == start + 1 and _is_transition(self._lines[start]): 

817 parent.items.append(yuio.doc.ThematicBreak()) 

818 return end, _LineEnding.NORMAL 

819 elif end > start and self._lines[end - 1].rstrip().endswith("::"): 

820 line_ending = _LineEnding.LITERAL_MARK 

821 self._lines[end - 1] = self._lines[end - 1].rstrip()[:-1] 

822 else: 

823 line_ending = _LineEnding.NORMAL 

824 node = yuio.doc.Paragraph( 

825 items=_t.cast(list[str | yuio.doc.TextRegion], self._lines[start:end]) 

826 ) 

827 parent.items.append(node) 

828 return end, line_ending 

829 

830 def _gather_indented_lines(self, start: int, end: int, allow_blank: bool) -> int: 

831 if start >= end: 

832 return start 

833 

834 common_indent = None 

835 result_end = start 

836 

837 for i in range(start, end): 

838 line = self._lines[i] 

839 if _is_blank(line): 

840 if allow_blank: 

841 continue 

842 else: 

843 break 

844 

845 indent = len(line) - len(line.lstrip()) 

846 if indent >= 1: 

847 result_end = i + 1 

848 if common_indent is None: 

849 common_indent = indent 

850 else: 

851 common_indent = min(common_indent, indent) 

852 else: 

853 break 

854 

855 if common_indent: 

856 for i in range(start, result_end): 

857 self._lines[i] = self._lines[i][common_indent:] 

858 

859 return result_end 

860 

861 def _gather_exactly_indented_lines( 

862 self, start: int, end: int, min_indent: int, allow_blank: bool 

863 ) -> int: 

864 result_end = start 

865 

866 for i in range(start, end): 

867 line = self._lines[i] 

868 if _is_blank(line): 

869 if allow_blank: 

870 continue 

871 else: 

872 break 

873 

874 if not min_indent: 

875 result_end = i + 1 

876 elif len(line) - len(line.lstrip()) >= min_indent: 

877 result_end = i + 1 

878 self._lines[i] = self._lines[i][min_indent:] 

879 else: 

880 break 

881 

882 return result_end 

883 

884 def _gather_prefixed_lines(self, start: int, end: int, prefix: str) -> int: 

885 result_end = start 

886 

887 for i in range(start, end): 

888 if self._lines[i] and self._lines[i][0] == prefix: 

889 result_end = i + 1 

890 else: 

891 break 

892 

893 return result_end 

894 

895 

896def _is_blank(line: str) -> bool: 

897 return not line or line.isspace() 

898 

899 

900def _is_transition(line: str) -> bool: 

901 return len(line) >= 4 and line[0] in _PUNCT and all(c == line[0] for c in line) 

902 

903 

904def _is_heading_underline(lines, start, end): 

905 if end - start < 2: 

906 return False 

907 title, underline = lines[start : start + 2] 

908 return ( 

909 title 

910 and not title.startswith(" ") 

911 and underline 

912 and underline[0] in _PUNCT 

913 and all(c == underline[0] for c in underline) 

914 and len(title) <= len(underline) 

915 ) 

916 

917 

918def _is_heading_overline(lines, start, end): 

919 if end - start < 3: 

920 return False 

921 overline, title, underline = lines[start : start + 3] 

922 return ( 

923 overline 

924 and title 

925 and underline 

926 and overline[0] in _PUNCT 

927 and overline[0] == underline[0] 

928 and all(c == overline[0] for c in overline) 

929 and len(title) <= len(overline) 

930 and all(c == underline[0] for c in underline) 

931 and len(title) <= len(underline) 

932 ) 

933 

934 

935# fmt: off 

936# The following code is copied from docutils/utils/punctuation_chars.py 

937# Copyright 2011, 2017 Günter Milde, 2-Clause BSD license. 

938# See https://sourceforge.net/p/docutils/code/HEAD/tree/trunk/docutils/docutils/utils/punctuation_chars.py. 

939# See https://opensource.org/license/BSD-2-Clause. 

940_OPENERS = ( 

941 "\"'(<\\[{\u0f3a\u0f3c\u169b\u2045\u207d\u208d\u2329\u2768" 

942 "\u276a\u276c\u276e\u2770\u2772\u2774\u27c5\u27e6\u27e8\u27ea" 

943 "\u27ec\u27ee\u2983\u2985\u2987\u2989\u298b\u298d\u298f\u2991" 

944 "\u2993\u2995\u2997\u29d8\u29da\u29fc\u2e22\u2e24\u2e26\u2e28" 

945 "\u3008\u300a\u300c\u300e\u3010\u3014\u3016\u3018\u301a\u301d" 

946 "\u301d\ufd3e\ufe17\ufe35\ufe37\ufe39\ufe3b\ufe3d\ufe3f\ufe41" 

947 "\ufe43\ufe47\ufe59\ufe5b\ufe5d\uff08\uff3b\uff5b\uff5f\uff62" 

948 "\xab\u2018\u201c\u2039\u2e02\u2e04\u2e09\u2e0c\u2e1c\u2e20" 

949 "\u201a\u201e\xbb\u2019\u201d\u203a\u2e03\u2e05\u2e0a\u2e0d" 

950 "\u2e1d\u2e21\u201b\u201f" 

951) 

952_CLOSERS = ( 

953 "\"')>\\]}\u0f3b\u0f3d\u169c\u2046\u207e\u208e\u232a\u2769" 

954 "\u276b\u276d\u276f\u2771\u2773\u2775\u27c6\u27e7\u27e9\u27eb" 

955 "\u27ed\u27ef\u2984\u2986\u2988\u298a\u298c\u298e\u2990\u2992" 

956 "\u2994\u2996\u2998\u29d9\u29db\u29fd\u2e23\u2e25\u2e27\u2e29" 

957 "\u3009\u300b\u300d\u300f\u3011\u3015\u3017\u3019\u301b\u301e" 

958 "\u301f\ufd3f\ufe18\ufe36\ufe38\ufe3a\ufe3c\ufe3e\ufe40\ufe42" 

959 "\ufe44\ufe48\ufe5a\ufe5c\ufe5e\uff09\uff3d\uff5d\uff60\uff63" 

960 "\xbb\u2019\u201d\u203a\u2e03\u2e05\u2e0a\u2e0d\u2e1d\u2e21" 

961 "\u201b\u201f\xab\u2018\u201c\u2039\u2e02\u2e04\u2e09\u2e0c" 

962 "\u2e1c\u2e20\u201a\u201e" 

963) 

964_DELIMITERS = ( 

965 "\\-/:\u058a\xa1\xb7\xbf\u037e\u0387\u055a-\u055f\u0589" 

966 "\u05be\u05c0\u05c3\u05c6\u05f3\u05f4\u0609\u060a\u060c" 

967 "\u060d\u061b\u061e\u061f\u066a-\u066d\u06d4\u0700-\u070d" 

968 "\u07f7-\u07f9\u0830-\u083e\u0964\u0965\u0970\u0df4\u0e4f" 

969 "\u0e5a\u0e5b\u0f04-\u0f12\u0f85\u0fd0-\u0fd4\u104a-\u104f" 

970 "\u10fb\u1361-\u1368\u1400\u166d\u166e\u16eb-\u16ed\u1735" 

971 "\u1736\u17d4-\u17d6\u17d8-\u17da\u1800-\u180a\u1944\u1945" 

972 "\u19de\u19df\u1a1e\u1a1f\u1aa0-\u1aa6\u1aa8-\u1aad\u1b5a-" 

973 "\u1b60\u1c3b-\u1c3f\u1c7e\u1c7f\u1cd3\u2010-\u2017\u2020-" 

974 "\u2027\u2030-\u2038\u203b-\u203e\u2041-\u2043\u2047-" 

975 "\u2051\u2053\u2055-\u205e\u2cf9-\u2cfc\u2cfe\u2cff\u2e00" 

976 "\u2e01\u2e06-\u2e08\u2e0b\u2e0e-\u2e1b\u2e1e\u2e1f\u2e2a-" 

977 "\u2e2e\u2e30\u2e31\u3001-\u3003\u301c\u3030\u303d\u30a0" 

978 "\u30fb\ua4fe\ua4ff\ua60d-\ua60f\ua673\ua67e\ua6f2-\ua6f7" 

979 "\ua874-\ua877\ua8ce\ua8cf\ua8f8-\ua8fa\ua92e\ua92f\ua95f" 

980 "\ua9c1-\ua9cd\ua9de\ua9df\uaa5c-\uaa5f\uaade\uaadf\uabeb" 

981 "\ufe10-\ufe16\ufe19\ufe30-\ufe32\ufe45\ufe46\ufe49-\ufe4c" 

982 "\ufe50-\ufe52\ufe54-\ufe58\ufe5f-\ufe61\ufe63\ufe68\ufe6a" 

983 "\ufe6b\uff01-\uff03\uff05-\uff07\uff0a\uff0c-\uff0f\uff1a" 

984 "\uff1b\uff1f\uff20\uff3c\uff61\uff64\uff65" 

985 "\U00010100\U00010101\U0001039f\U000103d0\U00010857" 

986 "\U0001091f\U0001093f\U00010a50-\U00010a58\U00010a7f" 

987 "\U00010b39-\U00010b3f\U000110bb\U000110bc\U000110be-" 

988 "\U000110c1\U00012470-\U00012473" 

989) 

990_CLOSING_DELIMITERS = r"\\.,;!?" 

991_QUOTE_PAIRS = { 

992 # open char: matching closing characters # use case 

993 "\xbb": "\xbb", # » » Swedish 

994 "\u2018": "\u201a", # ‘ ‚ Albanian/Greek/Turkish 

995 "\u2019": "\u2019", # ’ ’ Swedish 

996 "\u201a": "\u2018\u2019", # ‚ ‘ German, ‚ ’ Polish 

997 "\u201c": "\u201e", # “ „ Albanian/Greek/Turkish 

998 "\u201e": "\u201c\u201d", # „ “ German, „ ” Polish 

999 "\u201d": "\u201d", # ” ” Swedish 

1000 "\u203a": "\u203a", # › › Swedish 

1001} 

1002def _match_chars(c1, c2): 

1003 try: 

1004 i = _OPENERS.index(c1) 

1005 except ValueError: # c1 not in openers 

1006 return False 

1007 return c2 == _CLOSERS[i] or c2 in _QUOTE_PAIRS.get(c1, "") 

1008# End docutils code. 

1009# fmt: on 

1010 

1011_OPENERS_RE = re.compile(rf"[{_OPENERS}{_DELIMITERS}]") 

1012_CLOSERS_RE = re.compile(rf"[{_CLOSERS}{_DELIMITERS}{_CLOSING_DELIMITERS}]") 

1013 

1014 

1015def _is_start_string(prev: str, next: str) -> bool: 

1016 if next.isspace(): 

1017 return False 

1018 if prev.isspace(): 

1019 return True 

1020 if _match_chars(prev, next): 

1021 return False 

1022 # if character_level_inline_markup: 

1023 # return True 

1024 return _OPENERS_RE.match(prev) is not None 

1025 

1026 

1027def _is_end_string(prev: str, next: str) -> bool: 

1028 if prev.isspace(): 

1029 return False 

1030 if next.isspace(): 

1031 return True 

1032 if _match_chars(prev, next): 

1033 return False 

1034 # if character_level_inline_markup: 

1035 # return True 

1036 return _CLOSERS_RE.match(next) is not None 

1037 

1038 

1039def _detect_num_list_type( 

1040 match: _tx.StrReMatch, 

1041 prev_enumerator_kind: yuio.doc.ListEnumeratorKind | str | None, 

1042 prev_marker_kind: yuio.doc.ListMarkerKind | None, 

1043 prev_num: int | None, 

1044) -> tuple[yuio.doc.ListEnumeratorKind, yuio.doc.ListMarkerKind, int] | None: 

1045 match (match["open_marker"], match["close_marker"]): 

1046 case ("(", ")"): 

1047 marker_kind = yuio.doc.ListMarkerKind.ENCLOSED 

1048 case ("", ")"): 

1049 marker_kind = yuio.doc.ListMarkerKind.PAREN 

1050 case ("", "."): 

1051 marker_kind = yuio.doc.ListMarkerKind.DOT 

1052 case _: 

1053 return None 

1054 

1055 if ( 

1056 prev_enumerator_kind is not None 

1057 and prev_marker_kind is not None 

1058 and prev_num is not None 

1059 and marker_kind == prev_marker_kind 

1060 and isinstance(prev_enumerator_kind, yuio.doc.ListEnumeratorKind) 

1061 ): 

1062 # List continues. 

1063 if match["enumerator"] == "#": 

1064 return prev_enumerator_kind, prev_marker_kind, prev_num + 1 

1065 match prev_enumerator_kind: 

1066 case yuio.doc.ListEnumeratorKind.NUMBER: 

1067 expected_enumerator = str(prev_num + 1) 

1068 case yuio.doc.ListEnumeratorKind.SMALL_LETTER: 

1069 expected_enumerator = yuio.doc.to_letters(prev_num + 1) 

1070 case yuio.doc.ListEnumeratorKind.CAPITAL_LETTER: 

1071 expected_enumerator = yuio.doc.to_letters(prev_num + 1).upper() 

1072 case yuio.doc.ListEnumeratorKind.SMALL_ROMAN: 

1073 expected_enumerator = yuio.doc.to_roman(prev_num + 1) 

1074 case yuio.doc.ListEnumeratorKind.CAPITAL_ROMAN: 

1075 expected_enumerator = yuio.doc.to_roman(prev_num + 1).upper() 

1076 if match["enumerator"].lstrip("0") == expected_enumerator: 

1077 return prev_enumerator_kind, prev_marker_kind, prev_num + 1 

1078 

1079 # List starts afresh. 

1080 if enumerator := match["enumerator_num"]: 

1081 return yuio.doc.ListEnumeratorKind.NUMBER, marker_kind, int(enumerator) 

1082 elif enumerator := match["enumerator_auto"]: 

1083 return yuio.doc.ListEnumeratorKind.NUMBER, marker_kind, 1 

1084 elif enumerator := match["enumerator_lowercase"]: 

1085 if (enumerator == "i" or len(enumerator) > 1) and ( 

1086 (num := yuio.doc.from_roman(enumerator)) is not None 

1087 ): 

1088 return yuio.doc.ListEnumeratorKind.SMALL_ROMAN, marker_kind, num 

1089 elif len(enumerator) > 1: 

1090 return None 

1091 elif (num := yuio.doc.from_letters(enumerator)) is not None: 

1092 return yuio.doc.ListEnumeratorKind.SMALL_LETTER, marker_kind, num 

1093 else: 

1094 return None 

1095 elif enumerator := match["enumerator_uppercase"]: 

1096 if (enumerator == "I" or len(enumerator) > 1) and ( 

1097 num := yuio.doc.from_roman(enumerator) 

1098 ) is not None: 

1099 return yuio.doc.ListEnumeratorKind.CAPITAL_ROMAN, marker_kind, num 

1100 elif len(enumerator) > 1: 

1101 return None 

1102 elif (num := yuio.doc.from_letters(enumerator)) is not None: 

1103 return yuio.doc.ListEnumeratorKind.CAPITAL_LETTER, marker_kind, num 

1104 else: 

1105 return None 

1106 

1107 return None 

1108 

1109 

1110def _is_list_start( 

1111 line: str, 

1112 prev_enumerator_kind: yuio.doc.ListEnumeratorKind | str, 

1113 prev_marker_kind: yuio.doc.ListMarkerKind, 

1114 prev_num: int, 

1115): 

1116 match = _NUM_LIST_START_RE.match(line) 

1117 if not match: 

1118 return False 

1119 list_data = _detect_num_list_type( 

1120 match, prev_enumerator_kind, prev_marker_kind, prev_num 

1121 ) 

1122 if not list_data: 

1123 return False 

1124 enumerator_kind, marker_kind, num = list_data 

1125 return ( 

1126 enumerator_kind == prev_enumerator_kind 

1127 and marker_kind == prev_marker_kind 

1128 and num == prev_num + 1 

1129 ) 

1130 

1131 

1132def _normalize_hyperlink_anchor(anchor: str) -> str: 

1133 return _unescape(re.sub(r"\s+", " ", anchor.strip()).casefold()) 

1134 

1135 

1136def _normalize_hyperlink_target(target: str) -> tuple[str, bool]: 

1137 is_redirect = bool(re.match(r"^(\\.|[^\\])*_$", target)) 

1138 target = re.sub(r"\\(.)|\s", r"\1", target) 

1139 if is_redirect: 

1140 target = target[:-1] 

1141 return target, is_redirect 

1142 

1143 

1144def _unescape(text: str) -> str: 

1145 return re.sub(r"\\(?:\s|(.))", r"\1", text) 

1146 

1147 

1148@dataclass(slots=True) 

1149class _Token: 

1150 """ 

1151 Token for processing inline markup. 

1152 

1153 """ 

1154 

1155 start: int 

1156 end: int 

1157 kind: str 

1158 

1159 _data: dict[str, _t.Any] | None = dataclasses.field(init=False, default=None) 

1160 

1161 @property 

1162 def data(self): 

1163 if self._data is None: 

1164 self._data = {} 

1165 return self._data 

1166 

1167 

1168class _InlineParser: 

1169 def __init__(self, text: str, link_resolver: _LinkResolver) -> None: 

1170 self._text: str = text 

1171 self._start: int = 0 

1172 self._pos: int = 0 

1173 self._tokens: list[_Token] = [] 

1174 self._link_resolver = link_resolver 

1175 

1176 def run(self) -> list[str | yuio.doc.TextRegion]: 

1177 while self._fits(self._pos): 

1178 self._run() 

1179 if self._start < len(self._text): 

1180 self._tokens.append(_Token(self._start, len(self._text), "text")) 

1181 

1182 res: list[str | yuio.doc.TextRegion] = [] 

1183 for token in self._tokens: 

1184 text = _unescape(self._text[token.start : token.end]) 

1185 match token.kind: 

1186 case "text": 

1187 res.append(text) 

1188 case "em": 

1189 res.append(yuio.doc.HighlightedRegion(text, color="em")) 

1190 case "strong": 

1191 res.append(yuio.doc.HighlightedRegion(text, color="strong")) 

1192 case "formatted": 

1193 res.append(token.data["content"]) 

1194 case "link": 

1195 if title := token.data.get("title"): 

1196 text = _unescape(title) 

1197 res.append(yuio.doc.LinkRegion(text, url=token.data.get("url", ""))) 

1198 case "footnote": 

1199 if content := token.data.get("content"): 

1200 text = _unescape(content) 

1201 text = f"[{text}]" 

1202 res.append( 

1203 yuio.doc.NoWrapRegion( 

1204 yuio.doc.HighlightedRegion(text, color="role/footnote") 

1205 ) 

1206 ) 

1207 case kind: 

1208 assert False, kind 

1209 return res 

1210 

1211 def _fits(self, i): 

1212 return i < len(self._text) 

1213 

1214 def _ch_eq(self, i, cs): 

1215 return self._fits(i) and self._text[i] in cs 

1216 

1217 def _ch_in(self, i, cs): 

1218 return self._fits(i) and self._text[i] in cs 

1219 

1220 def _ch_at(self, i): 

1221 if 0 <= i < len(self._text): 

1222 return self._text[i] 

1223 else: 

1224 return " " 

1225 

1226 def _eat(self, ch): 

1227 start = self._pos 

1228 while self._pos < len(self._text) and self._text[self._pos] == ch: 

1229 self._pos += 1 

1230 return self._pos - start 

1231 

1232 def _eat_in(self, ch): 

1233 while self._pos < len(self._text) and self._text[self._pos] in ch: 

1234 self._pos += 1 

1235 

1236 def _eat_not_in(self, ch): 

1237 while self._pos < len(self._text) and self._text[self._pos] not in ch: 

1238 self._pos += 1 

1239 

1240 def _emit( 

1241 self, 

1242 tok_start: int, 

1243 content_start: int, 

1244 content_end: int, 

1245 token_end: int, 

1246 kind: str, 

1247 ): 

1248 if tok_start > self._start: 

1249 self._tokens.append(_Token(self._start, tok_start, "text")) 

1250 assert token_end == self._pos # sanity check 

1251 self._start = self._pos 

1252 token = _Token(content_start, content_end, kind) 

1253 self._tokens.append(token) 

1254 return token 

1255 

1256 def _run(self): 

1257 match self._text[self._pos]: 

1258 case "\\": 

1259 self._pos += 2 

1260 case "`": 

1261 if self._ch_eq(self._pos + 1, "`"): 

1262 self._parse_inline_literal() 

1263 else: 

1264 self._parse_interpreted_text( 

1265 prefix_role=None, prefix_role_start=None 

1266 ) 

1267 case ":": 

1268 self._parse_prefixed_interpreted_text() 

1269 case "*": 

1270 if self._ch_eq(self._pos + 1, "*"): 

1271 self._parse_strong() 

1272 else: 

1273 self._parse_emphasis() 

1274 case "|": 

1275 self._parse_substitution() 

1276 case "_": 

1277 if self._ch_eq(self._pos + 1, "`"): 

1278 self._parse_inline_internal_target() 

1279 else: 

1280 self._parse_unquoted_link() 

1281 case "[": 

1282 self._parse_footnote_reference() 

1283 case _: 

1284 self._eat_not_in("\\`:*|_[") 

1285 

1286 def _scan_for_explicit_role(self) -> str | None: 

1287 """ 

1288 Eat explicit role, leaving current position right after it. If explicit role 

1289 can't be found, returns None and leaves current position untouched:: 

1290 

1291 text :role:`ref` 

1292 │ └ position if this function succeeds 

1293 └ initial position 

1294 

1295 text :malformed-role 

1296 

1297 └ initial position, position if this function fails 

1298 

1299 """ 

1300 

1301 if not self._ch_eq(self._pos, ":"): # pragma: no cover 

1302 return None 

1303 

1304 token_start = self._pos 

1305 self._pos += 1 

1306 content_start = self._pos 

1307 

1308 while self._fits(self._pos): 

1309 match self._text[self._pos]: 

1310 case ch if ch.isalnum(): 

1311 self._pos += 1 

1312 case ":": 

1313 if self._ch_at(self._pos + 1).isalnum(): 

1314 # Isolated punctuation. 

1315 self._pos += 1 

1316 continue 

1317 

1318 content_end = self._pos 

1319 self._pos += 1 

1320 

1321 if content_start == content_end: 

1322 # Empty content is not allowed. 

1323 break 

1324 

1325 return self._text[content_start:content_end] 

1326 case ch if ch in "-_+:," and not self._ch_in(self._pos + 1, "-_+:,"): 

1327 # Isolated punctuation. 

1328 self._pos += 1 

1329 case _: 

1330 break 

1331 

1332 self._pos = token_start # Leave position as it was before. 

1333 return None 

1334 

1335 def _parse_inline_literal(self): 

1336 """ 

1337 Eats and emits inline literal. If inline literal can't be parsed, advances 

1338 current position one char and returns:: 

1339 

1340 text ``literal`` 

1341 │ └ position if this function succeeds 

1342 └ initial position 

1343 

1344 text ``literal 

1345 │└ position if this function fails 

1346 └ initial position 

1347 

1348 """ 

1349 

1350 assert self._ch_eq(self._pos, "`") 

1351 assert self._ch_eq(self._pos + 1, "`") 

1352 

1353 token_start = self._pos 

1354 self._pos += 2 

1355 content_start = self._pos 

1356 

1357 prev_char = self._ch_at(token_start - 1) 

1358 next_char = self._ch_at(content_start) 

1359 if not _is_start_string(prev_char, next_char): 

1360 self._pos = content_start + 1 

1361 return 

1362 

1363 while self._fits(self._pos): 

1364 match self._text[self._pos]: 

1365 case "`" if self._ch_eq(self._pos + 1, "`"): 

1366 content_end = self._pos 

1367 self._pos += 2 

1368 token_end = self._pos 

1369 

1370 prev_char = self._ch_at(content_end - 1) 

1371 next_char = self._ch_at(token_end) 

1372 if not _is_end_string(prev_char, next_char): 

1373 self._pos = content_end + 1 # Skip 1 char and continue. 

1374 continue 

1375 

1376 if content_start == content_end: 

1377 # Empty content is not allowed. 

1378 break 

1379 

1380 token = self._emit( 

1381 token_start, content_start, content_end, token_end, "formatted" 

1382 ) 

1383 token.data["content"] = yuio.doc._process_role( 

1384 self._text[content_start:content_end], "code" 

1385 ) 

1386 return 

1387 case _: 

1388 self._pos += 1 

1389 

1390 self._pos = content_start + 1 

1391 

1392 def _parse_interpreted_text( 

1393 self, prefix_role: str | None, prefix_role_start: int | None 

1394 ): 

1395 """ 

1396 Eats and emits interpreted text and its tail role or hyperlink marker. 

1397 If interpreted text can't be parsed, advances current position one char 

1398 and returns:: 

1399 

1400 text `ref` 

1401 │ └ position if this function succeeds 

1402 └ initial position 

1403 

1404 text `ref 

1405 │└ position if this function fails 

1406 └ initial position 

1407 

1408 text :role:`ref` 

1409 │ │ └ position if this function succeeds 

1410 │ └ initial position 

1411 └ prefix_role_start 

1412 

1413 text :role:`ref 

1414 ││ └ initial position 

1415 │└ position if this function fails 

1416 └ prefix_role_start 

1417 

1418 """ 

1419 

1420 assert self._ch_eq(self._pos, "`") 

1421 

1422 if prefix_role_start is None: 

1423 prefix_role_start = self._pos 

1424 

1425 token_start = prefix_role_start 

1426 self._pos += 1 

1427 content_start = self._pos 

1428 

1429 # TODO: are these correct bounds? 

1430 prev_char = self._ch_at(token_start - 1) 

1431 next_char = self._ch_at(token_start + 1) 

1432 if not _is_start_string(prev_char, next_char): 

1433 self._pos = content_start + 1 

1434 return 

1435 

1436 while self._fits(self._pos): 

1437 if self._ch_eq(self._pos, "`"): 

1438 content_end = self._pos 

1439 self._pos += 1 

1440 if self._ch_eq(self._pos, "_"): 

1441 n_underscores = self._eat("_") 

1442 suffix_role = None 

1443 elif self._ch_eq(self._pos, ":"): 

1444 suffix_role = self._scan_for_explicit_role() 

1445 n_underscores = 0 

1446 else: 

1447 suffix_role = None 

1448 n_underscores = 0 

1449 token_end = self._pos 

1450 

1451 # TODO: are these correct bounds? 

1452 prev_char = self._ch_at(content_end - 1) 

1453 next_char = self._ch_at(token_end) 

1454 if not _is_end_string(prev_char, next_char): 

1455 self._pos = content_end + 1 

1456 continue 

1457 

1458 if content_start == content_end: 

1459 # Empty content is not allowed. 

1460 break 

1461 

1462 if n_underscores > 2: 

1463 # Too many underscores. 

1464 break 

1465 

1466 if bool(n_underscores) + bool(prefix_role) + bool(suffix_role) > 1: 

1467 # Malformed interpreted text, just skip it as-is. 

1468 return 

1469 

1470 if n_underscores: 

1471 target, title = yuio.doc._process_link( 

1472 self._text[content_start:content_end], 

1473 ) 

1474 link = self._link_resolver.find_link( 

1475 title, target, is_anonymous=n_underscores == 2 

1476 ) 

1477 if link and link.type == "link": 

1478 target = link.content 

1479 else: 

1480 target = None 

1481 token = self._emit( 

1482 token_start, content_start, content_end, token_end, "link" 

1483 ) 

1484 token.data["url"] = target 

1485 token.data["title"] = title 

1486 else: 

1487 token = self._emit( 

1488 token_start, content_start, content_end, token_end, "formatted" 

1489 ) 

1490 token.data["content"] = yuio.doc._process_role( 

1491 self._text[content_start:content_end], 

1492 prefix_role or suffix_role or "literal", 

1493 ) 

1494 return 

1495 elif self._ch_eq(self._pos, "\\"): 

1496 self._pos += 2 

1497 else: 

1498 self._pos += 1 

1499 

1500 self._pos = content_start + 1 

1501 

1502 def _parse_prefixed_interpreted_text(self): 

1503 assert self._ch_eq(self._pos, ":") 

1504 

1505 token_start = self._pos 

1506 role = self._scan_for_explicit_role() 

1507 if role and self._ch_eq(self._pos, "`"): 

1508 self._parse_interpreted_text(role, token_start) 

1509 else: 

1510 self._pos = token_start + 1 

1511 

1512 def _parse_emphasis(self): 

1513 assert self._ch_eq(self._pos, "*") 

1514 

1515 token_start = self._pos 

1516 self._pos += 1 

1517 content_start = self._pos 

1518 

1519 prev_char = self._ch_at(token_start - 1) 

1520 next_char = self._ch_at(content_start) 

1521 if not _is_start_string(prev_char, next_char): 

1522 self._pos = content_start + 1 

1523 return 

1524 

1525 while self._fits(self._pos): 

1526 if self._ch_eq(self._pos, "*"): 

1527 content_end = self._pos 

1528 self._pos += 1 

1529 token_end = self._pos 

1530 

1531 prev_char = self._ch_at(content_end - 1) 

1532 next_char = self._ch_at(token_end) 

1533 if not _is_end_string(prev_char, next_char): 

1534 self._pos = content_end + 1 

1535 continue 

1536 

1537 if content_start == content_end: 

1538 # Empty content is not allowed. 

1539 break 

1540 

1541 self._emit(token_start, content_start, content_end, token_end, "em") 

1542 return 

1543 elif self._ch_eq(self._pos, "\\"): 

1544 self._pos += 2 

1545 else: 

1546 self._pos += 1 

1547 

1548 self._pos = content_start + 1 

1549 

1550 def _parse_strong(self): 

1551 assert self._ch_eq(self._pos, "*") 

1552 assert self._ch_eq(self._pos + 1, "*") 

1553 

1554 token_start = self._pos 

1555 self._pos += 2 

1556 content_start = self._pos 

1557 

1558 prev_char = self._ch_at(token_start - 1) 

1559 next_char = self._ch_at(content_start) 

1560 if not _is_start_string(prev_char, next_char): 

1561 self._pos = content_start + 1 

1562 return 

1563 

1564 while self._fits(self._pos): 

1565 if self._ch_eq(self._pos, "*") and self._ch_eq(self._pos + 1, "*"): 

1566 content_end = self._pos 

1567 self._pos += 2 

1568 token_end = self._pos 

1569 

1570 prev_char = self._ch_at(content_end - 1) 

1571 next_char = self._ch_at(token_end) 

1572 if not _is_end_string(prev_char, next_char): 

1573 self._pos = content_end + 1 

1574 continue 

1575 

1576 if content_start == content_end: 

1577 # Empty content is not allowed. 

1578 break 

1579 

1580 self._emit(token_start, content_start, content_end, token_end, "strong") 

1581 return 

1582 elif self._ch_eq(self._pos, "\\"): 

1583 self._pos += 2 

1584 else: 

1585 self._pos += 1 

1586 

1587 self._pos = content_start + 1 

1588 

1589 def _parse_substitution(self): 

1590 assert self._ch_eq(self._pos, "|") 

1591 

1592 token_start = self._pos 

1593 self._pos += 1 

1594 content_start = self._pos 

1595 

1596 prev_char = self._ch_at(token_start - 1) 

1597 next_char = self._ch_at(content_start) 

1598 if not _is_start_string(prev_char, next_char): 

1599 self._pos = content_start + 1 

1600 return 

1601 

1602 while self._fits(self._pos): 

1603 if self._ch_eq(self._pos, "|"): 

1604 content_end = self._pos 

1605 self._pos += 1 

1606 token_end = self._pos 

1607 

1608 prev_char = self._ch_at(content_end - 1) 

1609 next_char = self._ch_at(token_end) 

1610 if not _is_end_string(prev_char, next_char): 

1611 self._pos = content_end + 1 

1612 continue 

1613 

1614 if content_start == content_end: 

1615 # Empty content is not allowed. 

1616 break 

1617 

1618 # TODO: actually substitute things. 

1619 self._emit(token_start, content_start, content_end, token_end, "text") 

1620 return 

1621 elif self._ch_eq(self._pos, "\\"): 

1622 self._pos += 2 

1623 else: 

1624 self._pos += 1 

1625 

1626 self._pos = content_start + 1 

1627 

1628 def _parse_inline_internal_target(self): 

1629 assert self._ch_eq(self._pos, "_") 

1630 assert self._ch_eq(self._pos + 1, "`") 

1631 

1632 token_start = self._pos 

1633 self._pos += 2 

1634 content_start = self._pos 

1635 

1636 prev_char = self._ch_at(token_start - 1) 

1637 next_char = self._ch_at(content_start) 

1638 if not _is_start_string(prev_char, next_char): 

1639 self._pos = content_start + 1 

1640 return 

1641 

1642 while self._fits(self._pos): 

1643 if self._ch_eq(self._pos, "`"): 

1644 content_end = self._pos 

1645 self._pos += 1 

1646 token_end = self._pos 

1647 

1648 prev_char = self._ch_at(content_end - 1) 

1649 next_char = self._ch_at(token_end) 

1650 if not _is_end_string(prev_char, next_char): 

1651 self._pos = content_end + 1 

1652 continue 

1653 

1654 if content_start == content_end: 

1655 # Empty content is not allowed. 

1656 break 

1657 

1658 self._emit(token_start, content_start, content_end, token_end, "text") 

1659 return 

1660 elif self._ch_eq(self._pos, "\\"): 

1661 self._pos += 2 

1662 else: 

1663 self._pos += 1 

1664 

1665 self._pos = content_start + 1 

1666 

1667 def _parse_footnote_reference(self): 

1668 assert self._ch_eq(self._pos, "[") 

1669 

1670 token_start = self._pos 

1671 self._pos += 1 

1672 content_start = self._pos 

1673 

1674 prev_char = self._ch_at(token_start - 1) 

1675 next_char = self._ch_at(content_start) 

1676 if not _is_start_string(prev_char, next_char): 

1677 self._pos = content_start + 1 

1678 return 

1679 

1680 while self._fits(self._pos): 

1681 if self._ch_eq(self._pos, "]") and self._ch_eq(self._pos + 1, "_"): 

1682 content_end = self._pos 

1683 self._pos += 2 

1684 token_end = self._pos 

1685 

1686 prev_char = self._ch_at(content_end - 1) 

1687 next_char = self._ch_at(token_end) 

1688 if not _is_end_string(prev_char, next_char): 

1689 self._pos = content_end + 1 

1690 continue 

1691 

1692 if content_start == content_end: 

1693 # Empty content is not allowed. 

1694 break 

1695 

1696 target = self._link_resolver.find_link( 

1697 self._text[content_start:content_end], 

1698 None, 

1699 is_anonymous=False, 

1700 ) 

1701 if target and target.type == "footnote": 

1702 content = target.content 

1703 else: 

1704 content = None 

1705 token = self._emit( 

1706 token_start, content_start, content_end, token_end, "footnote" 

1707 ) 

1708 token.data["content"] = content 

1709 return 

1710 elif self._ch_eq(self._pos, "\\"): 

1711 self._pos += 2 

1712 else: 

1713 self._pos += 1 

1714 

1715 self._pos = content_start + 1 

1716 

1717 def _parse_unquoted_link(self): 

1718 content_end = self._pos 

1719 n_underscores = self._eat("_") 

1720 token_end = self._pos 

1721 

1722 assert n_underscores > 0 

1723 

1724 if n_underscores > 2: 

1725 return 

1726 

1727 prev_char = self._ch_at(content_end - 1) 

1728 next_char = self._ch_at(token_end) 

1729 if not _is_end_string(prev_char, next_char): 

1730 return 

1731 

1732 # Can be a link without backticks. Scan back to find its start. 

1733 content_start = content_end 

1734 while content_start - 1 >= self._start: 

1735 match self._text[content_start - 1]: 

1736 case ch if ch.isalnum(): 

1737 content_start -= 1 

1738 case ch if ch in "-_+:," and not self._ch_in( 

1739 content_start - 2, "-_+:," 

1740 ): 

1741 # Isolated punctuation. 

1742 content_start -= 1 

1743 case _: 

1744 break 

1745 

1746 # Start string is empty as per RST spec. 

1747 token_start = content_start 

1748 

1749 prev_char = self._ch_at(token_start - 1) 

1750 next_char = self._ch_at(content_start) 

1751 if not _is_start_string(prev_char, next_char): 

1752 return 

1753 

1754 if content_start == content_end: 

1755 return 

1756 

1757 title = self._text[content_start:content_end] 

1758 target = self._link_resolver.find_link( 

1759 title, 

1760 None, 

1761 is_anonymous=n_underscores == 2, 

1762 ) 

1763 if target and target.type == "link": 

1764 url = target.content 

1765 else: 

1766 url = None 

1767 token = self._emit(token_start, content_start, content_end, token_end, "link") 

1768 token.data["url"] = url 

1769 token.data["title"] = title 

1770 

1771 

1772def parse(text: str, /, *, dedent: bool = True) -> yuio.doc.Document: 

1773 """ 

1774 Parse a ReStructuredText document and return an AST node. 

1775 

1776 :param text: 

1777 text to parse. Common indentation will be removed from this string, 

1778 making it suitable to use with triple quote literals. 

1779 :param dedent: 

1780 remove lading indent from `text`. 

1781 :returns: 

1782 parsed AST node. 

1783 

1784 """ 

1785 

1786 if dedent: 

1787 text = _dedent(text) 

1788 

1789 return RstParser().parse(text)