Coverage for yuio / util.py: 100%

167 statements  

« prev     ^ index     » next       coverage.py v7.13.1, created at 2026-01-05 11:41 +0000

1# Yuio project, MIT license. 

2# 

3# https://github.com/taminomara/yuio/ 

4# 

5# You're free to copy this file to your project and edit it for your needs, 

6# just keep this copyright line please :3 

7 

8""" 

9Utility functions and types. 

10 

11""" 

12 

13from __future__ import annotations 

14 

15import re as _re 

16import textwrap as _textwrap 

17 

18from typing import TYPE_CHECKING 

19 

20if TYPE_CHECKING: 

21 import typing_extensions as _t 

22else: 

23 from yuio import _typing as _t 

24 

25__all__ = [ 

26 "UserString", 

27 "dedent", 

28 "to_dash_case", 

29] 

30 

31_UNPRINTABLE = "".join([chr(i) for i in range(32)]) + "\x7f" 

32_UNPRINTABLE_TRANS = str.maketrans(_UNPRINTABLE, " " * len(_UNPRINTABLE)) 

33_UNPRINTABLE_RE = r"[" + _re.escape(_UNPRINTABLE) + "]" 

34_UNPRINTABLE_RE_WITHOUT_NL = r"[" + _re.escape(_UNPRINTABLE.replace("\n", "")) + "]" 

35 

36_TO_DASH_CASE_RE = _re.compile( 

37 r""" 

38 # We will add a dash (bear with me here): 

39 [_\s] # 1. instead of underscore or space, 

40 | ( # 2. OR in the following case: 

41 (?<!^) # - not at the beginning of the string, 

42 ( # - AND EITHER: 

43 (?<=[A-Z])(?=[A-Z][a-z]) # - before case gets lower (`XMLTag` -> `XML-Tag`), 

44 | (?<=[a-zA-Z])(?![a-zA-Z_]) # - between a letter and a non-letter (`HTTP20` -> `HTTP-20`), 

45 | (?<![A-Z_])(?=[A-Z]) # - between non-uppercase and uppercase letter (`TagXML` -> `Tag-XML`), 

46 ) # - AND ALSO: 

47 (?!$) # - not at the end of the string. 

48 ) 

49 """, 

50 _re.VERBOSE | _re.MULTILINE, 

51) 

52 

53 

54def to_dash_case(msg: str, /) -> str: 

55 """ 

56 Convert ``CamelCase`` or ``snake_case`` identifier to a ``dash-case`` one. 

57 

58 This function assumes ASCII input, and will not work correctly 

59 with non-ASCII characters. 

60 

61 :param msg: 

62 identifier to convert. 

63 :returns: 

64 identifier in ``dash-case``. 

65 :example: 

66 :: 

67 

68 >>> to_dash_case("SomeClass") 

69 'some-class' 

70 >>> to_dash_case("HTTP20XMLUberParser") 

71 'http-20-xml-uber-parser' 

72 

73 """ 

74 

75 return _TO_DASH_CASE_RE.sub("-", msg).lower() 

76 

77 

78def dedent(msg: str, /): 

79 """ 

80 Remove leading indentation from a message and normalize trailing newlines. 

81 

82 This function is intended to be used with triple-quote string literals, 

83 such as docstrings. It will remove common indentation from second 

84 and subsequent lines, then it will strip any leading and trailing whitespaces 

85 and add a new line at the end. 

86 

87 :param msg: 

88 message to dedent. 

89 :returns: 

90 normalized message. 

91 :example: 

92 :: 

93 

94 >>> def foo(): 

95 ... \"""Documentation for function ``foo``. 

96 ... 

97 ... Leading indent is stripped. 

98 ... \""" 

99 ... 

100 ... ... 

101 

102 >>> dedent(foo.__doc__) 

103 'Documentation for function ``foo``.\\n\\nLeading indent is stripped.\\n' 

104 

105 """ 

106 

107 if not msg: 

108 return msg 

109 

110 first, *rest = msg.splitlines(keepends=True) 

111 return (first.rstrip() + "\n" + _textwrap.dedent("".join(rest))).strip() + "\n" 

112 

113 

114_COMMENT_RE = _re.compile(r"^\s*#:(.*)\r?\n?$") 

115_RST_ROLE_RE = _re.compile( 

116 r"(?::[\w+.:-]+:|__?)?`((?:[^`\n\\]|\\.)+)`(?::[\w+.:-]+:|__?)?", _re.DOTALL 

117) 

118_RST_ROLE_TITLE_RE = _re.compile( 

119 r"^((?:[^`\n\\]|\\.)*) <(?:[^`\n\\]|\\.)*>$", _re.DOTALL 

120) 

121_ESC_RE = _re.compile(r"\\(.)", _re.DOTALL) 

122 

123 

124def _rst_esc_repl(match: _re.Match[str]): 

125 symbol = match.group(1) 

126 if symbol in "\n\r\t\v\b": 

127 return " " 

128 return symbol 

129 

130 

131def _rst_repl(match: _re.Match[str]): 

132 full: str = match.group(0) 

133 text: str = match.group(1) 

134 if full.startswith(":") or full.endswith(":"): 

135 if title_match := _RST_ROLE_TITLE_RE.match(text): 

136 text = title_match.group(1) 

137 elif text.startswith("~"): 

138 text = text.rsplit(".", maxsplit=1)[-1] 

139 text = _ESC_RE.sub(_rst_esc_repl, text) 

140 n_backticks = 0 

141 cur_n_backticks = 0 

142 for ch in text: 

143 if ch == "`": 

144 cur_n_backticks += 1 

145 else: 

146 n_backticks = max(cur_n_backticks, n_backticks) 

147 cur_n_backticks = 0 

148 n_backticks = max(cur_n_backticks, n_backticks) 

149 if not n_backticks: 

150 return f"`{text}`" 

151 else: 

152 bt = "`" * (n_backticks + 1) 

153 return f"{bt} {text} {bt}" 

154 

155 

156def _process_docstring(msg: str, /, only_first_paragraph: bool = True): 

157 value = dedent(msg).removesuffix("\n") 

158 

159 if only_first_paragraph and (index := value.find("\n\n")) != -1: 

160 value = value[:index] 

161 

162 return _RST_ROLE_RE.sub(_rst_repl, value) 

163 

164 

165def _find_docs(obj: _t.Any, /) -> dict[str, str]: 

166 """ 

167 Find documentation for fields of a class. 

168 

169 Inspects source code of a class and finds docstrings and doc comments (``#:``) 

170 for variables in its body. Doesn't inspect ``__init__``, doesn't return documentation 

171 for class methods. Returns first paragraph from each docstring, formatted for use 

172 in CLI help messages. 

173 

174 """ 

175 

176 # Based on code from Sphinx, two clause BSD license. 

177 # See https://github.com/sphinx-doc/sphinx/blob/master/LICENSE.rst. 

178 

179 import ast 

180 import inspect 

181 import itertools 

182 

183 if (qualname := getattr(obj, "__qualname__", None)) is None: 

184 # Not a known object. 

185 return {} 

186 

187 if "<locals>" in qualname: 

188 # This will not work as expected! 

189 return {} 

190 

191 try: 

192 sourcelines, _ = inspect.getsourcelines(obj) 

193 except TypeError: 

194 return {} 

195 

196 docs: dict[str, str] = {} 

197 

198 node = ast.parse(_textwrap.dedent("".join(sourcelines))) 

199 assert isinstance(node, ast.Module) 

200 assert len(node.body) == 1 

201 cdef = node.body[0] 

202 

203 if isinstance(cdef, ast.ClassDef): 

204 fields: list[tuple[int, str]] = [] 

205 last_field: str | None = None 

206 for stmt in cdef.body: 

207 if ( 

208 last_field 

209 and isinstance(stmt, ast.Expr) 

210 and isinstance(stmt.value, ast.Constant) 

211 and isinstance(stmt.value.value, str) 

212 ): 

213 docs[last_field] = _process_docstring(stmt.value.value) 

214 last_field = None 

215 if isinstance(stmt, ast.AnnAssign): 

216 target = stmt.target 

217 elif isinstance(stmt, ast.Assign) and len(stmt.targets) == 1: 

218 target = stmt.targets[0] 

219 else: 

220 continue 

221 if isinstance(target, ast.Name) and not target.id.startswith("_"): 

222 fields.append((stmt.lineno, target.id)) 

223 last_field = target.id 

224 elif isinstance(cdef, ast.FunctionDef): 

225 fields = [ 

226 (field.lineno, field.arg) 

227 for field in itertools.chain(cdef.args.args, cdef.args.kwonlyargs) 

228 ] 

229 else: # pragma: no cover 

230 return {} 

231 

232 for pos, name in fields: 

233 comment_lines: list[str] = [] 

234 for before_line in sourcelines[pos - 2 :: -1]: 

235 if match := _COMMENT_RE.match(before_line): 

236 comment_lines.append(match.group(1)) 

237 else: 

238 break 

239 

240 if comment_lines: 

241 docs[name] = _process_docstring("\n".join(reversed(comment_lines))) 

242 

243 return docs 

244 

245 

246if TYPE_CHECKING: 

247 

248 class _FormatMapMapping(_t.Protocol): 

249 def __getitem__(self, key: str, /) -> _t.Any: ... 

250 

251 class _TranslateTable(_t.Protocol): 

252 def __getitem__(self, key: int, /) -> str | int | None: ... 

253 

254 

255class UserString(str): 

256 """ 

257 Base class for user string. 

258 

259 This class is similar to :class:`collections.UserString`, but actually derived 

260 from string, with customizable wrapping semantics, and returns custom string 

261 instances from all string methods (:class:`collections.UserString` doesn't 

262 wrap strings returned from :meth:`str.split` and similar). 

263 

264 .. tip:: 

265 

266 When deriving from this class, add ``__slots__`` to avoid making a string 

267 with a ``__dict__`` property. 

268 

269 .. seealso:: 

270 

271 See implementation of :class:`yuio.string.Link` for an example of handling user 

272 strings with internal state. 

273 

274 """ 

275 

276 __slots__ = () 

277 

278 def _wrap(self, data: str) -> _t.Self: 

279 """ 

280 Wrap raw string that resulted from an operation on this instance into another 

281 instance of :class:`UserString`. 

282 

283 Override this method if you need to preserve some internal state during 

284 operations. 

285 

286 By default, this simply creates an instance of ``self.__class__`` with the 

287 given string. 

288 

289 """ 

290 

291 return self.__class__(data) 

292 

293 def __add__(self, value: str, /) -> _t.Self: 

294 return self._wrap(super().__add__(value)) 

295 

296 def __format__(self, format_spec: str, /) -> _t.Self: 

297 return self._wrap(super().__format__(format_spec)) 

298 

299 def __getitem__(self, key: _t.SupportsIndex | slice, /) -> _t.Self: 

300 return self._wrap(super().__getitem__(key)) 

301 

302 def __mod__(self, value: _t.Any, /) -> _t.Self: 

303 return self._wrap(super().__mod__(value)) 

304 

305 def __mul__(self, value: _t.SupportsIndex, /) -> _t.Self: 

306 return self._wrap(super().__mul__(value)) 

307 

308 def __rmul__(self, value: _t.SupportsIndex, /) -> _t.Self: 

309 return self._wrap(super().__rmul__(value)) 

310 

311 def capitalize(self) -> _t.Self: 

312 return self._wrap(super().capitalize()) 

313 

314 def casefold(self) -> _t.Self: 

315 return self._wrap(super().casefold()) 

316 

317 def center(self, width: _t.SupportsIndex, fillchar: str = " ", /) -> _t.Self: 

318 return self._wrap(super().center(width)) 

319 

320 def expandtabs(self, tabsize: _t.SupportsIndex = 8) -> _t.Self: 

321 return self._wrap(super().expandtabs(tabsize)) 

322 

323 def format_map(self, mapping: _FormatMapMapping, /) -> _t.Self: 

324 return self._wrap(super().format_map(mapping)) 

325 

326 def format(self, *args: object, **kwargs: object) -> _t.Self: 

327 return self._wrap(super().format(*args, **kwargs)) 

328 

329 def join(self, iterable: _t.Iterable[str], /) -> _t.Self: 

330 return self._wrap(super().join(iterable)) 

331 

332 def ljust(self, width: _t.SupportsIndex, fillchar: str = " ", /) -> _t.Self: 

333 return self._wrap(super().ljust(width, fillchar)) 

334 

335 def lower(self) -> _t.Self: 

336 return self._wrap(super().lower()) 

337 

338 def lstrip(self, chars: str | None = None, /) -> _t.Self: 

339 return self._wrap(super().lstrip(chars)) 

340 

341 def partition(self, sep: str, /) -> tuple[_t.Self, _t.Self, _t.Self]: 

342 l, c, r = super().partition(sep) 

343 return self._wrap(l), self._wrap(c), self._wrap(r) 

344 

345 def removeprefix(self, prefix: str, /) -> _t.Self: 

346 return self._wrap(super().removeprefix(prefix)) 

347 

348 def removesuffix(self, suffix: str, /) -> _t.Self: 

349 return self._wrap(super().removesuffix(suffix)) 

350 

351 def replace(self, old: str, new: str, count: _t.SupportsIndex = -1, /) -> _t.Self: 

352 return self._wrap(super().replace(old, new, count)) 

353 

354 def rjust(self, width: _t.SupportsIndex, fillchar: str = " ", /) -> _t.Self: 

355 return self._wrap(super().rjust(width, fillchar)) 

356 

357 def rpartition(self, sep: str, /) -> tuple[_t.Self, _t.Self, _t.Self]: 

358 l, c, r = super().rpartition(sep) 

359 return self._wrap(l), self._wrap(c), self._wrap(r) 

360 

361 def rsplit( # pyright: ignore[reportIncompatibleMethodOverride] 

362 self, sep: str | None = None, maxsplit: _t.SupportsIndex = -1 

363 ) -> list[_t.Self]: 

364 return [self._wrap(part) for part in super().rsplit(sep, maxsplit)] 

365 

366 def rstrip(self, chars: str | None = None, /) -> _t.Self: 

367 return self._wrap(super().rstrip(chars)) 

368 

369 def split( # pyright: ignore[reportIncompatibleMethodOverride] 

370 self, sep: str | None = None, maxsplit: _t.SupportsIndex = -1 

371 ) -> list[_t.Self]: 

372 return [self._wrap(part) for part in super().split(sep, maxsplit)] 

373 

374 def splitlines( # pyright: ignore[reportIncompatibleMethodOverride] 

375 self, keepends: bool = False 

376 ) -> list[_t.Self]: 

377 return [self._wrap(part) for part in super().splitlines(keepends)] 

378 

379 def strip(self, chars: str | None = None, /) -> _t.Self: 

380 return self._wrap(super().strip(chars)) 

381 

382 def swapcase(self) -> _t.Self: 

383 return self._wrap(super().swapcase()) 

384 

385 def title(self) -> _t.Self: 

386 return self._wrap(super().title()) 

387 

388 def translate(self, table: _TranslateTable, /) -> _t.Self: 

389 return self._wrap(super().translate(table)) 

390 

391 def upper(self) -> _t.Self: 

392 return self._wrap(super().upper()) 

393 

394 def zfill(self, width: _t.SupportsIndex, /) -> _t.Self: 

395 return self._wrap(super().zfill(width)) 

396 

397 

398T = _t.TypeVar("T", covariant=True)