Coverage for yuio / util.py: 100%
167 statements
« prev ^ index » next coverage.py v7.13.1, created at 2026-01-05 11:41 +0000
« prev ^ index » next coverage.py v7.13.1, created at 2026-01-05 11:41 +0000
1# Yuio project, MIT license.
2#
3# https://github.com/taminomara/yuio/
4#
5# You're free to copy this file to your project and edit it for your needs,
6# just keep this copyright line please :3
8"""
9Utility functions and types.
11"""
13from __future__ import annotations
15import re as _re
16import textwrap as _textwrap
18from typing import TYPE_CHECKING
20if TYPE_CHECKING:
21 import typing_extensions as _t
22else:
23 from yuio import _typing as _t
25__all__ = [
26 "UserString",
27 "dedent",
28 "to_dash_case",
29]
31_UNPRINTABLE = "".join([chr(i) for i in range(32)]) + "\x7f"
32_UNPRINTABLE_TRANS = str.maketrans(_UNPRINTABLE, " " * len(_UNPRINTABLE))
33_UNPRINTABLE_RE = r"[" + _re.escape(_UNPRINTABLE) + "]"
34_UNPRINTABLE_RE_WITHOUT_NL = r"[" + _re.escape(_UNPRINTABLE.replace("\n", "")) + "]"
36_TO_DASH_CASE_RE = _re.compile(
37 r"""
38 # We will add a dash (bear with me here):
39 [_\s] # 1. instead of underscore or space,
40 | ( # 2. OR in the following case:
41 (?<!^) # - not at the beginning of the string,
42 ( # - AND EITHER:
43 (?<=[A-Z])(?=[A-Z][a-z]) # - before case gets lower (`XMLTag` -> `XML-Tag`),
44 | (?<=[a-zA-Z])(?![a-zA-Z_]) # - between a letter and a non-letter (`HTTP20` -> `HTTP-20`),
45 | (?<![A-Z_])(?=[A-Z]) # - between non-uppercase and uppercase letter (`TagXML` -> `Tag-XML`),
46 ) # - AND ALSO:
47 (?!$) # - not at the end of the string.
48 )
49 """,
50 _re.VERBOSE | _re.MULTILINE,
51)
54def to_dash_case(msg: str, /) -> str:
55 """
56 Convert ``CamelCase`` or ``snake_case`` identifier to a ``dash-case`` one.
58 This function assumes ASCII input, and will not work correctly
59 with non-ASCII characters.
61 :param msg:
62 identifier to convert.
63 :returns:
64 identifier in ``dash-case``.
65 :example:
66 ::
68 >>> to_dash_case("SomeClass")
69 'some-class'
70 >>> to_dash_case("HTTP20XMLUberParser")
71 'http-20-xml-uber-parser'
73 """
75 return _TO_DASH_CASE_RE.sub("-", msg).lower()
78def dedent(msg: str, /):
79 """
80 Remove leading indentation from a message and normalize trailing newlines.
82 This function is intended to be used with triple-quote string literals,
83 such as docstrings. It will remove common indentation from second
84 and subsequent lines, then it will strip any leading and trailing whitespaces
85 and add a new line at the end.
87 :param msg:
88 message to dedent.
89 :returns:
90 normalized message.
91 :example:
92 ::
94 >>> def foo():
95 ... \"""Documentation for function ``foo``.
96 ...
97 ... Leading indent is stripped.
98 ... \"""
99 ...
100 ... ...
102 >>> dedent(foo.__doc__)
103 'Documentation for function ``foo``.\\n\\nLeading indent is stripped.\\n'
105 """
107 if not msg:
108 return msg
110 first, *rest = msg.splitlines(keepends=True)
111 return (first.rstrip() + "\n" + _textwrap.dedent("".join(rest))).strip() + "\n"
114_COMMENT_RE = _re.compile(r"^\s*#:(.*)\r?\n?$")
115_RST_ROLE_RE = _re.compile(
116 r"(?::[\w+.:-]+:|__?)?`((?:[^`\n\\]|\\.)+)`(?::[\w+.:-]+:|__?)?", _re.DOTALL
117)
118_RST_ROLE_TITLE_RE = _re.compile(
119 r"^((?:[^`\n\\]|\\.)*) <(?:[^`\n\\]|\\.)*>$", _re.DOTALL
120)
121_ESC_RE = _re.compile(r"\\(.)", _re.DOTALL)
124def _rst_esc_repl(match: _re.Match[str]):
125 symbol = match.group(1)
126 if symbol in "\n\r\t\v\b":
127 return " "
128 return symbol
131def _rst_repl(match: _re.Match[str]):
132 full: str = match.group(0)
133 text: str = match.group(1)
134 if full.startswith(":") or full.endswith(":"):
135 if title_match := _RST_ROLE_TITLE_RE.match(text):
136 text = title_match.group(1)
137 elif text.startswith("~"):
138 text = text.rsplit(".", maxsplit=1)[-1]
139 text = _ESC_RE.sub(_rst_esc_repl, text)
140 n_backticks = 0
141 cur_n_backticks = 0
142 for ch in text:
143 if ch == "`":
144 cur_n_backticks += 1
145 else:
146 n_backticks = max(cur_n_backticks, n_backticks)
147 cur_n_backticks = 0
148 n_backticks = max(cur_n_backticks, n_backticks)
149 if not n_backticks:
150 return f"`{text}`"
151 else:
152 bt = "`" * (n_backticks + 1)
153 return f"{bt} {text} {bt}"
156def _process_docstring(msg: str, /, only_first_paragraph: bool = True):
157 value = dedent(msg).removesuffix("\n")
159 if only_first_paragraph and (index := value.find("\n\n")) != -1:
160 value = value[:index]
162 return _RST_ROLE_RE.sub(_rst_repl, value)
165def _find_docs(obj: _t.Any, /) -> dict[str, str]:
166 """
167 Find documentation for fields of a class.
169 Inspects source code of a class and finds docstrings and doc comments (``#:``)
170 for variables in its body. Doesn't inspect ``__init__``, doesn't return documentation
171 for class methods. Returns first paragraph from each docstring, formatted for use
172 in CLI help messages.
174 """
176 # Based on code from Sphinx, two clause BSD license.
177 # See https://github.com/sphinx-doc/sphinx/blob/master/LICENSE.rst.
179 import ast
180 import inspect
181 import itertools
183 if (qualname := getattr(obj, "__qualname__", None)) is None:
184 # Not a known object.
185 return {}
187 if "<locals>" in qualname:
188 # This will not work as expected!
189 return {}
191 try:
192 sourcelines, _ = inspect.getsourcelines(obj)
193 except TypeError:
194 return {}
196 docs: dict[str, str] = {}
198 node = ast.parse(_textwrap.dedent("".join(sourcelines)))
199 assert isinstance(node, ast.Module)
200 assert len(node.body) == 1
201 cdef = node.body[0]
203 if isinstance(cdef, ast.ClassDef):
204 fields: list[tuple[int, str]] = []
205 last_field: str | None = None
206 for stmt in cdef.body:
207 if (
208 last_field
209 and isinstance(stmt, ast.Expr)
210 and isinstance(stmt.value, ast.Constant)
211 and isinstance(stmt.value.value, str)
212 ):
213 docs[last_field] = _process_docstring(stmt.value.value)
214 last_field = None
215 if isinstance(stmt, ast.AnnAssign):
216 target = stmt.target
217 elif isinstance(stmt, ast.Assign) and len(stmt.targets) == 1:
218 target = stmt.targets[0]
219 else:
220 continue
221 if isinstance(target, ast.Name) and not target.id.startswith("_"):
222 fields.append((stmt.lineno, target.id))
223 last_field = target.id
224 elif isinstance(cdef, ast.FunctionDef):
225 fields = [
226 (field.lineno, field.arg)
227 for field in itertools.chain(cdef.args.args, cdef.args.kwonlyargs)
228 ]
229 else: # pragma: no cover
230 return {}
232 for pos, name in fields:
233 comment_lines: list[str] = []
234 for before_line in sourcelines[pos - 2 :: -1]:
235 if match := _COMMENT_RE.match(before_line):
236 comment_lines.append(match.group(1))
237 else:
238 break
240 if comment_lines:
241 docs[name] = _process_docstring("\n".join(reversed(comment_lines)))
243 return docs
246if TYPE_CHECKING:
248 class _FormatMapMapping(_t.Protocol):
249 def __getitem__(self, key: str, /) -> _t.Any: ...
251 class _TranslateTable(_t.Protocol):
252 def __getitem__(self, key: int, /) -> str | int | None: ...
255class UserString(str):
256 """
257 Base class for user string.
259 This class is similar to :class:`collections.UserString`, but actually derived
260 from string, with customizable wrapping semantics, and returns custom string
261 instances from all string methods (:class:`collections.UserString` doesn't
262 wrap strings returned from :meth:`str.split` and similar).
264 .. tip::
266 When deriving from this class, add ``__slots__`` to avoid making a string
267 with a ``__dict__`` property.
269 .. seealso::
271 See implementation of :class:`yuio.string.Link` for an example of handling user
272 strings with internal state.
274 """
276 __slots__ = ()
278 def _wrap(self, data: str) -> _t.Self:
279 """
280 Wrap raw string that resulted from an operation on this instance into another
281 instance of :class:`UserString`.
283 Override this method if you need to preserve some internal state during
284 operations.
286 By default, this simply creates an instance of ``self.__class__`` with the
287 given string.
289 """
291 return self.__class__(data)
293 def __add__(self, value: str, /) -> _t.Self:
294 return self._wrap(super().__add__(value))
296 def __format__(self, format_spec: str, /) -> _t.Self:
297 return self._wrap(super().__format__(format_spec))
299 def __getitem__(self, key: _t.SupportsIndex | slice, /) -> _t.Self:
300 return self._wrap(super().__getitem__(key))
302 def __mod__(self, value: _t.Any, /) -> _t.Self:
303 return self._wrap(super().__mod__(value))
305 def __mul__(self, value: _t.SupportsIndex, /) -> _t.Self:
306 return self._wrap(super().__mul__(value))
308 def __rmul__(self, value: _t.SupportsIndex, /) -> _t.Self:
309 return self._wrap(super().__rmul__(value))
311 def capitalize(self) -> _t.Self:
312 return self._wrap(super().capitalize())
314 def casefold(self) -> _t.Self:
315 return self._wrap(super().casefold())
317 def center(self, width: _t.SupportsIndex, fillchar: str = " ", /) -> _t.Self:
318 return self._wrap(super().center(width))
320 def expandtabs(self, tabsize: _t.SupportsIndex = 8) -> _t.Self:
321 return self._wrap(super().expandtabs(tabsize))
323 def format_map(self, mapping: _FormatMapMapping, /) -> _t.Self:
324 return self._wrap(super().format_map(mapping))
326 def format(self, *args: object, **kwargs: object) -> _t.Self:
327 return self._wrap(super().format(*args, **kwargs))
329 def join(self, iterable: _t.Iterable[str], /) -> _t.Self:
330 return self._wrap(super().join(iterable))
332 def ljust(self, width: _t.SupportsIndex, fillchar: str = " ", /) -> _t.Self:
333 return self._wrap(super().ljust(width, fillchar))
335 def lower(self) -> _t.Self:
336 return self._wrap(super().lower())
338 def lstrip(self, chars: str | None = None, /) -> _t.Self:
339 return self._wrap(super().lstrip(chars))
341 def partition(self, sep: str, /) -> tuple[_t.Self, _t.Self, _t.Self]:
342 l, c, r = super().partition(sep)
343 return self._wrap(l), self._wrap(c), self._wrap(r)
345 def removeprefix(self, prefix: str, /) -> _t.Self:
346 return self._wrap(super().removeprefix(prefix))
348 def removesuffix(self, suffix: str, /) -> _t.Self:
349 return self._wrap(super().removesuffix(suffix))
351 def replace(self, old: str, new: str, count: _t.SupportsIndex = -1, /) -> _t.Self:
352 return self._wrap(super().replace(old, new, count))
354 def rjust(self, width: _t.SupportsIndex, fillchar: str = " ", /) -> _t.Self:
355 return self._wrap(super().rjust(width, fillchar))
357 def rpartition(self, sep: str, /) -> tuple[_t.Self, _t.Self, _t.Self]:
358 l, c, r = super().rpartition(sep)
359 return self._wrap(l), self._wrap(c), self._wrap(r)
361 def rsplit( # pyright: ignore[reportIncompatibleMethodOverride]
362 self, sep: str | None = None, maxsplit: _t.SupportsIndex = -1
363 ) -> list[_t.Self]:
364 return [self._wrap(part) for part in super().rsplit(sep, maxsplit)]
366 def rstrip(self, chars: str | None = None, /) -> _t.Self:
367 return self._wrap(super().rstrip(chars))
369 def split( # pyright: ignore[reportIncompatibleMethodOverride]
370 self, sep: str | None = None, maxsplit: _t.SupportsIndex = -1
371 ) -> list[_t.Self]:
372 return [self._wrap(part) for part in super().split(sep, maxsplit)]
374 def splitlines( # pyright: ignore[reportIncompatibleMethodOverride]
375 self, keepends: bool = False
376 ) -> list[_t.Self]:
377 return [self._wrap(part) for part in super().splitlines(keepends)]
379 def strip(self, chars: str | None = None, /) -> _t.Self:
380 return self._wrap(super().strip(chars))
382 def swapcase(self) -> _t.Self:
383 return self._wrap(super().swapcase())
385 def title(self) -> _t.Self:
386 return self._wrap(super().title())
388 def translate(self, table: _TranslateTable, /) -> _t.Self:
389 return self._wrap(super().translate(table))
391 def upper(self) -> _t.Self:
392 return self._wrap(super().upper())
394 def zfill(self, width: _t.SupportsIndex, /) -> _t.Self:
395 return self._wrap(super().zfill(width))
398T = _t.TypeVar("T", covariant=True)