tj-python · pull · Jun 25, 2026 · Jun 25, 2026 · Jun 25, 2026 · Jun 25, 2026
diff --git a/Doc/library/re.rst b/Doc/library/re.rst
@@ -279,25 +279,47 @@ The special characters are:
      ``[]()[{}]`` will match a right bracket, as well as left bracket, braces,
      and parentheses.
 
-   .. .. index:: single: --; in regular expressions
-   .. .. index:: single: &&; in regular expressions
-   .. .. index:: single: ~~; in regular expressions
-   .. .. index:: single: ||; in regular expressions
-
-   * Support of nested sets and set operations as in `Unicode Technical
-     Standard #18`_ might be added in the future.  This would change the
-     syntax, so to facilitate this change a :exc:`FutureWarning` will be raised
-     in ambiguous cases for the time being.
-     That includes sets starting with a literal ``'['`` or containing literal
-     character sequences ``'--'``, ``'&&'``, ``'~~'``, and ``'||'``.  To
-     avoid a warning escape them with a backslash.
+   .. index::
+      single: --; in regular expressions
+      single: &&; in regular expressions
+      single: ||; in regular expressions
+
+   * A character set may contain a nested set written in square brackets, and
+     two sets may be combined with a set operator, as in `Unicode Technical
+     Standard #18`_:
+
+     * ``[A--B]`` (*difference*) matches a character that is in *A* but not
+       in *B*; for example ``[a-z--[aeiou]]`` matches an ASCII lowercase
+       consonant.
+     * ``[A&&B]`` (*intersection*) matches a character that is in both *A*
+       and *B*; for example ``[\w&&[a-z]]`` matches an ASCII lowercase letter.
+     * ``[A||B]`` (*union*) matches a character that is in *A* or in *B*; this
+       is the same as listing the members of both sets in a single set, but
+       allows combining nested sets.
+
+     Operators have no precedence and are applied from left to right.  To
+     group, write a nested set as the operand after an operator, as in
+     ``[a-z--[aeiou]]``.  A leading ``'^'`` complements the whole result.
+     A ``'['`` begins a nested set only immediately after a set operator;
+     anywhere else -- including at the start of a character set -- it is an
+     ordinary character, so existing patterns keep their meaning.  Escape it
+     as ``'\['`` to include a literal ``'['`` right after an operator.
 
    .. _Unicode Technical Standard #18: https://unicode.org/reports/tr18/
 
+   .. note::
+
+      Symmetric difference (``A~~B``) is not yet supported; a literal ``'~~'``
+      in a character set still raises a :exc:`FutureWarning`.
+
    .. versionchanged:: 3.7
       :exc:`FutureWarning` is raised if a character set contains constructs
       that will change semantically in the future.
 
+   .. versionchanged:: next
+      Added support for nested sets and the set operators ``--``, ``&&``
+      and ``||``.
+
 .. index:: single: | (vertical bar); in regular expressions
 
 ``|``

diff --git a/Doc/whatsnew/3.16.rst b/Doc/whatsnew/3.16.rst
@@ -181,6 +181,18 @@ os
   (Contributed by Maurycy Pawłowski-Wieroński in :gh:`149464`.)
 
 
+re
+--
+
+* :mod:`re` now supports set operations and nested sets in character classes,
+  as described in `Unicode Technical Standard #18
+  <https://unicode.org/reports/tr18/>`__: set difference (``[A--B]``),
+  intersection (``[A&&B]``) and union (``[A||B]``), where an operand may be a
+  nested set written in square brackets.  For example, ``[a-z--[aeiou]]``
+  matches an ASCII lowercase consonant.
+  (Contributed by Serhiy Storchaka in :gh:`152100`.)
+
+
 shlex
 -----
 

diff --git a/Lib/_strptime.py b/Lib/_strptime.py
@@ -238,7 +238,7 @@ def __calc_date_time(self):
                         current_format = current_format.replace(tz, "%Z")
             # Transform all non-ASCII digits to digits in range U+0660 to U+0669.
             if not current_format.isascii() and self.LC_alt_digits is None:
-                current_format = re_sub(r'\d(?<![0-9])',
+                current_format = re_sub(r'[\d--0-9]',
                                         lambda m: chr(0x0660 + int(m[0])),
                                         current_format)
             for old, new in replacement_pairs:

diff --git a/Lib/doctest.py b/Lib/doctest.py
@@ -1768,7 +1768,7 @@ def check_output(self, want, got, optionflags):
                           '', want)
             # If a line in got contains only spaces, then remove the
             # spaces.
-            got = re.sub(r'(?m)^[^\S\n]+$', '', got)
+            got = re.sub(r'(?m)^[\s--\n]+$', '', got)
             if got == want:
                 return True
 

diff --git a/Lib/pkgutil.py b/Lib/pkgutil.py
@@ -443,7 +443,7 @@ def resolve_name(name, *, strict=False):
                      within the imported package to get to the desired object.
     """
     global _LENIENT_PATTERN, _STRICT_PATTERN
-    dotted_words = r'(?!\d)(\w+)(\.(?!\d)(\w+))*'
+    dotted_words = r'([\w--\d]\w*)(\.([\w--\d]\w*))*'
     if strict:
         if _STRICT_PATTERN is None:
             _STRICT_PATTERN = re.compile(