From a6c2d4ae3bd744610e1a8b70396effdabca1593d Mon Sep 17 00:00:00 2001
From: sobolevn <mail@sobolevn.me>
Date: Thu, 25 Jun 2026 10:04:02 +0300
Subject: [PATCH 1/7] gh-151763: Fix crash in `_interpqueues.create` on
 `MemoryError` (#152131)

---
 .../next/Library/2026-06-25-01-00-43.gh-issue-151763.wWeHBe.rst | 2 ++
 Modules/_interpqueuesmodule.c                                   | 1 +
 2 files changed, 3 insertions(+)
 create mode 100644 Misc/NEWS.d/next/Library/2026-06-25-01-00-43.gh-issue-151763.wWeHBe.rst

diff --git a/Misc/NEWS.d/next/Library/2026-06-25-01-00-43.gh-issue-151763.wWeHBe.rst b/Misc/NEWS.d/next/Library/2026-06-25-01-00-43.gh-issue-151763.wWeHBe.rst
new file mode 100644
index 000000000000000..2f5e84027ad31bb
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2026-06-25-01-00-43.gh-issue-151763.wWeHBe.rst
@@ -0,0 +1,2 @@
+Fix crash in :func:`!_interpqueues.create` whe :exc:`MemoryError`
+happens on queue creation.
diff --git a/Modules/_interpqueuesmodule.c b/Modules/_interpqueuesmodule.c
index 9979cd3457e1014..d203ddba7d9c3c9 100644
--- a/Modules/_interpqueuesmodule.c
+++ b/Modules/_interpqueuesmodule.c
@@ -1101,6 +1101,7 @@ queue_create(_queues *queues, Py_ssize_t maxsize,
     }
     int64_t qid = _queues_add(queues, queue);
     if (qid < 0) {
+        queue->alive = 0;
         _queue_clear(queue);
         GLOBAL_FREE(queue);
     }

From bd4bd3e76a684969022c00aafb8acf18006ac89b Mon Sep 17 00:00:00 2001
From: Serhiy Storchaka <storchaka@gmail.com>
Date: Thu, 25 Jun 2026 10:09:41 +0300
Subject: [PATCH 2/7] gh-152100: Support set operations in character classes
 (GH-152153)

Implement set difference [A--B], intersection [A&&B] and union [A||B] in
regular expression character classes (Unicode Technical Standard #18),
including nested, complemented and compound set operands.  Symmetric
difference [A~~B] remains reserved.

Also use the new syntax in the standard library (_strptime, textwrap,
doctest, pkgutil).

Co-authored-by: Claude Opus 4.8 <noreply@anthropic.com>
---
 Doc/library/re.rst                            |  46 ++-
 Doc/whatsnew/3.16.rst                         |  12 +
 Lib/_strptime.py                              |   2 +-
 Lib/doctest.py                                |   2 +-
 Lib/pkgutil.py                                |   2 +-
 Lib/re/_parser.py                             | 291 ++++++++++++------
 Lib/test/test_re.py                           | 126 ++++----
 Lib/textwrap.py                               |   2 +-
 ...-06-24-12-00-00.gh-issue-152100.Set0ps.rst |   3 +
 9 files changed, 324 insertions(+), 162 deletions(-)
 create mode 100644 Misc/NEWS.d/next/Library/2026-06-24-12-00-00.gh-issue-152100.Set0ps.rst

diff --git a/Doc/library/re.rst b/Doc/library/re.rst
index 4745c1b98a45543..7c8c589b3f5dfcb 100644
--- a/Doc/library/re.rst
+++ b/Doc/library/re.rst
@@ -279,25 +279,47 @@ The special characters are:
      ``[]()[{}]`` will match a right bracket, as well as left bracket, braces,
      and parentheses.
 
-   .. .. index:: single: --; in regular expressions
-   .. .. index:: single: &&; in regular expressions
-   .. .. index:: single: ~~; in regular expressions
-   .. .. index:: single: ||; in regular expressions
-
-   * Support of nested sets and set operations as in `Unicode Technical
-     Standard #18`_ might be added in the future.  This would change the
-     syntax, so to facilitate this change a :exc:`FutureWarning` will be raised
-     in ambiguous cases for the time being.
-     That includes sets starting with a literal ``'['`` or containing literal
-     character sequences ``'--'``, ``'&&'``, ``'~~'``, and ``'||'``.  To
-     avoid a warning escape them with a backslash.
+   .. index::
+      single: --; in regular expressions
+      single: &&; in regular expressions
+      single: ||; in regular expressions
+
+   * A character set may contain a nested set written in square brackets, and
+     two sets may be combined with a set operator, as in `Unicode Technical
+     Standard #18`_:
+
+     * ``[A--B]`` (*difference*) matches a character that is in *A* but not
+       in *B*; for example ``[a-z--[aeiou]]`` matches an ASCII lowercase
+       consonant.
+     * ``[A&&B]`` (*intersection*) matches a character that is in both *A*
+       and *B*; for example ``[\w&&[a-z]]`` matches an ASCII lowercase letter.
+     * ``[A||B]`` (*union*) matches a character that is in *A* or in *B*; this
+       is the same as listing the members of both sets in a single set, but
+       allows combining nested sets.
+
+     Operators have no precedence and are applied from left to right.  To
+     group, write a nested set as the operand after an operator, as in
+     ``[a-z--[aeiou]]``.  A leading ``'^'`` complements the whole result.
+     A ``'['`` begins a nested set only immediately after a set operator;
+     anywhere else -- including at the start of a character set -- it is an
+     ordinary character, so existing patterns keep their meaning.  Escape it
+     as ``'\['`` to include a literal ``'['`` right after an operator.
 
    .. _Unicode Technical Standard #18: https://unicode.org/reports/tr18/
 
+   .. note::
+
+      Symmetric difference (``A~~B``) is not yet supported; a literal ``'~~'``
+      in a character set still raises a :exc:`FutureWarning`.
+
    .. versionchanged:: 3.7
       :exc:`FutureWarning` is raised if a character set contains constructs
       that will change semantically in the future.
 
+   .. versionchanged:: next
+      Added support for nested sets and the set operators ``--``, ``&&``
+      and ``||``.
+
 .. index:: single: | (vertical bar); in regular expressions
 
 ``|``
diff --git a/Doc/whatsnew/3.16.rst b/Doc/whatsnew/3.16.rst
index 18e500df6f30749..32962a9520fa691 100644
--- a/Doc/whatsnew/3.16.rst
+++ b/Doc/whatsnew/3.16.rst
@@ -181,6 +181,18 @@ os
   (Contributed by Maurycy Pawłowski-Wieroński in :gh:`149464`.)
 
 
+re
+--
+
+* :mod:`re` now supports set operations and nested sets in character classes,
+  as described in `Unicode Technical Standard #18
+  <https://unicode.org/reports/tr18/>`__: set difference (``[A--B]``),
+  intersection (``[A&&B]``) and union (``[A||B]``), where an operand may be a
+  nested set written in square brackets.  For example, ``[a-z--[aeiou]]``
+  matches an ASCII lowercase consonant.
+  (Contributed by Serhiy Storchaka in :gh:`152100`.)
+
+
 shlex
 -----
 
diff --git a/Lib/_strptime.py b/Lib/_strptime.py
index 746b0907c1d9f4e..59ac96745aa15e2 100644
--- a/Lib/_strptime.py
+++ b/Lib/_strptime.py
@@ -238,7 +238,7 @@ def __calc_date_time(self):
                         current_format = current_format.replace(tz, "%Z")
             # Transform all non-ASCII digits to digits in range U+0660 to U+0669.
             if not current_format.isascii() and self.LC_alt_digits is None:
-                current_format = re_sub(r'\d(?<![0-9])',
+                current_format = re_sub(r'[\d--0-9]',
                                         lambda m: chr(0x0660 + int(m[0])),
                                         current_format)
             for old, new in replacement_pairs:
diff --git a/Lib/doctest.py b/Lib/doctest.py
index be950079e396de1..8a55fe3ddd26154 100644
--- a/Lib/doctest.py
+++ b/Lib/doctest.py
@@ -1768,7 +1768,7 @@ def check_output(self, want, got, optionflags):
                           '', want)
             # If a line in got contains only spaces, then remove the
             # spaces.
-            got = re.sub(r'(?m)^[^\S\n]+$', '', got)
+            got = re.sub(r'(?m)^[\s--\n]+$', '', got)
             if got == want:
                 return True
 
diff --git a/Lib/pkgutil.py b/Lib/pkgutil.py
index 11c2a4b0ef46350..9121d6a1e2285ce 100644
--- a/Lib/pkgutil.py
+++ b/Lib/pkgutil.py
@@ -443,7 +443,7 @@ def resolve_name(name, *, strict=False):
                      within the imported package to get to the desired object.
     """
     global _LENIENT_PATTERN, _STRICT_PATTERN
-    dotted_words = r'(?!\d)(\w+)(\.(?!\d)(\w+))*'
+    dotted_words = r'([\w--\d]\w*)(\.([\w--\d]\w*))*'
     if strict:
         if _STRICT_PATTERN is None:
             _STRICT_PATTERN = re.compile(
diff --git a/Lib/re/_parser.py b/Lib/re/_parser.py
index b8c19cd3070c4df..cc2b66c54b66811 100644
--- a/Lib/re/_parser.py
+++ b/Lib/re/_parser.py
@@ -509,6 +509,201 @@ def _parse_sub(source, state, verbose, nested):
     subpattern.append((BRANCH, (None, items)))
     return subpattern
 
+def _charset_node(items):
+    # One element matching a character in the union `items`.  A lone LITERAL or
+    # CATEGORY is already a one-character matcher and needs no IN wrapper.
+    if len(items) == 1 and items[0][0] in _SETITEMCODES:
+        return items[0]
+    return (IN, items)
+
+def _flat_items(elements):
+    # The items if `elements` is a single flat charset (no complement), else
+    # None -- the dual of _charset_node: a lone LITERAL or CATEGORY is an item.
+    if len(elements) == 1:
+        op, av = elements[0]
+        if op in _SETITEMCODES:
+            return [elements[0]]
+        if op is IN and all(o is not NEGATE for o, _av in av):
+            return av
+    return None
+
+def _union(left, right, state):
+    # A || B: merge two flat character classes into one charset where possible,
+    # else alternate the one-character matchers.
+    left_items = _flat_items(left)
+    right_items = _flat_items(right)
+    if left_items is not None and right_items is not None:
+        return [_charset_node(_uniq(left_items + right_items))]
+    return [(BRANCH, (None, [SubPattern(state, left),
+                             SubPattern(state, right)]))]
+
+def _intersect(left, right, state):
+    # A && B: A, then require the same character to also match B (lookbehind).
+    return left + [(ASSERT, (-1, SubPattern(state, right)))]
+
+def _difference(left, right, state):
+    # A -- B: A, then require the character not to match B (lookbehind).
+    return left + [(ASSERT_NOT, (-1, SubPattern(state, right)))]
+
+# Map a set-operator token to the function combining the accumulated result
+# with the next operand.
+_SETOPS = {'||': _union, '&&': _intersect, '--': _difference}
+
+def _operand_elements(set, compound):
+    # The operand's elements: a standalone nested set, else the member union.
+    if compound is not None:
+        return compound
+    return [_charset_node(_uniq(set))]
+
+def _parse_operand(source, state, nested, here, allow_nested):
+    # Read one operand, stopping at a set operator or the closing ']'.  An
+    # operand is either a union of members/ranges/escapes or, when allow_nested,
+    # a single nested set ([...]) -- not a mix.  Return (elements, terminator),
+    # where terminator is the operator that ended the operand, or None at the end
+    # of the class.
+    _ord = ord
+    sourceget = source.get
+    sourcematch = source.match
+    set = []
+    setappend = set.append
+    compound = None     # elements of a standalone nested-set operand
+    if allow_nested and sourcematch("["):
+        # A nested set after an operator is the whole operand, used as-is (not
+        # wrapped in a group); it cannot be combined with loose members.
+        compound = _parse_charset(source, state, nested + 1)
+    while True:
+        this = sourceget()
+        if this is None:
+            raise source.error("unterminated character set",
+                               source.tell() - here)
+        if set or compound is not None:
+            if this == "]":
+                return _operand_elements(set, compound), None
+            if this in '-&|~' and source.next == this:
+                if this == '~':
+                    import warnings
+                    warnings.warn(
+                        'Possible set symmetric difference at position %d'
+                        % (source.tell() - 1),
+                        FutureWarning, stacklevel=nested + 8
+                    )
+                else:
+                    # '--', '&&' or '||' ends this operand and starts the next.
+                    sourceget()  # consume the second operator character
+                    return _operand_elements(set, compound), this + this
+        if this[0] == "\\":
+            code1 = _class_escape(source, this)
+        else:
+            code1 = LITERAL, _ord(this)
+        if compound is not None:
+            # A standalone nested set cannot be combined with other members.
+            raise source.error("unsupported nested set operand",
+                               source.tell() - here)
+        # Past this point the operand is a plain member set (compound is None).
+        if sourcematch("-"):
+            # potential range
+            that = sourceget()
+            if that is None:
+                raise source.error("unterminated character set",
+                                   source.tell() - here)
+            if that == "]":
+                # A trailing '-' is a literal.
+                setappend(code1)
+                setappend((LITERAL, _ord("-")))
+                return [_charset_node(_uniq(set))], None
+            if that == "-":
+                # 'X--': difference, not a range.  '--' after a single member
+                # lands here because the range probe consumed the first '-'.
+                setappend(code1)
+                return [_charset_node(_uniq(set))], "--"
+            if that[0] == "\\":
+                code2 = _class_escape(source, that)
+            else:
+                code2 = LITERAL, _ord(that)
+            if code1[0] != LITERAL or code2[0] != LITERAL:
+                msg = "bad character range %s-%s" % (this, that)
+                raise source.error(msg, len(this) + 1 + len(that))
+            lo = code1[1]
+            hi = code2[1]
+            if hi < lo:
+                msg = "bad character range %s-%s" % (this, that)
+                raise source.error(msg, len(this) + 1 + len(that))
+            setappend((RANGE, (lo, hi)))
+        else:
+            setappend(code1)
+
+def _complement(elements, state):
+    # The complement of `elements` (a single matcher, or a set operation as a
+    # head followed by lookbehind assertions).  De Morgan pushes the negation in
+    # -- recursively through nested set operations -- so no lookahead is needed.
+    op, av = elements[0]
+    if op is LITERAL:
+        result = [(NOT_LITERAL, av)]
+    elif op is NOT_LITERAL:
+        result = [(LITERAL, av)]
+    elif op is CATEGORY:
+        result = [(CATEGORY, CH_NEGATE[av])]
+    elif op is IN:
+        # Negate by toggling a leading NEGATE: a doubly negated set flips back
+        # to positive instead of stacking a second NEGATE.
+        if av[0][0] is NEGATE:
+            result = [(IN, av[1:])]
+        else:
+            result = [(IN, [(NEGATE, None)] + av)]
+    else:
+        # An un-merged union (A||B as an alternation).  De Morgan:
+        # ~(A | B | ...) = ~A & ~B & ... -- intersect the operand complements.
+        assert op is BRANCH
+        branches = av[1]
+        result = _complement(branches[0].data, state)
+        for sub in branches[1:]:
+            result = _intersect(result, _complement(sub.data, state), state)
+    # A set operation: a head followed by lookbehind assertions.  De Morgan:
+    #   ~(head & ~B & C ...) = ~head | B | ~C ...
+    for op, av in elements[1:]:
+        if op is ASSERT_NOT:      # '--' operand B: union with B
+            result = _union(result, av[1].data, state)
+        else:                     # '&&' operand B (ASSERT): union with [^B]
+            result = _union(result, _complement(av[1].data, state), state)
+    return result
+
+def _parse_charset(source, state, nested):
+    # Parse a character set, assuming the opening '[' has been consumed, up to
+    # and including the closing ']'.  Return a list of subpattern elements that
+    # together consume exactly one character.
+    #
+    # A set operation (UTS #18 RL1.3) maps to assertions on, or alternatives of,
+    # the matched character:
+    #   [A--B]  ->  A (?<![B])           difference
+    #   [A&&B]  ->  A (?<=[B])           intersection
+    #   [A||B]  ->  [AB] or (?:A|B)      union
+    # Operators chain left-to-right with no precedence.  A leading '^' negates by
+    # De Morgan, pushing the negation into the operands (no lookahead needed):
+    #   [^A--B] -> [^A] | B ; [^A&&B] -> [^A] | [^B] ; [^A||B] -> [^A] && [^B]
+    # Each operand compiles in its own flag context, so this is IGNORECASE-safe.
+    here = source.tell() - 1
+    if source.next == '[':
+        # A '[' at the start of a class stays a literal (the first operand never
+        # needs grouping), but the position is reserved -- keep warning.
+        import warnings
+        warnings.warn(
+            'Possible nested set at position %d' % source.tell(),
+            FutureWarning, stacklevel=nested + 7
+        )
+    negate = source.match("^")
+    result, term = _parse_operand(source, state, nested, here, False)
+    while term is not None:
+        combine = _SETOPS[term]
+        operand, term = _parse_operand(source, state, nested, here, True)
+        result = combine(result, operand, state)
+    if negate:
+        # Push the negation into the operands by De Morgan (see above).
+        result = _complement(result, state)
+
+    # A single one-character matcher, or a set operation (head + assertions);
+    # the caller groups a multi-element result if a quantifier could follow.
+    return result
+
 def _parse(source, state, verbose, nested, first=False):
     # parse a simple pattern
     subpattern = SubPattern(state)
@@ -548,95 +743,15 @@ def _parse(source, state, verbose, nested, first=False):
             subpatternappend((LITERAL, _ord(this)))
 
         elif this == "[":
-            here = source.tell() - 1
-            # character set
-            set = []
-            setappend = set.append
-##          if sourcematch(":"):
-##              pass # handle character classes
-            if source.next == '[':
-                import warnings
-                warnings.warn(
-                    'Possible nested set at position %d' % source.tell(),
-                    FutureWarning, stacklevel=nested + 6
-                )
-            negate = sourcematch("^")
-            # check remaining characters
-            while True:
-                this = sourceget()
-                if this is None:
-                    raise source.error("unterminated character set",
-                                       source.tell() - here)
-                if this == "]" and set:
-                    break
-                elif this[0] == "\\":
-                    code1 = _class_escape(source, this)
-                else:
-                    if set and this in '-&~|' and source.next == this:
-                        import warnings
-                        warnings.warn(
-                            'Possible set %s at position %d' % (
-                                'difference' if this == '-' else
-                                'intersection' if this == '&' else
-                                'symmetric difference' if this == '~' else
-                                'union',
-                                source.tell() - 1),
-                            FutureWarning, stacklevel=nested + 6
-                        )
-                    code1 = LITERAL, _ord(this)
-                if sourcematch("-"):
-                    # potential range
-                    that = sourceget()
-                    if that is None:
-                        raise source.error("unterminated character set",
-                                           source.tell() - here)
-                    if that == "]":
-                        setappend(code1)
-                        setappend((LITERAL, _ord("-")))
-                        break
-                    if that[0] == "\\":
-                        code2 = _class_escape(source, that)
-                    else:
-                        if that == '-':
-                            import warnings
-                            warnings.warn(
-                                'Possible set difference at position %d' % (
-                                    source.tell() - 2),
-                                FutureWarning, stacklevel=nested + 6
-                            )
-                        code2 = LITERAL, _ord(that)
-                    if code1[0] != LITERAL or code2[0] != LITERAL:
-                        msg = "bad character range %s-%s" % (this, that)
-                        raise source.error(msg, len(this) + 1 + len(that))
-                    lo = code1[1]
-                    hi = code2[1]
-                    if hi < lo:
-                        msg = "bad character range %s-%s" % (this, that)
-                        raise source.error(msg, len(this) + 1 + len(that))
-                    setappend((RANGE, (lo, hi)))
-                else:
-                    setappend(code1)
-
-            set = _uniq(set)
-            # XXX: <fl> should move set optimization to compiler!
-            if _len(set) == 1 and set[0][0] is LITERAL:
-                # optimization
-                if negate:
-                    subpatternappend((NOT_LITERAL, set[0][1]))
-                else:
-                    subpatternappend(set[0])
-            elif _len(set) == 1 and set[0][0] is CATEGORY:
-                # optimization: a lone category like [\d] or [^\d]
-                if negate:
-                    subpatternappend((CATEGORY, CH_NEGATE[set[0][1]]))
-                else:
-                    subpatternappend(set[0])
+            charset = _parse_charset(source, state, nested)
+            if len(charset) == 1:
+                code = charset[0]
             else:
-                if negate:
-                    set.insert(0, (NEGATE, None))
-                # charmap optimization can't be added here because
-                # global flags still are not known
-                subpatternappend((IN, set))
+                # Wrap a multi-element set operation in a non-capturing group so
+                # a following quantifier (e.g. [a-z--[aeiou]]+) binds the whole
+                # operation, not just its trailing assertion.
+                code = (SUBPATTERN, (None, 0, 0, SubPattern(state, charset)))
+            subpatternappend(code)
 
         elif this in REPEAT_CHARS:
             # repeat previous item
diff --git a/Lib/test/test_re.py b/Lib/test/test_re.py
index 69d730c49387bee..2a57370a6fb6436 100644
--- a/Lib/test/test_re.py
+++ b/Lib/test/test_re.py
@@ -1288,80 +1288,90 @@ def test_not_literal(self):
         self.assertEqual(re.search(r"\s([^a])", " b").group(1), "b")
         self.assertEqual(re.search(r"\s([^a]*)", " bb").group(1), "bb")
 
-    def test_possible_set_operations(self):
+    def test_set_operations(self):
+        # UTS #18 RL1.3 set operations in character classes: '--' (difference),
+        # '&&' (intersection) and '||' (union) are operators on the matched
+        # character; '~~' (symmetric difference) is still reserved
+        # (FutureWarning).
         s = bytes(range(128)).decode()
-        with self.assertWarnsRegex(FutureWarning, 'Possible set difference') as w:
-            p = re.compile(r'[0-9--1]')
-        self.assertEqual(w.filename, __file__)
-        self.assertEqual(p.findall(s), list('-./0123456789'))
-        with self.assertWarnsRegex(FutureWarning, 'Possible set difference') as w:
-            self.assertEqual(re.findall(r'[0-9--2]', s), list('-./0123456789'))
-        self.assertEqual(w.filename, __file__)
 
+        # Set difference  A--B == A and not B.
+        self.assertEqual(re.findall(r'[0-9--1]', s), list('023456789'))
+        self.assertEqual(re.findall(r'[0-9--2]', s), list('013456789'))
+        self.assertEqual(re.findall(r'[%--1]', s), list('%'))
+        # A leading '-' is a literal, so this stays a range.
         self.assertEqual(re.findall(r'[--1]', s), list('-./01'))
-
-        with self.assertWarnsRegex(FutureWarning, 'Possible set difference') as w:
-            p = re.compile(r'[%--1]')
-        self.assertEqual(w.filename, __file__)
-        self.assertEqual(p.findall(s), list("%&'()*+,-1"))
-
-        with self.assertWarnsRegex(FutureWarning, 'Possible set difference ') as w:
-            p = re.compile(r'[%--]')
-        self.assertEqual(w.filename, __file__)
-        self.assertEqual(p.findall(s), list("%&'()*+,-"))
-
-        with self.assertWarnsRegex(FutureWarning, 'Possible set intersection ') as w:
-            p = re.compile(r'[0-9&&1]')
-        self.assertEqual(w.filename, __file__)
-        self.assertEqual(p.findall(s), list('&0123456789'))
-        with self.assertWarnsRegex(FutureWarning, 'Possible set intersection ') as w:
-            self.assertEqual(re.findall(r'[0-8&&1]', s), list('&012345678'))
-        self.assertEqual(w.filename, __file__)
-
-        with self.assertWarnsRegex(FutureWarning, 'Possible set intersection ') as w:
-            p = re.compile(r'[\d&&1]')
-        self.assertEqual(w.filename, __file__)
-        self.assertEqual(p.findall(s), list('&0123456789'))
-
+        # A dangling operator (empty operand) is an error.
+        self.assertRaises(re.PatternError, re.compile, r'[%--]')
+
+        # Set intersection  A&&B == A and B.
+        self.assertEqual(re.findall(r'[0-9&&1]', s), list('1'))
+        self.assertEqual(re.findall(r'[0-8&&1]', s), list('1'))
+        self.assertEqual(re.findall(r'[\d&&1]', s), list('1'))
+        # A leading '&' is a literal.
         self.assertEqual(re.findall(r'[&&1]', s), list('&1'))
 
-        with self.assertWarnsRegex(FutureWarning, 'Possible set union ') as w:
-            p = re.compile(r'[0-9||a]')
-        self.assertEqual(w.filename, __file__)
-        self.assertEqual(p.findall(s), list('0123456789a|'))
-
-        with self.assertWarnsRegex(FutureWarning, 'Possible set union ') as w:
-            p = re.compile(r'[\d||a]')
+        # Nested sets and lookbehind-mapped operands.
+        self.assertEqual(re.findall(r'[a-z--[aeiou]]', s),
+                         list('bcdfghjklmnpqrstvwxyz'))
+        self.assertEqual(re.findall(r'[\w&&[a-z]]', s),
+                         list('abcdefghijklmnopqrstuvwxyz'))
+        # Operators chain and mix left-to-right.
+        self.assertEqual(re.findall(r'[a-z--[aeiou]--[xyz]]', s),
+                         list('bcdfghjklmnpqrstvw'))
+        self.assertEqual(re.findall(r'[\w&&[a-z]&&[m-z]]', s),
+                         list('mnopqrstuvwxyz'))
+        # A negated set operation: [^A--B] == complement of (A minus B).
+        self.assertEqual(re.findall(r'[^a-z--aeiou]', s),
+                         [c for c in s if not ('a' <= c <= 'z' and c not in 'aeiou')])
+        # A nested operand may be complemented or itself a set operation; it is
+        # used directly as the assertion body.
+        self.assertEqual(re.findall(r'[a-z--[^m]]', s), list('m'))
+        self.assertEqual(re.findall(r'[\w&&[a-c--b]]', s), list('ac'))
+        self.assertEqual(re.findall(r'[a-f&&[^bc]]', s), list('adef'))
+        # A nested set is the whole operand; it cannot be mixed with loose
+        # members (write the members in the set instead).
+        self.assertEqual(re.findall(r'[a-c--[ab]]', s), list('c'))
+        self.assertRaises(re.PatternError, re.compile, r'[a-c--[ab]d]')
+        self.assertRaises(re.PatternError, re.compile, r'[a-c--[ab][c]]')
+        # A '[' is a nested set only immediately after a set operator;
+        # elsewhere it is a literal, so these stay backward compatible.
+        self.assertEqual(re.findall(r'[*?[]', s), list('*?['))
+        self.assertEqual(re.findall(r'[a[b]', s), list('[ab'))
+        self.assertEqual(re.findall(r'[^[]', 'a[b'), list('ab'))
+        # A '[' at the start of a class also stays a literal (the position is
+        # reserved, so it still warns) and keeps its historical meaning.
+        with self.assertWarnsRegex(FutureWarning, 'Possible nested set ') as w:
+            p = re.compile(r'[[a-z]]')
         self.assertEqual(w.filename, __file__)
-        self.assertEqual(p.findall(s), list('0123456789a|'))
-
+        self.assertEqual(p.findall('a]b[c'), ['a]'])  # {[, a-z} then a literal ']'
+        with self.assertWarnsRegex(FutureWarning, 'Possible nested set '):
+            re.compile(r'[[:digit:]]')
+        # A nested set after an operator does not warn.
+        with warnings.catch_warnings():
+            warnings.simplefilter('error', FutureWarning)
+            re.compile(r'[a-z--[aeiou]]')
+
+        # Set union  A||B == A or B (an explicit form of [AB]); flat operands
+        # merge into one charset, otherwise the operations are alternated.
+        self.assertEqual(re.findall(r'[0-9||a]', s), list('0123456789a'))
+        self.assertEqual(re.findall(r'[\d||a]', s), list('0123456789a'))
+        self.assertEqual(re.findall(r'[a-z--m||0-9]', s),
+                         list('0123456789abcdefghijklnopqrstuvwxyz'))
+        # A leading '|' is a literal.
         self.assertEqual(re.findall(r'[||1]', s), list('1|'))
 
+        # '~~' remains reserved.
+
         with self.assertWarnsRegex(FutureWarning, 'Possible set symmetric difference ') as w:
             p = re.compile(r'[0-9~~1]')
         self.assertEqual(w.filename, __file__)
         self.assertEqual(p.findall(s), list('0123456789~'))
-
         with self.assertWarnsRegex(FutureWarning, 'Possible set symmetric difference ') as w:
-            p = re.compile(r'[\d~~1]')
+            self.assertEqual(re.findall(r'[\d~~1]', s), list('0123456789~'))
         self.assertEqual(w.filename, __file__)
-        self.assertEqual(p.findall(s), list('0123456789~'))
-
         self.assertEqual(re.findall(r'[~~1]', s), list('1~'))
 
-        with self.assertWarnsRegex(FutureWarning, 'Possible nested set ') as w:
-            p = re.compile(r'[[0-9]|]')
-        self.assertEqual(w.filename, __file__)
-        self.assertEqual(p.findall(s), list('0123456789[]'))
-        with self.assertWarnsRegex(FutureWarning, 'Possible nested set ') as w:
-            self.assertEqual(re.findall(r'[[0-8]|]', s), list('012345678[]'))
-        self.assertEqual(w.filename, __file__)
-
-        with self.assertWarnsRegex(FutureWarning, 'Possible nested set ') as w:
-            p = re.compile(r'[[:digit:]|]')
-        self.assertEqual(w.filename, __file__)
-        self.assertEqual(p.findall(s), list(':[]dgit'))
-
     def test_search_coverage(self):
         self.assertEqual(re.search(r"\s(b)", " b").group(1), "b")
         self.assertEqual(re.search(r"a\s", "a ").group(0), "a ")
diff --git a/Lib/textwrap.py b/Lib/textwrap.py
index 41366fbf443a4fc..2f213e34c2c329f 100644
--- a/Lib/textwrap.py
+++ b/Lib/textwrap.py
@@ -72,7 +72,7 @@ class TextWrapper:
     #   Hello/ /there/ /--/ /you/ /goof-/ball,/ /use/ /the/ /-b/ /option!
     # (after stripping out empty strings).
     word_punct = r'[\w!"\'&.,?]'
-    letter = r'[^\d\W]'
+    letter = r'[\w--\d]'
     whitespace = r'[%s]' % re.escape(_whitespace)
     nowhitespace = '[^' + whitespace[1:]
     wordsep_re = re.compile(r'''
diff --git a/Misc/NEWS.d/next/Library/2026-06-24-12-00-00.gh-issue-152100.Set0ps.rst b/Misc/NEWS.d/next/Library/2026-06-24-12-00-00.gh-issue-152100.Set0ps.rst
new file mode 100644
index 000000000000000..848740ed7a56d31
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2026-06-24-12-00-00.gh-issue-152100.Set0ps.rst
@@ -0,0 +1,3 @@
+Support set operations and nested sets in regular expression character
+classes, as described in Unicode Technical Standard #18: set difference
+(``[A--B]``), intersection (``[A&&B]``) and union (``[A||B]``).

From a00464bc338f19b746f4f7f65a607ad501515f7b Mon Sep 17 00:00:00 2001
From: Serhiy Storchaka <storchaka@gmail.com>
Date: Thu, 25 Jun 2026 11:09:56 +0300
Subject: [PATCH 3/7] gh-152100: Move re compiler optimizations to
 Lib/re/_optimizer.py (GH-152154)

Move the compile-time optimizations (_optimize_charset, _compile_charset,
_simple, _compile_info and the literal/charset prefix helpers) out of
_compiler.py into a new Lib/re/_optimizer.py.  _compiler.py keeps only the
bytecode emitter and imports them.  This is groundwork for a follow-up
optimization; there is no behavior change.

Co-authored-by: Claude Opus 4.8 <noreply@anthropic.com>
---
 Lib/re/_compiler.py  | 381 +----------------------------------------
 Lib/re/_optimizer.py | 397 +++++++++++++++++++++++++++++++++++++++++++
 Lib/test/test_re.py  |   8 +-
 3 files changed, 408 insertions(+), 378 deletions(-)
 create mode 100644 Lib/re/_optimizer.py

diff --git a/Lib/re/_compiler.py b/Lib/re/_compiler.py
index 304f875ea7fe7aa..fb0c8d35f6f89a3 100644
--- a/Lib/re/_compiler.py
+++ b/Lib/re/_compiler.py
@@ -14,13 +14,16 @@
 from . import _parser
 from ._constants import *
 from ._casefix import _EXTRA_CASES
+from ._optimizer import (
+    _combine_flags, _compile_charset, _optimize_charset, _compile_info,
+    _simple, _CHARSET_ALL, _CODEBITS, MAXCODE,
+)
 
 assert _sre.MAGIC == MAGIC, "SRE module mismatch"
 
 _LITERAL_CODES = {LITERAL, NOT_LITERAL}
 _SUCCESS_CODES = {SUCCESS, FAILURE}
 _ASSERT_CODES = {ASSERT, ASSERT_NOT}
-_UNIT_CODES = _LITERAL_CODES | {ANY, IN, CATEGORY}
 
 _REPEATING_CODES = {
     MIN_REPEAT: (REPEAT, MIN_UNTIL, MIN_REPEAT_ONE),
@@ -28,14 +31,6 @@
     POSSESSIVE_REPEAT: (POSSESSIVE_REPEAT, SUCCESS, POSSESSIVE_REPEAT_ONE),
 }
 
-_CHARSET_ALL = [(NEGATE, None)]
-
-def _combine_flags(flags, add_flags, del_flags,
-                   TYPE_FLAGS=_parser.TYPE_FLAGS):
-    if add_flags & TYPE_FLAGS:
-        flags &= ~TYPE_FLAGS
-    return (flags | add_flags) & ~del_flags
-
 def _compile(code, pattern, flags):
     # internal: compile a (sub)pattern
     emit = code.append
@@ -218,374 +213,6 @@ def _compile(code, pattern, flags):
         else:
             raise PatternError(f"internal: unsupported operand type {op!r}")
 
-def _compile_charset(charset, flags, code):
-    # compile charset subprogram
-    emit = code.append
-    for op, av in charset:
-        emit(op)
-        if op is NEGATE:
-            pass
-        elif op is LITERAL:
-            emit(av)
-        elif op is RANGE or op is RANGE_UNI_IGNORE:
-            emit(av[0])
-            emit(av[1])
-        elif op is CHARSET:
-            code.extend(av)
-        elif op is BIGCHARSET:
-            code.extend(av)
-        elif op is CATEGORY:
-            if flags & SRE_FLAG_LOCALE:
-                emit(CH_LOCALE[av])
-            elif flags & SRE_FLAG_UNICODE:
-                emit(CH_UNICODE[av])
-            else:
-                emit(av)
-        else:
-            raise PatternError(f"internal: unsupported set operator {op!r}")
-    emit(FAILURE)
-
-def _optimize_charset(charset, iscased=None, fixup=None, fixes=None):
-    # internal: optimize character set
-    out = []
-    tail = []
-    charmap = bytearray(256)
-    hascased = False
-    for op, av in charset:
-        while True:
-            try:
-                if op is LITERAL:
-                    if fixup: # IGNORECASE and not LOCALE
-                        av = fixup(av)
-                        charmap[av] = 1
-                        if fixes and av in fixes:
-                            for k in fixes[av]:
-                                charmap[k] = 1
-                        if not hascased and iscased(av):
-                            hascased = True
-                    else:
-                        charmap[av] = 1
-                elif op is RANGE:
-                    r = range(av[0], av[1]+1)
-                    if fixup: # IGNORECASE and not LOCALE
-                        if fixes:
-                            for i in map(fixup, r):
-                                charmap[i] = 1
-                                if i in fixes:
-                                    for k in fixes[i]:
-                                        charmap[k] = 1
-                        else:
-                            for i in map(fixup, r):
-                                charmap[i] = 1
-                        if not hascased:
-                            hascased = any(map(iscased, r))
-                    else:
-                        for i in r:
-                            charmap[i] = 1
-                elif op is NEGATE:
-                    out.append((op, av))
-                elif op is CATEGORY and tail and (CATEGORY, CH_NEGATE[av]) in tail:
-                    # Optimize [\s\S] etc.
-                    out = [] if out else _CHARSET_ALL
-                    return out, False
-                else:
-                    tail.append((op, av))
-            except IndexError:
-                if len(charmap) == 256:
-                    # character set contains non-UCS1 character codes
-                    charmap += b'\0' * 0xff00
-                    continue
-                # Character set contains non-BMP character codes.
-                # For range, all BMP characters in the range are already
-                # proceeded.
-                if fixup: # IGNORECASE and not LOCALE
-                    # For now, IN_UNI_IGNORE+LITERAL and
-                    # IN_UNI_IGNORE+RANGE_UNI_IGNORE work for all non-BMP
-                    # characters, because two characters (at least one of
-                    # which is not in the BMP) match case-insensitively
-                    # if and only if:
-                    # 1) c1.lower() == c2.lower()
-                    # 2) c1.lower() == c2 or c1.lower().upper() == c2
-                    # Also, both c.lower() and c.lower().upper() are single
-                    # characters for every non-BMP character.
-                    if op is RANGE:
-                        if fixes: # not ASCII
-                            op = RANGE_UNI_IGNORE
-                        hascased = True
-                    else:
-                        assert op is LITERAL
-                        if not hascased and iscased(av):
-                            hascased = True
-                tail.append((op, av))
-            break
-
-    # compress character map
-    runs = []
-    q = 0
-    while True:
-        p = charmap.find(1, q)
-        if p < 0:
-            break
-        if len(runs) >= 2:
-            runs = None
-            break
-        q = charmap.find(0, p)
-        if q < 0:
-            runs.append((p, len(charmap)))
-            break
-        runs.append((p, q))
-    if runs is not None:
-        # use literal/range
-        for p, q in runs:
-            if q - p == 1:
-                out.append((LITERAL, p))
-            else:
-                out.append((RANGE, (p, q - 1)))
-        out += tail
-        # if the case was changed or new representation is more compact
-        if hascased or len(out) < len(charset):
-            return out, hascased
-        # else original character set is good enough
-        return charset, hascased
-
-    # use bitmap
-    if len(charmap) == 256:
-        data = _mk_bitmap(charmap)
-        out.append((CHARSET, data))
-        out += tail
-        return out, hascased
-
-    # To represent a big charset, first a bitmap of all characters in the
-    # set is constructed. Then, this bitmap is sliced into chunks of 256
-    # characters, duplicate chunks are eliminated, and each chunk is
-    # given a number. In the compiled expression, the charset is
-    # represented by a 32-bit word sequence, consisting of one word for
-    # the number of different chunks, a sequence of 256 bytes (64 words)
-    # of chunk numbers indexed by their original chunk position, and a
-    # sequence of 256-bit chunks (8 words each).
-
-    # Compression is normally good: in a typical charset, large ranges of
-    # Unicode will be either completely excluded (e.g. if only cyrillic
-    # letters are to be matched), or completely included (e.g. if large
-    # subranges of Kanji match). These ranges will be represented by
-    # chunks of all one-bits or all zero-bits.
-
-    # Matching can be also done efficiently: the more significant byte of
-    # the Unicode character is an index into the chunk number, and the
-    # less significant byte is a bit index in the chunk (just like the
-    # CHARSET matching).
-
-    charmap = charmap.take_bytes() # should be hashable
-    comps = {}
-    mapping = bytearray(256)
-    block = 0
-    data = bytearray()
-    for i in range(0, 65536, 256):
-        chunk = charmap[i: i + 256]
-        if chunk in comps:
-            mapping[i // 256] = comps[chunk]
-        else:
-            mapping[i // 256] = comps[chunk] = block
-            block += 1
-            data += chunk
-    data = _mk_bitmap(data)
-    data[0:0] = [block] + _bytes_to_codes(mapping)
-    out.append((BIGCHARSET, data))
-    out += tail
-    return out, hascased
-
-_CODEBITS = _sre.CODESIZE * 8
-MAXCODE = (1 << _CODEBITS) - 1
-_BITS_TRANS = b'0' + b'1' * 255
-def _mk_bitmap(bits, _CODEBITS=_CODEBITS, _int=int):
-    s = bits.translate(_BITS_TRANS)[::-1]
-    return [_int(s[i - _CODEBITS: i], 2)
-            for i in range(len(s), 0, -_CODEBITS)]
-
-def _bytes_to_codes(b):
-    # Convert block indices to word array
-    a = memoryview(b).cast('I')
-    assert a.itemsize == _sre.CODESIZE
-    assert len(a) * a.itemsize == len(b)
-    return a.tolist()
-
-def _simple(p):
-    # check if this subpattern is a "simple" operator
-    if len(p) != 1:
-        return False
-    op, av = p[0]
-    if op is SUBPATTERN:
-        return av[0] is None and _simple(av[-1])
-    return op in _UNIT_CODES
-
-def _generate_overlap_table(prefix):
-    """
-    Generate an overlap table for the following prefix.
-    An overlap table is a table of the same size as the prefix which
-    informs about the potential self-overlap for each index in the prefix:
-    - if overlap[i] == 0, prefix[i:] can't overlap prefix[0:...]
-    - if overlap[i] == k with 0 < k <= i, prefix[i-k+1:i+1] overlaps with
-      prefix[0:k]
-    """
-    table = [0] * len(prefix)
-    for i in range(1, len(prefix)):
-        idx = table[i - 1]
-        while prefix[i] != prefix[idx]:
-            if idx == 0:
-                table[i] = 0
-                break
-            idx = table[idx - 1]
-        else:
-            table[i] = idx + 1
-    return table
-
-def _get_iscased(flags):
-    if not flags & SRE_FLAG_IGNORECASE:
-        return None
-    elif flags & SRE_FLAG_UNICODE:
-        return _sre.unicode_iscased
-    else:
-        return _sre.ascii_iscased
-
-def _get_literal_prefix(pattern, flags):
-    # look for literal prefix
-    prefix = []
-    prefixappend = prefix.append
-    prefix_skip = None
-    iscased = _get_iscased(flags)
-    for op, av in pattern.data:
-        if op is LITERAL:
-            if iscased and iscased(av):
-                break
-            prefixappend(av)
-        elif op is SUBPATTERN:
-            group, add_flags, del_flags, p = av
-            flags1 = _combine_flags(flags, add_flags, del_flags)
-            if flags1 & SRE_FLAG_IGNORECASE and flags1 & SRE_FLAG_LOCALE:
-                break
-            prefix1, prefix_skip1, got_all = _get_literal_prefix(p, flags1)
-            if prefix_skip is None:
-                if group is not None:
-                    prefix_skip = len(prefix)
-                elif prefix_skip1 is not None:
-                    prefix_skip = len(prefix) + prefix_skip1
-            prefix.extend(prefix1)
-            if not got_all:
-                break
-        else:
-            break
-    else:
-        return prefix, prefix_skip, True
-    return prefix, prefix_skip, False
-
-def _get_charset_prefix(pattern, flags):
-    while True:
-        if not pattern.data:
-            return None
-        op, av = pattern.data[0]
-        if op is not SUBPATTERN:
-            break
-        group, add_flags, del_flags, pattern = av
-        flags = _combine_flags(flags, add_flags, del_flags)
-        if flags & SRE_FLAG_IGNORECASE and flags & SRE_FLAG_LOCALE:
-            return None
-
-    iscased = _get_iscased(flags)
-    if op is LITERAL:
-        if iscased and iscased(av):
-            return None
-        return [(op, av)]
-    elif op is CATEGORY:
-        return [(op, av)]
-    elif op is BRANCH:
-        charset = []
-        charsetappend = charset.append
-        for p in av[1]:
-            if not p:
-                return None
-            op, av = p[0]
-            if op is LITERAL and not (iscased and iscased(av)):
-                charsetappend((op, av))
-            else:
-                return None
-        return charset
-    elif op is IN:
-        charset = av
-        if iscased:
-            for op, av in charset:
-                if op is LITERAL:
-                    if iscased(av):
-                        return None
-                elif op is RANGE:
-                    if av[1] > 0xffff:
-                        return None
-                    if any(map(iscased, range(av[0], av[1]+1))):
-                        return None
-        return charset
-    return None
-
-def _compile_info(code, pattern, flags):
-    # internal: compile an info block.  in the current version,
-    # this contains min/max pattern width, and an optional literal
-    # prefix or a character map
-    lo, hi = pattern.getwidth()
-    if hi > MAXCODE:
-        hi = MAXCODE
-    if lo == 0:
-        code.extend([INFO, 4, 0, lo, hi])
-        return
-    # look for a literal prefix
-    prefix = []
-    prefix_skip = 0
-    charset = None # not used
-    if not (flags & SRE_FLAG_IGNORECASE and flags & SRE_FLAG_LOCALE):
-        # look for literal prefix
-        prefix, prefix_skip, got_all = _get_literal_prefix(pattern, flags)
-        # if no prefix, look for charset prefix
-        if not prefix:
-            charset = _get_charset_prefix(pattern, flags)
-            if charset:
-                charset, hascased = _optimize_charset(charset)
-                assert not hascased
-                if charset == _CHARSET_ALL:
-                    charset = None
-##     if prefix:
-##         print("*** PREFIX", prefix, prefix_skip)
-##     if charset:
-##         print("*** CHARSET", charset)
-    # add an info block
-    emit = code.append
-    emit(INFO)
-    skip = len(code); emit(0)
-    # literal flag
-    mask = 0
-    if prefix:
-        mask = SRE_INFO_PREFIX
-        if prefix_skip is None and got_all:
-            mask = mask | SRE_INFO_LITERAL
-    elif charset:
-        mask = mask | SRE_INFO_CHARSET
-    emit(mask)
-    # pattern length
-    if lo < MAXCODE:
-        emit(lo)
-    else:
-        emit(MAXCODE)
-        prefix = prefix[:MAXCODE]
-    emit(hi)
-    # add literal prefix
-    if prefix:
-        emit(len(prefix)) # length
-        if prefix_skip is None:
-            prefix_skip =  len(prefix)
-        emit(prefix_skip) # skip
-        code.extend(prefix)
-        # generate overlap table
-        code.extend(_generate_overlap_table(prefix))
-    elif charset:
-        _compile_charset(charset, flags, code)
-    code[skip] = len(code) - skip
-
 def isstring(obj):
     return isinstance(obj, (str, bytes))
 
diff --git a/Lib/re/_optimizer.py b/Lib/re/_optimizer.py
new file mode 100644
index 000000000000000..5e3892583a64c9b
--- /dev/null
+++ b/Lib/re/_optimizer.py
@@ -0,0 +1,397 @@
+#
+# Secret Labs' Regular Expression Engine
+#
+# optimizations for the compiler
+#
+# Copyright (c) 1997-2001 by Secret Labs AB.  All rights reserved.
+#
+# See the __init__.py file for information on usage and redistribution.
+#
+
+"""Internal support module for sre.
+
+Optimization passes used by the compiler: character-set optimization
+(:func:`_optimize_charset`), the "simple" repeat-body test (:func:`_simple`),
+and the literal/charset prefix info block (:func:`_compile_info`).
+"""
+
+import _sre
+from . import _parser
+from ._constants import *
+
+_CHARSET_ALL = [(NEGATE, None)]
+_UNIT_CODES = {LITERAL, NOT_LITERAL, ANY, IN, CATEGORY}
+
+def _combine_flags(flags, add_flags, del_flags,
+                   TYPE_FLAGS=_parser.TYPE_FLAGS):
+    if add_flags & TYPE_FLAGS:
+        flags &= ~TYPE_FLAGS
+    return (flags | add_flags) & ~del_flags
+
+def _compile_charset(charset, flags, code):
+    # compile charset subprogram
+    emit = code.append
+    for op, av in charset:
+        emit(op)
+        if op is NEGATE:
+            pass
+        elif op is LITERAL:
+            emit(av)
+        elif op is RANGE or op is RANGE_UNI_IGNORE:
+            emit(av[0])
+            emit(av[1])
+        elif op is CHARSET:
+            code.extend(av)
+        elif op is BIGCHARSET:
+            code.extend(av)
+        elif op is CATEGORY:
+            if flags & SRE_FLAG_LOCALE:
+                emit(CH_LOCALE[av])
+            elif flags & SRE_FLAG_UNICODE:
+                emit(CH_UNICODE[av])
+            else:
+                emit(av)
+        else:
+            raise PatternError(f"internal: unsupported set operator {op!r}")
+    emit(FAILURE)
+
+def _optimize_charset(charset, iscased=None, fixup=None, fixes=None):
+    # internal: optimize character set
+    out = []
+    tail = []
+    charmap = bytearray(256)
+    hascased = False
+    for op, av in charset:
+        while True:
+            try:
+                if op is LITERAL:
+                    if fixup: # IGNORECASE and not LOCALE
+                        av = fixup(av)
+                        charmap[av] = 1
+                        if fixes and av in fixes:
+                            for k in fixes[av]:
+                                charmap[k] = 1
+                        if not hascased and iscased(av):
+                            hascased = True
+                    else:
+                        charmap[av] = 1
+                elif op is RANGE:
+                    r = range(av[0], av[1]+1)
+                    if fixup: # IGNORECASE and not LOCALE
+                        if fixes:
+                            for i in map(fixup, r):
+                                charmap[i] = 1
+                                if i in fixes:
+                                    for k in fixes[i]:
+                                        charmap[k] = 1
+                        else:
+                            for i in map(fixup, r):
+                                charmap[i] = 1
+                        if not hascased:
+                            hascased = any(map(iscased, r))
+                    else:
+                        for i in r:
+                            charmap[i] = 1
+                elif op is NEGATE:
+                    out.append((op, av))
+                elif op is CATEGORY and tail and (CATEGORY, CH_NEGATE[av]) in tail:
+                    # Optimize [\s\S] etc.
+                    out = [] if out else _CHARSET_ALL
+                    return out, False
+                else:
+                    tail.append((op, av))
+            except IndexError:
+                if len(charmap) == 256:
+                    # character set contains non-UCS1 character codes
+                    charmap += b'\0' * 0xff00
+                    continue
+                # Character set contains non-BMP character codes.
+                # For range, all BMP characters in the range are already
+                # proceeded.
+                if fixup: # IGNORECASE and not LOCALE
+                    # For now, IN_UNI_IGNORE+LITERAL and
+                    # IN_UNI_IGNORE+RANGE_UNI_IGNORE work for all non-BMP
+                    # characters, because two characters (at least one of
+                    # which is not in the BMP) match case-insensitively
+                    # if and only if:
+                    # 1) c1.lower() == c2.lower()
+                    # 2) c1.lower() == c2 or c1.lower().upper() == c2
+                    # Also, both c.lower() and c.lower().upper() are single
+                    # characters for every non-BMP character.
+                    if op is RANGE:
+                        if fixes: # not ASCII
+                            op = RANGE_UNI_IGNORE
+                        hascased = True
+                    else:
+                        assert op is LITERAL
+                        if not hascased and iscased(av):
+                            hascased = True
+                tail.append((op, av))
+            break
+
+    # compress character map
+    runs = []
+    q = 0
+    while True:
+        p = charmap.find(1, q)
+        if p < 0:
+            break
+        if len(runs) >= 2:
+            runs = None
+            break
+        q = charmap.find(0, p)
+        if q < 0:
+            runs.append((p, len(charmap)))
+            break
+        runs.append((p, q))
+    if runs is not None:
+        # use literal/range
+        for p, q in runs:
+            if q - p == 1:
+                out.append((LITERAL, p))
+            else:
+                out.append((RANGE, (p, q - 1)))
+        out += tail
+        # if the case was changed or new representation is more compact
+        if hascased or len(out) < len(charset):
+            return out, hascased
+        # else original character set is good enough
+        return charset, hascased
+
+    # use bitmap
+    if len(charmap) == 256:
+        data = _mk_bitmap(charmap)
+        out.append((CHARSET, data))
+        out += tail
+        return out, hascased
+
+    # To represent a big charset, first a bitmap of all characters in the
+    # set is constructed. Then, this bitmap is sliced into chunks of 256
+    # characters, duplicate chunks are eliminated, and each chunk is
+    # given a number. In the compiled expression, the charset is
+    # represented by a 32-bit word sequence, consisting of one word for
+    # the number of different chunks, a sequence of 256 bytes (64 words)
+    # of chunk numbers indexed by their original chunk position, and a
+    # sequence of 256-bit chunks (8 words each).
+
+    # Compression is normally good: in a typical charset, large ranges of
+    # Unicode will be either completely excluded (e.g. if only cyrillic
+    # letters are to be matched), or completely included (e.g. if large
+    # subranges of Kanji match). These ranges will be represented by
+    # chunks of all one-bits or all zero-bits.
+
+    # Matching can be also done efficiently: the more significant byte of
+    # the Unicode character is an index into the chunk number, and the
+    # less significant byte is a bit index in the chunk (just like the
+    # CHARSET matching).
+
+    charmap = charmap.take_bytes() # should be hashable
+    comps = {}
+    mapping = bytearray(256)
+    block = 0
+    data = bytearray()
+    for i in range(0, 65536, 256):
+        chunk = charmap[i: i + 256]
+        if chunk in comps:
+            mapping[i // 256] = comps[chunk]
+        else:
+            mapping[i // 256] = comps[chunk] = block
+            block += 1
+            data += chunk
+    data = _mk_bitmap(data)
+    data[0:0] = [block] + _bytes_to_codes(mapping)
+    out.append((BIGCHARSET, data))
+    out += tail
+    return out, hascased
+
+_CODEBITS = _sre.CODESIZE * 8
+MAXCODE = (1 << _CODEBITS) - 1
+_BITS_TRANS = b'0' + b'1' * 255
+def _mk_bitmap(bits, _CODEBITS=_CODEBITS, _int=int):
+    s = bits.translate(_BITS_TRANS)[::-1]
+    return [_int(s[i - _CODEBITS: i], 2)
+            for i in range(len(s), 0, -_CODEBITS)]
+
+def _bytes_to_codes(b):
+    # Convert block indices to word array
+    a = memoryview(b).cast('I')
+    assert a.itemsize == _sre.CODESIZE
+    assert len(a) * a.itemsize == len(b)
+    return a.tolist()
+
+def _simple(p):
+    # check if this subpattern is a "simple" operator
+    if len(p) != 1:
+        return False
+    op, av = p[0]
+    if op is SUBPATTERN:
+        return av[0] is None and _simple(av[-1])
+    return op in _UNIT_CODES
+
+def _generate_overlap_table(prefix):
+    """
+    Generate an overlap table for the following prefix.
+    An overlap table is a table of the same size as the prefix which
+    informs about the potential self-overlap for each index in the prefix:
+    - if overlap[i] == 0, prefix[i:] can't overlap prefix[0:...]
+    - if overlap[i] == k with 0 < k <= i, prefix[i-k+1:i+1] overlaps with
+      prefix[0:k]
+    """
+    table = [0] * len(prefix)
+    for i in range(1, len(prefix)):
+        idx = table[i - 1]
+        while prefix[i] != prefix[idx]:
+            if idx == 0:
+                table[i] = 0
+                break
+            idx = table[idx - 1]
+        else:
+            table[i] = idx + 1
+    return table
+
+def _get_iscased(flags):
+    if not flags & SRE_FLAG_IGNORECASE:
+        return None
+    elif flags & SRE_FLAG_UNICODE:
+        return _sre.unicode_iscased
+    else:
+        return _sre.ascii_iscased
+
+def _get_literal_prefix(pattern, flags):
+    # look for literal prefix
+    prefix = []
+    prefixappend = prefix.append
+    prefix_skip = None
+    iscased = _get_iscased(flags)
+    for op, av in pattern.data:
+        if op is LITERAL:
+            if iscased and iscased(av):
+                break
+            prefixappend(av)
+        elif op is SUBPATTERN:
+            group, add_flags, del_flags, p = av
+            flags1 = _combine_flags(flags, add_flags, del_flags)
+            if flags1 & SRE_FLAG_IGNORECASE and flags1 & SRE_FLAG_LOCALE:
+                break
+            prefix1, prefix_skip1, got_all = _get_literal_prefix(p, flags1)
+            if prefix_skip is None:
+                if group is not None:
+                    prefix_skip = len(prefix)
+                elif prefix_skip1 is not None:
+                    prefix_skip = len(prefix) + prefix_skip1
+            prefix.extend(prefix1)
+            if not got_all:
+                break
+        else:
+            break
+    else:
+        return prefix, prefix_skip, True
+    return prefix, prefix_skip, False
+
+def _get_charset_prefix(pattern, flags):
+    while True:
+        if not pattern.data:
+            return None
+        op, av = pattern.data[0]
+        if op is not SUBPATTERN:
+            break
+        group, add_flags, del_flags, pattern = av
+        flags = _combine_flags(flags, add_flags, del_flags)
+        if flags & SRE_FLAG_IGNORECASE and flags & SRE_FLAG_LOCALE:
+            return None
+
+    iscased = _get_iscased(flags)
+    if op is LITERAL:
+        if iscased and iscased(av):
+            return None
+        return [(op, av)]
+    elif op is CATEGORY:
+        return [(op, av)]
+    elif op is BRANCH:
+        charset = []
+        charsetappend = charset.append
+        for p in av[1]:
+            if not p:
+                return None
+            op, av = p[0]
+            if op is LITERAL and not (iscased and iscased(av)):
+                charsetappend((op, av))
+            else:
+                return None
+        return charset
+    elif op is IN:
+        charset = av
+        if iscased:
+            for op, av in charset:
+                if op is LITERAL:
+                    if iscased(av):
+                        return None
+                elif op is RANGE:
+                    if av[1] > 0xffff:
+                        return None
+                    if any(map(iscased, range(av[0], av[1]+1))):
+                        return None
+        return charset
+    return None
+
+def _compile_info(code, pattern, flags):
+    # internal: compile an info block.  in the current version,
+    # this contains min/max pattern width, and an optional literal
+    # prefix or a character map
+    lo, hi = pattern.getwidth()
+    if hi > MAXCODE:
+        hi = MAXCODE
+    if lo == 0:
+        code.extend([INFO, 4, 0, lo, hi])
+        return
+    # look for a literal prefix
+    prefix = []
+    prefix_skip = 0
+    charset = None # not used
+    if not (flags & SRE_FLAG_IGNORECASE and flags & SRE_FLAG_LOCALE):
+        # look for literal prefix
+        prefix, prefix_skip, got_all = _get_literal_prefix(pattern, flags)
+        # if no prefix, look for charset prefix
+        if not prefix:
+            charset = _get_charset_prefix(pattern, flags)
+            if charset:
+                charset, hascased = _optimize_charset(charset)
+                assert not hascased
+                if charset == _CHARSET_ALL:
+                    charset = None
+##     if prefix:
+##         print("*** PREFIX", prefix, prefix_skip)
+##     if charset:
+##         print("*** CHARSET", charset)
+    # add an info block
+    emit = code.append
+    emit(INFO)
+    skip = len(code); emit(0)
+    # literal flag
+    mask = 0
+    if prefix:
+        mask = SRE_INFO_PREFIX
+        if prefix_skip is None and got_all:
+            mask = mask | SRE_INFO_LITERAL
+    elif charset:
+        mask = mask | SRE_INFO_CHARSET
+    emit(mask)
+    # pattern length
+    if lo < MAXCODE:
+        emit(lo)
+    else:
+        emit(MAXCODE)
+        prefix = prefix[:MAXCODE]
+    emit(hi)
+    # add literal prefix
+    if prefix:
+        emit(len(prefix)) # length
+        if prefix_skip is None:
+            prefix_skip =  len(prefix)
+        emit(prefix_skip) # skip
+        code.extend(prefix)
+        # generate overlap table
+        code.extend(_generate_overlap_table(prefix))
+    elif charset:
+        _compile_charset(charset, flags, code)
+    code[skip] = len(code) - skip
diff --git a/Lib/test/test_re.py b/Lib/test/test_re.py
index 2a57370a6fb6436..4ab615b150002c8 100644
--- a/Lib/test/test_re.py
+++ b/Lib/test/test_re.py
@@ -2825,6 +2825,12 @@ def test_atomic_group(self):
 17: SUCCESS
 ''')
 
+    def test_debug_charset_bitmap(self):
+        # gh-152100: disassembling a charset that compiles to a CHARSET/
+        # BIGCHARSET bitmap must not fail (the disassembler needs _CODEBITS).
+        out = get_debug_out(r'[aeiou]')
+        self.assertIn('CHARSET', out)
+
     def test_possesive_repeat_one(self):
         self.assertEqual(get_debug_out(r'a?+'), '''\
 POSSESSIVE_REPEAT 0 1
@@ -2950,7 +2956,7 @@ def test_immutable(self):
             tp.foo = 1
 
     def test_overlap_table(self):
-        f = re._compiler._generate_overlap_table
+        f = re._optimizer._generate_overlap_table
         self.assertEqual(f(""), [])
         self.assertEqual(f("a"), [0])
         self.assertEqual(f("abcd"), [0, 0, 0, 0])

From 05225aa06a4c5eceaa2eb29e99c2d44d2dbfe295 Mon Sep 17 00:00:00 2001
From: stevens <lipengyu@kylinos.cn>
Date: Thu, 25 Jun 2026 17:02:00 +0800
Subject: [PATCH 4/7] gh-151126: Fix missing memory errors in
 `_interpretersmodule.c` (#151624)

---
 .../2026-06-18-16-00-10.gh-issue-151126.tBqn6I.rst           | 3 +++
 Modules/_interpretersmodule.c                                | 5 +++--
 2 files changed, 6 insertions(+), 2 deletions(-)
 create mode 100644 Misc/NEWS.d/next/Core_and_Builtins/2026-06-18-16-00-10.gh-issue-151126.tBqn6I.rst

diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2026-06-18-16-00-10.gh-issue-151126.tBqn6I.rst b/Misc/NEWS.d/next/Core_and_Builtins/2026-06-18-16-00-10.gh-issue-151126.tBqn6I.rst
new file mode 100644
index 000000000000000..d495df43ede932c
--- /dev/null
+++ b/Misc/NEWS.d/next/Core_and_Builtins/2026-06-18-16-00-10.gh-issue-151126.tBqn6I.rst
@@ -0,0 +1,3 @@
+Fix a crash when sharing :class:`memoryview` objects between interpreters
+fails due to running out of memory. It now raises a proper
+:exc:`MemoryError`.
diff --git a/Modules/_interpretersmodule.c b/Modules/_interpretersmodule.c
index d024dee906ded36..15bfd35a80806ce 100644
--- a/Modules/_interpretersmodule.c
+++ b/Modules/_interpretersmodule.c
@@ -144,7 +144,7 @@ xibufferview_from_buffer(PyTypeObject *cls, Py_buffer *view, int64_t interpid)
 
     Py_buffer *copied = PyMem_RawMalloc(sizeof(Py_buffer));
     if (copied == NULL) {
-        return NULL;
+        return PyErr_NoMemory();
     }
     /* This steals the view->obj reference  */
     *copied = *view;
@@ -152,7 +152,7 @@ xibufferview_from_buffer(PyTypeObject *cls, Py_buffer *view, int64_t interpid)
     xibufferview *self = PyObject_Malloc(sizeof(xibufferview));
     if (self == NULL) {
         PyMem_RawFree(copied);
-        return NULL;
+        return PyErr_NoMemory();
     }
     PyObject_Init(&self->base, cls);
     *self = (xibufferview){
@@ -277,6 +277,7 @@ _pybuffer_shared(PyThreadState *tstate, PyObject *obj, _PyXIData_t *data)
 {
     struct xibuffer *view = PyMem_RawMalloc(sizeof(struct xibuffer));
     if (view == NULL) {
+        PyErr_NoMemory();
         return -1;
     }
     view->used = 0;

From 6f9c76d8d86997012acfa09fed05396aa9349bbf Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?tonghuaroot=20=28=E7=AB=A5=E8=AF=9D=29?=
 <tonghuaroot@gmail.com>
Date: Thu, 25 Jun 2026 17:15:28 +0800
Subject: [PATCH 5/7] gh-152079: Fix `_datetime.fromisoformat()` mishandling a
 sub-second tz offset (#152087)

Co-authored-by: Stan Ulbrych <stan@python.org>
---
 Lib/test/datetimetester.py                    | 26 +++++++++++++++++++
 ...-06-24-12-00-00.gh-issue-152079.f1tzus.rst |  3 +++
 Modules/_datetimemodule.c                     |  4 +--
 3 files changed, 31 insertions(+), 2 deletions(-)
 create mode 100644 Misc/NEWS.d/next/Library/2026-06-24-12-00-00.gh-issue-152079.f1tzus.rst

diff --git a/Lib/test/datetimetester.py b/Lib/test/datetimetester.py
index e29f5e3ecb5fd4f..28c3ab2605c45db 100644
--- a/Lib/test/datetimetester.py
+++ b/Lib/test/datetimetester.py
@@ -3803,6 +3803,32 @@ def test_fromisoformat_utc(self):
 
         self.assertIs(dt.tzinfo, timezone.utc)
 
+    def test_fromisoformat_utc_subsecond_offset(self):
+        # A UTC offset whose whole-second part is zero but with a non-zero
+        # microsecond part must be preserved, not collapsed to UTC.
+        for us in (1, -1, 999999, -999999):
+            with self.subTest(microseconds=us):
+                tz = timezone(timedelta(microseconds=us))
+                dt = self.theclass(2020, 6, 15, 12, 34, 56, tzinfo=tz)
+                rt = self.theclass.fromisoformat(dt.isoformat())
+                self.assertEqual(rt.utcoffset(), timedelta(microseconds=us))
+                self.assertEqual(rt, dt)
+                self.assertIsNot(rt.tzinfo, timezone.utc)
+
+        tz = timezone(timedelta(hours=5, minutes=30, seconds=15,
+                                microseconds=123456))
+        dt = self.theclass(2020, 6, 15, 12, 34, 56, tzinfo=tz)
+        rt = self.theclass.fromisoformat(dt.isoformat())
+        self.assertEqual(rt.utcoffset(), tz.utcoffset(None))
+        self.assertEqual(rt, dt)
+
+        for tstr in ('2020-06-15T12:34:56+00:00',
+                     '2020-06-15T12:34:56+00:00:00.000000',
+                     '2020-06-15T12:34:56Z'):
+            with self.subTest(tstr=tstr):
+                self.assertIs(self.theclass.fromisoformat(tstr).tzinfo,
+                              timezone.utc)
+
     def test_fromisoformat_subclass(self):
         class DateTimeSubclass(self.theclass):
             pass
diff --git a/Misc/NEWS.d/next/Library/2026-06-24-12-00-00.gh-issue-152079.f1tzus.rst b/Misc/NEWS.d/next/Library/2026-06-24-12-00-00.gh-issue-152079.f1tzus.rst
new file mode 100644
index 000000000000000..492d00724f6a46e
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2026-06-24-12-00-00.gh-issue-152079.f1tzus.rst
@@ -0,0 +1,3 @@
+Fix :meth:`datetime.datetime.fromisoformat` in the C implementation dropping
+the sub-second part of a UTC offset whose whole-second part is zero, matching
+the pure-Python implementation.
diff --git a/Modules/_datetimemodule.c b/Modules/_datetimemodule.c
index 979aa1beb8657b2..fd8d95d05c933e0 100644
--- a/Modules/_datetimemodule.c
+++ b/Modules/_datetimemodule.c
@@ -1668,8 +1668,8 @@ tzinfo_from_isoformat_results(int rv, int tzoffset, int tz_useconds)
 {
     PyObject *tzinfo;
     if (rv == 1) {
-        // Create a timezone from offset in seconds (0 returns UTC)
-        if (tzoffset == 0) {
+        // Create a timezone from the offset (a zero offset returns UTC)
+        if (tzoffset == 0 && tz_useconds == 0) {
             return Py_NewRef(CONST_UTC(NO_STATE));
         }
 

From a0093282ea87e112e3758e6b3eadb8b6b9770569 Mon Sep 17 00:00:00 2001
From: sobolevn <mail@sobolevn.me>
Date: Thu, 25 Jun 2026 12:48:23 +0300
Subject: [PATCH 6/7] gh-151126: Fix missing `PyErr_NoMemory` in
 `testinternalcapi.c` (#152177)

---
 Modules/_testinternalcapi.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Modules/_testinternalcapi.c b/Modules/_testinternalcapi.c
index d0d1f1f1bc8e53e..e3de9006d5a427f 100644
--- a/Modules/_testinternalcapi.c
+++ b/Modules/_testinternalcapi.c
@@ -1919,7 +1919,7 @@ pending_identify(PyObject *self, PyObject *args)
 
     PyThread_type_lock mutex = PyThread_allocate_lock();
     if (mutex == NULL) {
-        return NULL;
+        return PyErr_NoMemory();
     }
     PyThread_acquire_lock(mutex, WAIT_LOCK);
     /* It gets released in _pending_identify_callback(). */

From a580029f1168cf87707b157865b6a6b89a77b7ad Mon Sep 17 00:00:00 2001
From: Ivy Xu <fakeshadow1337@gmail.com>
Date: Thu, 25 Jun 2026 18:47:25 +0800
Subject: [PATCH 7/7] gh-151126: Add missing `PyErr_NoMemory` in `_winapi.c`
 (#151588)

Co-authored-by: sobolevn <mail@sobolevn.me>
---
 .../2026-06-17-16-46-07.gh-issue-151126.vhTL0T.rst            | 2 ++
 Modules/_winapi.c                                             | 4 ++++
 2 files changed, 6 insertions(+)
 create mode 100644 Misc/NEWS.d/next/Core_and_Builtins/2026-06-17-16-46-07.gh-issue-151126.vhTL0T.rst

diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2026-06-17-16-46-07.gh-issue-151126.vhTL0T.rst b/Misc/NEWS.d/next/Core_and_Builtins/2026-06-17-16-46-07.gh-issue-151126.vhTL0T.rst
new file mode 100644
index 000000000000000..6f2d230b1dcfc00
--- /dev/null
+++ b/Misc/NEWS.d/next/Core_and_Builtins/2026-06-17-16-46-07.gh-issue-151126.vhTL0T.rst
@@ -0,0 +1,2 @@
+Avoid possible crash in ``_winapi.c`` where a device has no memory left. Now
+it properly raises a :exc:`MemoryError`. Patch by Ivy Xu.
diff --git a/Modules/_winapi.c b/Modules/_winapi.c
index 369a7400eb63b90..5bbb02fe414bfa7 100644
--- a/Modules/_winapi.c
+++ b/Modules/_winapi.c
@@ -1684,6 +1684,9 @@ _winapi_GetShortPathName_impl(PyObject *module, LPCWSTR path)
             }
             PyMem_Free((void *)buffer);
         }
+        else {
+            PyErr_NoMemory();
+        }
     } else {
         PyErr_SetFromWindowsErr(0);
     }
@@ -2394,6 +2397,7 @@ _winapi_BatchedWaitForMultipleObjects_impl(PyObject *module,
     while (i < nhandles) {
         BatchedWaitData *data = (BatchedWaitData*)PyMem_Malloc(sizeof(BatchedWaitData));
         if (!data) {
+            PyErr_NoMemory();
             goto error;
         }
         thread_data[thread_count++] = data;