From a7885b46f153e9c6857342a4963edf342c86845e Mon Sep 17 00:00:00 2001 From: Ivy Xu Date: Sun, 14 Jun 2026 20:00:28 +0800 Subject: [PATCH 1/6] gh-151428: Remove unused imports in the stdlib (#151440) --- Lib/asyncio/coroutines.py | 1 - Lib/unittest/mock.py | 1 - Lib/zipfile/__init__.py | 1 - 3 files changed, 3 deletions(-) diff --git a/Lib/asyncio/coroutines.py b/Lib/asyncio/coroutines.py index bfffb6da4b19a18..3a4246f6ccd4c24 100644 --- a/Lib/asyncio/coroutines.py +++ b/Lib/asyncio/coroutines.py @@ -1,7 +1,6 @@ __all__ = ('iscoroutine',) import collections.abc -import inspect import os import sys import types diff --git a/Lib/unittest/mock.py b/Lib/unittest/mock.py index 56cdc37942d65d8..14a490c16575857 100644 --- a/Lib/unittest/mock.py +++ b/Lib/unittest/mock.py @@ -24,7 +24,6 @@ ) -import asyncio import contextlib import io import inspect diff --git a/Lib/zipfile/__init__.py b/Lib/zipfile/__init__.py index 71e4dd4f6f625ce..005b6f4eb840448 100644 --- a/Lib/zipfile/__init__.py +++ b/Lib/zipfile/__init__.py @@ -4,7 +4,6 @@ XXX references to utf-8 need further investigation. """ import binascii -import importlib.util import io import os import shutil From e91f68ab40e25dc964afb872eb75873c8b1838d6 Mon Sep 17 00:00:00 2001 From: "Gregory P. Smith" <68491+gpshead@users.noreply.github.com> Date: Sun, 14 Jun 2026 05:29:26 -0700 Subject: [PATCH 2/6] Skip test_highly_nested_objects_decoding during the PGO profile task. (GH-151460) Since the recursion guard tracks real C-stack bounds (gh-91079), this test asserts that 500k nesting levels overflow the stack margin. On a 64 MiB stack (some Nix build envs use one that large), the optimized interpreter uses ~160 bytes/level (raises at ~420k levels) so the assertion holds with only ~16% margin; the PGO *instrumented* stage inlines less, its per-level scanner frames are smaller, and the 500k-deep decode completes -- "RecursionError not raised" fails the profile run and aborts `make profile-opt`. Upstream's skip_if_unlimited_stack_size (gh-143460) only covers RLIM_INFINITY, not large-finite stacks like ours. We could also keep playing whack a mole and raise the 500k to a much larger number... but there's little value in PGO training on this test anyways. --- Lib/test/test_json/test_recursion.py | 1 + 1 file changed, 1 insertion(+) diff --git a/Lib/test/test_json/test_recursion.py b/Lib/test/test_json/test_recursion.py index ffd3404e6f77a07..d732fc80cf1cf30 100644 --- a/Lib/test/test_json/test_recursion.py +++ b/Lib/test/test_json/test_recursion.py @@ -68,6 +68,7 @@ def default(self, o): self.fail("didn't raise ValueError on default recursion") + @support.skip_if_pgo_task # fails during PGO training w/ some stack sizes @support.skip_if_unlimited_stack_size @support.skip_emscripten_stack_overflow() @support.skip_wasi_stack_overflow() From f1a5f68e3761e010ccd4dda1342500c5ae40bbc4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bartosz=20S=C5=82awecki?= Date: Sun, 14 Jun 2026 14:33:03 +0200 Subject: [PATCH 3/6] gh-151461: Fix encoding-related exception handling in file tokenizer (GH-151462) --- Lib/test/test_source_encoding.py | 17 ++++++- ...-06-14-05-05-15.gh-issue-151461.5q0s88.rst | 3 ++ Parser/pegen.c | 5 +- Parser/pegen.h | 1 - Parser/pegen_errors.c | 47 ----------------- Parser/tokenizer/helpers.c | 51 ++++++++++++++++++- Parser/tokenizer/helpers.h | 1 + 7 files changed, 72 insertions(+), 53 deletions(-) create mode 100644 Misc/NEWS.d/next/Core_and_Builtins/2026-06-14-05-05-15.gh-issue-151461.5q0s88.rst diff --git a/Lib/test/test_source_encoding.py b/Lib/test/test_source_encoding.py index 8ac64b3105708f7..53fffe7cfb56d09 100644 --- a/Lib/test/test_source_encoding.py +++ b/Lib/test/test_source_encoding.py @@ -387,8 +387,7 @@ def test_utf8_non_utf8_third_line_error(self): b'#third\xa4\n' b'raise RuntimeError\n') self.check_script_error(src, - br"'utf-8' codec can't decode byte|" - br"encoding problem: utf8") + br"'utf-8' codec can't decode byte") def test_crlf(self): src = (b'print(ascii("""\r\n"""))\n') @@ -540,6 +539,20 @@ def check_script_error(self, src, expected, lineno=...): line = line.removeprefix('\ufeff') self.assertIn(line.encode(), err) + def test_coding_spec_unknown_encoding(self): + src = (b'# coding: c1252\n' + b'print("Hi!")\n') + self.check_script_error(src, br"unknown encoding: c1252") + + def test_coding_spec_decode_error(self): + src = (b'# coding: shift-jis\n' + b'print("\xc4\x85")\n') + self.check_script_error(src, br"'shift_jis' codec can't decode byte") + + def test_coding_spec_non_text_encoding(self): + src = (b'# coding: hex_codec\n' + b'print("eggs")\n') + self.check_script_error(src, br"'hex_codec' is not a text encoding") if __name__ == "__main__": diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2026-06-14-05-05-15.gh-issue-151461.5q0s88.rst b/Misc/NEWS.d/next/Core_and_Builtins/2026-06-14-05-05-15.gh-issue-151461.5q0s88.rst new file mode 100644 index 000000000000000..d76a9bc95278bcb --- /dev/null +++ b/Misc/NEWS.d/next/Core_and_Builtins/2026-06-14-05-05-15.gh-issue-151461.5q0s88.rst @@ -0,0 +1,3 @@ +Fix direct execution of files with invalid source encodings to report the +underlying codec lookup or decoding error instead of the generic +``SyntaxError: encoding problem`` message. Patch by Bartosz Sławecki. diff --git a/Parser/pegen.c b/Parser/pegen.c index 569f5afb3120085..bb222b50fc095f2 100644 --- a/Parser/pegen.c +++ b/Parser/pegen.c @@ -9,6 +9,7 @@ #include "lexer/lexer.h" #include "tokenizer/tokenizer.h" +#include "tokenizer/helpers.h" #include "pegen.h" // Internal parser functions @@ -993,7 +994,7 @@ _PyPegen_run_parser_from_file_pointer(FILE *fp, int start_rule, PyObject *filena struct tok_state *tok = _PyTokenizer_FromFile(fp, enc, ps1, ps2); if (tok == NULL) { if (PyErr_Occurred()) { - _PyPegen_raise_tokenizer_init_error(filename_ob); + _PyTokenizer_raise_init_error(filename_ob); return NULL; } return NULL; @@ -1051,7 +1052,7 @@ _PyPegen_run_parser_from_string(const char *str, int start_rule, PyObject *filen } if (tok == NULL) { if (PyErr_Occurred()) { - _PyPegen_raise_tokenizer_init_error(filename_ob); + _PyTokenizer_raise_init_error(filename_ob); } return NULL; } diff --git a/Parser/pegen.h b/Parser/pegen.h index 85c9ada765d9bd4..5c461e82a7f0fa7 100644 --- a/Parser/pegen.h +++ b/Parser/pegen.h @@ -174,7 +174,6 @@ typedef enum { } TARGETS_TYPE; int _Pypegen_raise_decode_error(Parser *p); -void _PyPegen_raise_tokenizer_init_error(PyObject *filename); int _Pypegen_tokenizer_error(Parser *p); void *_PyPegen_raise_error(Parser *p, PyObject *errtype, int use_mark, const char *errmsg, ...); void *_PyPegen_raise_error_known_location(Parser *p, PyObject *errtype, diff --git a/Parser/pegen_errors.c b/Parser/pegen_errors.c index 312699415efd9af..b13e1c079220a92 100644 --- a/Parser/pegen_errors.c +++ b/Parser/pegen_errors.c @@ -10,53 +10,6 @@ // TOKENIZER ERRORS -void -_PyPegen_raise_tokenizer_init_error(PyObject *filename) -{ - if (!(PyErr_ExceptionMatches(PyExc_LookupError) - || PyErr_ExceptionMatches(PyExc_SyntaxError) - || PyErr_ExceptionMatches(PyExc_ValueError) - || PyErr_ExceptionMatches(PyExc_UnicodeDecodeError))) { - return; - } - PyObject *errstr = NULL; - PyObject *tuple = NULL; - PyObject *type; - PyObject *value; - PyObject *tback; - PyErr_Fetch(&type, &value, &tback); - if (PyErr_GivenExceptionMatches(value, PyExc_SyntaxError)) { - if (PyObject_SetAttr(value, &_Py_ID(filename), filename)) { - goto error; - } - PyErr_Restore(type, value, tback); - return; - } - errstr = PyObject_Str(value); - if (!errstr) { - goto error; - } - - PyObject *tmp = Py_BuildValue("(OiiO)", filename, 0, -1, Py_None); - if (!tmp) { - goto error; - } - - tuple = _PyTuple_FromPair(errstr, tmp); - Py_DECREF(tmp); - if (!tuple) { - goto error; - } - PyErr_SetObject(PyExc_SyntaxError, tuple); - -error: - Py_XDECREF(type); - Py_XDECREF(value); - Py_XDECREF(tback); - Py_XDECREF(errstr); - Py_XDECREF(tuple); -} - static inline void raise_unclosed_parentheses_error(Parser *p) { int error_lineno = p->tok->parenlinenostack[p->tok->level-1]; diff --git a/Parser/tokenizer/helpers.c b/Parser/tokenizer/helpers.c index c69e66d0ab9b7a8..62b0971d418c396 100644 --- a/Parser/tokenizer/helpers.c +++ b/Parser/tokenizer/helpers.c @@ -1,6 +1,8 @@ #include "Python.h" #include "errcode.h" +#include "pycore_runtime.h" // _Py_ID() #include "pycore_token.h" +#include "pycore_tuple.h" // _PyTuple_FromPair #include "../lexer/state.h" @@ -149,6 +151,53 @@ _PyTokenizer_warn_invalid_escape_sequence(struct tok_state *tok, int first_inval return 0; } +void +_PyTokenizer_raise_init_error(PyObject *filename) +{ + if (!(PyErr_ExceptionMatches(PyExc_LookupError) + || PyErr_ExceptionMatches(PyExc_SyntaxError) + || PyErr_ExceptionMatches(PyExc_ValueError) + || PyErr_ExceptionMatches(PyExc_UnicodeDecodeError))) { + return; + } + PyObject *errstr = NULL; + PyObject *tuple = NULL; + PyObject *type; + PyObject *value; + PyObject *tback; + PyErr_Fetch(&type, &value, &tback); + if (PyErr_GivenExceptionMatches(value, PyExc_SyntaxError)) { + if (PyObject_SetAttr(value, &_Py_ID(filename), filename)) { + goto error; + } + PyErr_Restore(type, value, tback); + return; + } + errstr = PyObject_Str(value); + if (!errstr) { + goto error; + } + + PyObject *tmp = Py_BuildValue("(OiiO)", filename, 0, -1, Py_None); + if (!tmp) { + goto error; + } + + tuple = _PyTuple_FromPair(errstr, tmp); + Py_DECREF(tmp); + if (!tuple) { + goto error; + } + PyErr_SetObject(PyExc_SyntaxError, tuple); + +error: + Py_XDECREF(type); + Py_XDECREF(value); + Py_XDECREF(tback); + Py_XDECREF(errstr); + Py_XDECREF(tuple); +} + int _PyTokenizer_parser_warn(struct tok_state *tok, PyObject *category, const char *format, ...) { @@ -418,8 +467,8 @@ _PyTokenizer_check_coding_spec(const char* line, Py_ssize_t size, struct tok_sta if (tok->encoding == NULL) { assert(tok->decoding_readline == NULL); if (strcmp(cs, "utf-8") != 0 && !set_readline(tok, cs)) { + _PyTokenizer_raise_init_error(tok->filename); _PyTokenizer_error_ret(tok); - PyErr_Format(PyExc_SyntaxError, "encoding problem: %s", cs); PyMem_Free(cs); return 0; } diff --git a/Parser/tokenizer/helpers.h b/Parser/tokenizer/helpers.h index 98f6445d5a3b40e..34303999a60aff7 100644 --- a/Parser/tokenizer/helpers.h +++ b/Parser/tokenizer/helpers.h @@ -15,6 +15,7 @@ int _PyTokenizer_indenterror(struct tok_state *tok); int _PyTokenizer_warn_invalid_escape_sequence(struct tok_state *tok, int first_invalid_escape_char); int _PyTokenizer_parser_warn(struct tok_state *tok, PyObject *category, const char *format, ...); char *_PyTokenizer_error_ret(struct tok_state *tok); +void _PyTokenizer_raise_init_error(PyObject *filename); char *_PyTokenizer_new_string(const char *s, Py_ssize_t len, struct tok_state *tok); char *_PyTokenizer_translate_newlines(const char *s, int exec_input, int preserve_crlf, struct tok_state *tok); From 47b7dc788c9bcf3d5ea69a2ea0aed3d5883647a8 Mon Sep 17 00:00:00 2001 From: Aniket <148300120+Aniketsy@users.noreply.github.com> Date: Sun, 14 Jun 2026 18:09:33 +0530 Subject: [PATCH 4/6] gh-139837: Document attributes of objects recorded by warnings.catch_warnings (GH-139893) Co-authored-by: Victor Stinner --- Doc/library/warnings.rst | 25 ++++++++++++++++++++++--- 1 file changed, 22 insertions(+), 3 deletions(-) diff --git a/Doc/library/warnings.rst b/Doc/library/warnings.rst index b25384dbfce54bb..9063bea96ccb0ac 100644 --- a/Doc/library/warnings.rst +++ b/Doc/library/warnings.rst @@ -626,9 +626,28 @@ Available Context Managers If the *record* argument is :const:`False` (the default) the context manager returns :class:`None` on entry. If *record* is :const:`True`, a list is returned that is progressively populated with objects as seen by a custom - :func:`showwarning` function (which also suppresses output to ``sys.stdout``). - Each object in the list has attributes with the same names as the arguments to - :func:`showwarning`. + :func:`showwarning` function (which also suppresses output to ``sys.stderr``). + Each object in the list is guaranteed to have the following attributes: + + - ``message``: the warning message (an instance of :exc:`Warning`) + - ``category``: the warning category (a subclass of :exc:`Warning`) + - ``filename``: the file name where the warning occurred (:class:`str`) + - ``lineno``: the line number in the file (:class:`int`) + - ``file``: the file object used for output (if any), or ``None`` + - ``line``: the line of source code (if available), or ``None`` + - ``source``: the original object that generated the warning (if + available), or ``None`` + - ``module``: the module name where the warning occurred + (:class:`str`), or ``None`` + + .. versionchanged:: 3.6 + The ``source`` attribute was added. + + .. versionchanged:: 3.15 + The ``module`` attribute was added. + + The type of these objects is not specified and may change; only the + presence of these attributes is guaranteed. The *module* argument takes a module that will be used instead of the module returned when you import :mod:`!warnings` whose filter will be From 1097b22fd92afa553d738983e078687441a1d7c4 Mon Sep 17 00:00:00 2001 From: Ivy Xu Date: Sun, 14 Jun 2026 21:12:30 +0800 Subject: [PATCH 5/6] gh-151428: Remove unused imports from `Tools/` (#151442) --- Tools/c-analyzer/c_parser/preprocessor/clang.py | 1 - Tools/c-analyzer/distutils/util.py | 1 - Tools/cases_generator/generators_common.py | 2 -- Tools/cases_generator/opcode_metadata_generator.py | 1 - Tools/cases_generator/parser.py | 1 - Tools/cases_generator/tier1_generator.py | 6 ------ Tools/inspection/benchmark_external_inspection.py | 1 - Tools/jit/_stencils.py | 1 - Tools/picklebench/memory_dos_impact.py | 4 +--- 9 files changed, 1 insertion(+), 17 deletions(-) diff --git a/Tools/c-analyzer/c_parser/preprocessor/clang.py b/Tools/c-analyzer/c_parser/preprocessor/clang.py index 574a23f8f6d6f9f..738c261d1ecd80a 100644 --- a/Tools/c-analyzer/c_parser/preprocessor/clang.py +++ b/Tools/c-analyzer/c_parser/preprocessor/clang.py @@ -1,5 +1,4 @@ import os.path -import re, sys from . import common as _common from . import gcc as _gcc diff --git a/Tools/c-analyzer/distutils/util.py b/Tools/c-analyzer/distutils/util.py index f687a28ec2f40e8..c8e92658d953661 100644 --- a/Tools/c-analyzer/distutils/util.py +++ b/Tools/c-analyzer/distutils/util.py @@ -8,7 +8,6 @@ import re import string import sys -from distutils.errors import DistutilsPlatformError def get_host_platform(): """Return a string that identifies the current platform. This is used mainly to diff --git a/Tools/cases_generator/generators_common.py b/Tools/cases_generator/generators_common.py index bdc4324b6702006..8c66ad4885ccfc8 100644 --- a/Tools/cases_generator/generators_common.py +++ b/Tools/cases_generator/generators_common.py @@ -7,14 +7,12 @@ analysis_error, Label, CodeSection, - Uop, ) from cwriter import CWriter from typing import Callable, TextIO, Iterator, Iterable from lexer import Token from stack import Storage, StackError from parser import Stmt, SimpleStmt, BlockStmt, IfStmt, ForStmt, WhileStmt, MacroIfStmt -from stack import PRINT_STACKS DEBUG = False class TokenIterator: diff --git a/Tools/cases_generator/opcode_metadata_generator.py b/Tools/cases_generator/opcode_metadata_generator.py index 21407ad7df1e9a5..00f6804f1724b59 100644 --- a/Tools/cases_generator/opcode_metadata_generator.py +++ b/Tools/cases_generator/opcode_metadata_generator.py @@ -19,7 +19,6 @@ cflags, ) from cwriter import CWriter -from dataclasses import dataclass from typing import TextIO from stack import get_stack_effect diff --git a/Tools/cases_generator/parser.py b/Tools/cases_generator/parser.py index ccf8bf649520ff6..aa6c0b1446fb76b 100644 --- a/Tools/cases_generator/parser.py +++ b/Tools/cases_generator/parser.py @@ -20,7 +20,6 @@ MacroIfStmt, ) -import pprint CodeDef = InstDef | LabelDef diff --git a/Tools/cases_generator/tier1_generator.py b/Tools/cases_generator/tier1_generator.py index d2fa749e1417f51..ebc914a6837e2eb 100644 --- a/Tools/cases_generator/tier1_generator.py +++ b/Tools/cases_generator/tier1_generator.py @@ -9,8 +9,6 @@ Analysis, Instruction, Uop, - Label, - CodeSection, Part, analyze_files, Skip, @@ -24,13 +22,9 @@ write_header, type_and_null, Emitter, - TokenIterator, - always_true, - emit_to, ) from cwriter import CWriter from typing import TextIO -from lexer import Token from stack import Local, Stack, StackError, get_stack_effect, Storage DEFAULT_OUTPUT = ROOT / "Python/generated_cases.c.h" diff --git a/Tools/inspection/benchmark_external_inspection.py b/Tools/inspection/benchmark_external_inspection.py index 8e367422a961da2..b7aa0e5de7ed99b 100644 --- a/Tools/inspection/benchmark_external_inspection.py +++ b/Tools/inspection/benchmark_external_inspection.py @@ -4,7 +4,6 @@ import sys import contextlib import tempfile -import os import argparse from _colorize import get_colors, can_colorize diff --git a/Tools/jit/_stencils.py b/Tools/jit/_stencils.py index e2ae3d988fc7ac9..d4547dc8e8e3c12 100644 --- a/Tools/jit/_stencils.py +++ b/Tools/jit/_stencils.py @@ -2,7 +2,6 @@ import dataclasses import enum -import sys import typing import _schema diff --git a/Tools/picklebench/memory_dos_impact.py b/Tools/picklebench/memory_dos_impact.py index 3bad6586c469437..0a7cef8668565c8 100755 --- a/Tools/picklebench/memory_dos_impact.py +++ b/Tools/picklebench/memory_dos_impact.py @@ -24,7 +24,6 @@ import argparse import gc -import io import json import os import pickle @@ -32,11 +31,10 @@ import struct import subprocess import sys -import tempfile import tracemalloc from pathlib import Path from time import perf_counter -from typing import Any, Dict, List, Tuple, Optional +from typing import Any, Dict, List, Tuple # Configuration From a7007322c2a70b01e7c2a9e6b3f8f222d241c7d7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bartosz=20S=C5=82awecki?= Date: Sun, 14 Jun 2026 18:06:30 +0200 Subject: [PATCH 6/6] gh-151390: Colorize `match +` and `match -` in the REPL (#151391) --- Lib/_pyrepl/utils.py | 2 +- Lib/test/test_pyrepl/test_utils.py | 16 ++++++++++++++++ ...6-06-12-07-20-08.gh-issue-151390.CmYN9EeJ.rst | 1 + 3 files changed, 18 insertions(+), 1 deletion(-) create mode 100644 Misc/NEWS.d/next/Library/2026-06-12-07-20-08.gh-issue-151390.CmYN9EeJ.rst diff --git a/Lib/_pyrepl/utils.py b/Lib/_pyrepl/utils.py index f2837a1b8eb95e7..230dae35af665ab 100644 --- a/Lib/_pyrepl/utils.py +++ b/Lib/_pyrepl/utils.py @@ -259,7 +259,7 @@ def is_soft_keyword_used(*tokens: TI | None) -> bool: None | TI(T.NEWLINE) | TI(T.INDENT) | TI(string=":"), TI(string="match"), TI(T.NUMBER | T.STRING | T.FSTRING_START | T.TSTRING_START) - | TI(T.OP, string="(" | "*" | "[" | "{" | "~" | "...") + | TI(T.OP, string="(" | "*" | "-" | "+" | "[" | "{" | "~" | "...") ): return True case ( diff --git a/Lib/test/test_pyrepl/test_utils.py b/Lib/test/test_pyrepl/test_utils.py index 0b873d32b62b5d2..ebbd06213c69aff 100644 --- a/Lib/test/test_pyrepl/test_utils.py +++ b/Lib/test/test_pyrepl/test_utils.py @@ -133,6 +133,22 @@ def test_gen_colors_keyword_highlighting(self): ("1", "number"), ], ), + ( + "match +1", + [ + ("match", "soft_keyword"), + ("+", "op"), + ("1", "number"), + ], + ), + ( + "match -1", + [ + ("match", "soft_keyword"), + ("-", "op"), + ("1", "number"), + ], + ), ] for code, expected_highlights in cases: with self.subTest(code=code): diff --git a/Misc/NEWS.d/next/Library/2026-06-12-07-20-08.gh-issue-151390.CmYN9EeJ.rst b/Misc/NEWS.d/next/Library/2026-06-12-07-20-08.gh-issue-151390.CmYN9EeJ.rst new file mode 100644 index 000000000000000..ff8de30599c6ad5 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2026-06-12-07-20-08.gh-issue-151390.CmYN9EeJ.rst @@ -0,0 +1 @@ +Colorize ``match`` in the :term:`REPL` when followed by a unary ``+`` or ``-`` operator. Patch by Bartosz Sławecki.