diff --git a/Doc/library/mimetypes.rst b/Doc/library/mimetypes.rst index f33098faf7d8a7..5c29fff146eef0 100644 --- a/Doc/library/mimetypes.rst +++ b/Doc/library/mimetypes.rst @@ -39,8 +39,8 @@ the information :func:`init` sets up. (e.g. :program:`compress` or :program:`gzip`). The encoding is suitable for use as a :mailheader:`Content-Encoding` header, **not** as a :mailheader:`Content-Transfer-Encoding` header. The mappings are table driven. - Encoding suffixes are case sensitive; type suffixes are first tried case - sensitively, then case insensitively. + Encoding suffixes are case-sensitive. Suffix mappings and type suffixes are + first tried case-sensitively, then case-insensitively. The optional *strict* argument is a flag specifying whether the list of known MIME types is limited to only the official types `registered with IANA @@ -131,6 +131,8 @@ behavior of the module. is already known the extension will be added to the list of known extensions. Valid extensions are empty or start with a ``'.'``. + Registered lower-case extensions are matched case-insensitively. + When *strict* is ``True`` (the default), the mapping will be added to the official MIME types, otherwise to the non-standard ones. @@ -312,6 +314,8 @@ than one MIME-type database; it provides an interface similar to the one of the extension is already known, the new type will replace the old one. When the type is already known the extension will be added to the list of known extensions. + Registered lower-case extensions are matched case-insensitively. + When *strict* is ``True`` (the default), the mapping will be added to the official MIME types, otherwise to the non-standard ones. diff --git a/Lib/mimetypes.py b/Lib/mimetypes.py index 15e8c0a437bfd9..4339ef5a61397d 100644 --- a/Lib/mimetypes.py +++ b/Lib/mimetypes.py @@ -86,6 +86,9 @@ def add_type(self, type, ext, strict=True): is already known the extension will be added to the list of known extensions. + Registered lower-case extensions are matched + case-insensitively. + If strict is true, information will be added to list of standard types, else to the list of non-standard types. @@ -172,23 +175,33 @@ def guess_file_type(self, path, *, strict=True): def _guess_file_type(self, path, strict, splitext): base, ext = splitext(path) - while (ext_lower := ext.lower()) in self.suffix_map: - base, ext = splitext(base + self.suffix_map[ext_lower]) + while True: + if ext in self.suffix_map: + suffix = self.suffix_map[ext] + elif (ext_lower := ext.lower()) in self.suffix_map: + suffix = self.suffix_map[ext_lower] + else: + break + base, ext = splitext(base + suffix) # encodings_map is case sensitive if ext in self.encodings_map: encoding = self.encodings_map[ext] base, ext = splitext(base) else: encoding = None - ext = ext.lower() + ext_lower = ext.lower() types_map = self.types_map[True] if ext in types_map: return types_map[ext], encoding + if ext_lower in types_map: + return types_map[ext_lower], encoding elif strict: return None, encoding types_map = self.types_map[False] if ext in types_map: return types_map[ext], encoding + if ext_lower in types_map: + return types_map[ext_lower], encoding else: return None, encoding @@ -386,6 +399,9 @@ def add_type(type, ext, strict=True): is already known the extension will be added to the list of known extensions. + Registered lower-case extensions are matched + case-insensitively. + If strict is true, information will be added to list of standard types, else to the list of non-standard types. diff --git a/Lib/test/support/__init__.py b/Lib/test/support/__init__.py index cd85ef60a80f4b..e9966e1f7a6d49 100644 --- a/Lib/test/support/__init__.py +++ b/Lib/test/support/__init__.py @@ -3159,7 +3159,7 @@ def in_systemd_nspawn_sync_suppressed() -> bool: with open("/run/systemd/container", "rb") as fp: if fp.read().rstrip() != b"systemd-nspawn": return False - except FileNotFoundError: + except (FileNotFoundError, PermissionError): return False # If systemd-nspawn is used, O_SYNC flag will immediately diff --git a/Lib/test/test_mimetypes.py b/Lib/test/test_mimetypes.py index 1a3b49b87b121f..19983fa3fa7628 100644 --- a/Lib/test/test_mimetypes.py +++ b/Lib/test/test_mimetypes.py @@ -287,6 +287,50 @@ def test_case_sensitivity(self): eq(self.db.guess_file_type("foobar.tar.z"), (None, None)) eq(self.db.guess_type("scheme:foobar.tar.z"), (None, None)) + def test_suffix_map_case_sensitive_preferred(self): + self.db.suffix_map[".TEST-SUFFIX"] = ".tar.gz" + self.db.suffix_map[".test-suffix"] = ".tar.xz" + self.assertEqual( + self.db.guess_file_type("example.TEST-SUFFIX"), + ("application/x-tar", "gzip"), + ) + self.assertEqual( + self.db.guess_file_type("example.test-suffix"), + ("application/x-tar", "xz"), + ) + + def test_added_types_case_sensitive_preferred(self): + self.db.add_type("text/x-test-uppercase-r", ".R") + self.db.add_type("text/x-test-lowercase-r", ".r") + self.assertEqual( + self.db.guess_file_type("example.R"), + ("text/x-test-uppercase-r", None), + ) + self.assertEqual( + self.db.guess_file_type("example.r"), + ("text/x-test-lowercase-r", None), + ) + self.db.add_type("text/x-test-uppercase-non-strict", + ".NON-STRICT-EXT", strict=False) + self.db.add_type("text/x-test-lowercase-non-strict", + ".non-strict-ext", strict=False) + self.assertEqual( + self.db.guess_file_type("example.NON-STRICT-EXT"), + (None, None), + ) + self.assertEqual( + self.db.guess_file_type("example.non-strict-ext"), + (None, None), + ) + self.assertEqual( + self.db.guess_file_type("example.NON-STRICT-EXT", strict=False), + ("text/x-test-uppercase-non-strict", None), + ) + self.assertEqual( + self.db.guess_file_type("example.non-strict-ext", strict=False), + ("text/x-test-lowercase-non-strict", None), + ) + def test_default_data(self): eq = self.assertEqual eq(self.db.guess_file_type("foo.html"), ("text/html", None)) diff --git a/Lib/zoneinfo/_common.py b/Lib/zoneinfo/_common.py index 98668c15d8bf94..caa3a5b583bab3 100644 --- a/Lib/zoneinfo/_common.py +++ b/Lib/zoneinfo/_common.py @@ -26,7 +26,7 @@ def load_tzdata(key): # UnicodeEncodeError: If package_name or resource_name are not UTF-8, # such as keys containing a surrogate character. # IsADirectoryError: If package_name without a resource_name specified. - raise ZoneInfoNotFoundError(f"No time zone found with key {key}") + raise ZoneInfoNotFoundError(f"No time zone found with key {key!r}") def load_data(fobj): diff --git a/Misc/NEWS.d/next/Library/2026-04-20-01-24-22.gh-issue-92455.vXhmad.rst b/Misc/NEWS.d/next/Library/2026-04-20-01-24-22.gh-issue-92455.vXhmad.rst new file mode 100644 index 00000000000000..8d2a11cb776137 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2026-04-20-01-24-22.gh-issue-92455.vXhmad.rst @@ -0,0 +1,3 @@ +Fix :mod:`mimetypes` to prefer case-sensitive matches for suffix mappings and +MIME type suffixes before falling back to case-insensitive matches. +Contributed by Xiao Yuan. diff --git a/Misc/NEWS.d/next/Tests/2026-04-24-01-38-56.gh-issue-148853._uM4_Q.rst b/Misc/NEWS.d/next/Tests/2026-04-24-01-38-56.gh-issue-148853._uM4_Q.rst new file mode 100644 index 00000000000000..9d3fbc2590dc7a --- /dev/null +++ b/Misc/NEWS.d/next/Tests/2026-04-24-01-38-56.gh-issue-148853._uM4_Q.rst @@ -0,0 +1,2 @@ +Fix tests failing on FreeBSD in test.support's +in_systemd_nspawn_sync_suppressed() due to unreadable /run directory. diff --git a/Modules/_testinternalcapi/test_cases.c.h b/Modules/_testinternalcapi/test_cases.c.h index 503f566c9ae86a..62d08826a2faea 100644 --- a/Modules/_testinternalcapi/test_cases.c.h +++ b/Modules/_testinternalcapi/test_cases.c.h @@ -8830,7 +8830,7 @@ assert(keys->dk_kind == DICT_KEYS_UNICODE); assert(index < FT_ATOMIC_LOAD_SSIZE_RELAXED(keys->dk_nentries)); PyDictUnicodeEntry *ep = DK_UNICODE_ENTRIES(keys) + index; - PyObject *attr_o = FT_ATOMIC_LOAD_PTR_RELAXED(ep->me_value); + PyObject *attr_o = FT_ATOMIC_LOAD_PTR_CONSUME(ep->me_value); if (attr_o == NULL) { UPDATE_MISS_STATS(LOAD_ATTR); assert(_PyOpcode_Deopt[opcode] == (LOAD_ATTR)); @@ -9707,7 +9707,7 @@ } assert(keys->dk_kind == DICT_KEYS_UNICODE); PyDictUnicodeEntry *entries = DK_UNICODE_ENTRIES(keys); - PyObject *res_o = FT_ATOMIC_LOAD_PTR_RELAXED(entries[index].me_value); + PyObject *res_o = FT_ATOMIC_LOAD_PTR_CONSUME(entries[index].me_value); if (res_o == NULL) { UPDATE_MISS_STATS(LOAD_GLOBAL); assert(_PyOpcode_Deopt[opcode] == (LOAD_GLOBAL)); @@ -9774,7 +9774,7 @@ assert(keys->dk_kind == DICT_KEYS_UNICODE); PyDictUnicodeEntry *entries = DK_UNICODE_ENTRIES(keys); assert(index < DK_SIZE(keys)); - PyObject *res_o = FT_ATOMIC_LOAD_PTR_RELAXED(entries[index].me_value); + PyObject *res_o = FT_ATOMIC_LOAD_PTR_CONSUME(entries[index].me_value); if (res_o == NULL) { UPDATE_MISS_STATS(LOAD_GLOBAL); assert(_PyOpcode_Deopt[opcode] == (LOAD_GLOBAL)); diff --git a/Modules/unicodedata.c b/Modules/unicodedata.c index bcdcc624e66f93..5f0e7ab6ec220d 100644 --- a/Modules/unicodedata.c +++ b/Modules/unicodedata.c @@ -560,39 +560,36 @@ get_decomp_record(PyObject *self, Py_UCS4 code, #define CANONICAL_ORDERING_COUNTING_SORT_THRESHOLD 20 static void -canonical_ordering_sort_insertion(int kind, void *data, - Py_ssize_t start, Py_ssize_t end) +canonical_ordering_sort_insertion(Py_UCS4 *data, Py_ssize_t length) { - for (Py_ssize_t i = start + 1; i < end; i++) { - Py_UCS4 code = PyUnicode_READ(kind, data, i); + for (Py_ssize_t i = 1; i < length; i++) { + Py_UCS4 code = data[i]; unsigned char combining = _getrecord_ex(code)->combining; Py_ssize_t j = i; - while (j > start) { - Py_UCS4 previous = PyUnicode_READ(kind, data, j - 1); + while (j > 0) { + Py_UCS4 previous = data[j - 1]; if (_getrecord_ex(previous)->combining <= combining) { break; } - PyUnicode_WRITE(kind, data, j, previous); + data[j] = previous; j--; } if (j != i) { - PyUnicode_WRITE(kind, data, j, code); + data[j] = code; } } } static void -canonical_ordering_sort_counting(int kind, void *data, - Py_ssize_t start, Py_ssize_t end, +canonical_ordering_sort_counting(Py_UCS4 *data, Py_ssize_t length, Py_UCS4 *sortbuf) { Py_ssize_t counts[256] = {0}; - Py_ssize_t run_length = end - start; Py_ssize_t total = 0; - for (Py_ssize_t i = start; i < end; i++) { - Py_UCS4 code = PyUnicode_READ(kind, data, i); + for (Py_ssize_t i = 0; i < length; i++) { + Py_UCS4 code = data[i]; unsigned char combining = _getrecord_ex(code)->combining; counts[combining]++; } @@ -604,14 +601,12 @@ canonical_ordering_sort_counting(int kind, void *data, } /* Reuse counts[] as the next output slot for each CCC. */ - for (Py_ssize_t i = start; i < end; i++) { - Py_UCS4 code = PyUnicode_READ(kind, data, i); + for (Py_ssize_t i = 0; i < length; i++) { + Py_UCS4 code = data[i]; unsigned char combining = _getrecord_ex(code)->combining; sortbuf[counts[combining]++] = code; } - for (Py_ssize_t i = 0; i < run_length; i++) { - PyUnicode_WRITE(kind, data, start + i, sortbuf[i]); - } + memcpy(data, sortbuf, length * sizeof(Py_UCS4)); } static PyObject* @@ -620,9 +615,8 @@ nfd_nfkd(PyObject *self, PyObject *input, int k) PyObject *result; Py_UCS4 *output; Py_ssize_t i, o, osize; - int input_kind, result_kind; + int input_kind; const void *input_data; - void *result_data; /* Longest decomposition in Unicode 3.2: U+FDFA */ Py_UCS4 stack[20]; Py_ssize_t space, isize; @@ -715,22 +709,13 @@ nfd_nfkd(PyObject *self, PyObject *input, int k) } } - result = PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND, - output, o); - PyMem_Free(output); - if (!result) - return NULL; - - result_kind = PyUnicode_KIND(result); - result_data = PyUnicode_DATA(result); - /* Sort each consecutive combining-character run canonically. */ i = 0; while (i < o) { Py_ssize_t run_length, run_start; int needs_sort = 0; - Py_UCS4 ch = PyUnicode_READ(result_kind, result_data, i); + Py_UCS4 ch = output[i]; prev = _getrecord_ex(ch)->combining; if (prev == 0) { i++; @@ -739,7 +724,7 @@ nfd_nfkd(PyObject *self, PyObject *input, int k) run_start = i++; while (i < o) { - Py_UCS4 ch = PyUnicode_READ(result_kind, result_data, i); + Py_UCS4 ch = output[i]; cur = _getrecord_ex(ch)->combining; if (cur == 0) { break; @@ -756,29 +741,28 @@ nfd_nfkd(PyObject *self, PyObject *input, int k) run_length = i - run_start; if (run_length < CANONICAL_ORDERING_COUNTING_SORT_THRESHOLD) { - canonical_ordering_sort_insertion(result_kind, result_data, - run_start, i); + canonical_ordering_sort_insertion(output + run_start, run_length); continue; } if (run_length > sortbuflen) { - Py_UCS4 *new_sortbuf = PyMem_Resize(sortbuf, - Py_UCS4, - run_length); + Py_UCS4 *new_sortbuf = PyMem_Resize(sortbuf, Py_UCS4, run_length); if (new_sortbuf == NULL) { PyErr_NoMemory(); PyMem_Free(sortbuf); - Py_DECREF(result); + PyMem_Free(output); return NULL; } sortbuf = new_sortbuf; sortbuflen = run_length; } - canonical_ordering_sort_counting(result_kind, result_data, - run_start, i, sortbuf); + canonical_ordering_sort_counting(output + run_start, run_length, + sortbuf); } PyMem_Free(sortbuf); + result = PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND, output, o); + PyMem_Free(output); return result; } diff --git a/Python/bytecodes.c b/Python/bytecodes.c index e368092b300f86..beaf6752b87ea2 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -2349,7 +2349,7 @@ dummy_func( assert(keys->dk_kind == DICT_KEYS_UNICODE); PyDictUnicodeEntry *entries = DK_UNICODE_ENTRIES(keys); assert(index < DK_SIZE(keys)); - PyObject *res_o = FT_ATOMIC_LOAD_PTR_RELAXED(entries[index].me_value); + PyObject *res_o = FT_ATOMIC_LOAD_PTR_CONSUME(entries[index].me_value); DEOPT_IF(res_o == NULL); #if Py_GIL_DISABLED int increfed = _Py_TryIncrefCompareStackRef(&entries[index].me_value, res_o, &res); @@ -2368,7 +2368,7 @@ dummy_func( DEOPT_IF(FT_ATOMIC_LOAD_UINT32_RELAXED(keys->dk_version) != version); assert(keys->dk_kind == DICT_KEYS_UNICODE); PyDictUnicodeEntry *entries = DK_UNICODE_ENTRIES(keys); - PyObject *res_o = FT_ATOMIC_LOAD_PTR_RELAXED(entries[index].me_value); + PyObject *res_o = FT_ATOMIC_LOAD_PTR_CONSUME(entries[index].me_value); DEOPT_IF(res_o == NULL); #if Py_GIL_DISABLED int increfed = _Py_TryIncrefCompareStackRef(&entries[index].me_value, res_o, &res); @@ -2958,7 +2958,7 @@ dummy_func( assert(keys->dk_kind == DICT_KEYS_UNICODE); assert(index < FT_ATOMIC_LOAD_SSIZE_RELAXED(keys->dk_nentries)); PyDictUnicodeEntry *ep = DK_UNICODE_ENTRIES(keys) + index; - PyObject *attr_o = FT_ATOMIC_LOAD_PTR_RELAXED(ep->me_value); + PyObject *attr_o = FT_ATOMIC_LOAD_PTR_CONSUME(ep->me_value); EXIT_IF(attr_o == NULL); #ifdef Py_GIL_DISABLED int increfed = _Py_TryIncrefCompareStackRef(&ep->me_value, attr_o, &attr); diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h index 7973c75e1a60ad..d5bfe60cd23473 100644 --- a/Python/executor_cases.c.h +++ b/Python/executor_cases.c.h @@ -10301,7 +10301,7 @@ assert(keys->dk_kind == DICT_KEYS_UNICODE); PyDictUnicodeEntry *entries = DK_UNICODE_ENTRIES(keys); assert(index < DK_SIZE(keys)); - PyObject *res_o = FT_ATOMIC_LOAD_PTR_RELAXED(entries[index].me_value); + PyObject *res_o = FT_ATOMIC_LOAD_PTR_CONSUME(entries[index].me_value); if (res_o == NULL) { UOP_STAT_INC(uopcode, miss); SET_CURRENT_CACHED_VALUES(0); @@ -10346,7 +10346,7 @@ } assert(keys->dk_kind == DICT_KEYS_UNICODE); PyDictUnicodeEntry *entries = DK_UNICODE_ENTRIES(keys); - PyObject *res_o = FT_ATOMIC_LOAD_PTR_RELAXED(entries[index].me_value); + PyObject *res_o = FT_ATOMIC_LOAD_PTR_CONSUME(entries[index].me_value); if (res_o == NULL) { UOP_STAT_INC(uopcode, miss); SET_CURRENT_CACHED_VALUES(0); @@ -12152,7 +12152,7 @@ assert(keys->dk_kind == DICT_KEYS_UNICODE); assert(index < FT_ATOMIC_LOAD_SSIZE_RELAXED(keys->dk_nentries)); PyDictUnicodeEntry *ep = DK_UNICODE_ENTRIES(keys) + index; - PyObject *attr_o = FT_ATOMIC_LOAD_PTR_RELAXED(ep->me_value); + PyObject *attr_o = FT_ATOMIC_LOAD_PTR_CONSUME(ep->me_value); if (attr_o == NULL) { UOP_STAT_INC(uopcode, miss); _tos_cache0 = owner; diff --git a/Python/generated_cases.c.h b/Python/generated_cases.c.h index 5adcdcb4521baf..a6e0f90d8c1ce2 100644 --- a/Python/generated_cases.c.h +++ b/Python/generated_cases.c.h @@ -8829,7 +8829,7 @@ assert(keys->dk_kind == DICT_KEYS_UNICODE); assert(index < FT_ATOMIC_LOAD_SSIZE_RELAXED(keys->dk_nentries)); PyDictUnicodeEntry *ep = DK_UNICODE_ENTRIES(keys) + index; - PyObject *attr_o = FT_ATOMIC_LOAD_PTR_RELAXED(ep->me_value); + PyObject *attr_o = FT_ATOMIC_LOAD_PTR_CONSUME(ep->me_value); if (attr_o == NULL) { UPDATE_MISS_STATS(LOAD_ATTR); assert(_PyOpcode_Deopt[opcode] == (LOAD_ATTR)); @@ -9705,7 +9705,7 @@ } assert(keys->dk_kind == DICT_KEYS_UNICODE); PyDictUnicodeEntry *entries = DK_UNICODE_ENTRIES(keys); - PyObject *res_o = FT_ATOMIC_LOAD_PTR_RELAXED(entries[index].me_value); + PyObject *res_o = FT_ATOMIC_LOAD_PTR_CONSUME(entries[index].me_value); if (res_o == NULL) { UPDATE_MISS_STATS(LOAD_GLOBAL); assert(_PyOpcode_Deopt[opcode] == (LOAD_GLOBAL)); @@ -9772,7 +9772,7 @@ assert(keys->dk_kind == DICT_KEYS_UNICODE); PyDictUnicodeEntry *entries = DK_UNICODE_ENTRIES(keys); assert(index < DK_SIZE(keys)); - PyObject *res_o = FT_ATOMIC_LOAD_PTR_RELAXED(entries[index].me_value); + PyObject *res_o = FT_ATOMIC_LOAD_PTR_CONSUME(entries[index].me_value); if (res_o == NULL) { UPDATE_MISS_STATS(LOAD_GLOBAL); assert(_PyOpcode_Deopt[opcode] == (LOAD_GLOBAL));