Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 6 additions & 2 deletions Doc/library/mimetypes.rst
Original file line number Diff line number Diff line change
Expand Up @@ -39,8 +39,8 @@ the information :func:`init` sets up.
(e.g. :program:`compress` or :program:`gzip`). The encoding is suitable for use
as a :mailheader:`Content-Encoding` header, **not** as a
:mailheader:`Content-Transfer-Encoding` header. The mappings are table driven.
Encoding suffixes are case sensitive; type suffixes are first tried case
sensitively, then case insensitively.
Encoding suffixes are case-sensitive. Suffix mappings and type suffixes are
first tried case-sensitively, then case-insensitively.

The optional *strict* argument is a flag specifying whether the list of known MIME types
is limited to only the official types `registered with IANA
Expand Down Expand Up @@ -131,6 +131,8 @@ behavior of the module.
is already known the extension will be added to the list of known extensions.
Valid extensions are empty or start with a ``'.'``.

Registered lower-case extensions are matched case-insensitively.

When *strict* is ``True`` (the default), the mapping will be added to the
official MIME types, otherwise to the non-standard ones.

Expand Down Expand Up @@ -312,6 +314,8 @@ than one MIME-type database; it provides an interface similar to the one of the
extension is already known, the new type will replace the old one. When the type
is already known the extension will be added to the list of known extensions.

Registered lower-case extensions are matched case-insensitively.

When *strict* is ``True`` (the default), the mapping will be added to the
official MIME types, otherwise to the non-standard ones.

Expand Down
22 changes: 19 additions & 3 deletions Lib/mimetypes.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,9 @@ def add_type(self, type, ext, strict=True):
is already known the extension will be added
to the list of known extensions.

Registered lower-case extensions are matched
case-insensitively.

If strict is true, information will be added to
list of standard types, else to the list of non-standard
types.
Expand Down Expand Up @@ -172,23 +175,33 @@ def guess_file_type(self, path, *, strict=True):

def _guess_file_type(self, path, strict, splitext):
base, ext = splitext(path)
while (ext_lower := ext.lower()) in self.suffix_map:
base, ext = splitext(base + self.suffix_map[ext_lower])
while True:
if ext in self.suffix_map:
suffix = self.suffix_map[ext]
elif (ext_lower := ext.lower()) in self.suffix_map:
suffix = self.suffix_map[ext_lower]
else:
break
base, ext = splitext(base + suffix)
# encodings_map is case sensitive
if ext in self.encodings_map:
encoding = self.encodings_map[ext]
base, ext = splitext(base)
else:
encoding = None
ext = ext.lower()
ext_lower = ext.lower()
types_map = self.types_map[True]
if ext in types_map:
return types_map[ext], encoding
if ext_lower in types_map:
return types_map[ext_lower], encoding
elif strict:
return None, encoding
types_map = self.types_map[False]
if ext in types_map:
return types_map[ext], encoding
if ext_lower in types_map:
return types_map[ext_lower], encoding
else:
return None, encoding

Expand Down Expand Up @@ -386,6 +399,9 @@ def add_type(type, ext, strict=True):
is already known the extension will be added
to the list of known extensions.

Registered lower-case extensions are matched
case-insensitively.

If strict is true, information will be added to
list of standard types, else to the list of non-standard
types.
Expand Down
2 changes: 1 addition & 1 deletion Lib/test/support/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3159,7 +3159,7 @@ def in_systemd_nspawn_sync_suppressed() -> bool:
with open("/run/systemd/container", "rb") as fp:
if fp.read().rstrip() != b"systemd-nspawn":
return False
except FileNotFoundError:
except (FileNotFoundError, PermissionError):
return False

# If systemd-nspawn is used, O_SYNC flag will immediately
Expand Down
44 changes: 44 additions & 0 deletions Lib/test/test_mimetypes.py
Original file line number Diff line number Diff line change
Expand Up @@ -287,6 +287,50 @@ def test_case_sensitivity(self):
eq(self.db.guess_file_type("foobar.tar.z"), (None, None))
eq(self.db.guess_type("scheme:foobar.tar.z"), (None, None))

def test_suffix_map_case_sensitive_preferred(self):
self.db.suffix_map[".TEST-SUFFIX"] = ".tar.gz"
self.db.suffix_map[".test-suffix"] = ".tar.xz"
self.assertEqual(
self.db.guess_file_type("example.TEST-SUFFIX"),
("application/x-tar", "gzip"),
)
self.assertEqual(
self.db.guess_file_type("example.test-suffix"),
("application/x-tar", "xz"),
)

def test_added_types_case_sensitive_preferred(self):
self.db.add_type("text/x-test-uppercase-r", ".R")
self.db.add_type("text/x-test-lowercase-r", ".r")
self.assertEqual(
self.db.guess_file_type("example.R"),
("text/x-test-uppercase-r", None),
)
self.assertEqual(
self.db.guess_file_type("example.r"),
("text/x-test-lowercase-r", None),
)
self.db.add_type("text/x-test-uppercase-non-strict",
".NON-STRICT-EXT", strict=False)
self.db.add_type("text/x-test-lowercase-non-strict",
".non-strict-ext", strict=False)
self.assertEqual(
self.db.guess_file_type("example.NON-STRICT-EXT"),
(None, None),
)
self.assertEqual(
self.db.guess_file_type("example.non-strict-ext"),
(None, None),
)
self.assertEqual(
self.db.guess_file_type("example.NON-STRICT-EXT", strict=False),
("text/x-test-uppercase-non-strict", None),
)
self.assertEqual(
self.db.guess_file_type("example.non-strict-ext", strict=False),
("text/x-test-lowercase-non-strict", None),
)

def test_default_data(self):
eq = self.assertEqual
eq(self.db.guess_file_type("foo.html"), ("text/html", None))
Expand Down
2 changes: 1 addition & 1 deletion Lib/zoneinfo/_common.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ def load_tzdata(key):
# UnicodeEncodeError: If package_name or resource_name are not UTF-8,
# such as keys containing a surrogate character.
# IsADirectoryError: If package_name without a resource_name specified.
raise ZoneInfoNotFoundError(f"No time zone found with key {key}")
raise ZoneInfoNotFoundError(f"No time zone found with key {key!r}")


def load_data(fobj):
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
Fix :mod:`mimetypes` to prefer case-sensitive matches for suffix mappings and
MIME type suffixes before falling back to case-insensitive matches.
Contributed by Xiao Yuan.
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
Fix tests failing on FreeBSD in test.support's
in_systemd_nspawn_sync_suppressed() due to unreadable /run directory.
6 changes: 3 additions & 3 deletions Modules/_testinternalcapi/test_cases.c.h

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

62 changes: 23 additions & 39 deletions Modules/unicodedata.c
Original file line number Diff line number Diff line change
Expand Up @@ -560,39 +560,36 @@ get_decomp_record(PyObject *self, Py_UCS4 code,
#define CANONICAL_ORDERING_COUNTING_SORT_THRESHOLD 20

static void
canonical_ordering_sort_insertion(int kind, void *data,
Py_ssize_t start, Py_ssize_t end)
canonical_ordering_sort_insertion(Py_UCS4 *data, Py_ssize_t length)
{
for (Py_ssize_t i = start + 1; i < end; i++) {
Py_UCS4 code = PyUnicode_READ(kind, data, i);
for (Py_ssize_t i = 1; i < length; i++) {
Py_UCS4 code = data[i];
unsigned char combining = _getrecord_ex(code)->combining;
Py_ssize_t j = i;

while (j > start) {
Py_UCS4 previous = PyUnicode_READ(kind, data, j - 1);
while (j > 0) {
Py_UCS4 previous = data[j - 1];
if (_getrecord_ex(previous)->combining <= combining) {
break;
}
PyUnicode_WRITE(kind, data, j, previous);
data[j] = previous;
j--;
}
if (j != i) {
PyUnicode_WRITE(kind, data, j, code);
data[j] = code;
}
}
}

static void
canonical_ordering_sort_counting(int kind, void *data,
Py_ssize_t start, Py_ssize_t end,
canonical_ordering_sort_counting(Py_UCS4 *data, Py_ssize_t length,
Py_UCS4 *sortbuf)
{
Py_ssize_t counts[256] = {0};
Py_ssize_t run_length = end - start;
Py_ssize_t total = 0;

for (Py_ssize_t i = start; i < end; i++) {
Py_UCS4 code = PyUnicode_READ(kind, data, i);
for (Py_ssize_t i = 0; i < length; i++) {
Py_UCS4 code = data[i];
unsigned char combining = _getrecord_ex(code)->combining;
counts[combining]++;
}
Expand All @@ -604,14 +601,12 @@ canonical_ordering_sort_counting(int kind, void *data,
}

/* Reuse counts[] as the next output slot for each CCC. */
for (Py_ssize_t i = start; i < end; i++) {
Py_UCS4 code = PyUnicode_READ(kind, data, i);
for (Py_ssize_t i = 0; i < length; i++) {
Py_UCS4 code = data[i];
unsigned char combining = _getrecord_ex(code)->combining;
sortbuf[counts[combining]++] = code;
}
for (Py_ssize_t i = 0; i < run_length; i++) {
PyUnicode_WRITE(kind, data, start + i, sortbuf[i]);
}
memcpy(data, sortbuf, length * sizeof(Py_UCS4));
}

static PyObject*
Expand All @@ -620,9 +615,8 @@ nfd_nfkd(PyObject *self, PyObject *input, int k)
PyObject *result;
Py_UCS4 *output;
Py_ssize_t i, o, osize;
int input_kind, result_kind;
int input_kind;
const void *input_data;
void *result_data;
/* Longest decomposition in Unicode 3.2: U+FDFA */
Py_UCS4 stack[20];
Py_ssize_t space, isize;
Expand Down Expand Up @@ -715,22 +709,13 @@ nfd_nfkd(PyObject *self, PyObject *input, int k)
}
}

result = PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND,
output, o);
PyMem_Free(output);
if (!result)
return NULL;

result_kind = PyUnicode_KIND(result);
result_data = PyUnicode_DATA(result);

/* Sort each consecutive combining-character run canonically. */
i = 0;
while (i < o) {
Py_ssize_t run_length, run_start;
int needs_sort = 0;

Py_UCS4 ch = PyUnicode_READ(result_kind, result_data, i);
Py_UCS4 ch = output[i];
prev = _getrecord_ex(ch)->combining;
if (prev == 0) {
i++;
Expand All @@ -739,7 +724,7 @@ nfd_nfkd(PyObject *self, PyObject *input, int k)

run_start = i++;
while (i < o) {
Py_UCS4 ch = PyUnicode_READ(result_kind, result_data, i);
Py_UCS4 ch = output[i];
cur = _getrecord_ex(ch)->combining;
if (cur == 0) {
break;
Expand All @@ -756,29 +741,28 @@ nfd_nfkd(PyObject *self, PyObject *input, int k)

run_length = i - run_start;
if (run_length < CANONICAL_ORDERING_COUNTING_SORT_THRESHOLD) {
canonical_ordering_sort_insertion(result_kind, result_data,
run_start, i);
canonical_ordering_sort_insertion(output + run_start, run_length);
continue;
}

if (run_length > sortbuflen) {
Py_UCS4 *new_sortbuf = PyMem_Resize(sortbuf,
Py_UCS4,
run_length);
Py_UCS4 *new_sortbuf = PyMem_Resize(sortbuf, Py_UCS4, run_length);
if (new_sortbuf == NULL) {
PyErr_NoMemory();
PyMem_Free(sortbuf);
Py_DECREF(result);
PyMem_Free(output);
return NULL;
}
sortbuf = new_sortbuf;
sortbuflen = run_length;
}

canonical_ordering_sort_counting(result_kind, result_data,
run_start, i, sortbuf);
canonical_ordering_sort_counting(output + run_start, run_length,
sortbuf);
}
PyMem_Free(sortbuf);
result = PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND, output, o);
PyMem_Free(output);
return result;
}

Expand Down
6 changes: 3 additions & 3 deletions Python/bytecodes.c
Original file line number Diff line number Diff line change
Expand Up @@ -2349,7 +2349,7 @@ dummy_func(
assert(keys->dk_kind == DICT_KEYS_UNICODE);
PyDictUnicodeEntry *entries = DK_UNICODE_ENTRIES(keys);
assert(index < DK_SIZE(keys));
PyObject *res_o = FT_ATOMIC_LOAD_PTR_RELAXED(entries[index].me_value);
PyObject *res_o = FT_ATOMIC_LOAD_PTR_CONSUME(entries[index].me_value);
DEOPT_IF(res_o == NULL);
#if Py_GIL_DISABLED
int increfed = _Py_TryIncrefCompareStackRef(&entries[index].me_value, res_o, &res);
Expand All @@ -2368,7 +2368,7 @@ dummy_func(
DEOPT_IF(FT_ATOMIC_LOAD_UINT32_RELAXED(keys->dk_version) != version);
assert(keys->dk_kind == DICT_KEYS_UNICODE);
PyDictUnicodeEntry *entries = DK_UNICODE_ENTRIES(keys);
PyObject *res_o = FT_ATOMIC_LOAD_PTR_RELAXED(entries[index].me_value);
PyObject *res_o = FT_ATOMIC_LOAD_PTR_CONSUME(entries[index].me_value);
DEOPT_IF(res_o == NULL);
#if Py_GIL_DISABLED
int increfed = _Py_TryIncrefCompareStackRef(&entries[index].me_value, res_o, &res);
Expand Down Expand Up @@ -2958,7 +2958,7 @@ dummy_func(
assert(keys->dk_kind == DICT_KEYS_UNICODE);
assert(index < FT_ATOMIC_LOAD_SSIZE_RELAXED(keys->dk_nentries));
PyDictUnicodeEntry *ep = DK_UNICODE_ENTRIES(keys) + index;
PyObject *attr_o = FT_ATOMIC_LOAD_PTR_RELAXED(ep->me_value);
PyObject *attr_o = FT_ATOMIC_LOAD_PTR_CONSUME(ep->me_value);
EXIT_IF(attr_o == NULL);
#ifdef Py_GIL_DISABLED
int increfed = _Py_TryIncrefCompareStackRef(&ep->me_value, attr_o, &attr);
Expand Down
6 changes: 3 additions & 3 deletions Python/executor_cases.c.h

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading
Loading