From 483b437602203a680326821cf5cbd352ebee16c9 Mon Sep 17 00:00:00 2001 From: Hood Chatham Date: Wed, 17 Jun 2026 13:02:57 -0700 Subject: [PATCH] gh-145177: Bump Emscripten to 6.0.0 And fix two problems due to changes in Emscripten libc: 1. There was a regression in getentropy that broke it if exactly one byte is requested. Replace it with a wrapped getentropy that replaces a one byte request with a two-byte request and then put the first byte in the original buffer. 2. Emscripten libc now supports umask. We need to initialize the emscripten libc umask to the ambient umask and zero out the ambient umask. --- Lib/test/test_platform.py | 15 +++++++++- Platforms/emscripten/config.toml | 2 +- Python/emscripten_syscalls.c | 47 ++++++++++++++++++++++++-------- configure | 2 ++ configure.ac | 4 +++ 5 files changed, 56 insertions(+), 14 deletions(-) diff --git a/Lib/test/test_platform.py b/Lib/test/test_platform.py index 63c130813ec497..3503eaefaf8e9e 100644 --- a/Lib/test/test_platform.py +++ b/Lib/test/test_platform.py @@ -534,7 +534,20 @@ def test_ios_ver(self): def test_libc_ver(self): if support.is_emscripten: - assert platform.libc_ver() == ("emscripten", "4.0.19") + import tomllib + from pathlib import Path + + # Get expected emscripten version from emscripten config + config_path = ( + Path(__file__).parents[2] / "Platforms/emscripten/config.toml" + ) + with open(config_path, "rb") as fp: + emscripten_version = tomllib.load(fp)["emscripten-version"] + + self.assertEqual( + platform.libc_ver(), ("emscripten", emscripten_version) + ) + return # check that libc_ver(executable) doesn't raise an exception if os.path.isdir(sys.executable) and \ diff --git a/Platforms/emscripten/config.toml b/Platforms/emscripten/config.toml index 401e9396ddbb00..389d2ea66ce948 100644 --- a/Platforms/emscripten/config.toml +++ b/Platforms/emscripten/config.toml @@ -1,7 +1,7 @@ # Any data that can vary between Python versions is to be kept in this file. # This allows for blanket copying of the Emscripten build code between supported # Python versions. -emscripten-version = "4.0.19" +emscripten-version = "6.0.0" node-version = "24" test-args = [ "-m", "test", diff --git a/Python/emscripten_syscalls.c b/Python/emscripten_syscalls.c index 98ee44276e53e0..b0c370ced36cc3 100644 --- a/Python/emscripten_syscalls.c +++ b/Python/emscripten_syscalls.c @@ -23,24 +23,27 @@ int __syscall_getuid32(void) { return __syscall_getuid32_js(); } -EM_JS(int, __syscall_umask_js, (int mask), { - // If we're in node and we can, call native process.umask() +// Emscripten's syscall layer tracks the umask in SYSCALLS.currentUmask and +// applies it itself when creating files and directories. We mount the real +// filesystem via NODEFS, which applies proces.umask() to everything as well. To +// avoid masking the mode twice, read and zero out process umask at startup, +// and store it as emscripten's umask. +EM_JS(void, __syscall_init_umask_js, (void), { if (ENVIRONMENT_IS_NODE) { try { - return process.umask(mask); - } catch(e) { - // oops... - // NodeJS docs: "In Worker threads, process.umask(mask) will throw an exception." - // umask docs: "This system call always succeeds" - return 0; + // process.umask(0) returns the previous umask and sets it to 0. + SYSCALLS.currentUmask = process.umask(0); + } catch (e) { + // NodeJS docs: "In Worker threads, process.umask(mask) will throw an + // exception." In that case just keep emscripten's default umask. } } - // Fall back to the stub case of returning 0. - return 0; }) -int __syscall_umask(int mask) { - return __syscall_umask_js(mask); +EM_JS_DEPS(__syscall_init_umask, "$SYSCALLS"); + +__attribute__((constructor)) void __syscall_init_umask(void) { + __syscall_init_umask_js(); } #include @@ -290,6 +293,26 @@ int __syscall_poll(intptr_t fds, int nfds, int timeout) { return __block_for_int(p); } + +// Workaround for an Emscripten bug: getentropy(buffer, 1) returns the single +// byte of entropy as the return code. Fixed upstream by +// emscripten-core/emscripten#27122 +int __real_getentropy(void*, size_t); + +int __wrap_getentropy(void *buffer, size_t len) { + if (len != 1) { + return __real_getentropy(buffer, len); + } + // Length is 1. Workaround is to get two bytes of entropy and write the + // first one into the original target buffer. + uint8_t tmp[2]; + int ret = __real_getentropy(tmp, 2); + if (ret == 0) { + *(uint8_t *)buffer = tmp[0]; + } + return ret; +} + #include int syscall_ioctl_orig(int fd, int request, void* varargs) diff --git a/configure b/configure index 7408fac738b8a3..d73a88d04016dd 100755 --- a/configure +++ b/configure @@ -9803,6 +9803,8 @@ fi as_fn_append LINKFORSHARED " -sSTACK_SIZE=5MB" as_fn_append LINKFORSHARED " -sTEXTDECODER=2" + as_fn_append LDFLAGS_NODIST " -Wl,--wrap=getentropy" + if test "x$enable_wasm_dynamic_linking" = xyes then : diff --git a/configure.ac b/configure.ac index dbc781b545dd36..8ba134c77b3412 100644 --- a/configure.ac +++ b/configure.ac @@ -2411,6 +2411,10 @@ AS_CASE([$ac_sys_system], dnl Avoid bugs in JS fallback string decoding path AS_VAR_APPEND([LINKFORSHARED], [" -sTEXTDECODER=2"]) + dnl Workaround for a bug in Emscipten libc's getentropy. See + dnl __wrap_getentropy in Python/emscripten_syscalls.c. + AS_VAR_APPEND([LDFLAGS_NODIST], [" -Wl,--wrap=getentropy"]) + AS_VAR_IF([enable_wasm_dynamic_linking], [yes], [ AS_VAR_APPEND([LINKFORSHARED], [" -sMAIN_MODULE"]) ])