-
Notifications
You must be signed in to change notification settings - Fork 61
8377907: (process) Race in ProcessBuilder can cause JVM hangs #558
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: master
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -44,6 +44,8 @@ | |
| #include <signal.h> | ||
| #include <string.h> | ||
|
|
||
| #include <fcntl.h> | ||
| #include <unistd.h> | ||
| #include <spawn.h> | ||
|
|
||
| #include "childproc.h" | ||
|
|
@@ -666,6 +668,28 @@ startChild(JNIEnv *env, jobject process, ChildStuff *c, const char *helperpath) | |
| } | ||
| } | ||
|
|
||
| static int pipeSafely(int fd[2]) { | ||
| /* Pipe filedescriptors must be CLOEXEC as early as possible - ideally from the point of | ||
| * creation on - since at any moment a concurrent (third-party) fork() could inherit copies | ||
| * of these descriptors and accidentally keep the pipes open. That could cause the parent | ||
| * process to hang (see e.g. JDK-8377907). | ||
| * We use pipe2(2), if we have it. If we don't, we use pipe(2) + fcntl(2) immediately. | ||
| * The latter is still racy and can therefore still cause hangs as described in JDK-8377907, | ||
| * but at least the dangerous time window is as short as we can make it. | ||
| */ | ||
| int rc = -1; | ||
| #ifdef HAVE_PIPE2 | ||
| rc = pipe2(fd, O_CLOEXEC); | ||
| #else | ||
| rc = pipe(fd); | ||
| if (rc == 0) { | ||
| fcntl(fd[0], F_SETFD, FD_CLOEXEC); | ||
| fcntl(fd[1], F_SETFD, FD_CLOEXEC); | ||
| } | ||
| #endif /* HAVE_PIPE2 */ | ||
| return rc; | ||
| } | ||
|
|
||
| JNIEXPORT jint JNICALL | ||
| Java_java_lang_ProcessImpl_forkAndExec(JNIEnv *env, | ||
| jobject process, | ||
|
|
@@ -727,14 +751,33 @@ Java_java_lang_ProcessImpl_forkAndExec(JNIEnv *env, | |
| fds = (*env)->GetIntArrayElements(env, std_fds, NULL); | ||
| if (fds == NULL) goto Catch; | ||
|
|
||
| if ((fds[0] == -1 && pipe(in) < 0) || | ||
| (fds[1] == -1 && pipe(out) < 0) || | ||
| (fds[2] == -1 && pipe(err) < 0) || | ||
| (pipe(childenv) < 0) || | ||
| (pipe(fail) < 0)) { | ||
| throwInternalIOException(env, errno, "Bad file descriptor", mode); | ||
| goto Catch; | ||
| if (mode == MODE_FORK || mode == MODE_VFORK) { | ||
| /* We cannot downport the full breadth of JDK-8377907 to JDK 25 and earlier, since such | ||
| * a change (including prerequisites) would be far too invasive for LTS releases. We | ||
| * therefore only downport a very small part of it, namely the fix for the FORK/VFORK | ||
| * modes. Fixing FORK/VFORK just requires us to open pipes with CLOEXEC. Fixing the | ||
| * POSIX_SPAWN mode, otoh, would require rewriting large parts of the spawn layer | ||
| * (see JDK-8377907). | ||
| */ | ||
| if ((fds[0] == -1 && pipeSafely(in) < 0) || | ||
| (fds[1] == -1 && pipeSafely(out) < 0) || | ||
| (fds[2] == -1 && pipeSafely(err) < 0) || | ||
| (pipeSafely(childenv) < 0) || | ||
| (pipeSafely(fail) < 0)) { | ||
| throwInternalIOException(env, errno, "Bad file descriptor", mode); | ||
| goto Catch; | ||
| } | ||
|
Comment on lines
+762
to
+769
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It looks like the only difference between this and the one in the |
||
| } else { | ||
| if ((fds[0] == -1 && pipe(in) < 0) || | ||
| (fds[1] == -1 && pipe(out) < 0) || | ||
| (fds[2] == -1 && pipe(err) < 0) || | ||
| (pipe(childenv) < 0) || | ||
| (pipe(fail) < 0)) { | ||
| throwInternalIOException(env, errno, "Bad file descriptor", mode); | ||
| goto Catch; | ||
| } | ||
| } | ||
|
|
||
| c->fds[0] = fds[0]; | ||
| c->fds[1] = fds[1]; | ||
| c->fds[2] = fds[2]; | ||
|
|
||
| Original file line number | Diff line number | Diff line change | ||||
|---|---|---|---|---|---|---|
| @@ -0,0 +1,118 @@ | ||||||
| /* | ||||||
| * Copyright (c) 2026, Oracle and/or its affiliates. All rights reserved. | ||||||
| * Copyright (c) 2026, IBM Corp. | ||||||
| * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. | ||||||
| * | ||||||
| * This code is free software; you can redistribute it and/or modify it | ||||||
| * under the terms of the GNU General Public License version 2 only, as | ||||||
| * published by the Free Software Foundation. | ||||||
| * | ||||||
| * This code is distributed in the hope that it will be useful, but WITHOUT | ||||||
| * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||||||
| * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License | ||||||
| * version 2 for more details (a copy is included in the LICENSE file that | ||||||
| * accompanied this code). | ||||||
| * | ||||||
| * You should have received a copy of the GNU General Public License version | ||||||
| * 2 along with this work; if not, write to the Free Software Foundation, | ||||||
| * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. | ||||||
| * | ||||||
| * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA | ||||||
| * or visit www.oracle.com if you need additional information or have any | ||||||
| * questions. | ||||||
| */ | ||||||
|
|
||||||
| /* | ||||||
| * @test id=FORK | ||||||
| * @bug 8377907 | ||||||
| * @summary Test that demonstrates the hanging-parent-on-native-concurrent-forks problem | ||||||
| * @requires os.family != "windows" | ||||||
| * @requires vm.flagless | ||||||
| * @library /test/lib | ||||||
| * @run main/othervm/manual -Djdk.lang.Process.launchMechanism=FORK ConcNativeForkTest | ||||||
| */ | ||||||
|
|
||||||
| /* | ||||||
| * @test id=VFORK | ||||||
| * @bug 8377907 | ||||||
| * @summary Test that demonstrates the hanging-parent-on-native-concurrent-forks problem | ||||||
| * @requires os.family == "linux" | ||||||
| * @requires vm.flagless | ||||||
| * @library /test/lib | ||||||
| * @run main/othervm/manual -Djdk.lang.Process.launchMechanism=VFORK ConcNativeForkTest | ||||||
| */ | ||||||
|
|
||||||
| public class ConcNativeForkTest { | ||||||
|
|
||||||
| // How this works: | ||||||
| // - We start a child process via ProcessBuilder. Does not matter what, we just call "/bin/true". | ||||||
| // - Concurrently, we continuously (up to a limit) fork natively; these forks will all exec "sleep 30". | ||||||
| // - If the natively forked child process forks off at the right (wrong) moment, it will catch the open pipe from | ||||||
| // the "/bin/true" child process, and forcing the parent process (this test) to wait in ProcessBuilder.start() | ||||||
| // (inside forkAndExec()) until the natively forked child releases the pipe file descriptors it inherited. | ||||||
|
|
||||||
| // Notes: | ||||||
| // | ||||||
| // Obviously, this is racy and depends on scheduler timings of the underlying OS. The test succeeding is | ||||||
| // no proof the bug does not exist (see PipesCloseOnExecTest as a complimentary test that is more reliable, but | ||||||
| // only works on Linux). | ||||||
| // That said, in tests it reliably reproduces the bug on Linux x64 and MacOS Arm. | ||||||
| // | ||||||
| // This test is not well suited for automatic test execution, since the test essentially | ||||||
| // fork-bombs itself, and that may run into issues in containerized CI/CD environments. | ||||||
|
|
||||||
| native static boolean prepareNativeForkerThread(int numForks); | ||||||
| native static void releaseNativeForkerThread(); | ||||||
| native static void stopNativeForkerThread(); | ||||||
|
|
||||||
| private static final int numIterations = 20; | ||||||
|
|
||||||
| public static void main(String[] args) throws Exception { | ||||||
|
|
||||||
| System.out.println("jdk.lang.Process.launchMechanism=" + | ||||||
| System.getProperty("jdk.lang.Process.launchMechanism")); | ||||||
|
|
||||||
| System.loadLibrary("ConcNativeFork"); | ||||||
|
|
||||||
| // A very simple program returning immediately (/bin/true) | ||||||
| ProcessBuilder pb = new ProcessBuilder("true").inheritIO(); | ||||||
| final int numJavaProcesses = 10; | ||||||
| final int numNativeProcesses = 250; | ||||||
| Process[] processes = new Process[numJavaProcesses]; | ||||||
|
|
||||||
| for (int iteration = 0; iteration < numIterations; iteration ++) { | ||||||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Nit:
Suggested change
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Probably not in a backport, though. |
||||||
|
|
||||||
| if (!prepareNativeForkerThread(numNativeProcesses)) { | ||||||
| throw new RuntimeException("Failed to start native forker thread (see stdout)"); | ||||||
| } | ||||||
|
|
||||||
| long[] durations = new long[numJavaProcesses]; | ||||||
|
|
||||||
| releaseNativeForkerThread(); | ||||||
|
|
||||||
| for (int np = 0; np < numJavaProcesses; np ++) { | ||||||
| long t1 = System.currentTimeMillis(); | ||||||
| Process p = pb.start(); | ||||||
| durations[np] = System.currentTimeMillis() - t1; | ||||||
| processes[np] = p; | ||||||
| } | ||||||
|
|
||||||
| stopNativeForkerThread(); | ||||||
|
|
||||||
| long longestDuration = 0; | ||||||
| for (int np = 0; np < numJavaProcesses; np ++) { | ||||||
| processes[np].waitFor(); | ||||||
| System.out.printf("Duration: %dms%n", durations[np]); | ||||||
| longestDuration = Math.max(durations[np], longestDuration); | ||||||
| } | ||||||
|
|
||||||
| System.out.printf("Longest startup time: %dms%n", longestDuration); | ||||||
|
|
||||||
| if (longestDuration >= 30000) { | ||||||
| throw new RuntimeException("Looks like we blocked on native fork"); | ||||||
| } | ||||||
| } | ||||||
|
|
||||||
| } | ||||||
|
|
||||||
| } | ||||||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The JDK head patch has
assert(fdIsCloexec(fd[0]));here. Intentionally omitted?