From 477e923ea3adcd488417cc5107f7aac2bfb44ddc Mon Sep 17 00:00:00 2001 From: Kwon Byungchang Date: Thu, 21 May 2026 14:06:42 +0900 Subject: [PATCH 1/2] YARN-11960. Fix NodeManager OOM caused by large client.json --- .../hadoop/yarn/conf/YarnConfiguration.java | 9 +++ .../yarn/util/DockerClientConfigHandler.java | 38 +++++++--- .../src/main/resources/yarn-default.xml | 10 +++ .../runtime/DockerLinuxContainerRuntime.java | 2 +- .../runtime/TestDockerContainerRuntime.java | 71 +++++++++++++++++++ 5 files changed, 120 insertions(+), 10 deletions(-) diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java index 6064967ad15d6b..773776377fd2fc 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java @@ -2726,6 +2726,15 @@ public static boolean isAclEnabled(Configuration conf) { public static final String NM_DOCKER_DEFAULT_TMPFS_MOUNTS = DOCKER_CONTAINER_RUNTIME_PREFIX + "default-tmpfs-mounts"; + /** + * Maximum allowed file size in bytes for the Docker client configuration + * file (config.json). If the file size exceeds this value, the container + * launch will be rejected. See yarn-default.xml for the default value. + */ + public static final String NM_DOCKER_CLIENT_CONFIG_MAX_SIZE_BYTES = + DOCKER_CONTAINER_RUNTIME_PREFIX + "client-config-file-max-size-bytes"; + public static final int DEFAULT_NM_DOCKER_CLIENT_CONFIG_MAX_SIZE_BYTES = 1024; + /** The mode in which the Java Container Sandbox should run detailed by * the JavaSandboxLinuxContainerRuntime. */ public static final String YARN_CONTAINER_SANDBOX = diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/util/DockerClientConfigHandler.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/util/DockerClientConfigHandler.java index 6351cb69c82e78..0af78e92059359 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/util/DockerClientConfigHandler.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/util/DockerClientConfigHandler.java @@ -19,6 +19,7 @@ import org.apache.commons.io.FileUtils; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FSDataInputStream; +import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.commons.io.IOUtils; @@ -27,6 +28,7 @@ import org.apache.hadoop.security.Credentials; import org.apache.hadoop.security.token.Token; import org.apache.hadoop.security.token.TokenIdentifier; +import org.apache.hadoop.yarn.conf.YarnConfiguration; import org.apache.hadoop.yarn.security.DockerCredentialTokenIdentifier; import com.fasterxml.jackson.core.JsonFactory; @@ -80,19 +82,37 @@ private DockerClientConfigHandler() { } */ public static Credentials readCredentialsFromConfigFile(Path configFile, Configuration conf, String applicationId) throws IOException { - // Read the config file - String contents = null; + + int configFileMaxSizeByte = conf.getInt( + YarnConfiguration.NM_DOCKER_CLIENT_CONFIG_MAX_SIZE_BYTES, + YarnConfiguration.DEFAULT_NM_DOCKER_CLIENT_CONFIG_MAX_SIZE_BYTES); + if (configFileMaxSizeByte <= 0) { + LOG.warn("Invalid value {} for {}, falling back to default {} bytes.", + configFileMaxSizeByte, + YarnConfiguration.NM_DOCKER_CLIENT_CONFIG_MAX_SIZE_BYTES, + YarnConfiguration.DEFAULT_NM_DOCKER_CLIENT_CONFIG_MAX_SIZE_BYTES); + configFileMaxSizeByte = + YarnConfiguration.DEFAULT_NM_DOCKER_CLIENT_CONFIG_MAX_SIZE_BYTES; + } + + // Read the config file. Check the file size up front so + // a large config.json cannot OOM the NodeManager via IOUtils.toString. configFile = new Path(configFile.toUri()); FileSystem fs = configFile.getFileSystem(conf); - if (fs != null) { - FSDataInputStream fileHandle = fs.open(configFile); - if (fileHandle != null) { - contents = IOUtils.toString(fileHandle, StandardCharsets.UTF_8); - } + FileStatus status = fs.getFileStatus(configFile); + if (status.getLen() > configFileMaxSizeByte) { + throw new IOException("Docker client config " + configFile + " (" + + status.getLen() + " bytes) is larger than maximum allowed size (" + + configFileMaxSizeByte + " bytes)"); + } + + String contents; + try (FSDataInputStream fileHandle = fs.open(configFile)) { + contents = IOUtils.toString(fileHandle, StandardCharsets.UTF_8); } if (contents == null) { - throw new IOException("Failed to read Docker client configuration: " - + configFile); + throw new IOException( + "Failed to read Docker client configuration: " + configFile); } // Parse the JSON and create the Tokens/Credentials. diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml index b128d0f0ef14f9..52fe9426d686b3 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml @@ -2336,6 +2336,16 @@ + + + Maximum allowed file size in bytes for the Docker client configuration file + (config.json). If the file size exceeds this value, the container launch + will be rejected. Default is 1024 bytes. + + yarn.nodemanager.runtime.linux.docker.client-config-file-max-size-bytes + 1024 + + The runC image tag to manifest plugin class to be used. diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/runtime/DockerLinuxContainerRuntime.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/runtime/DockerLinuxContainerRuntime.java index 14f5ffeefe0d3f..e38dd1632c11ea 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/runtime/DockerLinuxContainerRuntime.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/runtime/DockerLinuxContainerRuntime.java @@ -910,7 +910,7 @@ private Credentials getAdditionalDockerClientCredentials(String clientConfig, containerIdStr); } catch (IOException e) { throw new RuntimeException( - "Fail to read additional docker client config file from " + clientConfig); + "Failed to read additional docker client config file: " + clientConfig, e); } } return additionalDockerCredentials; diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/runtime/TestDockerContainerRuntime.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/runtime/TestDockerContainerRuntime.java index 84f262438341f4..a6da8273535aa5 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/runtime/TestDockerContainerRuntime.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/runtime/TestDockerContainerRuntime.java @@ -118,7 +118,9 @@ import static org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.runtime.OCIContainerRuntime.CONTAINER_PID_NAMESPACE_SUFFIX; import static org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.runtime.OCIContainerRuntime.RUN_PRIVILEGED_CONTAINER_SUFFIX; import static org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.runtime.OCIContainerRuntime.formatOciEnvKey; +import static org.assertj.core.api.Assertions.assertThat; import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertNotNull; import static org.junit.jupiter.api.Assertions.assertThrows; import static org.junit.jupiter.api.Assertions.assertTrue; import static org.junit.jupiter.api.Assertions.fail; @@ -2512,6 +2514,75 @@ public void testLaunchContainerWithAdditionalDockerClientConfig(boolean pHttps) testLaunchContainer(null, getDockerClientConfigFile()); } + @ParameterizedTest(name = "https={0}") + @MethodSource("data") + public void testLaunchContainerWithBigDockerClientConfig(boolean pHttps) throws Exception { + initHttps(pHttps); + int maxSize = YarnConfiguration.DEFAULT_NM_DOCKER_CLIENT_CONFIG_MAX_SIZE_BYTES; + int invalidSize = maxSize + 1; + + File file = new File(tmpPath, "docker-client-config-big"); + try (BufferedWriter bw = new BufferedWriter(new FileWriter(file))) { + bw.write("x".repeat(invalidSize)); + } + initHttps(pHttps); + RuntimeException e = assertThrows(RuntimeException.class, + () -> testLaunchContainer(null, file)); + assertNotNull(e.getCause(), "Wrapped IOException cause expected"); + assertThat(e.getCause().getMessage()) + .contains("(" + invalidSize + " bytes) is larger than maximum allowed size (" + + maxSize + " bytes)"); + } + + @ParameterizedTest(name = "https={0}") + @MethodSource("data") + public void testLaunchContainerWithDockerClientConfigAtMaxSize(boolean pHttps) throws Exception { + initHttps(pHttps); + int maxSize = YarnConfiguration.DEFAULT_NM_DOCKER_CLIENT_CONFIG_MAX_SIZE_BYTES; + + // Exactly maxSize bytes must be accepted (boundary check). The content + // does not need to be valid JSON: readCredentialsFromConfigFile parses + // JSON only after the size gate, so a JSON-parse failure here would + // already prove the size gate let the file through. + File file = new File(tmpPath, "docker-client-config-boundary"); + try (BufferedWriter bw = new BufferedWriter(new FileWriter(file))) { + bw.write(TestDockerClientConfigHandler.JSON); + bw.write("x".repeat(maxSize - TestDockerClientConfigHandler.JSON.length())); + } + // The handler should not reject on size. Any failure must come from + // downstream container launch wiring, not the size guard. + try { + testLaunchContainer(null, file); + } catch (RuntimeException re) { + if (re.getCause() != null && re.getCause().getMessage() != null) { + assertThat(re.getCause().getMessage()) + .doesNotContain("is larger than maximum allowed size"); + } + } + } + + @ParameterizedTest(name = "https={0}") + @MethodSource("data") + public void testLaunchContainerWithInvalidMaxSizeFallsBackToDefault(boolean pHttps) + throws Exception { + initHttps(pHttps); + + conf.setInt(YarnConfiguration.NM_DOCKER_CLIENT_CONFIG_MAX_SIZE_BYTES, -1); + int defaultMax = YarnConfiguration.DEFAULT_NM_DOCKER_CLIENT_CONFIG_MAX_SIZE_BYTES; + int invalidSize = defaultMax + 1; + + File file = new File(tmpPath, "docker-client-neg-max"); + try (BufferedWriter bw = new BufferedWriter(new FileWriter(file))) { + bw.write("x".repeat(invalidSize)); + } + RuntimeException e = assertThrows(RuntimeException.class, + () -> testLaunchContainer(null, file)); + assertNotNull(e.getCause(), "Wrapped IOException cause expected"); + assertThat(e.getCause().getMessage()) + .contains("(" + invalidSize + " bytes) is larger than maximum allowed size (" + + defaultMax + " bytes)"); + } + public void testLaunchContainer(ByteBuffer tokens, File dockerConfigFile) throws ContainerExecutionException, PrivilegedOperationException, IOException { From ddc8912b9bc17a7a974d0fbb55a5dd3abe541a43 Mon Sep 17 00:00:00 2001 From: Kwon Byungchang Date: Thu, 21 May 2026 14:33:46 +0900 Subject: [PATCH 2/2] changed default value --- .../hadoop/yarn/conf/YarnConfiguration.java | 5 +- .../yarn/util/DockerClientConfigHandler.java | 2 +- .../src/main/resources/yarn-default.xml | 4 +- .../runtime/TestDockerContainerRuntime.java | 56 +++++++++---------- 4 files changed, 33 insertions(+), 34 deletions(-) diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java index 773776377fd2fc..ff30a1397d1237 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java @@ -2729,11 +2729,12 @@ public static boolean isAclEnabled(Configuration conf) { /** * Maximum allowed file size in bytes for the Docker client configuration * file (config.json). If the file size exceeds this value, the container - * launch will be rejected. See yarn-default.xml for the default value. + * launch will be rejected. */ public static final String NM_DOCKER_CLIENT_CONFIG_MAX_SIZE_BYTES = DOCKER_CONTAINER_RUNTIME_PREFIX + "client-config-file-max-size-bytes"; - public static final int DEFAULT_NM_DOCKER_CLIENT_CONFIG_MAX_SIZE_BYTES = 1024; + public static final int DEFAULT_NM_DOCKER_CLIENT_CONFIG_MAX_SIZE_BYTES = + 16 * 1024; /** The mode in which the Java Container Sandbox should run detailed by * the JavaSandboxLinuxContainerRuntime. */ diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/util/DockerClientConfigHandler.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/util/DockerClientConfigHandler.java index 0af78e92059359..0e49ade6f0acea 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/util/DockerClientConfigHandler.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/util/DockerClientConfigHandler.java @@ -110,7 +110,7 @@ public static Credentials readCredentialsFromConfigFile(Path configFile, try (FSDataInputStream fileHandle = fs.open(configFile)) { contents = IOUtils.toString(fileHandle, StandardCharsets.UTF_8); } - if (contents == null) { + if (contents == null || contents.trim().isEmpty()) { throw new IOException( "Failed to read Docker client configuration: " + configFile); } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml index 52fe9426d686b3..bccef0aded330b 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml @@ -2340,10 +2340,10 @@ Maximum allowed file size in bytes for the Docker client configuration file (config.json). If the file size exceeds this value, the container launch - will be rejected. Default is 1024 bytes. + will be rejected. Default is 16 KiB. yarn.nodemanager.runtime.linux.docker.client-config-file-max-size-bytes - 1024 + 16384 diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/runtime/TestDockerContainerRuntime.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/runtime/TestDockerContainerRuntime.java index a6da8273535aa5..67e2d8423c6925 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/runtime/TestDockerContainerRuntime.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/runtime/TestDockerContainerRuntime.java @@ -58,6 +58,7 @@ import org.apache.hadoop.yarn.server.nodemanager.containermanager.runtime.ContainerRuntimeConstants; import org.apache.hadoop.yarn.server.nodemanager.containermanager.runtime.ContainerRuntimeContext; import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.Test; import org.junit.jupiter.api.io.TempDir; import org.junit.jupiter.params.ParameterizedTest; import org.junit.jupiter.params.provider.MethodSource; @@ -2514,10 +2515,9 @@ public void testLaunchContainerWithAdditionalDockerClientConfig(boolean pHttps) testLaunchContainer(null, getDockerClientConfigFile()); } - @ParameterizedTest(name = "https={0}") - @MethodSource("data") - public void testLaunchContainerWithBigDockerClientConfig(boolean pHttps) throws Exception { - initHttps(pHttps); + @Test + public void testLaunchContainerWithBigDockerClientConfig() throws Exception { + initHttps(false); int maxSize = YarnConfiguration.DEFAULT_NM_DOCKER_CLIENT_CONFIG_MAX_SIZE_BYTES; int invalidSize = maxSize + 1; @@ -2525,7 +2525,6 @@ public void testLaunchContainerWithBigDockerClientConfig(boolean pHttps) throws try (BufferedWriter bw = new BufferedWriter(new FileWriter(file))) { bw.write("x".repeat(invalidSize)); } - initHttps(pHttps); RuntimeException e = assertThrows(RuntimeException.class, () -> testLaunchContainer(null, file)); assertNotNull(e.getCause(), "Wrapped IOException cause expected"); @@ -2534,38 +2533,37 @@ public void testLaunchContainerWithBigDockerClientConfig(boolean pHttps) throws + maxSize + " bytes)"); } - @ParameterizedTest(name = "https={0}") - @MethodSource("data") - public void testLaunchContainerWithDockerClientConfigAtMaxSize(boolean pHttps) throws Exception { - initHttps(pHttps); + @Test + public void testLaunchContainerWithDockerClientConfigAtMaxSize() throws Exception { + initHttps(false); int maxSize = YarnConfiguration.DEFAULT_NM_DOCKER_CLIENT_CONFIG_MAX_SIZE_BYTES; - // Exactly maxSize bytes must be accepted (boundary check). The content - // does not need to be valid JSON: readCredentialsFromConfigFile parses - // JSON only after the size gate, so a JSON-parse failure here would - // already prove the size gate let the file through. + // Build a valid Docker client config JSON of exactly maxSize bytes by + // padding whitespace after the opening brace (JSON ignores whitespace + // between tokens). This verifies the boundary is accepted by the size + // gate and that the resulting content still parses as JSON downstream. + String base = TestDockerClientConfigHandler.JSON; + int padLen = maxSize - base.length(); + assertTrue(padLen >= 0, + "Base JSON (" + base.length() + " bytes) must fit within maxSize (" + + maxSize + " bytes) for this test to be meaningful"); + String content = "{" + " ".repeat(padLen) + base.substring(1); + assertEquals(maxSize, content.getBytes(StandardCharsets.UTF_8).length, + "Padded JSON must be exactly maxSize bytes"); + File file = new File(tmpPath, "docker-client-config-boundary"); try (BufferedWriter bw = new BufferedWriter(new FileWriter(file))) { - bw.write(TestDockerClientConfigHandler.JSON); - bw.write("x".repeat(maxSize - TestDockerClientConfigHandler.JSON.length())); - } - // The handler should not reject on size. Any failure must come from - // downstream container launch wiring, not the size guard. - try { - testLaunchContainer(null, file); - } catch (RuntimeException re) { - if (re.getCause() != null && re.getCause().getMessage() != null) { - assertThat(re.getCause().getMessage()) - .doesNotContain("is larger than maximum allowed size"); - } + bw.write(content); } + // Must NOT throw the size-gate error. Container launch should proceed + // through the normal path with this boundary-sized valid config. + testLaunchContainer(null, file); } - @ParameterizedTest(name = "https={0}") - @MethodSource("data") - public void testLaunchContainerWithInvalidMaxSizeFallsBackToDefault(boolean pHttps) + @Test + public void testLaunchContainerWithInvalidMaxSizeFallsBackToDefault() throws Exception { - initHttps(pHttps); + initHttps(false); conf.setInt(YarnConfiguration.NM_DOCKER_CLIENT_CONFIG_MAX_SIZE_BYTES, -1); int defaultMax = YarnConfiguration.DEFAULT_NM_DOCKER_CLIENT_CONFIG_MAX_SIZE_BYTES;