Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
46 changes: 46 additions & 0 deletions apache/livy/ExposedUI/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
# Setup Apache Livy with Docker Compose

## Vulnerable (Exposed UI — no authentication)
```bash
docker compose build spark-master
docker compose up
```

### Access the Livy UI and execute PySpark code
```bash
curl -X POST -H "Content-Type: application/json" -d '{"kind":"pyspark"}' http://localhost:8998/sessions
# {"id":6,"name":null,"appId":null,"owner":null,"proxyUser":null,"state":"starting",...}

# replace id from last response with $id
curl -X POST -H "Content-Type: application/json" -d '{"code":"import os\nprint(os.getcwd())"}' http://localhost:8998/sessions/$id/statements
# wait ~30sec for session to become idle

# replace id from last response with $statements_id
curl http://127.0.0.1:8998/sessions/$id/statements/$statements_id
# output.data is the stdout
```
#### alternative approach
instead, you can send the callback address to `batches` endpoint to check if the Apache Hive instance is exposed
```bash
curl -X POST -H "Content-Type: application/json" -d '{"file":"callback_address"}' http://localhost:8998/batches
```

---

## Secured (Custom Authentication Filter)

```bash
# Build and start the secured stack
docker compose -f docker-compose-secure.yml build
docker compose -f docker-compose-secure.yml up
```

### Test authentication
```bash
# Without token → 401
curl -X POST -H "Content-Type: application/json" -d '{"kind":"pyspark"}' http://localhost:8998/sessions

# With valid token → 200
curl -X POST -H "Authorization: Bearer changeme-use-a-strong-secret" -H "Content-Type: application/json" -d '{"kind":"pyspark"}' http://localhost:8998/sessions

```
16 changes: 16 additions & 0 deletions apache/livy/ExposedUI/apache-livy/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
#https://github.com/apache/incubator-livy?tab=readme-ov-file#building-livy
# Reuse the same image built for Spark Master/Worker
FROM mounirbs-local/spark-python3-java11:3.5.4
USER root
ENV LIVY_HOME /opt/livy
WORKDIR /opt/
# Get livy binaries from: https://livy.apache.org/download/
RUN apt-get update && apt-get install -y unzip \
&& curl "https://dlcdn.apache.org/incubator/livy/0.8.0-incubating/apache-livy-0.8.0-incubating_2.12-bin.zip" -O \
&& unzip "apache-livy-0.8.0-incubating_2.12-bin" \
&& rm -rf "apache-livy-0.8.0-incubating_2.12-bin.zip" \
&& mv "apache-livy-0.8.0-incubating_2.12-bin" $LIVY_HOME \
&& mkdir $LIVY_HOME/logs \
&& chown -R spark:spark $LIVY_HOME

USER spark
27 changes: 27 additions & 0 deletions apache/livy/ExposedUI/apache-livy/Dockerfile.secure
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
# Reuse the same image built for Spark Master/Worker
FROM mounirbs-local/spark-python3-java11:3.5.4
USER root
ENV LIVY_HOME /opt/livy
WORKDIR /opt/

# Install Livy
RUN apt-get update && apt-get install -y unzip \
&& curl "https://dlcdn.apache.org/incubator/livy/0.8.0-incubating/apache-livy-0.8.0-incubating_2.12-bin.zip" -O \
&& unzip "apache-livy-0.8.0-incubating_2.12-bin" \
&& rm -rf "apache-livy-0.8.0-incubating_2.12-bin.zip" \
&& mv "apache-livy-0.8.0-incubating_2.12-bin" $LIVY_HOME \
&& mkdir $LIVY_HOME/logs \
&& chown -R spark:spark $LIVY_HOME

# Compile and install the custom authentication filter
COPY custom-auth/src /tmp/custom-auth-src
RUN mkdir -p /tmp/custom-auth-classes \
&& javac -cp "$LIVY_HOME/jars/*" \
-d /tmp/custom-auth-classes \
/tmp/custom-auth-src/com/livy/auth/TokenAuthFilter.java \
&& jar cf $LIVY_HOME/jars/livy-custom-auth.jar \
-C /tmp/custom-auth-classes . \
&& rm -rf /tmp/custom-auth-src /tmp/custom-auth-classes \
&& chown spark:spark $LIVY_HOME/jars/livy-custom-auth.jar

USER spark
108 changes: 108 additions & 0 deletions apache/livy/ExposedUI/apache-livy/conf/livy-client.conf
Original file line number Diff line number Diff line change
@@ -0,0 +1,108 @@
#
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

#
# Configurations for a Livy Client, any configurations set here will override any
# livy or spark-default configurations.
#
# Before a Livy Client is able to load these configurations the folder containing
# this file must be added to the application classpath
#

#
# Configurations for Livy HTTPClient
#

# HTTP Request configurations
# How long before a request times out
# livy.client.http.connection.timeout = 10s
# How long between data packets before a request times out
# livy.client.http.connection.socket.timeout = 5m
# Whether content is compressed
# livy.client.http.content.compress.enable = true

# How long before idle connections are closed
# livy.client.http.connection.idle.timeout = 10m

# Initial interval before polling for Job results
# livy.client.http.job.initial-poll-interval = 100ms
# Maximum interval between successive polls
# livy.client.http.job.max-poll-interval = 5s

#
# Configurations for Livy RSCClient
#

# Configurations for registering a client with the rpc server
# Unique client id for connections to the rpc server
# livy.rsc.client.auth.id =
# Secret value for authenticating client connections with server
# livy.rsc.client.auth.secret =

# Timeout when stopping a rsc client
# livy.rsc.client.shutdown-timeout = 10s

# Class of the rsc driver to use
# livy.rsc.driver-class =
# The kind of rsc session. Examples: pyspark or sparkr
# livy.rsc.session.kind =

# Comma-separated list of Livy RSC jars. By default Livy will upload jars from its installation
# directory every time a session is started. By caching these files in HDFS, for example, startup
# time of sessions on YARN can be reduced.
# livy.rsc.jars =
# Location of the SparkR package for running sparkr
# livy.rsc.sparkr.package =
# Location of the PySpark package for running pyspark
# livy.rsc.pyspark.archives =

# Address for the RSC driver to connect back with it's connection info.
# livy.rsc.launcher.address =

# Port Range on which RPC will launch . Port range in inclusive of start and end port .
livy.rsc.launcher.port.range = 10000~10010

# How long will the RSC wait for a connection for a Livy server before shutting itself down.
livy.rsc.server.idle-timeout = 10m

# The user that should be impersonated when requesting a Livy session
# livy.rsc.proxy-user =

# Host or IP adress of the rpc server

#livy.rsc.rpc.server.address = livy-server
# How long the rsc client will wait when attempting to connect to the Livy server
#livy.rsc.server.connect.timeout = 90s

# The logging level for the rpc channel. Possible values: TRACE, DEBUG, INFO, WARN, or ERROR
livy.rsc.channel.log.level = ERROR

# SASL configurations for authentication
# SASL mechanism used for authentication
# livy.rsc.rpc.sasl.mechanisms = DIGEST-MD5
# SASL qop used for authentication
# livy.rsc.rpc.sasl.qop =

# Time between status checks for cancelled a Job
# livy.rsc.job-cancel.trigger-interval = 100ms
# Time before a cancelled a Job is forced into a Cancelled state
# livy.rsc.job-cancel.timeout = 30s

# Number of statements kept in driver's memory
# livy.rsc.retained-statements = 100
#
livy.rsc.jars = /opt/livy/rsc-jars/livy-api-0.8.0-incubating.jar, /opt/livy/rsc-jars/livy-rsc-0.8.0-incubating.jar
38 changes: 38 additions & 0 deletions apache/livy/ExposedUI/apache-livy/conf/livy-env.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
#!/usr/bin/env bash
#
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
# LIVY ENVIRONMENT VARIABLES
#
# - JAVA_HOME Java runtime to use. By default use "java" from PATH.
# - HADOOP_CONF_DIR Directory containing the Hadoop / YARN configuration to use.
# - SPARK_HOME Spark which you would like to use in Livy.
# - SPARK_CONF_DIR Optional directory where the Spark configuration lives.
# (Default: $SPARK_HOME/conf)
# - LIVY_LOG_DIR Where log files are stored. (Default: ${LIVY_HOME}/logs)
# - LIVY_PID_DIR Where the pid file is stored. (Default: /tmp)
# - LIVY_SERVER_JAVA_OPTS Java Opts for running livy server (You can set jvm related setting here,
# like jvm memory/gc algorithm and etc.)
# - LIVY_IDENT_STRING A name that identifies the Livy server instance, used to generate log file
# names. (Default: name of the user starting Livy).
# - LIVY_MAX_LOG_FILES Max number of log file to keep in the log directory. (Default: 5.)
# - LIVY_NICENESS Niceness of the Livy server process when running in the background. (Default: 0.)
# - LIVY_CLASSPATH Override if the additional classpath is required.

export JAVA_HOME=/opt/java/openjdk
export SPARK_HOME=/opt/spark
export LIVY_LOG_DIR=/opt/livy/logs
export SPARK_CONF_DIR=/opt/spark/conf
59 changes: 59 additions & 0 deletions apache/livy/ExposedUI/apache-livy/conf/livy-secure.conf
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
#
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

# What host address to start the server on.
livy.server.host = 0.0.0.0

# What port to start the server on.
livy.server.port = 8998

# What spark master Livy sessions should use.
livy.spark.master = spark://spark-master:7077

# What spark deploy mode Livy sessions should use.
livy.spark.deploy-mode = client

# If livy should impersonate the requesting users when creating a new session.
livy.impersonation.enabled = true

# List of local directories from where files are allowed to be added to user sessions.
livy.file.local-dir-whitelist = /target/

# If the Livy Web UI should be included in the Livy Server.
livy.ui.enabled = true

# Enable CSRF protection
livy.server.csrf-protection.enabled = false

# ============================================================
# Custom Authentication Filter Configuration
# ============================================================
# Use a custom token-based authentication filter.
# All requests must include: Authorization: Bearer <token>
livy.server.auth.type = token
livy.server.auth.token.class = com.livy.auth.TokenAuthFilter
livy.server.auth.token.param.token = changeme-use-a-strong-secret

# ============================================================
# Access Control
# ============================================================
livy.server.access-control.enabled = true
livy.server.access-control.allowed-users = livy-user

livy.repl.jars = /opt/livy/jars/livy-client-common-0.8.0-incubating.jar, /opt/livy/repl_2.12-jars/livy-core_2.12-0.8.0-incubating.jar, /opt/livy/repl_2.12-jars/commons-codec-1.9.jar, /opt/livy/repl_2.12-jars/livy-core_2.12-0.8.0-incubating.jar, /opt/livy/repl_2.12-jars/minlog-1.3.0.jar, /opt/livy/repl_2.12-jars/kryo-shaded-4.0.2.jar, /opt/livy/repl_2.12-jars/livy-repl_2.12-0.8.0-incubating.jar, /opt/livy/repl_2.12-jars/objenesis-2.5.1.jar

livy.rsc.jars = /opt/livy/rsc-jars/livy-api-0.8.0-incubating.jar, /opt/livy/rsc-jars/livy-rsc-0.8.0-incubating.jar
Loading