From c04b306551e04bb5205246bdd1345e47d9f57793 Mon Sep 17 00:00:00 2001 From: joernNNN Date: Fri, 8 Aug 2025 19:36:31 +0400 Subject: [PATCH 1/7] apache livy exposed UI RCE --- apache/livy/ExposedUI/README.md | 22 + apache/livy/ExposedUI/apache-livy/Dockerfile | 16 + .../apache-livy/conf/livy-client.conf | 108 + .../ExposedUI/apache-livy/conf/livy-env.sh | 38 + .../livy/ExposedUI/apache-livy/conf/livy.conf | 198 ++ .../apache-livy/conf/log4j.properties | 24 + .../apache-livy/conf/spark-blacklist | 35 + .../spark/conf/spark-defaults.conf | 35 + apache/livy/ExposedUI/docker-compose.yml | 78 + .../livy/ExposedUI/python/fabric/swagger.json | 1753 +++++++++++++++++ .../livy/ExposedUI/python/fabric/swagger.yaml | 1250 ++++++++++++ .../python/fabric/test_fabric..ipynb | 296 +++ .../ExposedUI/python/fabric/test_fabric.py | 45 + .../ExposedUI/python/livy/delete_session.py | 5 + .../python/livy/init_java_gateway.py | 26 + apache/livy/ExposedUI/python/livy/run_code.py | 15 + .../python/livy/run_code_external_file.py | 16 + .../python/livy/src/external_file.py | 12 + .../ExposedUI/python/livy/start_session.py | 11 + .../ExposedUI/python/livy/wait_for_idle.py | 5 + .../python/spark-submit/test_pandas.py | 20 + .../python/spark-submit/test_spark.py | 11 + apache/livy/ExposedUI/requirements.txt | 4 + apache/livy/ExposedUI/spark/Dockerfile | 23 + 24 files changed, 4046 insertions(+) create mode 100644 apache/livy/ExposedUI/README.md create mode 100644 apache/livy/ExposedUI/apache-livy/Dockerfile create mode 100644 apache/livy/ExposedUI/apache-livy/conf/livy-client.conf create mode 100644 apache/livy/ExposedUI/apache-livy/conf/livy-env.sh create mode 100644 apache/livy/ExposedUI/apache-livy/conf/livy.conf create mode 100644 apache/livy/ExposedUI/apache-livy/conf/log4j.properties create mode 100644 apache/livy/ExposedUI/apache-livy/conf/spark-blacklist create mode 100644 apache/livy/ExposedUI/apache-livy/spark/conf/spark-defaults.conf create mode 100644 apache/livy/ExposedUI/docker-compose.yml create mode 100644 apache/livy/ExposedUI/python/fabric/swagger.json create mode 100644 apache/livy/ExposedUI/python/fabric/swagger.yaml create mode 100644 apache/livy/ExposedUI/python/fabric/test_fabric..ipynb create mode 100644 apache/livy/ExposedUI/python/fabric/test_fabric.py create mode 100644 apache/livy/ExposedUI/python/livy/delete_session.py create mode 100644 apache/livy/ExposedUI/python/livy/init_java_gateway.py create mode 100644 apache/livy/ExposedUI/python/livy/run_code.py create mode 100644 apache/livy/ExposedUI/python/livy/run_code_external_file.py create mode 100644 apache/livy/ExposedUI/python/livy/src/external_file.py create mode 100644 apache/livy/ExposedUI/python/livy/start_session.py create mode 100644 apache/livy/ExposedUI/python/livy/wait_for_idle.py create mode 100644 apache/livy/ExposedUI/python/spark-submit/test_pandas.py create mode 100644 apache/livy/ExposedUI/python/spark-submit/test_spark.py create mode 100644 apache/livy/ExposedUI/requirements.txt create mode 100644 apache/livy/ExposedUI/spark/Dockerfile diff --git a/apache/livy/ExposedUI/README.md b/apache/livy/ExposedUI/README.md new file mode 100644 index 00000000..f388e153 --- /dev/null +++ b/apache/livy/ExposedUI/README.md @@ -0,0 +1,22 @@ +# Setup Apache Livy with Docker Compose + +```bash + +docker compose up --build +``` +# Access the Livy UI and execute PySpark code +```bash +curl -X POST -H "Content-Type: application/json" -d '{"kind":"pyspark"}' http://localhost:8998/sessions +# {"id":6,"name":null,"appId":null,"owner":null,"proxyUser":null,"state":"starting","kind":"pyspark","appInfo":{"driverLogUrl":null,"sparkUiUrl":null},"log":["stdout: ","\nstderr: "],"ttl":null,"driverMemory":null,"driverCores":0,"executorMemory":null,"executorCores":0,"conf":{},"archives":[],"files":[],"heartbeatTimeoutInSecond":0,"jars":[],"numExecutors":0,"pyFiles":[],"queue":null} + +# replace id from last response with $id +curl -X POST -H "Content-Type: application/json" -d '{"code":"import os\nprint(os.getcwd())"}' http://localhost:8998/sessions/$id/statements +# "java.lang.IllegalStateException: Session is in state starting" +# wait 30sec +# {"id":0,"code":"import os\nprint(os.getcwd())","state":"waiting","output":null,"progress":0.0,"started":0,"completed":0} + +# replace id from last reseponse with #statements_id +curl http://127.0.0.1:8998/sessions/$id/statements/$statements_id +# output.data is the stdout +# {"id":0,"code":"import os\nprint(os.getcwd())","state":"available","output":{"status":"ok","execution_count":0,"data":{"text/plain":"/opt"}},"progress":1.0,"started":1754515902001,"completed":1754515902003} +``` \ No newline at end of file diff --git a/apache/livy/ExposedUI/apache-livy/Dockerfile b/apache/livy/ExposedUI/apache-livy/Dockerfile new file mode 100644 index 00000000..c458ee0f --- /dev/null +++ b/apache/livy/ExposedUI/apache-livy/Dockerfile @@ -0,0 +1,16 @@ +#https://github.com/apache/incubator-livy?tab=readme-ov-file#building-livy +# Reuse the same image built for Spark Master/Worker +FROM mounirbs-local/spark-python3-java11:3.5.4 +USER root +ENV LIVY_HOME /opt/livy +WORKDIR /opt/ +# Get livy binaries from: https://livy.apache.org/download/ +RUN apt-get update && apt-get install -y unzip \ + && curl "https://dlcdn.apache.org/incubator/livy/0.8.0-incubating/apache-livy-0.8.0-incubating_2.12-bin.zip" -O \ + && unzip "apache-livy-0.8.0-incubating_2.12-bin" \ + && rm -rf "apache-livy-0.8.0-incubating_2.12-bin.zip" \ + && mv "apache-livy-0.8.0-incubating_2.12-bin" $LIVY_HOME \ + && mkdir $LIVY_HOME/logs \ + && chown -R spark:spark $LIVY_HOME + +USER spark diff --git a/apache/livy/ExposedUI/apache-livy/conf/livy-client.conf b/apache/livy/ExposedUI/apache-livy/conf/livy-client.conf new file mode 100644 index 00000000..97147729 --- /dev/null +++ b/apache/livy/ExposedUI/apache-livy/conf/livy-client.conf @@ -0,0 +1,108 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# +# Configurations for a Livy Client, any configurations set here will override any +# livy or spark-default configurations. +# +# Before a Livy Client is able to load these configurations the folder containing +# this file must be added to the application classpath +# + +# +# Configurations for Livy HTTPClient +# + +# HTTP Request configurations +# How long before a request times out +# livy.client.http.connection.timeout = 10s +# How long between data packets before a request times out +# livy.client.http.connection.socket.timeout = 5m +# Whether content is compressed +# livy.client.http.content.compress.enable = true + +# How long before idle connections are closed +# livy.client.http.connection.idle.timeout = 10m + +# Initial interval before polling for Job results +# livy.client.http.job.initial-poll-interval = 100ms +# Maximum interval between successive polls +# livy.client.http.job.max-poll-interval = 5s + +# +# Configurations for Livy RSCClient +# + +# Configurations for registering a client with the rpc server +# Unique client id for connections to the rpc server +# livy.rsc.client.auth.id = +# Secret value for authenticating client connections with server +# livy.rsc.client.auth.secret = + +# Timeout when stopping a rsc client +# livy.rsc.client.shutdown-timeout = 10s + +# Class of the rsc driver to use +# livy.rsc.driver-class = +# The kind of rsc session. Examples: pyspark or sparkr +# livy.rsc.session.kind = + +# Comma-separated list of Livy RSC jars. By default Livy will upload jars from its installation +# directory every time a session is started. By caching these files in HDFS, for example, startup +# time of sessions on YARN can be reduced. +# livy.rsc.jars = +# Location of the SparkR package for running sparkr +# livy.rsc.sparkr.package = +# Location of the PySpark package for running pyspark +# livy.rsc.pyspark.archives = + +# Address for the RSC driver to connect back with it's connection info. +# livy.rsc.launcher.address = + +# Port Range on which RPC will launch . Port range in inclusive of start and end port . +livy.rsc.launcher.port.range = 10000~10010 + +# How long will the RSC wait for a connection for a Livy server before shutting itself down. +livy.rsc.server.idle-timeout = 10m + +# The user that should be impersonated when requesting a Livy session +# livy.rsc.proxy-user = + +# Host or IP adress of the rpc server + +#livy.rsc.rpc.server.address = livy-server +# How long the rsc client will wait when attempting to connect to the Livy server +#livy.rsc.server.connect.timeout = 90s + +# The logging level for the rpc channel. Possible values: TRACE, DEBUG, INFO, WARN, or ERROR +livy.rsc.channel.log.level = ERROR + +# SASL configurations for authentication +# SASL mechanism used for authentication +# livy.rsc.rpc.sasl.mechanisms = DIGEST-MD5 +# SASL qop used for authentication +# livy.rsc.rpc.sasl.qop = + +# Time between status checks for cancelled a Job +# livy.rsc.job-cancel.trigger-interval = 100ms +# Time before a cancelled a Job is forced into a Cancelled state +# livy.rsc.job-cancel.timeout = 30s + +# Number of statements kept in driver's memory +# livy.rsc.retained-statements = 100 +# +livy.rsc.jars = /opt/livy/rsc-jars/livy-api-0.8.0-incubating.jar, /opt/livy/rsc-jars/livy-rsc-0.8.0-incubating.jar diff --git a/apache/livy/ExposedUI/apache-livy/conf/livy-env.sh b/apache/livy/ExposedUI/apache-livy/conf/livy-env.sh new file mode 100644 index 00000000..04796887 --- /dev/null +++ b/apache/livy/ExposedUI/apache-livy/conf/livy-env.sh @@ -0,0 +1,38 @@ +#!/usr/bin/env bash +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# LIVY ENVIRONMENT VARIABLES +# +# - JAVA_HOME Java runtime to use. By default use "java" from PATH. +# - HADOOP_CONF_DIR Directory containing the Hadoop / YARN configuration to use. +# - SPARK_HOME Spark which you would like to use in Livy. +# - SPARK_CONF_DIR Optional directory where the Spark configuration lives. +# (Default: $SPARK_HOME/conf) +# - LIVY_LOG_DIR Where log files are stored. (Default: ${LIVY_HOME}/logs) +# - LIVY_PID_DIR Where the pid file is stored. (Default: /tmp) +# - LIVY_SERVER_JAVA_OPTS Java Opts for running livy server (You can set jvm related setting here, +# like jvm memory/gc algorithm and etc.) +# - LIVY_IDENT_STRING A name that identifies the Livy server instance, used to generate log file +# names. (Default: name of the user starting Livy). +# - LIVY_MAX_LOG_FILES Max number of log file to keep in the log directory. (Default: 5.) +# - LIVY_NICENESS Niceness of the Livy server process when running in the background. (Default: 0.) +# - LIVY_CLASSPATH Override if the additional classpath is required. + +export JAVA_HOME=/opt/java/openjdk +export SPARK_HOME=/opt/spark +export LIVY_LOG_DIR=/opt/livy/logs +export SPARK_CONF_DIR=/opt/spark/conf \ No newline at end of file diff --git a/apache/livy/ExposedUI/apache-livy/conf/livy.conf b/apache/livy/ExposedUI/apache-livy/conf/livy.conf new file mode 100644 index 00000000..4f289ef2 --- /dev/null +++ b/apache/livy/ExposedUI/apache-livy/conf/livy.conf @@ -0,0 +1,198 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# Use this keystore for the SSL certificate and key. +# livy.keystore = + +# Specify the keystore password. +# livy.keystore.password = +# +# Specify the key password. +# livy.key-password = + +# Hadoop Credential Provider Path to get "livy.keystore.password" and "livy.key-password". +# Credential Provider can be created using command as follow: +# hadoop credential create "livy.keystore.password" -value "secret" -provider jceks://hdfs/path/to/livy.jceks +# livy.hadoop.security.credential.provider.path = + +# What host address to start the server on. By default, Livy will bind to all network interfaces. +livy.server.host = 0.0.0.0 + +# What port to start the server on. +livy.server.port = 8998 + +# What base path ui should work on. By default UI is mounted on "/". +# E.g.: livy.ui.basePath = /my_livy - result in mounting UI on /my_livy/ +# livy.ui.basePath = "" + +# What spark master Livy sessions should use. +livy.spark.master = spark://spark-master:7077 + +# What spark deploy mode Livy sessions should use. +livy.spark.deploy-mode = client + +# Configure Livy server http request and response header size. +# livy.server.request-header.size = 131072 +# livy.server.response-header.size = 131072 + +# Whether or not to send server version in http response. +# livy.server.send-server-version = false + +# Enabled to check whether timeout Livy sessions should be stopped. +#livy.server.session.timeout-check = true +# +# Whether or not to skip timeout check for a busy session +#livy.server.session.timeout-check.skip-busy = false + +# Time in milliseconds on how long Livy will wait before timing out an inactive session. +# Note that the inactive session could be busy running jobs. +#livy.server.session.timeout = 5m +# +# How long a finished session state should be kept in LivyServer for query. +#livy.server.session.state-retain.sec = 120s + +# If livy should impersonate the requesting users when creating a new session. +livy.impersonation.enabled = true + +# Logs size livy can cache for each session/batch. 0 means don't cache the logs. +# livy.cache-log.size = 200 + +# Comma-separated list of Livy RSC jars. By default Livy will upload jars from its installation +# directory every time a session is started. By caching these files in HDFS, for example, startup +# time of sessions on YARN can be reduced. +# livy.rsc.jars = + +# Comma-separated list of Livy REPL jars. By default Livy will upload jars from its installation +# directory every time a session is started. By caching these files in HDFS, for example, startup +# time of sessions on YARN can be reduced. Please list all the repl dependencies including +# Scala version-specific livy-repl jars, Livy will automatically pick the right dependencies +# during session creation. +# livy.repl.jars = + +# Location of PySpark archives. By default Livy will upload the file from SPARK_HOME, but +# by caching the file in HDFS, startup time of PySpark sessions on YARN can be reduced. +# livy.pyspark.archives = + +# Location of the SparkR package. By default Livy will upload the file from SPARK_HOME, but +# by caching the file in HDFS, startup time of R sessions on YARN can be reduced. +# livy.sparkr.package = + +# List of local directories from where files are allowed to be added to user sessions. By +# default it's empty, meaning users can only reference remote URIs when starting their +# sessions. +livy.file.local-dir-whitelist = /target/ + +# Whether to enable csrf protection, by default it is false. If it is enabled, client should add +# http-header "X-Requested-By" in request if the http method is POST/DELETE/PUT/PATCH. +# livy.server.csrf-protection.enabled = + +# Whether to enable HiveContext in livy interpreter, if it is true hive-site.xml will be detected +# on user request and then livy server classpath automatically. +# livy.repl.enable-hive-context = + +# Recovery mode of Livy. Possible values: +# off: Default. Turn off recovery. Every time Livy shuts down, it stops and forgets all sessions. +# recovery: Livy persists session info to the state store. When Livy restarts, it recovers +# previous sessions from the state store. +# Must set livy.server.recovery.state-store and livy.server.recovery.state-store.url to +# configure the state store. +# livy.server.recovery.mode = off +# Zookeeper address used for HA and state store. e.g. host1:port1, host2:port2 +# livy.server.zookeeper.url = + +# Where Livy should store state to for recovery. Possible values: +# : Default. State store disabled. +# filesystem: Store state on a file system. +# zookeeper: Store state in a Zookeeper instance. +# livy.server.recovery.state-store = + +# For filesystem state store, the path of the state store directory. Please don't use a filesystem +# that doesn't support atomic rename (e.g. S3). e.g. file:///tmp/livy or hdfs:///. +# For zookeeper, the address to the Zookeeper servers. e.g. host1:port1,host2:port2 +# If livy.server.recovery.state-store is zookeeper, this config is for back-compatibility, +# so if both this config and livy.server.zookeeper.url exist, +# livy uses livy.server.zookeeper.url first. +# livy.server.recovery.state-store.url = + +# The policy of curator connecting to zookeeper. +# For example, m, n means retry m times and the interval of retry is n milliseconds. +# Please use the new config: livy.server.zk.retry-policy. +# Keep this config for back-compatibility. +# If both this config and livy.server.zk.retry-policy exist, +# livy uses livy.server.zk.retry-policy first. +# livy.server.recovery.zk-state-store.retry-policy = 5,100 + +# The policy of curator connecting to zookeeper. +# For example, m, n means retry m times and the interval of retry is n milliseconds +# livy.server.zk.retry-policy = + +# The dir in zk to store the data about session. +# livy.server.recovery.zk-state-store.key-prefix = livy + +# If Livy can't find the yarn app within this time, consider it lost. +# livy.server.yarn.app-lookup-timeout = 120s +# When the cluster is busy, we may fail to launch yarn app in app-lookup-timeout, then it would +# cause session leakage, so we need to check session leakage. +# How long to check livy session leakage +# livy.server.yarn.app-leakage.check-timeout = 600s +# how often to check livy session leakage +# livy.server.yarn.app-leakage.check-interval = 60s + +# How often Livy polls YARN to refresh YARN app state. +# livy.server.yarn.poll-interval = 5s +# +# Days to keep Livy server request logs. +# livy.server.request-log-retain.days = 5 + +# If the Livy Web UI should be included in the Livy Server. Enabled by default. +livy.ui.enabled = true + +# Whether to enable Livy server access control, if it is true then all the income requests will +# be checked if the requested user has permission. +# livy.server.access-control.enabled = false + +# Allowed users to access Livy, by default any user is allowed to access Livy. If user want to +# limit who could access Livy, user should list all the permitted users with comma separated. +# livy.server.access-control.allowed-users = * + +# A list of users with comma separated has the permission to change other user's submitted +# session, like submitting statements, deleting session. +# livy.server.access-control.modify-users = + +# A list of users with comma separated has the permission to view other user's infomation, like +# submitted session state, statement results. +# livy.server.access-control.view-users = +# +# Authentication support for Livy server +# Livy has a built-in SPnego authentication support for HTTP requests with below configurations. +# livy.server.auth.type = kerberos +# livy.server.auth.kerberos.principal = +# livy.server.auth.kerberos.keytab = +# livy.server.auth.kerberos.name-rules = DEFAULT +# +# If user wants to use custom authentication filter, configurations are: +# livy.server.auth.type = +# livy.server.auth..class = +# livy.server.auth..param. = +# livy.server.auth..param. = + +# Enable to allow custom classpath by proxy user in cluster mode +# The below configuration parameter is disabled by default. +# livy.server.session.allow-custom-classpath = true + +livy.repl.jars = /opt/livy/jars/livy-client-common-0.8.0-incubating.jar, /opt/livy/repl_2.12-jars/livy-core_2.12-0.8.0-incubating.jar, /opt/livy/repl_2.12-jars/commons-codec-1.9.jar, /opt/livy/repl_2.12-jars/livy-core_2.12-0.8.0-incubating.jar, /opt/livy/repl_2.12-jars/minlog-1.3.0.jar, /opt/livy/repl_2.12-jars/kryo-shaded-4.0.2.jar, /opt/livy/repl_2.12-jars/livy-repl_2.12-0.8.0-incubating.jar, /opt/livy/repl_2.12-jars/objenesis-2.5.1.jar + +livy.rsc.jars = /opt/livy/rsc-jars/livy-api-0.8.0-incubating.jar, /opt/livy/rsc-jars/livy-rsc-0.8.0-incubating.jar diff --git a/apache/livy/ExposedUI/apache-livy/conf/log4j.properties b/apache/livy/ExposedUI/apache-livy/conf/log4j.properties new file mode 100644 index 00000000..70b67a6d --- /dev/null +++ b/apache/livy/ExposedUI/apache-livy/conf/log4j.properties @@ -0,0 +1,24 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# The default Livy logging configuration. +log4j.rootCategory=INFO, console +log4j.appender.console=org.apache.log4j.ConsoleAppender +log4j.appender.console.target=System.err +log4j.appender.console.layout=org.apache.log4j.PatternLayout +log4j.appender.console.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss} %p %c{1}: %m%n + +log4j.logger.org.eclipse.jetty=WARN diff --git a/apache/livy/ExposedUI/apache-livy/conf/spark-blacklist b/apache/livy/ExposedUI/apache-livy/conf/spark-blacklist new file mode 100644 index 00000000..e371ed22 --- /dev/null +++ b/apache/livy/ExposedUI/apache-livy/conf/spark-blacklist @@ -0,0 +1,35 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# +# Configuration override / blacklist. Defines a list of properties that users are not allowed +# to override when starting Spark sessions. +# +# This file takes a list of property names (one per line). Empty lines and lines starting with "#" +# are ignored. +# + +# Disallow overriding the master and the deploy mode. +spark.master +spark.submit.deployMode + +# Disallow overriding the location of Spark cached jars. +spark.yarn.jar +spark.yarn.jars +spark.yarn.archive + +# Don't allow users to override the RSC timeout. +livy.rsc.server.idle-timeout diff --git a/apache/livy/ExposedUI/apache-livy/spark/conf/spark-defaults.conf b/apache/livy/ExposedUI/apache-livy/spark/conf/spark-defaults.conf new file mode 100644 index 00000000..25721bbf --- /dev/null +++ b/apache/livy/ExposedUI/apache-livy/spark/conf/spark-defaults.conf @@ -0,0 +1,35 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# Default system properties included when running spark-submit. +# This is useful for setting default environmental settings. + +# Example: +spark.master spark://spark-master:7077 +spark.driver.host apache-livy +# spark.eventLog.enabled true +# spark.eventLog.dir hdfs://namenode:8021/directory +# spark.serializer org.apache.spark.serializer.KryoSerializer +# spark.driver.memory 2g +# spark.executor.extraJavaOptions -XX:+PrintGCDetails -Dkey=value -Dnumbers="one two three" + +# Dynamic Allocation +# Livy considers a dynamic executor one full worker (no concept of cores). So if a worker has 4 cores, one executor for Apache Livy will contain 4 cores +spark.dynamicAllocation.enabled true +spark.dynamicAllocation.minExecutors 1 +spark.dynamicAllocation.maxExecutors 4 +spark.dynamicAllocation.initialExecutors 1 diff --git a/apache/livy/ExposedUI/docker-compose.yml b/apache/livy/ExposedUI/docker-compose.yml new file mode 100644 index 00000000..f00b4646 --- /dev/null +++ b/apache/livy/ExposedUI/docker-compose.yml @@ -0,0 +1,78 @@ +services: + spark-master: + container_name: spark-master + hostname: spark-master + build: + context: ./ + dockerfile: ./spark/Dockerfile + image: mounirbs-local/spark-python3-java11:3.5.4 + ports: + - "8080:8080" + - "7077:7077" + - "6066:6066" + labels: + kompose.service.expose: true + kompose.service.type: headless + environment: + - SPARK_MASTER_HOST=spark-master + - SPARK_MASTER_PORT=7077 + - SPARK_MASTER_WEBUI_PORT=8080 + - SPARK_DAEMON_MEMORY=2g + - SPARK_MASTER_OPTS="-Dspark.master.rest.enabled=true" + - PYSPARK_PYTHON=python3 + entrypoint: + - "bash" + - "-c" + - "/opt/spark/sbin/start-master.sh && tail -f /dev/null" + volumes: + - ./python:/python + + spark-worker: + # reuse the image built for the spark-master + image: mounirbs-local/spark-python3-java11:3.5.4 + ports: + - "8081:8081" + labels: + kompose.service.expose: true + kompose.service.type: headless + container_name: spark-worker + hostname: spark-worker + environment: + - SPARK_WORKER_CORES=1 + - SPARK_WORKER_MEMORY=2g + - PYSPARK_PYTHON=python3 + depends_on: + - spark-master + entrypoint: + - "bash" + - "-c" + - "/opt/spark/sbin/start-worker.sh spark://spark-master:7077 && tail -f /dev/null" + volumes: + - ./python:/python + + apache-livy: + container_name: apache-livy + hostname: apache-livy + environment: + - PYSPARK_PYTHON=python3 + build: ./apache-livy/ + image: mounirbs-local/livy-spark3.5.4-python3-java11:0.8 + command: ["sh", "-c", "/opt/livy/bin/livy-server"] + user: root + volumes: + - ./apache-livy/conf/:/opt/livy/conf/ + - ./apache-livy/spark/conf/:/opt/spark/conf/ + ports: + - '8998:8998' + labels: + kompose.service.expose: true + kompose.service.type: headless + depends_on: + - spark-master + - spark-worker + + deploy: + resources: + limits: + cpus: '1' + memory: 2g diff --git a/apache/livy/ExposedUI/python/fabric/swagger.json b/apache/livy/ExposedUI/python/fabric/swagger.json new file mode 100644 index 00000000..3d533742 --- /dev/null +++ b/apache/livy/ExposedUI/python/fabric/swagger.json @@ -0,0 +1,1753 @@ +{ + "swagger": "2.0", + "info": { + "version": "v1", + "title": "Livy Public API" + }, + "host": "api.fabric.microsoft.com", + "schemes": [ + "https" + ], + "paths": { + "/v1/workspaces/{workspaceId}/lakehouses/{lakehouseId}/livyApi/versions/{livyApiVersion}/batches": { + "post": { + "tags": [ + "LivyApiBatch" + ], + "summary": "Executes a batch.", + "operationId": "LivyApiBatch_ExecuteBatchAsync", + "consumes": [ + "application/json", + "text/json" + ], + "produces": [ + "application/json", + "text/json" + ], + "parameters": [ + { + "name": "workspaceId", + "in": "path", + "description": "", + "required": true, + "type": "string", + "format": "uuid" + }, + { + "name": "lakehouseId", + "in": "path", + "description": "", + "required": true, + "type": "string", + "format": "uuid" + }, + { + "name": "livyApiVersion", + "in": "path", + "required": true, + "type": "string" + }, + { + "name": "payload", + "in": "body", + "description": "", + "required": true, + "schema": { + "$ref": "#/definitions/BatchRequest" + } + } + ], + "responses": { + "202": { + "description": "Accepted", + "schema": { + "$ref": "#/definitions/BatchResponse" + } + }, + "default": { + "description": "Other status codes", + "schema": { + "$ref": "#/definitions/ErrorResponse" + } + } + } + }, + "get": { + "tags": [ + "LivyApiBatch" + ], + "summary": "List Batch Jobs.", + "operationId": "LivyApiBatch_ListBatchJobsAsync", + "consumes": [], + "produces": [ + "application/json" + ], + "parameters": [ + { + "name": "workspaceId", + "in": "path", + "description": "", + "required": true, + "type": "string", + "format": "uuid" + }, + { + "name": "lakehouseId", + "in": "path", + "description": "", + "required": true, + "type": "string", + "format": "uuid" + }, + { + "name": "livyApiVersion", + "in": "path", + "required": true, + "type": "string" + }, + { + "name": "$top", + "in": "query", + "type": "string", + "description": "" + }, + { + "name": "$skip", + "in": "query", + "type": "string", + "description": "" + }, + { + "name": "$count", + "in": "query", + "type": "boolean", + "description": "" + } + ], + "responses": { + "200": { + "description": "OK", + "schema": { + "$ref": "#/definitions/LivySparkActivityList" + } + }, + "default": { + "description": "Other Status", + "schema": { + "$ref": "#/definitions/ErrorResponse" + } + } + } + } + }, + "/v1/workspaces/{workspaceId}/lakehouses/{lakehouseId}/livyApi/versions/{livyApiVersion}/batches/{batchId}": { + "get": { + "tags": [ + "LivyApiBatch" + ], + "summary": "Gets a Batch Details.", + "operationId": "LivyApiBatch_GetBatchAsync", + "consumes": [], + "produces": [ + "application/json" + ], + "parameters": [ + { + "name": "workspaceId", + "in": "path", + "description": "", + "required": true, + "type": "string", + "format": "uuid" + }, + { + "name": "lakehouseId", + "in": "path", + "description": "", + "required": true, + "type": "string", + "format": "uuid" + }, + { + "name": "livyApiVersion", + "in": "path", + "required": true, + "type": "string" + }, + { + "name": "batchId", + "in": "path", + "description": "", + "required": true, + "type": "string", + "format": "uuid" + } + ], + "responses": { + "200": { + "description": "OK", + "schema": { + "$ref": "#/definitions/BatchResponse" + } + }, + "default": { + "description": "Other status codes", + "schema": { + "$ref": "#/definitions/ErrorResponse" + } + } + } + }, + "delete": { + "tags": [ + "LivyApiBatch" + ], + "summary": "Cancels a Batch Execution.", + "operationId": "LivyApiBatch_CancelBatchAsync", + "consumes": [], + "produces": [ + "application/json" + ], + "parameters": [ + { + "name": "workspaceId", + "in": "path", + "description": "", + "required": true, + "type": "string", + "format": "uuid" + }, + { + "name": "lakehouseId", + "in": "path", + "description": "", + "required": true, + "type": "string", + "format": "uuid" + }, + { + "name": "livyApiVersion", + "in": "path", + "required": true, + "type": "string" + }, + { + "name": "batchId", + "in": "path", + "description": "", + "required": true, + "type": "string", + "format": "uuid" + } + ], + "responses": { + "200": { + "description": "OK" + }, + "default": { + "description": "Other status codes", + "schema": { + "$ref": "#/definitions/ErrorResponse" + } + } + } + } + }, + "/v1/workspaces/{workspaceId}/lakehouses/{lakehouseId}/livyApi/versions/{livyApiVersion}/sessions": { + "post": { + "tags": [ + "LivyApiSession" + ], + "summary": "Acquire a Spark Session.", + "operationId": "LivyApiSession_AcquireSparkSessionAsync", + "consumes": [ + "application/json" + ], + "produces": [ + "application/json" + ], + "parameters": [ + { + "name": "workspaceId", + "in": "path", + "description": "Workspace Id.", + "required": true, + "type": "string", + "format": "uuid" + }, + { + "name": "lakehouseId", + "in": "path", + "description": "Lakehouse Id.", + "required": true, + "type": "string", + "format": "uuid" + }, + { + "name": "livyApiVersion", + "in": "path", + "required": true, + "type": "string" + }, + { + "name": "payload", + "in": "body", + "description": "CreateSessionRequest.", + "required": true, + "schema": { + "$ref": "#/definitions/SessionRequest" + } + } + ], + "responses": { + "202": { + "description": "Accepted", + "schema": { + "$ref": "#/definitions/SessionResponse" + } + }, + "default": { + "description": "Other status code", + "schema": { + "$ref": "#/definitions/ErrorResponse" + } + } + } + }, + "get": { + "tags": [ + "LivyApiSession" + ], + "summary": "List spark sessions.", + "operationId": "LivyApiSession_ListSparkSessionsAsync", + "consumes": [], + "produces": [ + "application/json", + "text/json" + ], + "parameters": [ + { + "name": "workspaceId", + "in": "path", + "description": "Workspace name.", + "required": true, + "type": "string", + "format": "uuid" + }, + { + "name": "lakehouseId", + "in": "path", + "description": "Lakehouse Id.", + "required": true, + "type": "string", + "format": "uuid" + }, + { + "name": "livyApiVersion", + "in": "path", + "required": true, + "type": "string" + }, + { + "name": "$top", + "in": "query", + "type": "string", + "description": "" + }, + { + "name": "$skip", + "in": "query", + "type": "string", + "description": "" + }, + { + "name": "$count", + "in": "query", + "type": "boolean", + "description": "" + } + ], + "responses": { + "200": { + "description": "OK", + "schema": { + "$ref": "#/definitions/SessionResponse" + } + }, + "default": { + "description": "Other status codes", + "schema": { + "$ref": "#/definitions/ErrorResponse" + } + } + } + } + }, + "/v1/workspaces/{workspaceId}/lakehouses/{lakehouseId}/livyApi/versions/{livyApiVersion}/sessions/{sessionId}": { + "get": { + "tags": [ + "LivyApiSession" + ], + "summary": "Get details of a spark session.", + "operationId": "LivyApiSession_GetSparkSessionAsync", + "consumes": [], + "produces": [ + "application/json" + ], + "parameters": [ + { + "name": "workspaceId", + "in": "path", + "description": "Workspace name.", + "required": true, + "type": "string", + "format": "uuid" + }, + { + "name": "lakehouseId", + "in": "path", + "description": "Lakehouse Id.", + "required": true, + "type": "string", + "format": "uuid" + }, + { + "name": "livyApiVersion", + "in": "path", + "required": true, + "type": "string" + }, + { + "name": "sessionId", + "in": "path", + "description": "Spark session Id.", + "required": true, + "type": "string", + "format": "uuid" + } + ], + "responses": { + "200": { + "description": "OK", + "schema": { + "$ref": "#/definitions/SessionResponse" + } + }, + "default": { + "description": "Other status codes", + "schema": { + "$ref": "#/definitions/ErrorResponse" + } + } + } + }, + "delete": { + "tags": [ + "LivyApiSession" + ], + "summary": "Stops and deletes a spark session.", + "operationId": "LivyApiSession_DeleteSparkSessionAsync", + "consumes": [], + "produces": [ + "application/json", + "text/json" + ], + "parameters": [ + { + "name": "workspaceId", + "in": "path", + "description": "Workspace Id.", + "required": true, + "type": "string", + "format": "uuid" + }, + { + "name": "lakehouseId", + "in": "path", + "description": "Lakehouse Id.", + "required": true, + "type": "string", + "format": "uuid" + }, + { + "name": "livyApiVersion", + "in": "path", + "required": true, + "type": "string" + }, + { + "name": "sessionId", + "in": "path", + "description": "Session Id.", + "required": true, + "type": "string", + "format": "uuid" + } + ], + "responses": { + "200": { + "description": "OK" + }, + "default": { + "description": "Other status codes", + "schema": { + "$ref": "#/definitions/ErrorResponse" + } + } + } + } + }, + "/v1/workspaces/{workspaceId}/lakehouses/{lakehouseId}/livyApi/versions/{livyApiVersion}/sessions/{sessionId}/statements": { + "post": { + "tags": [ + "LivyApiSession" + ], + "summary": "Execute a statement on a spark session.", + "operationId": "LivyApiSession_ExecuteSparkSessionStatementAsync", + "consumes": [ + "application/json", + "text/json" + ], + "produces": [ + "application/json", + "text/json" + ], + "parameters": [ + { + "name": "workspaceId", + "in": "path", + "description": "Workspace Id.", + "required": true, + "type": "string", + "format": "uuid" + }, + { + "name": "lakehouseId", + "in": "path", + "description": "Lakehouse Id.", + "required": true, + "type": "string", + "format": "uuid" + }, + { + "name": "livyApiVersion", + "in": "path", + "required": true, + "type": "string" + }, + { + "name": "sessionId", + "in": "path", + "description": "Spark Session Id.", + "required": true, + "type": "string", + "format": "uuid" + }, + { + "name": "payload", + "in": "body", + "description": "", + "required": true, + "schema": { + "$ref": "#/definitions/StatementRequest" + } + } + ], + "responses": { + "200": { + "description": "OK", + "schema": { + "$ref": "#/definitions/StatementResponse" + } + }, + "default": { + "description": "Other status codes.", + "schema": { + "$ref": "#/definitions/ErrorResponse" + } + } + } + }, + "get": { + "tags": [ + "LivyApiSession" + ], + "summary": "List statements in an active session.", + "operationId": "LivyApiSession_ListSparkSessionStatementsAsync", + "consumes": [], + "produces": [ + "application/json", + "text/json" + ], + "parameters": [ + { + "name": "workspaceId", + "in": "path", + "description": "Workspace name.", + "required": true, + "type": "string", + "format": "uuid" + }, + { + "name": "lakehouseId", + "in": "path", + "description": "Lakehouse Id.", + "required": true, + "type": "string", + "format": "uuid" + }, + { + "name": "livyApiVersion", + "in": "path", + "required": true, + "type": "string" + }, + { + "name": "sessionId", + "in": "path", + "description": "", + "required": true, + "type": "string", + "format": "uuid" + } + ], + "responses": { + "200": { + "description": "OK", + "schema": { + "$ref": "#/definitions/StatementsResponse" + } + }, + "default": { + "description": "Other status codes.", + "schema": { + "$ref": "#/definitions/ErrorResponse" + } + } + } + } + }, + "/v1/workspaces/{workspaceId}/lakehouses/{lakehouseId}/livyApi/versions/{livyApiVersion}/sessions/{sessionId}/statements/{statementId}": { + "get": { + "tags": [ + "LivyApiSession" + ], + "summary": "Gets a spark statement from a spark session.", + "operationId": "LivyApiSession_GetSparkSessionStatementAsync", + "consumes": [], + "produces": [ + "application/json", + "text/json" + ], + "parameters": [ + { + "name": "workspaceId", + "in": "path", + "description": "Workspace Id.", + "required": true, + "type": "string", + "format": "uuid" + }, + { + "name": "lakehouseId", + "in": "path", + "description": "Lakehouse Id.", + "required": true, + "type": "string", + "format": "uuid" + }, + { + "name": "sessionId", + "in": "path", + "description": "Spark Session Id.", + "required": true, + "type": "string", + "format": "uuid" + }, + { + "name": "livyApiVersion", + "in": "path", + "required": true, + "type": "string" + }, + { + "name": "statementId", + "in": "path", + "description": "Statement Id.", + "required": true, + "type": "integer", + "format": "int32" + }, + { + "name": "from", + "in": "query", + "description": "Offset (in byte) which the output should begin from.", + "type": "integer", + "format": "int32" + }, + { + "name": "size", + "in": "query", + "description": "Size (in byte) of the returned output.", + "type": "integer", + "format": "int32" + } + ], + "responses": { + "200": { + "description": "OK" + }, + "default": { + "description": "Other status codes.", + "schema": { + "$ref": "#/definitions/ErrorResponse" + } + } + } + } + }, + "/v1/workspaces/{workspaceId}/lakehouses/{lakehouseId}/livyApi/versions/{livyApiVersion}/sessions/{sessionId}/statements/{statementId}/cancel": { + "post": { + "tags": [ + "LivyApiSession" + ], + "summary": "Cancels a statement execution.", + "operationId": "LivyApiSession_CancelSparkStatementAsync", + "consumes": [], + "produces": [ + "application/json", + "text/json" + ], + "parameters": [ + { + "name": "workspaceId", + "in": "path", + "description": "Workspace Id.", + "required": true, + "type": "string", + "format": "uuid" + }, + { + "name": "lakehouseId", + "in": "path", + "description": "Lakehouse Id.", + "required": true, + "type": "string", + "format": "uuid" + }, + { + "name": "livyApiVersion", + "in": "path", + "required": true, + "type": "string" + }, + { + "name": "sessionId", + "in": "path", + "description": "Session Id.", + "required": true, + "type": "string", + "format": "uuid" + }, + { + "name": "statementId", + "in": "path", + "description": "", + "required": true, + "type": "integer", + "format": "int32" + } + ], + "responses": { + "200": { + "description": "OK", + "schema": { + "$ref": "#/definitions/StatementCancellationResponse" + } + }, + "default": { + "description": "Other status codes.", + "schema": { + "$ref": "#/definitions/ErrorResponse" + } + } + } + } + }, + "/v1/workspaces/{workspaceId}/lakehouses/{lakehouseId}/livyApi/versions/{livyApiVersion}/sessions/{sessionId}/reset-timeout": { + "post": { + "tags": [ + "LivyApiSession" + ], + "summary": "Resets the timeout time of a session.", + "operationId": "LivyApiSession_ResetSparkSessionTimeoutAsync", + "consumes": [], + "produces": [ + "application/json", + "text/json" + ], + "parameters": [ + { + "name": "workspaceId", + "in": "path", + "description": "Workspace Id.", + "required": true, + "type": "string", + "format": "uuid" + }, + { + "name": "lakehouseId", + "in": "path", + "description": "Lakehouse Id.", + "required": true, + "type": "string", + "format": "uuid" + }, + { + "name": "livyApiVersion", + "in": "path", + "required": true, + "type": "string" + }, + { + "name": "sessionId", + "in": "path", + "description": "Session Id.", + "required": true, + "type": "string", + "format": "uuid" + } + ], + "responses": { + "204": { + "description": "OK" + }, + "default": { + "description": "Other status codes.", + "schema": { + "$ref": "#/definitions/ErrorResponse" + } + } + } + } + } + }, + "definitions": { + "StatementResponse": { + "description": "Statement Response.", + "type": "object", + "properties": { + "id": { + "format": "int32", + "type": "integer" + }, + "code": { + "type": "string" + }, + "state": { + "enum": [ + "waiting", + "running", + "available", + "Error", + "cancelling", + "cancelled" + ], + "type": "string" + }, + "sourceId": { + "type": "string" + }, + "output": { + "$ref": "#/definitions/StatementOutput" + } + } + }, + "StatementOutput": { + "description": "Statement Output.", + "type": "object", + "properties": { + "status": { + "type": "string" + }, + "execution_count": { + "format": "int32", + "type": "integer" + }, + "data": { + "type": "object" + }, + "ename": { + "type": "string" + }, + "evalue": { + "type": "string" + }, + "traceback": { + "type": "array", + "items": { + "type": "string" + } + } + } + }, + "BatchRequest": { + "description": "Request for runing batch job.", + "type": "object", + "allOf": [ + { + "$ref": "#/definitions/SessionRequest" + }, + { + "type": "object" + } + ], + "properties": { + "file": { + "type": "string" + }, + "className": { + "type": "string" + }, + "args": { + "type": "array", + "items": { + "type": "string" + } + }, + "jars": { + "type": "array", + "items": { + "type": "string" + } + }, + "files": { + "type": "array", + "items": { + "type": "string" + } + }, + "pyFiles": { + "type": "array", + "items": { + "type": "string" + } + }, + "archives": { + "type": "array", + "items": { + "type": "string" + } + } + } + }, + "SessionRequest": { + "description": "Request for acquiring a Session.", + "type": "object", + "properties": { + "name": { + "type": "string" + }, + "archives": { + "type": "array", + "items": { + "type": "string" + } + }, + "conf": { + "type": "object", + "additionalProperties": { + "type": "string" + } + }, + "tags": { + "description": "Gets or sets the optional tags.", + "type": "object", + "additionalProperties": { + "type": "string" + } + }, + "driverMemory": { + "type": "string" + }, + "driverCores": { + "format": "int32", + "type": "integer" + }, + "executorMemory": { + "type": "string" + }, + "executorCores": { + "format": "int32", + "type": "integer" + }, + "numExecutors": { + "format": "int32", + "type": "integer" + } + } + }, + "StatementRequest": { + "description": "Statement Request Body.", + "type": "object", + "properties": { + "code": { + "type": "string" + }, + "kind": { + "type": "string" + }, + "sourceId": { + "type": "string" + } + } + }, + "LivySessionStateInformation": { + "description": "Livy Session State Information.", + "type": "object", + "properties": { + "notStartedAt": { + "format": "date-time", + "type": "string" + }, + "startingAt": { + "format": "date-time", + "type": "string" + }, + "idleAt": { + "format": "date-time", + "type": "string" + }, + "deadAt": { + "format": "date-time", + "type": "string" + }, + "shuttingDownAt": { + "format": "date-time", + "type": "string" + }, + "killedAt": { + "format": "date-time", + "type": "string" + }, + "recoveringAt": { + "format": "date-time", + "type": "string" + }, + "busyAt": { + "format": "date-time", + "type": "string" + }, + "errorAt": { + "format": "date-time", + "type": "string" + }, + "currentState": { + "type": "string" + }, + "jobCreationRequest": { + "$ref": "#/definitions/SessionRequest" + } + } + }, + "SchedulerInformation": { + "description": "Scheduler Information.", + "type": "object", + "properties": { + "submittedAt": { + "format": "date-time", + "type": "string" + }, + "queuedAt": { + "format": "date-time", + "type": "string" + }, + "scheduledAt": { + "format": "date-time", + "type": "string" + }, + "endedAt": { + "format": "date-time", + "type": "string" + }, + "cancellationRequestedAt": { + "format": "date-time", + "type": "string" + }, + "currentState": { + "enum": [ + "Queued", + "Scheduled", + "Ended" + ], + "type": "string" + } + } + }, + "SparkServicePluginInformation": { + "description": "Spark Service Plugin Information.", + "type": "object", + "properties": { + "preparationStartedAt": { + "format": "date-time", + "type": "string" + }, + "resourceAcquisitionStartedAt": { + "format": "date-time", + "type": "string" + }, + "submissionStartedAt": { + "format": "date-time", + "type": "string" + }, + "monitoringStartedAt": { + "format": "date-time", + "type": "string" + }, + "cleanupStartedAt": { + "format": "date-time", + "type": "string" + }, + "currentState": { + "enum": [ + "Preparation", + "ResourceAcquisition", + "Queued", + "Submission", + "Monitoring", + "Cleanup", + "Ended" + ], + "type": "string" + } + } + }, + "LivyRequestBase": { + "description": "Livy Request Base.", + "type": "object", + "properties": { + "name": { + "type": "string" + }, + "file": { + "type": "string" + }, + "className": { + "type": "string" + }, + "args": { + "type": "array", + "items": { + "type": "string" + } + }, + "jars": { + "type": "array", + "items": { + "type": "string" + } + }, + "files": { + "type": "array", + "items": { + "type": "string" + } + }, + "pyFiles": { + "type": "array", + "items": { + "type": "string" + } + }, + "archives": { + "type": "array", + "items": { + "type": "string" + } + }, + "conf": { + "type": "object", + "additionalProperties": { + "type": "string" + } + }, + "driverMemory": { + "type": "string" + }, + "driverCores": { + "format": "int32", + "type": "integer" + }, + "executorMemory": { + "type": "string" + }, + "executorCores": { + "format": "int32", + "type": "integer" + }, + "numExecutors": { + "format": "int32", + "type": "integer" + } + } + }, + "StatementsResponse": { + "description": "Livy Statement Response Body.", + "type": "object", + "properties": { + "statements": { + "type": "array", + "items": { + "$ref": "#/definitions/StatementResponse" + } + }, + "total_statements": { + "format": "int32", + "type": "integer" + } + } + }, + "StatementCancellationResponse": { + "description": "Livy Statement CancellationResponse.", + "type": "object", + "properties": { + "msg": { + "type": "string" + } + } + }, + "BatchStateInformation": { + "description": "Batch State Information.", + "type": "object", + "properties": { + "notStartedAt": { + "format": "date-time", + "type": "string" + }, + "startingAt": { + "format": "date-time", + "type": "string" + }, + "runningAt": { + "format": "date-time", + "type": "string" + }, + "deadAt": { + "format": "date-time", + "type": "string" + }, + "successAt": { + "format": "date-time", + "type": "string" + }, + "killedAt": { + "format": "date-time", + "type": "string" + }, + "recoveringAt": { + "format": "date-time", + "type": "string" + }, + "currentState": { + "type": "string" + }, + "jobCreationRequest": { + "$ref": "#/definitions/LivyRequestBase" + } + } + }, + "ErrorResponse": { + "description": "The error response.", + "required": [ + "message", + "errorCode" + ], + "allOf": [ + { + "$ref": "#/definitions/ErrorResponseDetails" + } + ], + "properties": { + "requestId": { + "type": "string", + "description": "ID of the request associated with the error.", + "readOnly": true + }, + "moreDetails": { + "description": "List of additional error details.", + "type": "array", + "items": { + "$ref": "#/definitions/ErrorResponseDetails" + }, + "readOnly": true + } + }, + "readOnly": true + }, + "ErrorResponseDetails": { + "description": "The error response details.", + "required": [ + "message", + "errorCode" + ], + "properties": { + "errorCode": { + "type": "string", + "description": "A specific identifier that provides information about an error condition, allowing for standardized communication between our service and its users." + }, + "message": { + "type": "string", + "description": "A human readable representation of the error." + }, + "relatedResource": { + "$ref": "#/definitions/ErrorRelatedResource" + } + } + }, + "ErrorRelatedResource": { + "description": "The error related resource details object.", + "required": [ + "resourceId", + "resourceType" + ], + "properties": { + "resourceId": { + "type": "string", + "description": "Resource ID involved in the error." + }, + "resourceType": { + "type": "string", + "description": "Resource type involved in the error." + } + } + }, + "LivySparkActivityList": { + "description": "List of batches or sessions.", + "type": "object", + "properties": { + "items": { + "description": "List of items.", + "type": "array", + "items": { + "$ref": "#/definitions/LivySparkActivity" + } + }, + "totalCountOfMatchedItems": { + "format": "int32", + "description": "Total count of matched items.", + "type": "integer" + }, + "pageSize": { + "format": "int32", + "description": "Page size.", + "type": "integer" + } + } + }, + "LivySparkActivity": { + "description": "Batch or Session description when getting a list.", + "type": "object", + "properties": { + "id": { + "description": "Activity ID.", + "type": "string" + }, + "appId": { + "description": "Spark application ID.", + "type": "string" + }, + "name": { + "description": "Batch or Session Name.", + "type": "string" + }, + "workspaceId": { + "description": "Workspace ID.", + "type": "string" + }, + "submitterId": { + "description": "Submitter ID.", + "type": "string" + }, + "submitterName": { + "description": "Submitter name.", + "type": "string" + }, + "artifactId": { + "description": "Artifact ID.", + "type": "string" + }, + "cancellationReason": { + "description": "Cancellation reason.", + "type": "string" + }, + "result": { + "description": "Job result.", + "enum": [ + "Uncertain", + "Succeeded", + "Failed", + "Cancelled" + ], + "type": "string" + }, + "submittedAt": { + "format": "date-time", + "description": "Submitted at time.", + "type": "string" + }, + "startedAt": { + "format": "date-time", + "description": "Started at time.", + "type": "string" + }, + "endedAt": { + "format": "date-time", + "description": "Ended at time.", + "type": "string" + }, + "errorSource": { + "description": "Error source.", + "enum": [ + "System", + "User", + "Unknown", + "Dependency" + ], + "type": "string" + }, + "errorCode": { + "description": "Error Code.", + "type": "string" + }, + "tags": { + "description": "Optional tags.", + "type": "object", + "additionalProperties": { + "type": "string" + } + }, + "schedulerState": { + "description": "Scheduler state.", + "enum": [ + "Queued", + "Scheduled", + "Ended" + ], + "type": "string" + }, + "pluginState": { + "description": "Plugin state.", + "enum": [ + "Preparation", + "ResourceAcquisition", + "Queued", + "Submission", + "Monitoring", + "Cleanup", + "Ended" + ], + "type": "string" + }, + "livyState": { + "description": "Gets or sets livy state.", + "type": "string" + }, + "isJobTimedOut": { + "description": "If job is timed out.", + "type": "boolean" + } + } + }, + "BatchResponse": { + "description": "Class that represents livy responses for batch.", + "type": "object", + "properties": { + "livyInfo": { + "$ref": "#/definitions/BatchStateInformation" + }, + "fabricBatchStateInfo": { + "$ref": "#/definitions/BatchStateInfo" + }, + "name": { + "description": "Name of the batch.", + "type": "string" + }, + "id": { + "description": "ID created for the batch.", + "type": "string" + }, + "appId": { + "description": "Application id of this batch.", + "type": "string" + }, + "appInfo": { + "description": "Detailed application info.", + "type": "object", + "additionalProperties": { + "type": "string" + } + }, + "artifactId": { + "description": "Hosting artifact Id.", + "type": "string" + }, + "errorInfo": { + "description": "Detailed error information.", + "type": "array", + "items": { + "$ref": "#/definitions/ErrorInformation" + } + }, + "jobType": { + "description": "Spark job type.", + "enum": [ + "SparkBatch", + "SparkSession", + "ScopeBatch", + "JupyterEnvironment" + ], + "type": "string" + }, + "submitterId": { + "description": "ID of e user who submitted the session.", + "type": "string" + }, + "submitterName": { + "description": "Name of the user who submitted the session.", + "type": "string" + }, + "log": { + "description": "Log lines.", + "type": "array", + "items": { + "type": "string" + } + }, + "pluginInfo": { + "$ref": "#/definitions/SparkServicePluginInformation" + }, + "schedulerInfo": { + "$ref": "#/definitions/SchedulerInformation" + }, + "state": { + "description": "State of the batch or session.", + "enum": [ + "starting", + "running", + "dead", + "success", + "killed", + "idle", + "error", + "shutting_down", + "not_started", + "busy", + "recovering", + "submitting", + "not_submitted" + ], + "type": "string" + }, + "tags": { + "description": "Optional tags.", + "type": "object", + "additionalProperties": { + "type": "string" + } + }, + "result": { + "enum": [ + "Uncertain", + "Succeeded", + "Failed", + "Cancelled" + ], + "type": "string" + }, + "cancellationReason": { + "description": "Cancellation reason.", + "type": "string" + } + } + }, + "BatchStateInfo": { + "description": "Batch state info.", + "type": "object", + "properties": { + "state": { + "description": "State of the batch acquisition.", + "enum": [ + "unknown", + "expired", + "queued", + "libraryPackaging", + "submitting", + "cancelling", + "cancelled", + "error" + ], + "type": "string" + }, + "errorMessage": { + "description": "Error message if the state is in \"error\".", + "type": "string" + } + } + }, + "ErrorInformation": { + "description": "Error Information.", + "type": "object", + "properties": { + "message": { + "type": "string" + }, + "errorCode": { + "type": "string" + }, + "source": { + "enum": [ + "System", + "User", + "Unknown", + "Dependency" + ], + "type": "string" + } + } + }, + "SessionResponse": { + "type": "object", + "properties": { + "fabricSessionStateInfo": { + "$ref": "#/definitions/SessionStateInfo" + }, + "livyInfo": { + "$ref": "#/definitions/LivySessionStateInformation" + }, + "name": { + "description": "Name of the session.", + "type": "string" + }, + "id": { + "description": "Session ID created for the session.", + "type": "string" + }, + "appId": { + "description": "The application id of this session.", + "type": "string" + }, + "appInfo": { + "description": "Detailed application info.", + "type": "object", + "additionalProperties": { + "type": "string" + } + }, + "artifactId": { + "description": "Hosting artifact ID.", + "type": "string" + }, + "errorInfo": { + "description": "Detailed error information.", + "type": "array", + "items": { + "$ref": "#/definitions/ErrorInformation" + } + }, + "jobType": { + "description": "Spark job type.", + "enum": [ + "SparkBatch", + "SparkSession", + "ScopeBatch", + "JupyterEnvironment" + ], + "type": "string" + }, + "submitterId": { + "description": "ID of the user who submitted the sessionn.", + "type": "string" + }, + "submitterName": { + "description": "Name of the user who submitted the session.", + "type": "string" + }, + "log": { + "description": "Log lines.", + "type": "array", + "items": { + "type": "string" + } + }, + "pluginInfo": { + "$ref": "#/definitions/SparkServicePluginInformation" + }, + "schedulerInfo": { + "$ref": "#/definitions/SchedulerInformation" + }, + "state": { + "description": "Gets or sets the session state.", + "enum": [ + "starting", + "running", + "dead", + "success", + "killed", + "idle", + "error", + "shutting_down", + "not_started", + "busy", + "recovering", + "submitting", + "not_submitted" + ], + "type": "string" + }, + "tags": { + "description": "Optional tags.", + "type": "object", + "additionalProperties": { + "type": "string" + } + }, + "result": { + "enum": [ + "Uncertain", + "Succeeded", + "Failed", + "Cancelled" + ], + "type": "string" + }, + "cancellationReason": { + "description": "Cancellation reason.", + "type": "string" + } + } + }, + "SessionStateInfo": { + "description": "Session state info.", + "type": "object", + "properties": { + "state": { + "description": "Session acquisiton state.", + "enum": [ + "queued", + "libraryPackaging", + "acquiringSession", + "cancelling", + "cancelled", + "error", + "unknown" + ], + "type": "string" + }, + "errorMessage": { + "description": "Error message when in \"error\" state.", + "type": "string" + } + } + } + } +} \ No newline at end of file diff --git a/apache/livy/ExposedUI/python/fabric/swagger.yaml b/apache/livy/ExposedUI/python/fabric/swagger.yaml new file mode 100644 index 00000000..f447f338 --- /dev/null +++ b/apache/livy/ExposedUI/python/fabric/swagger.yaml @@ -0,0 +1,1250 @@ +swagger: '2.0' +info: + version: v1 + title: Livy Public API +host: api.fabric.microsoft.com +schemes: + - https +paths: + /v1/workspaces/{workspaceId}/lakehouses/{lakehouseId}/livyApi/versions/{livyApiVersion}/batches: + post: + tags: + - LivyApiBatch + summary: Executes a batch. + operationId: LivyApiBatch_ExecuteBatchAsync + consumes: + - application/json + - text/json + produces: + - application/json + - text/json + parameters: + - name: workspaceId + in: path + description: '' + required: true + type: string + format: uuid + - name: lakehouseId + in: path + description: '' + required: true + type: string + format: uuid + - name: livyApiVersion + in: path + required: true + type: string + - name: payload + in: body + description: '' + required: true + schema: + $ref: '#/definitions/BatchRequest' + responses: + '202': + description: Accepted + schema: + $ref: '#/definitions/BatchResponse' + default: + description: Other status codes + schema: + $ref: '#/definitions/ErrorResponse' + get: + tags: + - LivyApiBatch + summary: List Batch Jobs. + operationId: LivyApiBatch_ListBatchJobsAsync + consumes: [] + produces: + - application/json + parameters: + - name: workspaceId + in: path + description: '' + required: true + type: string + format: uuid + - name: lakehouseId + in: path + description: '' + required: true + type: string + format: uuid + - name: livyApiVersion + in: path + required: true + type: string + - name: $top + in: query + type: string + description: '' + - name: $skip + in: query + type: string + description: '' + - name: $count + in: query + type: boolean + description: '' + responses: + '200': + description: OK + schema: + $ref: '#/definitions/LivySparkActivityList' + default: + description: Other Status + schema: + $ref: '#/definitions/ErrorResponse' + /v1/workspaces/{workspaceId}/lakehouses/{lakehouseId}/livyApi/versions/{livyApiVersion}/batches/{batchId}: + get: + tags: + - LivyApiBatch + summary: Gets a Batch Details. + operationId: LivyApiBatch_GetBatchAsync + consumes: [] + produces: + - application/json + parameters: + - name: workspaceId + in: path + description: '' + required: true + type: string + format: uuid + - name: lakehouseId + in: path + description: '' + required: true + type: string + format: uuid + - name: livyApiVersion + in: path + required: true + type: string + - name: batchId + in: path + description: '' + required: true + type: string + format: uuid + responses: + '200': + description: OK + schema: + $ref: '#/definitions/BatchResponse' + default: + description: Other status codes + schema: + $ref: '#/definitions/ErrorResponse' + delete: + tags: + - LivyApiBatch + summary: Cancels a Batch Execution. + operationId: LivyApiBatch_CancelBatchAsync + consumes: [] + produces: + - application/json + parameters: + - name: workspaceId + in: path + description: '' + required: true + type: string + format: uuid + - name: lakehouseId + in: path + description: '' + required: true + type: string + format: uuid + - name: livyApiVersion + in: path + required: true + type: string + - name: batchId + in: path + description: '' + required: true + type: string + format: uuid + responses: + '200': + description: OK + default: + description: Other status codes + schema: + $ref: '#/definitions/ErrorResponse' + /v1/workspaces/{workspaceId}/lakehouses/{lakehouseId}/livyApi/versions/{livyApiVersion}/sessions: + post: + tags: + - LivyApiSession + summary: Acquire a Spark Session. + operationId: LivyApiSession_AcquireSparkSessionAsync + consumes: + - application/json + produces: + - application/json + parameters: + - name: workspaceId + in: path + description: Workspace Id. + required: true + type: string + format: uuid + - name: lakehouseId + in: path + description: Lakehouse Id. + required: true + type: string + format: uuid + - name: livyApiVersion + in: path + required: true + type: string + - name: payload + in: body + description: CreateSessionRequest. + required: true + schema: + $ref: '#/definitions/SessionRequest' + responses: + '202': + description: Accepted + schema: + $ref: '#/definitions/SessionResponse' + default: + description: Other status code + schema: + $ref: '#/definitions/ErrorResponse' + get: + tags: + - LivyApiSession + summary: List spark sessions. + operationId: LivyApiSession_ListSparkSessionsAsync + consumes: [] + produces: + - application/json + - text/json + parameters: + - name: workspaceId + in: path + description: Workspace name. + required: true + type: string + format: uuid + - name: lakehouseId + in: path + description: Lakehouse Id. + required: true + type: string + format: uuid + - name: livyApiVersion + in: path + required: true + type: string + - name: $top + in: query + type: string + description: '' + - name: $skip + in: query + type: string + description: '' + - name: $count + in: query + type: boolean + description: '' + responses: + '200': + description: OK + schema: + $ref: '#/definitions/SessionResponse' + default: + description: Other status codes + schema: + $ref: '#/definitions/ErrorResponse' + /v1/workspaces/{workspaceId}/lakehouses/{lakehouseId}/livyApi/versions/{livyApiVersion}/sessions/{sessionId}: + get: + tags: + - LivyApiSession + summary: Get details of a spark session. + operationId: LivyApiSession_GetSparkSessionAsync + consumes: [] + produces: + - application/json + parameters: + - name: workspaceId + in: path + description: Workspace name. + required: true + type: string + format: uuid + - name: lakehouseId + in: path + description: Lakehouse Id. + required: true + type: string + format: uuid + - name: livyApiVersion + in: path + required: true + type: string + - name: sessionId + in: path + description: Spark session Id. + required: true + type: string + format: uuid + responses: + '200': + description: OK + schema: + $ref: '#/definitions/SessionResponse' + default: + description: Other status codes + schema: + $ref: '#/definitions/ErrorResponse' + delete: + tags: + - LivyApiSession + summary: Stops and deletes a spark session. + operationId: LivyApiSession_DeleteSparkSessionAsync + consumes: [] + produces: + - application/json + - text/json + parameters: + - name: workspaceId + in: path + description: Workspace Id. + required: true + type: string + format: uuid + - name: lakehouseId + in: path + description: Lakehouse Id. + required: true + type: string + format: uuid + - name: livyApiVersion + in: path + required: true + type: string + - name: sessionId + in: path + description: Session Id. + required: true + type: string + format: uuid + responses: + '200': + description: OK + default: + description: Other status codes + schema: + $ref: '#/definitions/ErrorResponse' + /v1/workspaces/{workspaceId}/lakehouses/{lakehouseId}/livyApi/versions/{livyApiVersion}/sessions/{sessionId}/statements: + post: + tags: + - LivyApiSession + summary: Execute a statement on a spark session. + operationId: LivyApiSession_ExecuteSparkSessionStatementAsync + consumes: + - application/json + - text/json + produces: + - application/json + - text/json + parameters: + - name: workspaceId + in: path + description: Workspace Id. + required: true + type: string + format: uuid + - name: lakehouseId + in: path + description: Lakehouse Id. + required: true + type: string + format: uuid + - name: livyApiVersion + in: path + required: true + type: string + - name: sessionId + in: path + description: Spark Session Id. + required: true + type: string + format: uuid + - name: payload + in: body + description: '' + required: true + schema: + $ref: '#/definitions/StatementRequest' + responses: + '200': + description: OK + schema: + $ref: '#/definitions/StatementResponse' + default: + description: Other status codes. + schema: + $ref: '#/definitions/ErrorResponse' + get: + tags: + - LivyApiSession + summary: List statements in an active session. + operationId: LivyApiSession_ListSparkSessionStatementsAsync + consumes: [] + produces: + - application/json + - text/json + parameters: + - name: workspaceId + in: path + description: Workspace name. + required: true + type: string + format: uuid + - name: lakehouseId + in: path + description: Lakehouse Id. + required: true + type: string + format: uuid + - name: livyApiVersion + in: path + required: true + type: string + - name: sessionId + in: path + description: '' + required: true + type: string + format: uuid + responses: + '200': + description: OK + schema: + $ref: '#/definitions/StatementsResponse' + default: + description: Other status codes. + schema: + $ref: '#/definitions/ErrorResponse' + /v1/workspaces/{workspaceId}/lakehouses/{lakehouseId}/livyApi/versions/{livyApiVersion}/sessions/{sessionId}/statements/{statementId}: + get: + tags: + - LivyApiSession + summary: Gets a spark statement from a spark session. + operationId: LivyApiSession_GetSparkSessionStatementAsync + consumes: [] + produces: + - application/json + - text/json + parameters: + - name: workspaceId + in: path + description: Workspace Id. + required: true + type: string + format: uuid + - name: lakehouseId + in: path + description: Lakehouse Id. + required: true + type: string + format: uuid + - name: sessionId + in: path + description: Spark Session Id. + required: true + type: string + format: uuid + - name: livyApiVersion + in: path + required: true + type: string + - name: statementId + in: path + description: Statement Id. + required: true + type: integer + format: int32 + - name: from + in: query + description: Offset (in byte) which the output should begin from. + type: integer + format: int32 + - name: size + in: query + description: Size (in byte) of the returned output. + type: integer + format: int32 + responses: + '200': + description: OK + default: + description: Other status codes. + schema: + $ref: '#/definitions/ErrorResponse' + /v1/workspaces/{workspaceId}/lakehouses/{lakehouseId}/livyApi/versions/{livyApiVersion}/sessions/{sessionId}/statements/{statementId}/cancel: + post: + tags: + - LivyApiSession + summary: Cancels a statement execution. + operationId: LivyApiSession_CancelSparkStatementAsync + consumes: [] + produces: + - application/json + - text/json + parameters: + - name: workspaceId + in: path + description: Workspace Id. + required: true + type: string + format: uuid + - name: lakehouseId + in: path + description: Lakehouse Id. + required: true + type: string + format: uuid + - name: livyApiVersion + in: path + required: true + type: string + - name: sessionId + in: path + description: Session Id. + required: true + type: string + format: uuid + - name: statementId + in: path + description: '' + required: true + type: integer + format: int32 + + responses: + '200': + description: OK + schema: + $ref: '#/definitions/StatementCancellationResponse' + default: + description: Other status codes. + schema: + $ref: '#/definitions/ErrorResponse' + /v1/workspaces/{workspaceId}/lakehouses/{lakehouseId}/livyApi/versions/{livyApiVersion}/sessions/{sessionId}/reset-timeout: + post: + tags: + - LivyApiSession + summary: Resets the timeout time of a session. + operationId: LivyApiSession_ResetSparkSessionTimeoutAsync + consumes: [] + produces: + - application/json + - text/json + parameters: + - name: workspaceId + in: path + description: Workspace Id. + required: true + type: string + format: uuid + - name: lakehouseId + in: path + description: Lakehouse Id. + required: true + type: string + format: uuid + - name: livyApiVersion + in: path + required: true + type: string + - name: sessionId + in: path + description: Session Id. + required: true + type: string + format: uuid + responses: + '204': + description: OK + default: + description: Other status codes. + schema: + $ref: '#/definitions/ErrorResponse' +definitions: + StatementResponse: + description: Statement Response. + type: object + properties: + id: + format: int32 + type: integer + code: + type: string + state: + enum: + - waiting + - running + - available + - Error + - cancelling + - cancelled + type: string + sourceId: + type: string + output: + $ref: '#/definitions/StatementOutput' + StatementOutput: + description: Statement Output. + type: object + properties: + status: + type: string + execution_count: + format: int32 + type: integer + data: + type: object + ename: + type: string + evalue: + type: string + traceback: + type: array + items: + type: string + BatchRequest: + description: Request for runing batch job. + type: object + allOf: + - $ref: '#/definitions/SessionRequest' + - type: object + properties: + file: + type: string + className: + type: string + args: + type: array + items: + type: string + jars: + type: array + items: + type: string + files: + type: array + items: + type: string + pyFiles: + type: array + items: + type: string + archives: + type: array + items: + type: string + SessionRequest: + description: Request for acquiring a Session. + type: object + properties: + name: + type: string + archives: + type: array + items: + type: string + conf: + type: object + additionalProperties: + type: string + tags: + description: Gets or sets the optional tags. + type: object + additionalProperties: + type: string + driverMemory: + type: string + driverCores: + format: int32 + type: integer + executorMemory: + type: string + executorCores: + format: int32 + type: integer + numExecutors: + format: int32 + type: integer + StatementRequest: + description: Statement Request Body. + type: object + properties: + code: + type: string + kind: + type: string + sourceId: + type: string + LivySessionStateInformation: + description: Livy Session State Information. + type: object + properties: + notStartedAt: + format: date-time + type: string + startingAt: + format: date-time + type: string + idleAt: + format: date-time + type: string + deadAt: + format: date-time + type: string + shuttingDownAt: + format: date-time + type: string + killedAt: + format: date-time + type: string + recoveringAt: + format: date-time + type: string + busyAt: + format: date-time + type: string + errorAt: + format: date-time + type: string + currentState: + type: string + jobCreationRequest: + $ref: '#/definitions/SessionRequest' + SchedulerInformation: + description: Scheduler Information. + type: object + properties: + submittedAt: + format: date-time + type: string + queuedAt: + format: date-time + type: string + scheduledAt: + format: date-time + type: string + endedAt: + format: date-time + type: string + cancellationRequestedAt: + format: date-time + type: string + currentState: + enum: + - Queued + - Scheduled + - Ended + type: string + SparkServicePluginInformation: + description: Spark Service Plugin Information. + type: object + properties: + preparationStartedAt: + format: date-time + type: string + resourceAcquisitionStartedAt: + format: date-time + type: string + submissionStartedAt: + format: date-time + type: string + monitoringStartedAt: + format: date-time + type: string + cleanupStartedAt: + format: date-time + type: string + currentState: + enum: + - Preparation + - ResourceAcquisition + - Queued + - Submission + - Monitoring + - Cleanup + - Ended + type: string + LivyRequestBase: + description: Livy Request Base. + type: object + properties: + name: + type: string + file: + type: string + className: + type: string + args: + type: array + items: + type: string + jars: + type: array + items: + type: string + files: + type: array + items: + type: string + pyFiles: + type: array + items: + type: string + archives: + type: array + items: + type: string + conf: + type: object + additionalProperties: + type: string + driverMemory: + type: string + driverCores: + format: int32 + type: integer + executorMemory: + type: string + executorCores: + format: int32 + type: integer + numExecutors: + format: int32 + type: integer + StatementsResponse: + description: Livy Statement Response Body. + type: object + properties: + statements: + type: array + items: + $ref: '#/definitions/StatementResponse' + total_statements: + format: int32 + type: integer + StatementCancellationResponse: + description: Livy Statement CancellationResponse. + type: object + properties: + msg: + type: string + BatchStateInformation: + description: Batch State Information. + type: object + properties: + notStartedAt: + format: date-time + type: string + startingAt: + format: date-time + type: string + runningAt: + format: date-time + type: string + deadAt: + format: date-time + type: string + successAt: + format: date-time + type: string + killedAt: + format: date-time + type: string + recoveringAt: + format: date-time + type: string + currentState: + type: string + jobCreationRequest: + $ref: '#/definitions/LivyRequestBase' + ErrorResponse: + description: The error response. + required: + - message + - errorCode + allOf: + - $ref: '#/definitions/ErrorResponseDetails' + properties: + requestId: + type: string + description: ID of the request associated with the error. + readOnly: true + moreDetails: + description: List of additional error details. + type: array + items: + $ref: '#/definitions/ErrorResponseDetails' + readOnly: true + readOnly: true + ErrorResponseDetails: + description: The error response details. + required: + - message + - errorCode + properties: + errorCode: + type: string + description: A specific identifier that provides information about an error condition, allowing for standardized communication between our service and its users. + message: + type: string + description: A human readable representation of the error. + relatedResource: + $ref: '#/definitions/ErrorRelatedResource' + ErrorRelatedResource: + description: The error related resource details object. + required: + - resourceId + - resourceType + properties: + resourceId: + type: string + description: Resource ID involved in the error. + resourceType: + type: string + description: Resource type involved in the error. + LivySparkActivityList: + description: List of batches or sessions. + type: object + properties: + items: + description: List of items. + type: array + items: + $ref: '#/definitions/LivySparkActivity' + totalCountOfMatchedItems: + format: int32 + description: Total count of matched items. + type: integer + pageSize: + format: int32 + description: Page size. + type: integer + LivySparkActivity: + description: Batch or Session description when getting a list. + type: object + properties: + id: + description: Activity ID. + type: string + appId: + description: Spark application ID. + type: string + name: + description: Batch or Session Name. + type: string + workspaceId: + description: Workspace ID. + type: string + submitterId: + description: Submitter ID. + type: string + submitterName: + description: Submitter name. + type: string + artifactId: + description: Artifact ID. + type: string + cancellationReason: + description: Cancellation reason. + type: string + result: + description: Job result. + enum: + - Uncertain + - Succeeded + - Failed + - Cancelled + type: string + submittedAt: + format: date-time + description: Submitted at time. + type: string + startedAt: + format: date-time + description: Started at time. + type: string + endedAt: + format: date-time + description: Ended at time. + type: string + errorSource: + description: Error source. + enum: + - System + - User + - Unknown + - Dependency + type: string + errorCode: + description: Error Code. + type: string + tags: + description: Optional tags. + type: object + additionalProperties: + type: string + schedulerState: + description: Scheduler state. + enum: + - Queued + - Scheduled + - Ended + type: string + pluginState: + description: Plugin state. + enum: + - Preparation + - ResourceAcquisition + - Queued + - Submission + - Monitoring + - Cleanup + - Ended + type: string + livyState: + description: Gets or sets livy state. + type: string + isJobTimedOut: + description: If job is timed out. + type: boolean + BatchResponse: + description: Class that represents livy responses for batch. + type: object + properties: + livyInfo: + $ref: '#/definitions/BatchStateInformation' + fabricBatchStateInfo: + $ref: '#/definitions/BatchStateInfo' + name: + description: Name of the batch. + type: string + id: + description: ID created for the batch. + type: string + appId: + description: Application id of this batch. + type: string + appInfo: + description: Detailed application info. + type: object + additionalProperties: + type: string + artifactId: + description: Hosting artifact Id. + type: string + errorInfo: + description: Detailed error information. + type: array + items: + $ref: '#/definitions/ErrorInformation' + jobType: + description: Spark job type. + enum: + - SparkBatch + - SparkSession + - ScopeBatch + - JupyterEnvironment + type: string + submitterId: + description: ID of e user who submitted the session. + type: string + submitterName: + description: Name of the user who submitted the session. + type: string + log: + description: Log lines. + type: array + items: + type: string + pluginInfo: + $ref: '#/definitions/SparkServicePluginInformation' + schedulerInfo: + $ref: '#/definitions/SchedulerInformation' + state: + description: State of the batch or session. + enum: + - starting + - running + - dead + - success + - killed + - idle + - error + - shutting_down + - not_started + - busy + - recovering + - submitting + - not_submitted + type: string + tags: + description: Optional tags. + type: object + additionalProperties: + type: string + result: + enum: + - Uncertain + - Succeeded + - Failed + - Cancelled + type: string + cancellationReason: + description: Cancellation reason. + type: string + BatchStateInfo: + description: Batch state info. + type: object + properties: + state: + description: State of the batch acquisition. + enum: + - unknown + - expired + - queued + - libraryPackaging + - submitting + - cancelling + - cancelled + - error + type: string + errorMessage: + description: Error message if the state is in "error". + type: string + ErrorInformation: + description: Error Information. + type: object + properties: + message: + type: string + errorCode: + type: string + source: + enum: + - System + - User + - Unknown + - Dependency + type: string + SessionResponse: + type: object + properties: + fabricSessionStateInfo: + $ref: '#/definitions/SessionStateInfo' + livyInfo: + $ref: '#/definitions/LivySessionStateInformation' + name: + description: Name of the session. + type: string + id: + description: Session ID created for the session. + type: string + appId: + description: The application id of this session. + type: string + appInfo: + description: Detailed application info. + type: object + additionalProperties: + type: string + artifactId: + description: Hosting artifact ID. + type: string + errorInfo: + description: Detailed error information. + type: array + items: + $ref: '#/definitions/ErrorInformation' + jobType: + description: Spark job type. + enum: + - SparkBatch + - SparkSession + - ScopeBatch + - JupyterEnvironment + type: string + submitterId: + description: ID of the user who submitted the sessionn. + type: string + submitterName: + description: Name of the user who submitted the session. + type: string + log: + description: Log lines. + type: array + items: + type: string + pluginInfo: + $ref: '#/definitions/SparkServicePluginInformation' + schedulerInfo: + $ref: '#/definitions/SchedulerInformation' + state: + description: Gets or sets the session state. + enum: + - starting + - running + - dead + - success + - killed + - idle + - error + - shutting_down + - not_started + - busy + - recovering + - submitting + - not_submitted + type: string + tags: + description: Optional tags. + type: object + additionalProperties: + type: string + result: + enum: + - Uncertain + - Succeeded + - Failed + - Cancelled + type: string + cancellationReason: + description: Cancellation reason. + type: string + SessionStateInfo: + description: Session state info. + type: object + properties: + state: + description: Session acquisiton state. + enum: + - queued + - libraryPackaging + - acquiringSession + - cancelling + - cancelled + - error + - unknown + type: string + errorMessage: + description: Error message when in "error" state. + type: string diff --git a/apache/livy/ExposedUI/python/fabric/test_fabric..ipynb b/apache/livy/ExposedUI/python/fabric/test_fabric..ipynb new file mode 100644 index 00000000..d713ff1b --- /dev/null +++ b/apache/livy/ExposedUI/python/fabric/test_fabric..ipynb @@ -0,0 +1,296 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Authenticate and Request a token" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "from msal import PublicClientApplication\n", + "from dotenv import load_dotenv\n", + "import os\n", + "import requests\n", + "import time\n", + "\n", + "# Load environment variables from .env file\n", + "load_dotenv()\n", + "tenant_id = os.getenv('TENANT_ID')\n", + "client_id = os.getenv('CLIENT_ID')\n", + "workspace_id = os.getenv('WORKSPACE_ID')\n", + "lakehouse_id = os.getenv('LAKEHOUSE_ID')\n", + "redirect_url_port = os.getenv('REDIRECT_URL_PORT')\n", + "api_version = os.getenv('API_VERSION')\n", + "\n", + "app = PublicClientApplication(\n", + " client_id,\n", + " authority= f\"https://login.microsoftonline.com/{tenant_id}\", \n", + ")\n", + "\n", + "result = None\n", + "\n", + " # If no cached tokens or user interaction needed, acquire tokens interactively\n", + "if not result:\n", + " result = app.acquire_token_interactive(scopes=[\"https://api.fabric.microsoft.com/Lakehouse.Execute.All\", \"https://api.fabric.microsoft.com/Lakehouse.Read.All\", \"https://api.fabric.microsoft.com/Item.ReadWrite.All\", \n", + " \"https://api.fabric.microsoft.com/Workspace.ReadWrite.All\", \"https://api.fabric.microsoft.com/Code.AccessStorage.All\", \"https://api.fabric.microsoft.com/Code.AccessAzureKeyvault.All\", \n", + " \"https://api.fabric.microsoft.com/Code.AccessAzureDataExplorer.All\", \"https://api.fabric.microsoft.com/Code.AccessAzureDataLake.All\", \"https://api.fabric.microsoft.com/Code.AccessFabric.All\"],\n", + " port=f\"{redirect_url_port}\")\n", + "\n", + "# Get the access token\n", + "if \"access_token\" in result:\n", + " access_token = result[\"access_token\"]\n", + "else:\n", + " print(result.get(\"error\"))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "print(access_token)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Request a Livy Session" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "The request to create the Livy session is submitted:{'id': '462fe66a-9858-408c-813c-b848da3d9e60', 'artifactId': '0db8ae59-a739-4b50-844c-ee2be3519871'}\n", + "462fe66a-9858-408c-813c-b848da3d9e60\n" + ] + } + ], + "source": [ + "if access_token:\n", + " api_base_url_mist='https://api.fabric.microsoft.com/v1'\n", + " livy_base_url = api_base_url_mist + \"/workspaces/\"+workspace_id+\"/lakehouses/\"+lakehouse_id +\"/livyApi/versions/\"+api_version+\"/sessions\"\n", + " headers = {\"Authorization\": \"Bearer \" + access_token}\n", + "\n", + "# Create a Livy session\n", + "create_livy_session = requests.post(livy_base_url, headers=headers, json={\n", + " \"name\": \"test pyspark session from python code\",\n", + " \"archives\": [],\n", + " \"conf\": { \n", + " },\n", + " \"tags\": {\n", + " },\n", + " \"driverMemory\": \"7g\",\n", + " \"driverCores\": 1,\n", + " \"executorMemory\": \"7g\",\n", + " \"executorCores\": 1,\n", + " \"numExecutors\": 2\n", + "})\n", + "print('The request to create the Livy session is submitted:' + str(create_livy_session.json()))\n", + "\n", + "livy_session_id = create_livy_session.json()['id']\n", + "print(livy_session_id)\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## List Livy Sessions" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "livy_session_url = livy_base_url\n", + "get_sessions_response = requests.get(livy_session_url, headers=headers)\n", + "print(get_sessions_response.json())" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Get details of a Livy Session" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "livy_session_url = livy_base_url + \"/\" + livy_session_id\n", + "get_session_response = requests.get(livy_session_url, headers=headers)\n", + "\n", + "while get_session_response.json()[\"state\"] != \"idle\":\n", + " time.sleep(5)\n", + " get_session_response = requests.get(livy_session_url, headers=headers)\n", + " print(get_session_response.json())\n", + " \n", + "print(get_session_response.json())" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Execute a statement on a Spark session - Local dataframe" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "\n", + "# call get session API\n", + "livy_session_url = livy_base_url + \"/\" + livy_session_id\n", + "get_session_response = requests.get(livy_session_url, headers=headers)\n", + "print(get_session_response.json())\n", + "while get_session_response.json()[\"state\"] != \"idle\":\n", + " time.sleep(5)\n", + " get_session_response = requests.get(livy_session_url, headers=headers)\n", + "\n", + "execute_statement = livy_session_url + \"/statements\"\n", + "code =\"\"\"\n", + "df = spark.createDataFrame([{\"id\": 1, \"name\": \"Mounir\"}])\n", + "df.show()\n", + "\"\"\"\n", + "execute_statement_response = requests.post(execute_statement, headers=headers, json={\n", + " \"code\": f\"{code}\",\n", + " \"kind\": \"pyspark\"\n", + " })\n", + "print('the statement code is submitted as: ' + str(execute_statement_response.json()))\n", + "\n", + "statement_id = str(execute_statement_response.json()['id'])\n", + "get_statement = livy_session_url+ \"/statements/\" + statement_id\n", + "get_statement_response = requests.get(get_statement, headers=headers)\n", + "\n", + "while get_statement_response.json()[\"state\"] != \"available\":\n", + " # Sleep for 5 seconds before making the next request\n", + " time.sleep(5)\n", + " print('the statement code is submitted and running : ' + str(execute_statement_response.json()))\n", + "\n", + " # Make the next request\n", + " get_statement_response = requests.get(get_statement, headers=headers)\n", + "\n", + "rst = get_statement_response.json()['output']['data']['text/plain']\n", + "print(rst)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Execute a statement on a Spark session - Data on the LakeHouse" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# call get session API\n", + "livy_session_url = livy_base_url + \"/\" + livy_session_id\n", + "get_session_response = requests.get(livy_session_url, headers=headers)\n", + "print(get_session_response.json())\n", + "while get_session_response.json()[\"state\"] != \"idle\":\n", + " time.sleep(5)\n", + " get_session_response = requests.get(livy_session_url, headers=headers)\n", + "\n", + "execute_statement = livy_session_url + \"/statements\"\n", + "code =\"\"\"\n", + "df = spark.sql(\"SELECT count(*) as Total, AGE FROM person GROUP BY AGE\").show()\n", + "df.show()\n", + "\"\"\"\n", + "execute_statement_response = requests.post(execute_statement, headers=headers, json={\n", + " \"code\": f\"{code}\",\n", + " \"kind\": \"pyspark\"\n", + " })\n", + "print('the statement code is submitted as: ' + str(execute_statement_response.json()))\n", + "\n", + "statement_id = str(execute_statement_response.json()['id'])\n", + "get_statement = livy_session_url+ \"/statements/\" + statement_id\n", + "get_statement_response = requests.get(get_statement, headers=headers)\n", + "\n", + "while get_statement_response.json()[\"state\"] != \"available\":\n", + " # Sleep for 5 seconds before making the next request\n", + " time.sleep(5)\n", + " print('the statement code is submitted and running : ' + str(execute_statement_response.json()))\n", + "\n", + " # Make the next request\n", + " get_statement_response = requests.get(get_statement, headers=headers)\n", + "\n", + "rst = get_statement_response.json()['output']['data']['text/plain']\n", + "print(rst)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Stop and delete a Livy Session" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n" + ] + } + ], + "source": [ + "livy_session_url = livy_base_url + \"/\" + livy_session_id\n", + "\n", + "delete_session_response = requests.delete(livy_session_url, headers=headers)\n", + "print(delete_session_response)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "python3.10.11", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.11" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/apache/livy/ExposedUI/python/fabric/test_fabric.py b/apache/livy/ExposedUI/python/fabric/test_fabric.py new file mode 100644 index 00000000..0bdcb651 --- /dev/null +++ b/apache/livy/ExposedUI/python/fabric/test_fabric.py @@ -0,0 +1,45 @@ + +from msal import PublicClientApplication +from dotenv import load_dotenv +import os +import requests +import time + +# Load environment variables from .env file +load_dotenv() +tenant_id = os.getenv('TENANT_ID') +client_id = os.getenv('CLIENT_ID') +workspace_id = os.getenv('WORKSPACE_ID') +lakehouse_id = os.getenv('LAKEHOUSE_ID') +redirect_url_port = os.getenv('REDIRECT_URL_PORT') +api_version = os.getenv('API_VERSION') + +app = PublicClientApplication( + client_id, + authority= f"https://login.microsoftonline.com/{tenant_id}", +) + +result = None + + # If no cached tokens or user interaction needed, acquire tokens interactively +if not result: + result = app.acquire_token_interactive(scopes=["https://api.fabric.microsoft.com/Lakehouse.Execute.All", "https://api.fabric.microsoft.com/Lakehouse.Read.All", "https://api.fabric.microsoft.com/Item.ReadWrite.All", + "https://api.fabric.microsoft.com/Workspace.ReadWrite.All", "https://api.fabric.microsoft.com/Code.AccessStorage.All", "https://api.fabric.microsoft.com/Code.AccessAzureKeyvault.All", + "https://api.fabric.microsoft.com/Code.AccessAzureDataExplorer.All", "https://api.fabric.microsoft.com/Code.AccessAzureDataLake.All", "https://api.fabric.microsoft.com/Code.AccessFabric.All"], + port=f"{redirect_url_port}") + +# Get the access token +if "access_token" in result: + access_token = result["access_token"] +else: + print(result.get("error")) + +if access_token: + api_base_url_mist='https://api.fabric.microsoft.com/v1' + livy_base_url = api_base_url_mist + "/workspaces/"+workspace_id+"/lakehouses/"+lakehouse_id +"/livyApi/versions/"+api_version+"/sessions" + headers = {"Authorization": "Bearer " + access_token} + +# List Livy essions +livy_session_url = livy_base_url +get_sessions_response = requests.get(livy_session_url, headers=headers) +print(get_sessions_response.json()) diff --git a/apache/livy/ExposedUI/python/livy/delete_session.py b/apache/livy/ExposedUI/python/livy/delete_session.py new file mode 100644 index 00000000..b0c5047b --- /dev/null +++ b/apache/livy/ExposedUI/python/livy/delete_session.py @@ -0,0 +1,5 @@ +import json, pprint, requests, textwrap +host = 'http://localhost:8998' + +r = requests.delete(host + '/sessions/1') +pprint.pprint(r.json()) diff --git a/apache/livy/ExposedUI/python/livy/init_java_gateway.py b/apache/livy/ExposedUI/python/livy/init_java_gateway.py new file mode 100644 index 00000000..2304f509 --- /dev/null +++ b/apache/livy/ExposedUI/python/livy/init_java_gateway.py @@ -0,0 +1,26 @@ +import json, pprint, requests, textwrap +host = 'http://localhost:8998' +headers = {'Content-Type': 'application/json'} +statements_url = host + '/sessions/1/statements' + +data = { + 'code': textwrap.dedent(""" + # from https://stackoverflow.com/questions/65713299/javapackage-object-is-not-callable-error-executing-explain-in-pyspark-3-0 + # from https://github.com/apache/spark/blob/87bf6b0ea4ca0618c8604895d05037edce8b7cb0/python/pyspark/java_gateway.py#L153 + + from py4j.java_gateway import java_import + java_import(spark._sc._jvm, "org.apache.spark.SparkConf") + java_import(spark._sc._jvm, "org.apache.spark.api.java.*") + java_import(spark._sc._jvm, "org.apache.spark.api.python.*") + java_import(spark._sc._jvm, "org.apache.spark.ml.python.*") + java_import(spark._sc._jvm, "org.apache.spark.mllib.api.python.*") + java_import(spark._sc._jvm, "org.apache.spark.resource.*") + + java_import(spark._sc._jvm, "org.apache.spark.sql.*") + java_import(spark._sc._jvm, "org.apache.spark.sql.api.python.*") + java_import(spark._sc._jvm, "org.apache.spark.sql.hive.*") + """) +} + +r = requests.post(statements_url, data=json.dumps(data), headers=headers) +pprint.pprint(r.json()) diff --git a/apache/livy/ExposedUI/python/livy/run_code.py b/apache/livy/ExposedUI/python/livy/run_code.py new file mode 100644 index 00000000..464da289 --- /dev/null +++ b/apache/livy/ExposedUI/python/livy/run_code.py @@ -0,0 +1,15 @@ +import json, pprint, requests, textwrap +host = 'http://localhost:8998' +headers = {'Content-Type': 'application/json'} +statements_url = host + '/sessions/1/statements' + +data = { + 'code': textwrap.dedent(""" + df = spark.createDataFrame([{"id": 1, "name": "Mounir"}]) + + df.show() + """) +} + +r = requests.post(statements_url, data=json.dumps(data), headers=headers) +pprint.pprint(r.json()) diff --git a/apache/livy/ExposedUI/python/livy/run_code_external_file.py b/apache/livy/ExposedUI/python/livy/run_code_external_file.py new file mode 100644 index 00000000..53085659 --- /dev/null +++ b/apache/livy/ExposedUI/python/livy/run_code_external_file.py @@ -0,0 +1,16 @@ +import json, pprint, requests, textwrap +host = 'http://localhost:8998' +headers = {'Content-Type': 'application/json'} +statements_url = host + '/sessions/1/statements' + +external_python_file = './src/external_file.py' + +with open(external_python_file, 'r') as file: + pyspark_code = file.read() + +data = { + 'code': pyspark_code +} + +r = requests.post(statements_url, data=json.dumps(data), headers=headers) +pprint.pprint(r.json()) diff --git a/apache/livy/ExposedUI/python/livy/src/external_file.py b/apache/livy/ExposedUI/python/livy/src/external_file.py new file mode 100644 index 00000000..def07fb9 --- /dev/null +++ b/apache/livy/ExposedUI/python/livy/src/external_file.py @@ -0,0 +1,12 @@ +# running the Apache Livy example (converted into Python3 syntax) https://livy.incubator.apache.org/examples/ +# The code is running from an external python file + +import random +NUM_SAMPLES = 100000 + +def sample(p): + x, y = random.random(), random.random() + return 1 if x*x + y*y < 1 else 0 + +count = sc.parallelize(range(0, NUM_SAMPLES)).map(sample).reduce(lambda a, b: a + b) +print("Pi is roughly %f" % (4.0 * count / NUM_SAMPLES)) diff --git a/apache/livy/ExposedUI/python/livy/start_session.py b/apache/livy/ExposedUI/python/livy/start_session.py new file mode 100644 index 00000000..b810a4fe --- /dev/null +++ b/apache/livy/ExposedUI/python/livy/start_session.py @@ -0,0 +1,11 @@ +# from https://livy.apache.org/examples/ + +# requires pip install requests +import json, pprint, requests, textwrap +host = 'http://localhost:8998' +headers = {'Content-Type': 'application/json'} + +data = {'kind': 'pyspark', 'name': 'test pyspark session from python code', 'proxyUser': 'Mounir', 'executorMemory': '2g'} + +r = requests.post(host + '/sessions', data=json.dumps(data), headers=headers) +pprint.pprint(r.json()) diff --git a/apache/livy/ExposedUI/python/livy/wait_for_idle.py b/apache/livy/ExposedUI/python/livy/wait_for_idle.py new file mode 100644 index 00000000..32b9bd66 --- /dev/null +++ b/apache/livy/ExposedUI/python/livy/wait_for_idle.py @@ -0,0 +1,5 @@ +import json, pprint, requests, textwrap +host = 'http://localhost:8998' + +r = requests.get(host + '/sessions/1') +pprint.pprint(r.json()) diff --git a/apache/livy/ExposedUI/python/spark-submit/test_pandas.py b/apache/livy/ExposedUI/python/spark-submit/test_pandas.py new file mode 100644 index 00000000..bf8de60b --- /dev/null +++ b/apache/livy/ExposedUI/python/spark-submit/test_pandas.py @@ -0,0 +1,20 @@ +import pandas as pd + +from pyspark.sql import SparkSession + +app_name = "simple-app-pandas" + +spark = SparkSession.builder.appName(app_name).getOrCreate() + +# Creating a DataFrame from a dictionary +data = { + 'Name': ['Alice', 'Bob', 'Charlie'], + 'Age': [25, 30, 35], + 'City': ['New York', 'Los Angeles', 'Chicago'] +} + +df = pd.DataFrame(data) +print(df) + +spark.stop() + diff --git a/apache/livy/ExposedUI/python/spark-submit/test_spark.py b/apache/livy/ExposedUI/python/spark-submit/test_spark.py new file mode 100644 index 00000000..5c6f6e95 --- /dev/null +++ b/apache/livy/ExposedUI/python/spark-submit/test_spark.py @@ -0,0 +1,11 @@ +from pyspark.sql import SparkSession + +app_name = "simple-app" + +spark = SparkSession.builder.appName(app_name).getOrCreate() + +df = spark.createDataFrame([{"id": 1, "name": "Mounir"}]) + +df.show() + +spark.stop() \ No newline at end of file diff --git a/apache/livy/ExposedUI/requirements.txt b/apache/livy/ExposedUI/requirements.txt new file mode 100644 index 00000000..d9350b29 --- /dev/null +++ b/apache/livy/ExposedUI/requirements.txt @@ -0,0 +1,4 @@ +pandas +requests +msal +python-dotenv \ No newline at end of file diff --git a/apache/livy/ExposedUI/spark/Dockerfile b/apache/livy/ExposedUI/spark/Dockerfile new file mode 100644 index 00000000..803d3f1c --- /dev/null +++ b/apache/livy/ExposedUI/spark/Dockerfile @@ -0,0 +1,23 @@ +# https://github.com/apache/spark-docker/tree/master +# https://hub.docker.com/_/spark/tags +# Simulating Microsoft Fabric Runtime 1.3(with Apache Livy), Spark 3.5, Java 11 +FROM spark:3.5.4-python3 +USER root +RUN apt-get update && apt-get install -y curl + +# Install Python Dependencies +COPY ./requirements.txt /opt/ +RUN pip install -r /opt/requirements.txt + +# Apache Livy exception using Java 17 +# Exception in thread "main" java.util.concurrent.ExecutionException: javax.security.sasl.SaslException: Client closed before SASL negotiation finished. + +# Using Java 11 +# https://jdk.java.net/archive/ +WORKDIR /opt/java/ +RUN curl "https://download.java.net/java/GA/jdk11/9/GPL/openjdk-11.0.2_linux-x64_bin.tar.gz" -o openjdk-update.tar.gz \ + && tar -xzf "openjdk-update.tar.gz" \ + && rm -rf openjdk openjdk-update.tar.gz \ + && mv jdk-* openjdk + +USER spark From a86eda34484f36bb87cc7c1ff2c5034a5225d143 Mon Sep 17 00:00:00 2001 From: joernNNN Date: Wed, 13 Aug 2025 19:31:57 +0400 Subject: [PATCH 2/7] fix readme.md --- apache/livy/ExposedUI/README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/apache/livy/ExposedUI/README.md b/apache/livy/ExposedUI/README.md index f388e153..84c52f17 100644 --- a/apache/livy/ExposedUI/README.md +++ b/apache/livy/ExposedUI/README.md @@ -1,8 +1,8 @@ # Setup Apache Livy with Docker Compose ```bash - -docker compose up --build +docker compose build spark-master +docker compose up ``` # Access the Livy UI and execute PySpark code ```bash From 8672a24fafd8c06cbf58d38ab08fc0b92ae2973d Mon Sep 17 00:00:00 2001 From: joernNNN Date: Thu, 12 Feb 2026 21:05:02 +0400 Subject: [PATCH 3/7] typo fix --- apache/livy/ExposedUI/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/apache/livy/ExposedUI/README.md b/apache/livy/ExposedUI/README.md index 84c52f17..67c9c9f3 100644 --- a/apache/livy/ExposedUI/README.md +++ b/apache/livy/ExposedUI/README.md @@ -15,7 +15,7 @@ curl -X POST -H "Content-Type: application/json" -d '{"code":"import os\nprint(o # wait 30sec # {"id":0,"code":"import os\nprint(os.getcwd())","state":"waiting","output":null,"progress":0.0,"started":0,"completed":0} -# replace id from last reseponse with #statements_id +# replace id from last response with #statements_id curl http://127.0.0.1:8998/sessions/$id/statements/$statements_id # output.data is the stdout # {"id":0,"code":"import os\nprint(os.getcwd())","state":"available","output":{"status":"ok","execution_count":0,"data":{"text/plain":"/opt"}},"progress":1.0,"started":1754515902001,"completed":1754515902003} From e9511879d04ac4262d227da50d3987d45daf0c61 Mon Sep 17 00:00:00 2001 From: joernNNN Date: Thu, 12 Feb 2026 21:26:49 +0400 Subject: [PATCH 4/7] secure livy instance setup is added now --- apache/livy/ExposedUI/README.md | 6 ++++++ apache/livy/ExposedUI/caddyfile | 6 ++++++ apache/livy/ExposedUI/docker-compose.yml | 11 ++++++++++- 3 files changed, 22 insertions(+), 1 deletion(-) create mode 100644 apache/livy/ExposedUI/caddyfile diff --git a/apache/livy/ExposedUI/README.md b/apache/livy/ExposedUI/README.md index 67c9c9f3..1e3f2429 100644 --- a/apache/livy/ExposedUI/README.md +++ b/apache/livy/ExposedUI/README.md @@ -19,4 +19,10 @@ curl -X POST -H "Content-Type: application/json" -d '{"code":"import os\nprint(o curl http://127.0.0.1:8998/sessions/$id/statements/$statements_id # output.data is the stdout # {"id":0,"code":"import os\nprint(os.getcwd())","state":"available","output":{"status":"ok","execution_count":0,"data":{"text/plain":"/opt"}},"progress":1.0,"started":1754515902001,"completed":1754515902003} +``` + +# Secured Livy instance +By running the following command, against a secured Livy instance, we receive `401 Unauthorized` response from the server: +```bash +curl -X POST -H "Content-Type: application/json" -d '{"kind":"pyspark"}' http://localhost:8999/sessions -v ``` \ No newline at end of file diff --git a/apache/livy/ExposedUI/caddyfile b/apache/livy/ExposedUI/caddyfile new file mode 100644 index 00000000..80044ac6 --- /dev/null +++ b/apache/livy/ExposedUI/caddyfile @@ -0,0 +1,6 @@ +:8999 { + basicauth * { + admin $2a$14$jHFWxuhiqIAvbDQ/JJl/huo80ecDIvULPt2TvWII1oYbY2HpTqxMu + } + reverse_proxy apache-livy:8998 +} diff --git a/apache/livy/ExposedUI/docker-compose.yml b/apache/livy/ExposedUI/docker-compose.yml index f00b4646..b5700dd0 100644 --- a/apache/livy/ExposedUI/docker-compose.yml +++ b/apache/livy/ExposedUI/docker-compose.yml @@ -70,9 +70,18 @@ services: depends_on: - spark-master - spark-worker - deploy: resources: limits: cpus: '1' memory: 2g + + livy-auth-proxy: + image: caddy:2.9-alpine + container_name: livy-auth-proxy + ports: + - "8999:8999" + volumes: + - ./caddyfile:/etc/caddy/Caddyfile:ro + depends_on: + - apache-livy \ No newline at end of file From de98a6edfeddbc5e5c0b271819a0aa3695a40d7a Mon Sep 17 00:00:00 2001 From: joernNNN Date: Fri, 13 Feb 2026 15:51:01 +0400 Subject: [PATCH 5/7] add actual secure apache livy server --- apache/livy/ExposedUI/README.md | 35 +++++--- .../ExposedUI/apache-livy/Dockerfile.secure | 27 ++++++ .../apache-livy/conf/livy-secure.conf | 59 +++++++++++++ .../src/com/livy/auth/TokenAuthFilter.java | 83 +++++++++++++++++++ apache/livy/ExposedUI/caddyfile | 6 -- .../livy/ExposedUI/docker-compose-secure.yml | 74 +++++++++++++++++ apache/livy/ExposedUI/docker-compose.yml | 12 +-- 7 files changed, 268 insertions(+), 28 deletions(-) create mode 100644 apache/livy/ExposedUI/apache-livy/Dockerfile.secure create mode 100644 apache/livy/ExposedUI/apache-livy/conf/livy-secure.conf create mode 100644 apache/livy/ExposedUI/apache-livy/custom-auth/src/com/livy/auth/TokenAuthFilter.java delete mode 100644 apache/livy/ExposedUI/caddyfile create mode 100644 apache/livy/ExposedUI/docker-compose-secure.yml diff --git a/apache/livy/ExposedUI/README.md b/apache/livy/ExposedUI/README.md index 1e3f2429..ac75f779 100644 --- a/apache/livy/ExposedUI/README.md +++ b/apache/livy/ExposedUI/README.md @@ -1,28 +1,41 @@ # Setup Apache Livy with Docker Compose +## Vulnerable (Exposed UI — no authentication) ```bash docker compose build spark-master docker compose up ``` -# Access the Livy UI and execute PySpark code + +### Access the Livy UI and execute PySpark code ```bash curl -X POST -H "Content-Type: application/json" -d '{"kind":"pyspark"}' http://localhost:8998/sessions -# {"id":6,"name":null,"appId":null,"owner":null,"proxyUser":null,"state":"starting","kind":"pyspark","appInfo":{"driverLogUrl":null,"sparkUiUrl":null},"log":["stdout: ","\nstderr: "],"ttl":null,"driverMemory":null,"driverCores":0,"executorMemory":null,"executorCores":0,"conf":{},"archives":[],"files":[],"heartbeatTimeoutInSecond":0,"jars":[],"numExecutors":0,"pyFiles":[],"queue":null} +# {"id":6,"name":null,"appId":null,"owner":null,"proxyUser":null,"state":"starting",...} # replace id from last response with $id curl -X POST -H "Content-Type: application/json" -d '{"code":"import os\nprint(os.getcwd())"}' http://localhost:8998/sessions/$id/statements -# "java.lang.IllegalStateException: Session is in state starting" -# wait 30sec -# {"id":0,"code":"import os\nprint(os.getcwd())","state":"waiting","output":null,"progress":0.0,"started":0,"completed":0} +# wait ~30sec for session to become idle -# replace id from last response with #statements_id +# replace id from last response with $statements_id curl http://127.0.0.1:8998/sessions/$id/statements/$statements_id # output.data is the stdout -# {"id":0,"code":"import os\nprint(os.getcwd())","state":"available","output":{"status":"ok","execution_count":0,"data":{"text/plain":"/opt"}},"progress":1.0,"started":1754515902001,"completed":1754515902003} ``` -# Secured Livy instance -By running the following command, against a secured Livy instance, we receive `401 Unauthorized` response from the server: +--- + +## Secured (Custom Authentication Filter) + +```bash +# Build and start the secured stack +docker compose -f docker-compose-secure.yml build +docker compose -f docker-compose-secure.yml up +``` + +### Test authentication ```bash -curl -X POST -H "Content-Type: application/json" -d '{"kind":"pyspark"}' http://localhost:8999/sessions -v -``` \ No newline at end of file +# Without token → 401 +curl -X POST -H "Content-Type: application/json" -d '{"kind":"pyspark"}' http://localhost:8998/sessions + +# With valid token → 200 +curl -X POST -H "Authorization: Bearer changeme-use-a-strong-secret" -H "Content-Type: application/json" -d '{"kind":"pyspark"}' http://localhost:8998/sessions + +``` diff --git a/apache/livy/ExposedUI/apache-livy/Dockerfile.secure b/apache/livy/ExposedUI/apache-livy/Dockerfile.secure new file mode 100644 index 00000000..66f4927f --- /dev/null +++ b/apache/livy/ExposedUI/apache-livy/Dockerfile.secure @@ -0,0 +1,27 @@ +# Reuse the same image built for Spark Master/Worker +FROM mounirbs-local/spark-python3-java11:3.5.4 +USER root +ENV LIVY_HOME /opt/livy +WORKDIR /opt/ + +# Install Livy +RUN apt-get update && apt-get install -y unzip \ + && curl "https://dlcdn.apache.org/incubator/livy/0.8.0-incubating/apache-livy-0.8.0-incubating_2.12-bin.zip" -O \ + && unzip "apache-livy-0.8.0-incubating_2.12-bin" \ + && rm -rf "apache-livy-0.8.0-incubating_2.12-bin.zip" \ + && mv "apache-livy-0.8.0-incubating_2.12-bin" $LIVY_HOME \ + && mkdir $LIVY_HOME/logs \ + && chown -R spark:spark $LIVY_HOME + +# Compile and install the custom authentication filter +COPY custom-auth/src /tmp/custom-auth-src +RUN mkdir -p /tmp/custom-auth-classes \ + && javac -cp "$LIVY_HOME/jars/*" \ + -d /tmp/custom-auth-classes \ + /tmp/custom-auth-src/com/livy/auth/TokenAuthFilter.java \ + && jar cf $LIVY_HOME/jars/livy-custom-auth.jar \ + -C /tmp/custom-auth-classes . \ + && rm -rf /tmp/custom-auth-src /tmp/custom-auth-classes \ + && chown spark:spark $LIVY_HOME/jars/livy-custom-auth.jar + +USER spark diff --git a/apache/livy/ExposedUI/apache-livy/conf/livy-secure.conf b/apache/livy/ExposedUI/apache-livy/conf/livy-secure.conf new file mode 100644 index 00000000..1eda2589 --- /dev/null +++ b/apache/livy/ExposedUI/apache-livy/conf/livy-secure.conf @@ -0,0 +1,59 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# What host address to start the server on. +livy.server.host = 0.0.0.0 + +# What port to start the server on. +livy.server.port = 8998 + +# What spark master Livy sessions should use. +livy.spark.master = spark://spark-master:7077 + +# What spark deploy mode Livy sessions should use. +livy.spark.deploy-mode = client + +# If livy should impersonate the requesting users when creating a new session. +livy.impersonation.enabled = true + +# List of local directories from where files are allowed to be added to user sessions. +livy.file.local-dir-whitelist = /target/ + +# If the Livy Web UI should be included in the Livy Server. +livy.ui.enabled = true + +# Enable CSRF protection +livy.server.csrf-protection.enabled = false + +# ============================================================ +# Custom Authentication Filter Configuration +# ============================================================ +# Use a custom token-based authentication filter. +# All requests must include: Authorization: Bearer +livy.server.auth.type = token +livy.server.auth.token.class = com.livy.auth.TokenAuthFilter +livy.server.auth.token.param.token = changeme-use-a-strong-secret + +# ============================================================ +# Access Control +# ============================================================ +livy.server.access-control.enabled = true +livy.server.access-control.allowed-users = livy-user + +livy.repl.jars = /opt/livy/jars/livy-client-common-0.8.0-incubating.jar, /opt/livy/repl_2.12-jars/livy-core_2.12-0.8.0-incubating.jar, /opt/livy/repl_2.12-jars/commons-codec-1.9.jar, /opt/livy/repl_2.12-jars/livy-core_2.12-0.8.0-incubating.jar, /opt/livy/repl_2.12-jars/minlog-1.3.0.jar, /opt/livy/repl_2.12-jars/kryo-shaded-4.0.2.jar, /opt/livy/repl_2.12-jars/livy-repl_2.12-0.8.0-incubating.jar, /opt/livy/repl_2.12-jars/objenesis-2.5.1.jar + +livy.rsc.jars = /opt/livy/rsc-jars/livy-api-0.8.0-incubating.jar, /opt/livy/rsc-jars/livy-rsc-0.8.0-incubating.jar diff --git a/apache/livy/ExposedUI/apache-livy/custom-auth/src/com/livy/auth/TokenAuthFilter.java b/apache/livy/ExposedUI/apache-livy/custom-auth/src/com/livy/auth/TokenAuthFilter.java new file mode 100644 index 00000000..5fc6c46d --- /dev/null +++ b/apache/livy/ExposedUI/apache-livy/custom-auth/src/com/livy/auth/TokenAuthFilter.java @@ -0,0 +1,83 @@ +package com.livy.auth; + +import javax.servlet.*; +import javax.servlet.http.HttpServletRequest; +import javax.servlet.http.HttpServletResponse; +import java.io.IOException; +import java.security.Principal; + +/** + * Custom authentication filter for Apache Livy. + * + * Validates requests using a static bearer token passed via the "Authorization" header. + * The expected token is configured through the Livy config property: + * livy.server.auth.token.param.token = <secret-value> + * + * Requests without a valid "Authorization: Bearer <token>" header receive a 401 response. + */ +public class TokenAuthFilter implements Filter { + + private String expectedToken; + + @Override + public void init(FilterConfig filterConfig) throws ServletException { + expectedToken = filterConfig.getInitParameter("token"); + if (expectedToken == null || expectedToken.isEmpty()) { + throw new ServletException( + "TokenAuthFilter requires 'token' init parameter. " + + "Set livy.server.auth.token.param.token in livy.conf"); + } + } + + @Override + public void doFilter(ServletRequest request, ServletResponse response, FilterChain chain) + throws IOException, ServletException { + + HttpServletRequest httpReq = (HttpServletRequest) request; + HttpServletResponse httpResp = (HttpServletResponse) response; + + String authHeader = httpReq.getHeader("Authorization"); + + if (authHeader != null && authHeader.startsWith("Bearer ")) { + String token = authHeader.substring("Bearer ".length()).trim(); + if (token.equals(expectedToken)) { + // Wrap request so Livy sees an authenticated principal + HttpServletRequest wrappedRequest = new AuthenticatedRequestWrapper(httpReq, "livy-user"); + chain.doFilter(wrappedRequest, response); + return; + } + } + + httpResp.setStatus(HttpServletResponse.SC_UNAUTHORIZED); + httpResp.setContentType("application/json"); + httpResp.getWriter().write("{\"error\": \"Unauthorized. Provide a valid Authorization: Bearer header.\"}"); + } + + @Override + public void destroy() { + // no-op + } + + /** + * Wrapper that exposes the authenticated user as a Principal so Livy's + * access-control layer can identify the caller. + */ + private static class AuthenticatedRequestWrapper extends javax.servlet.http.HttpServletRequestWrapper { + private final String username; + + AuthenticatedRequestWrapper(HttpServletRequest request, String username) { + super(request); + this.username = username; + } + + @Override + public Principal getUserPrincipal() { + return () -> username; + } + + @Override + public String getRemoteUser() { + return username; + } + } +} diff --git a/apache/livy/ExposedUI/caddyfile b/apache/livy/ExposedUI/caddyfile deleted file mode 100644 index 80044ac6..00000000 --- a/apache/livy/ExposedUI/caddyfile +++ /dev/null @@ -1,6 +0,0 @@ -:8999 { - basicauth * { - admin $2a$14$jHFWxuhiqIAvbDQ/JJl/huo80ecDIvULPt2TvWII1oYbY2HpTqxMu - } - reverse_proxy apache-livy:8998 -} diff --git a/apache/livy/ExposedUI/docker-compose-secure.yml b/apache/livy/ExposedUI/docker-compose-secure.yml new file mode 100644 index 00000000..d674750f --- /dev/null +++ b/apache/livy/ExposedUI/docker-compose-secure.yml @@ -0,0 +1,74 @@ +services: + spark-master: + container_name: spark-master + hostname: spark-master + build: + context: ./ + dockerfile: ./spark/Dockerfile + image: mounirbs-local/spark-python3-java11:3.5.4 + ports: + - "8080:8080" + - "7077:7077" + - "6066:6066" + environment: + - SPARK_MASTER_HOST=spark-master + - SPARK_MASTER_PORT=7077 + - SPARK_MASTER_WEBUI_PORT=8080 + - SPARK_DAEMON_MEMORY=2g + - SPARK_MASTER_OPTS=-Dspark.master.rest.enabled=true + - PYSPARK_PYTHON=python3 + entrypoint: + - "bash" + - "-c" + - "/opt/spark/sbin/start-master.sh && tail -f /dev/null" + volumes: + - ./python:/python + + spark-worker: + image: mounirbs-local/spark-python3-java11:3.5.4 + ports: + - "8081:8081" + container_name: spark-worker + hostname: spark-worker + environment: + - SPARK_WORKER_CORES=1 + - SPARK_WORKER_MEMORY=2g + - PYSPARK_PYTHON=python3 + depends_on: + - spark-master + entrypoint: + - "bash" + - "-c" + - "/opt/spark/sbin/start-worker.sh spark://spark-master:7077 && tail -f /dev/null" + volumes: + - ./python:/python + + apache-livy: + container_name: apache-livy + hostname: apache-livy + environment: + - PYSPARK_PYTHON=python3 + build: + context: ./apache-livy/ + dockerfile: Dockerfile.secure + image: mounirbs-local/livy-spark3.5.4-python3-java11:0.8-secure + command: ["sh", "-c", "/opt/livy/bin/livy-server"] + user: root + volumes: + # Mount the SECURE config that enables the custom auth filter + - ./apache-livy/conf/livy-secure.conf:/opt/livy/conf/livy.conf + - ./apache-livy/conf/livy-client.conf:/opt/livy/conf/livy-client.conf + - ./apache-livy/conf/livy-env.sh:/opt/livy/conf/livy-env.sh + - ./apache-livy/conf/log4j.properties:/opt/livy/conf/log4j.properties + - ./apache-livy/conf/spark-blacklist:/opt/livy/conf/spark-blacklist + - ./apache-livy/spark/conf/:/opt/spark/conf/ + ports: + - '8998:8998' + depends_on: + - spark-master + - spark-worker + deploy: + resources: + limits: + cpus: '1' + memory: 2g diff --git a/apache/livy/ExposedUI/docker-compose.yml b/apache/livy/ExposedUI/docker-compose.yml index b5700dd0..0f9e9825 100644 --- a/apache/livy/ExposedUI/docker-compose.yml +++ b/apache/livy/ExposedUI/docker-compose.yml @@ -74,14 +74,4 @@ services: resources: limits: cpus: '1' - memory: 2g - - livy-auth-proxy: - image: caddy:2.9-alpine - container_name: livy-auth-proxy - ports: - - "8999:8999" - volumes: - - ./caddyfile:/etc/caddy/Caddyfile:ro - depends_on: - - apache-livy \ No newline at end of file + memory: 2g \ No newline at end of file From 2562e346475abe5fca575caaa04e6c9989680777 Mon Sep 17 00:00:00 2001 From: joernNNN Date: Fri, 20 Feb 2026 16:10:57 +0400 Subject: [PATCH 6/7] no need to python directory here --- apache/livy/ExposedUI/docker-compose.yml | 4 - .../livy/ExposedUI/python/fabric/swagger.json | 1753 ----------------- .../livy/ExposedUI/python/fabric/swagger.yaml | 1250 ------------ .../python/fabric/test_fabric..ipynb | 296 --- .../ExposedUI/python/fabric/test_fabric.py | 45 - .../ExposedUI/python/livy/delete_session.py | 5 - .../python/livy/init_java_gateway.py | 26 - apache/livy/ExposedUI/python/livy/run_code.py | 15 - .../python/livy/run_code_external_file.py | 16 - .../python/livy/src/external_file.py | 12 - .../ExposedUI/python/livy/start_session.py | 11 - .../ExposedUI/python/livy/wait_for_idle.py | 5 - .../python/spark-submit/test_pandas.py | 20 - .../python/spark-submit/test_spark.py | 11 - 14 files changed, 3469 deletions(-) delete mode 100644 apache/livy/ExposedUI/python/fabric/swagger.json delete mode 100644 apache/livy/ExposedUI/python/fabric/swagger.yaml delete mode 100644 apache/livy/ExposedUI/python/fabric/test_fabric..ipynb delete mode 100644 apache/livy/ExposedUI/python/fabric/test_fabric.py delete mode 100644 apache/livy/ExposedUI/python/livy/delete_session.py delete mode 100644 apache/livy/ExposedUI/python/livy/init_java_gateway.py delete mode 100644 apache/livy/ExposedUI/python/livy/run_code.py delete mode 100644 apache/livy/ExposedUI/python/livy/run_code_external_file.py delete mode 100644 apache/livy/ExposedUI/python/livy/src/external_file.py delete mode 100644 apache/livy/ExposedUI/python/livy/start_session.py delete mode 100644 apache/livy/ExposedUI/python/livy/wait_for_idle.py delete mode 100644 apache/livy/ExposedUI/python/spark-submit/test_pandas.py delete mode 100644 apache/livy/ExposedUI/python/spark-submit/test_spark.py diff --git a/apache/livy/ExposedUI/docker-compose.yml b/apache/livy/ExposedUI/docker-compose.yml index 0f9e9825..4075e544 100644 --- a/apache/livy/ExposedUI/docker-compose.yml +++ b/apache/livy/ExposedUI/docker-compose.yml @@ -24,8 +24,6 @@ services: - "bash" - "-c" - "/opt/spark/sbin/start-master.sh && tail -f /dev/null" - volumes: - - ./python:/python spark-worker: # reuse the image built for the spark-master @@ -47,8 +45,6 @@ services: - "bash" - "-c" - "/opt/spark/sbin/start-worker.sh spark://spark-master:7077 && tail -f /dev/null" - volumes: - - ./python:/python apache-livy: container_name: apache-livy diff --git a/apache/livy/ExposedUI/python/fabric/swagger.json b/apache/livy/ExposedUI/python/fabric/swagger.json deleted file mode 100644 index 3d533742..00000000 --- a/apache/livy/ExposedUI/python/fabric/swagger.json +++ /dev/null @@ -1,1753 +0,0 @@ -{ - "swagger": "2.0", - "info": { - "version": "v1", - "title": "Livy Public API" - }, - "host": "api.fabric.microsoft.com", - "schemes": [ - "https" - ], - "paths": { - "/v1/workspaces/{workspaceId}/lakehouses/{lakehouseId}/livyApi/versions/{livyApiVersion}/batches": { - "post": { - "tags": [ - "LivyApiBatch" - ], - "summary": "Executes a batch.", - "operationId": "LivyApiBatch_ExecuteBatchAsync", - "consumes": [ - "application/json", - "text/json" - ], - "produces": [ - "application/json", - "text/json" - ], - "parameters": [ - { - "name": "workspaceId", - "in": "path", - "description": "", - "required": true, - "type": "string", - "format": "uuid" - }, - { - "name": "lakehouseId", - "in": "path", - "description": "", - "required": true, - "type": "string", - "format": "uuid" - }, - { - "name": "livyApiVersion", - "in": "path", - "required": true, - "type": "string" - }, - { - "name": "payload", - "in": "body", - "description": "", - "required": true, - "schema": { - "$ref": "#/definitions/BatchRequest" - } - } - ], - "responses": { - "202": { - "description": "Accepted", - "schema": { - "$ref": "#/definitions/BatchResponse" - } - }, - "default": { - "description": "Other status codes", - "schema": { - "$ref": "#/definitions/ErrorResponse" - } - } - } - }, - "get": { - "tags": [ - "LivyApiBatch" - ], - "summary": "List Batch Jobs.", - "operationId": "LivyApiBatch_ListBatchJobsAsync", - "consumes": [], - "produces": [ - "application/json" - ], - "parameters": [ - { - "name": "workspaceId", - "in": "path", - "description": "", - "required": true, - "type": "string", - "format": "uuid" - }, - { - "name": "lakehouseId", - "in": "path", - "description": "", - "required": true, - "type": "string", - "format": "uuid" - }, - { - "name": "livyApiVersion", - "in": "path", - "required": true, - "type": "string" - }, - { - "name": "$top", - "in": "query", - "type": "string", - "description": "" - }, - { - "name": "$skip", - "in": "query", - "type": "string", - "description": "" - }, - { - "name": "$count", - "in": "query", - "type": "boolean", - "description": "" - } - ], - "responses": { - "200": { - "description": "OK", - "schema": { - "$ref": "#/definitions/LivySparkActivityList" - } - }, - "default": { - "description": "Other Status", - "schema": { - "$ref": "#/definitions/ErrorResponse" - } - } - } - } - }, - "/v1/workspaces/{workspaceId}/lakehouses/{lakehouseId}/livyApi/versions/{livyApiVersion}/batches/{batchId}": { - "get": { - "tags": [ - "LivyApiBatch" - ], - "summary": "Gets a Batch Details.", - "operationId": "LivyApiBatch_GetBatchAsync", - "consumes": [], - "produces": [ - "application/json" - ], - "parameters": [ - { - "name": "workspaceId", - "in": "path", - "description": "", - "required": true, - "type": "string", - "format": "uuid" - }, - { - "name": "lakehouseId", - "in": "path", - "description": "", - "required": true, - "type": "string", - "format": "uuid" - }, - { - "name": "livyApiVersion", - "in": "path", - "required": true, - "type": "string" - }, - { - "name": "batchId", - "in": "path", - "description": "", - "required": true, - "type": "string", - "format": "uuid" - } - ], - "responses": { - "200": { - "description": "OK", - "schema": { - "$ref": "#/definitions/BatchResponse" - } - }, - "default": { - "description": "Other status codes", - "schema": { - "$ref": "#/definitions/ErrorResponse" - } - } - } - }, - "delete": { - "tags": [ - "LivyApiBatch" - ], - "summary": "Cancels a Batch Execution.", - "operationId": "LivyApiBatch_CancelBatchAsync", - "consumes": [], - "produces": [ - "application/json" - ], - "parameters": [ - { - "name": "workspaceId", - "in": "path", - "description": "", - "required": true, - "type": "string", - "format": "uuid" - }, - { - "name": "lakehouseId", - "in": "path", - "description": "", - "required": true, - "type": "string", - "format": "uuid" - }, - { - "name": "livyApiVersion", - "in": "path", - "required": true, - "type": "string" - }, - { - "name": "batchId", - "in": "path", - "description": "", - "required": true, - "type": "string", - "format": "uuid" - } - ], - "responses": { - "200": { - "description": "OK" - }, - "default": { - "description": "Other status codes", - "schema": { - "$ref": "#/definitions/ErrorResponse" - } - } - } - } - }, - "/v1/workspaces/{workspaceId}/lakehouses/{lakehouseId}/livyApi/versions/{livyApiVersion}/sessions": { - "post": { - "tags": [ - "LivyApiSession" - ], - "summary": "Acquire a Spark Session.", - "operationId": "LivyApiSession_AcquireSparkSessionAsync", - "consumes": [ - "application/json" - ], - "produces": [ - "application/json" - ], - "parameters": [ - { - "name": "workspaceId", - "in": "path", - "description": "Workspace Id.", - "required": true, - "type": "string", - "format": "uuid" - }, - { - "name": "lakehouseId", - "in": "path", - "description": "Lakehouse Id.", - "required": true, - "type": "string", - "format": "uuid" - }, - { - "name": "livyApiVersion", - "in": "path", - "required": true, - "type": "string" - }, - { - "name": "payload", - "in": "body", - "description": "CreateSessionRequest.", - "required": true, - "schema": { - "$ref": "#/definitions/SessionRequest" - } - } - ], - "responses": { - "202": { - "description": "Accepted", - "schema": { - "$ref": "#/definitions/SessionResponse" - } - }, - "default": { - "description": "Other status code", - "schema": { - "$ref": "#/definitions/ErrorResponse" - } - } - } - }, - "get": { - "tags": [ - "LivyApiSession" - ], - "summary": "List spark sessions.", - "operationId": "LivyApiSession_ListSparkSessionsAsync", - "consumes": [], - "produces": [ - "application/json", - "text/json" - ], - "parameters": [ - { - "name": "workspaceId", - "in": "path", - "description": "Workspace name.", - "required": true, - "type": "string", - "format": "uuid" - }, - { - "name": "lakehouseId", - "in": "path", - "description": "Lakehouse Id.", - "required": true, - "type": "string", - "format": "uuid" - }, - { - "name": "livyApiVersion", - "in": "path", - "required": true, - "type": "string" - }, - { - "name": "$top", - "in": "query", - "type": "string", - "description": "" - }, - { - "name": "$skip", - "in": "query", - "type": "string", - "description": "" - }, - { - "name": "$count", - "in": "query", - "type": "boolean", - "description": "" - } - ], - "responses": { - "200": { - "description": "OK", - "schema": { - "$ref": "#/definitions/SessionResponse" - } - }, - "default": { - "description": "Other status codes", - "schema": { - "$ref": "#/definitions/ErrorResponse" - } - } - } - } - }, - "/v1/workspaces/{workspaceId}/lakehouses/{lakehouseId}/livyApi/versions/{livyApiVersion}/sessions/{sessionId}": { - "get": { - "tags": [ - "LivyApiSession" - ], - "summary": "Get details of a spark session.", - "operationId": "LivyApiSession_GetSparkSessionAsync", - "consumes": [], - "produces": [ - "application/json" - ], - "parameters": [ - { - "name": "workspaceId", - "in": "path", - "description": "Workspace name.", - "required": true, - "type": "string", - "format": "uuid" - }, - { - "name": "lakehouseId", - "in": "path", - "description": "Lakehouse Id.", - "required": true, - "type": "string", - "format": "uuid" - }, - { - "name": "livyApiVersion", - "in": "path", - "required": true, - "type": "string" - }, - { - "name": "sessionId", - "in": "path", - "description": "Spark session Id.", - "required": true, - "type": "string", - "format": "uuid" - } - ], - "responses": { - "200": { - "description": "OK", - "schema": { - "$ref": "#/definitions/SessionResponse" - } - }, - "default": { - "description": "Other status codes", - "schema": { - "$ref": "#/definitions/ErrorResponse" - } - } - } - }, - "delete": { - "tags": [ - "LivyApiSession" - ], - "summary": "Stops and deletes a spark session.", - "operationId": "LivyApiSession_DeleteSparkSessionAsync", - "consumes": [], - "produces": [ - "application/json", - "text/json" - ], - "parameters": [ - { - "name": "workspaceId", - "in": "path", - "description": "Workspace Id.", - "required": true, - "type": "string", - "format": "uuid" - }, - { - "name": "lakehouseId", - "in": "path", - "description": "Lakehouse Id.", - "required": true, - "type": "string", - "format": "uuid" - }, - { - "name": "livyApiVersion", - "in": "path", - "required": true, - "type": "string" - }, - { - "name": "sessionId", - "in": "path", - "description": "Session Id.", - "required": true, - "type": "string", - "format": "uuid" - } - ], - "responses": { - "200": { - "description": "OK" - }, - "default": { - "description": "Other status codes", - "schema": { - "$ref": "#/definitions/ErrorResponse" - } - } - } - } - }, - "/v1/workspaces/{workspaceId}/lakehouses/{lakehouseId}/livyApi/versions/{livyApiVersion}/sessions/{sessionId}/statements": { - "post": { - "tags": [ - "LivyApiSession" - ], - "summary": "Execute a statement on a spark session.", - "operationId": "LivyApiSession_ExecuteSparkSessionStatementAsync", - "consumes": [ - "application/json", - "text/json" - ], - "produces": [ - "application/json", - "text/json" - ], - "parameters": [ - { - "name": "workspaceId", - "in": "path", - "description": "Workspace Id.", - "required": true, - "type": "string", - "format": "uuid" - }, - { - "name": "lakehouseId", - "in": "path", - "description": "Lakehouse Id.", - "required": true, - "type": "string", - "format": "uuid" - }, - { - "name": "livyApiVersion", - "in": "path", - "required": true, - "type": "string" - }, - { - "name": "sessionId", - "in": "path", - "description": "Spark Session Id.", - "required": true, - "type": "string", - "format": "uuid" - }, - { - "name": "payload", - "in": "body", - "description": "", - "required": true, - "schema": { - "$ref": "#/definitions/StatementRequest" - } - } - ], - "responses": { - "200": { - "description": "OK", - "schema": { - "$ref": "#/definitions/StatementResponse" - } - }, - "default": { - "description": "Other status codes.", - "schema": { - "$ref": "#/definitions/ErrorResponse" - } - } - } - }, - "get": { - "tags": [ - "LivyApiSession" - ], - "summary": "List statements in an active session.", - "operationId": "LivyApiSession_ListSparkSessionStatementsAsync", - "consumes": [], - "produces": [ - "application/json", - "text/json" - ], - "parameters": [ - { - "name": "workspaceId", - "in": "path", - "description": "Workspace name.", - "required": true, - "type": "string", - "format": "uuid" - }, - { - "name": "lakehouseId", - "in": "path", - "description": "Lakehouse Id.", - "required": true, - "type": "string", - "format": "uuid" - }, - { - "name": "livyApiVersion", - "in": "path", - "required": true, - "type": "string" - }, - { - "name": "sessionId", - "in": "path", - "description": "", - "required": true, - "type": "string", - "format": "uuid" - } - ], - "responses": { - "200": { - "description": "OK", - "schema": { - "$ref": "#/definitions/StatementsResponse" - } - }, - "default": { - "description": "Other status codes.", - "schema": { - "$ref": "#/definitions/ErrorResponse" - } - } - } - } - }, - "/v1/workspaces/{workspaceId}/lakehouses/{lakehouseId}/livyApi/versions/{livyApiVersion}/sessions/{sessionId}/statements/{statementId}": { - "get": { - "tags": [ - "LivyApiSession" - ], - "summary": "Gets a spark statement from a spark session.", - "operationId": "LivyApiSession_GetSparkSessionStatementAsync", - "consumes": [], - "produces": [ - "application/json", - "text/json" - ], - "parameters": [ - { - "name": "workspaceId", - "in": "path", - "description": "Workspace Id.", - "required": true, - "type": "string", - "format": "uuid" - }, - { - "name": "lakehouseId", - "in": "path", - "description": "Lakehouse Id.", - "required": true, - "type": "string", - "format": "uuid" - }, - { - "name": "sessionId", - "in": "path", - "description": "Spark Session Id.", - "required": true, - "type": "string", - "format": "uuid" - }, - { - "name": "livyApiVersion", - "in": "path", - "required": true, - "type": "string" - }, - { - "name": "statementId", - "in": "path", - "description": "Statement Id.", - "required": true, - "type": "integer", - "format": "int32" - }, - { - "name": "from", - "in": "query", - "description": "Offset (in byte) which the output should begin from.", - "type": "integer", - "format": "int32" - }, - { - "name": "size", - "in": "query", - "description": "Size (in byte) of the returned output.", - "type": "integer", - "format": "int32" - } - ], - "responses": { - "200": { - "description": "OK" - }, - "default": { - "description": "Other status codes.", - "schema": { - "$ref": "#/definitions/ErrorResponse" - } - } - } - } - }, - "/v1/workspaces/{workspaceId}/lakehouses/{lakehouseId}/livyApi/versions/{livyApiVersion}/sessions/{sessionId}/statements/{statementId}/cancel": { - "post": { - "tags": [ - "LivyApiSession" - ], - "summary": "Cancels a statement execution.", - "operationId": "LivyApiSession_CancelSparkStatementAsync", - "consumes": [], - "produces": [ - "application/json", - "text/json" - ], - "parameters": [ - { - "name": "workspaceId", - "in": "path", - "description": "Workspace Id.", - "required": true, - "type": "string", - "format": "uuid" - }, - { - "name": "lakehouseId", - "in": "path", - "description": "Lakehouse Id.", - "required": true, - "type": "string", - "format": "uuid" - }, - { - "name": "livyApiVersion", - "in": "path", - "required": true, - "type": "string" - }, - { - "name": "sessionId", - "in": "path", - "description": "Session Id.", - "required": true, - "type": "string", - "format": "uuid" - }, - { - "name": "statementId", - "in": "path", - "description": "", - "required": true, - "type": "integer", - "format": "int32" - } - ], - "responses": { - "200": { - "description": "OK", - "schema": { - "$ref": "#/definitions/StatementCancellationResponse" - } - }, - "default": { - "description": "Other status codes.", - "schema": { - "$ref": "#/definitions/ErrorResponse" - } - } - } - } - }, - "/v1/workspaces/{workspaceId}/lakehouses/{lakehouseId}/livyApi/versions/{livyApiVersion}/sessions/{sessionId}/reset-timeout": { - "post": { - "tags": [ - "LivyApiSession" - ], - "summary": "Resets the timeout time of a session.", - "operationId": "LivyApiSession_ResetSparkSessionTimeoutAsync", - "consumes": [], - "produces": [ - "application/json", - "text/json" - ], - "parameters": [ - { - "name": "workspaceId", - "in": "path", - "description": "Workspace Id.", - "required": true, - "type": "string", - "format": "uuid" - }, - { - "name": "lakehouseId", - "in": "path", - "description": "Lakehouse Id.", - "required": true, - "type": "string", - "format": "uuid" - }, - { - "name": "livyApiVersion", - "in": "path", - "required": true, - "type": "string" - }, - { - "name": "sessionId", - "in": "path", - "description": "Session Id.", - "required": true, - "type": "string", - "format": "uuid" - } - ], - "responses": { - "204": { - "description": "OK" - }, - "default": { - "description": "Other status codes.", - "schema": { - "$ref": "#/definitions/ErrorResponse" - } - } - } - } - } - }, - "definitions": { - "StatementResponse": { - "description": "Statement Response.", - "type": "object", - "properties": { - "id": { - "format": "int32", - "type": "integer" - }, - "code": { - "type": "string" - }, - "state": { - "enum": [ - "waiting", - "running", - "available", - "Error", - "cancelling", - "cancelled" - ], - "type": "string" - }, - "sourceId": { - "type": "string" - }, - "output": { - "$ref": "#/definitions/StatementOutput" - } - } - }, - "StatementOutput": { - "description": "Statement Output.", - "type": "object", - "properties": { - "status": { - "type": "string" - }, - "execution_count": { - "format": "int32", - "type": "integer" - }, - "data": { - "type": "object" - }, - "ename": { - "type": "string" - }, - "evalue": { - "type": "string" - }, - "traceback": { - "type": "array", - "items": { - "type": "string" - } - } - } - }, - "BatchRequest": { - "description": "Request for runing batch job.", - "type": "object", - "allOf": [ - { - "$ref": "#/definitions/SessionRequest" - }, - { - "type": "object" - } - ], - "properties": { - "file": { - "type": "string" - }, - "className": { - "type": "string" - }, - "args": { - "type": "array", - "items": { - "type": "string" - } - }, - "jars": { - "type": "array", - "items": { - "type": "string" - } - }, - "files": { - "type": "array", - "items": { - "type": "string" - } - }, - "pyFiles": { - "type": "array", - "items": { - "type": "string" - } - }, - "archives": { - "type": "array", - "items": { - "type": "string" - } - } - } - }, - "SessionRequest": { - "description": "Request for acquiring a Session.", - "type": "object", - "properties": { - "name": { - "type": "string" - }, - "archives": { - "type": "array", - "items": { - "type": "string" - } - }, - "conf": { - "type": "object", - "additionalProperties": { - "type": "string" - } - }, - "tags": { - "description": "Gets or sets the optional tags.", - "type": "object", - "additionalProperties": { - "type": "string" - } - }, - "driverMemory": { - "type": "string" - }, - "driverCores": { - "format": "int32", - "type": "integer" - }, - "executorMemory": { - "type": "string" - }, - "executorCores": { - "format": "int32", - "type": "integer" - }, - "numExecutors": { - "format": "int32", - "type": "integer" - } - } - }, - "StatementRequest": { - "description": "Statement Request Body.", - "type": "object", - "properties": { - "code": { - "type": "string" - }, - "kind": { - "type": "string" - }, - "sourceId": { - "type": "string" - } - } - }, - "LivySessionStateInformation": { - "description": "Livy Session State Information.", - "type": "object", - "properties": { - "notStartedAt": { - "format": "date-time", - "type": "string" - }, - "startingAt": { - "format": "date-time", - "type": "string" - }, - "idleAt": { - "format": "date-time", - "type": "string" - }, - "deadAt": { - "format": "date-time", - "type": "string" - }, - "shuttingDownAt": { - "format": "date-time", - "type": "string" - }, - "killedAt": { - "format": "date-time", - "type": "string" - }, - "recoveringAt": { - "format": "date-time", - "type": "string" - }, - "busyAt": { - "format": "date-time", - "type": "string" - }, - "errorAt": { - "format": "date-time", - "type": "string" - }, - "currentState": { - "type": "string" - }, - "jobCreationRequest": { - "$ref": "#/definitions/SessionRequest" - } - } - }, - "SchedulerInformation": { - "description": "Scheduler Information.", - "type": "object", - "properties": { - "submittedAt": { - "format": "date-time", - "type": "string" - }, - "queuedAt": { - "format": "date-time", - "type": "string" - }, - "scheduledAt": { - "format": "date-time", - "type": "string" - }, - "endedAt": { - "format": "date-time", - "type": "string" - }, - "cancellationRequestedAt": { - "format": "date-time", - "type": "string" - }, - "currentState": { - "enum": [ - "Queued", - "Scheduled", - "Ended" - ], - "type": "string" - } - } - }, - "SparkServicePluginInformation": { - "description": "Spark Service Plugin Information.", - "type": "object", - "properties": { - "preparationStartedAt": { - "format": "date-time", - "type": "string" - }, - "resourceAcquisitionStartedAt": { - "format": "date-time", - "type": "string" - }, - "submissionStartedAt": { - "format": "date-time", - "type": "string" - }, - "monitoringStartedAt": { - "format": "date-time", - "type": "string" - }, - "cleanupStartedAt": { - "format": "date-time", - "type": "string" - }, - "currentState": { - "enum": [ - "Preparation", - "ResourceAcquisition", - "Queued", - "Submission", - "Monitoring", - "Cleanup", - "Ended" - ], - "type": "string" - } - } - }, - "LivyRequestBase": { - "description": "Livy Request Base.", - "type": "object", - "properties": { - "name": { - "type": "string" - }, - "file": { - "type": "string" - }, - "className": { - "type": "string" - }, - "args": { - "type": "array", - "items": { - "type": "string" - } - }, - "jars": { - "type": "array", - "items": { - "type": "string" - } - }, - "files": { - "type": "array", - "items": { - "type": "string" - } - }, - "pyFiles": { - "type": "array", - "items": { - "type": "string" - } - }, - "archives": { - "type": "array", - "items": { - "type": "string" - } - }, - "conf": { - "type": "object", - "additionalProperties": { - "type": "string" - } - }, - "driverMemory": { - "type": "string" - }, - "driverCores": { - "format": "int32", - "type": "integer" - }, - "executorMemory": { - "type": "string" - }, - "executorCores": { - "format": "int32", - "type": "integer" - }, - "numExecutors": { - "format": "int32", - "type": "integer" - } - } - }, - "StatementsResponse": { - "description": "Livy Statement Response Body.", - "type": "object", - "properties": { - "statements": { - "type": "array", - "items": { - "$ref": "#/definitions/StatementResponse" - } - }, - "total_statements": { - "format": "int32", - "type": "integer" - } - } - }, - "StatementCancellationResponse": { - "description": "Livy Statement CancellationResponse.", - "type": "object", - "properties": { - "msg": { - "type": "string" - } - } - }, - "BatchStateInformation": { - "description": "Batch State Information.", - "type": "object", - "properties": { - "notStartedAt": { - "format": "date-time", - "type": "string" - }, - "startingAt": { - "format": "date-time", - "type": "string" - }, - "runningAt": { - "format": "date-time", - "type": "string" - }, - "deadAt": { - "format": "date-time", - "type": "string" - }, - "successAt": { - "format": "date-time", - "type": "string" - }, - "killedAt": { - "format": "date-time", - "type": "string" - }, - "recoveringAt": { - "format": "date-time", - "type": "string" - }, - "currentState": { - "type": "string" - }, - "jobCreationRequest": { - "$ref": "#/definitions/LivyRequestBase" - } - } - }, - "ErrorResponse": { - "description": "The error response.", - "required": [ - "message", - "errorCode" - ], - "allOf": [ - { - "$ref": "#/definitions/ErrorResponseDetails" - } - ], - "properties": { - "requestId": { - "type": "string", - "description": "ID of the request associated with the error.", - "readOnly": true - }, - "moreDetails": { - "description": "List of additional error details.", - "type": "array", - "items": { - "$ref": "#/definitions/ErrorResponseDetails" - }, - "readOnly": true - } - }, - "readOnly": true - }, - "ErrorResponseDetails": { - "description": "The error response details.", - "required": [ - "message", - "errorCode" - ], - "properties": { - "errorCode": { - "type": "string", - "description": "A specific identifier that provides information about an error condition, allowing for standardized communication between our service and its users." - }, - "message": { - "type": "string", - "description": "A human readable representation of the error." - }, - "relatedResource": { - "$ref": "#/definitions/ErrorRelatedResource" - } - } - }, - "ErrorRelatedResource": { - "description": "The error related resource details object.", - "required": [ - "resourceId", - "resourceType" - ], - "properties": { - "resourceId": { - "type": "string", - "description": "Resource ID involved in the error." - }, - "resourceType": { - "type": "string", - "description": "Resource type involved in the error." - } - } - }, - "LivySparkActivityList": { - "description": "List of batches or sessions.", - "type": "object", - "properties": { - "items": { - "description": "List of items.", - "type": "array", - "items": { - "$ref": "#/definitions/LivySparkActivity" - } - }, - "totalCountOfMatchedItems": { - "format": "int32", - "description": "Total count of matched items.", - "type": "integer" - }, - "pageSize": { - "format": "int32", - "description": "Page size.", - "type": "integer" - } - } - }, - "LivySparkActivity": { - "description": "Batch or Session description when getting a list.", - "type": "object", - "properties": { - "id": { - "description": "Activity ID.", - "type": "string" - }, - "appId": { - "description": "Spark application ID.", - "type": "string" - }, - "name": { - "description": "Batch or Session Name.", - "type": "string" - }, - "workspaceId": { - "description": "Workspace ID.", - "type": "string" - }, - "submitterId": { - "description": "Submitter ID.", - "type": "string" - }, - "submitterName": { - "description": "Submitter name.", - "type": "string" - }, - "artifactId": { - "description": "Artifact ID.", - "type": "string" - }, - "cancellationReason": { - "description": "Cancellation reason.", - "type": "string" - }, - "result": { - "description": "Job result.", - "enum": [ - "Uncertain", - "Succeeded", - "Failed", - "Cancelled" - ], - "type": "string" - }, - "submittedAt": { - "format": "date-time", - "description": "Submitted at time.", - "type": "string" - }, - "startedAt": { - "format": "date-time", - "description": "Started at time.", - "type": "string" - }, - "endedAt": { - "format": "date-time", - "description": "Ended at time.", - "type": "string" - }, - "errorSource": { - "description": "Error source.", - "enum": [ - "System", - "User", - "Unknown", - "Dependency" - ], - "type": "string" - }, - "errorCode": { - "description": "Error Code.", - "type": "string" - }, - "tags": { - "description": "Optional tags.", - "type": "object", - "additionalProperties": { - "type": "string" - } - }, - "schedulerState": { - "description": "Scheduler state.", - "enum": [ - "Queued", - "Scheduled", - "Ended" - ], - "type": "string" - }, - "pluginState": { - "description": "Plugin state.", - "enum": [ - "Preparation", - "ResourceAcquisition", - "Queued", - "Submission", - "Monitoring", - "Cleanup", - "Ended" - ], - "type": "string" - }, - "livyState": { - "description": "Gets or sets livy state.", - "type": "string" - }, - "isJobTimedOut": { - "description": "If job is timed out.", - "type": "boolean" - } - } - }, - "BatchResponse": { - "description": "Class that represents livy responses for batch.", - "type": "object", - "properties": { - "livyInfo": { - "$ref": "#/definitions/BatchStateInformation" - }, - "fabricBatchStateInfo": { - "$ref": "#/definitions/BatchStateInfo" - }, - "name": { - "description": "Name of the batch.", - "type": "string" - }, - "id": { - "description": "ID created for the batch.", - "type": "string" - }, - "appId": { - "description": "Application id of this batch.", - "type": "string" - }, - "appInfo": { - "description": "Detailed application info.", - "type": "object", - "additionalProperties": { - "type": "string" - } - }, - "artifactId": { - "description": "Hosting artifact Id.", - "type": "string" - }, - "errorInfo": { - "description": "Detailed error information.", - "type": "array", - "items": { - "$ref": "#/definitions/ErrorInformation" - } - }, - "jobType": { - "description": "Spark job type.", - "enum": [ - "SparkBatch", - "SparkSession", - "ScopeBatch", - "JupyterEnvironment" - ], - "type": "string" - }, - "submitterId": { - "description": "ID of e user who submitted the session.", - "type": "string" - }, - "submitterName": { - "description": "Name of the user who submitted the session.", - "type": "string" - }, - "log": { - "description": "Log lines.", - "type": "array", - "items": { - "type": "string" - } - }, - "pluginInfo": { - "$ref": "#/definitions/SparkServicePluginInformation" - }, - "schedulerInfo": { - "$ref": "#/definitions/SchedulerInformation" - }, - "state": { - "description": "State of the batch or session.", - "enum": [ - "starting", - "running", - "dead", - "success", - "killed", - "idle", - "error", - "shutting_down", - "not_started", - "busy", - "recovering", - "submitting", - "not_submitted" - ], - "type": "string" - }, - "tags": { - "description": "Optional tags.", - "type": "object", - "additionalProperties": { - "type": "string" - } - }, - "result": { - "enum": [ - "Uncertain", - "Succeeded", - "Failed", - "Cancelled" - ], - "type": "string" - }, - "cancellationReason": { - "description": "Cancellation reason.", - "type": "string" - } - } - }, - "BatchStateInfo": { - "description": "Batch state info.", - "type": "object", - "properties": { - "state": { - "description": "State of the batch acquisition.", - "enum": [ - "unknown", - "expired", - "queued", - "libraryPackaging", - "submitting", - "cancelling", - "cancelled", - "error" - ], - "type": "string" - }, - "errorMessage": { - "description": "Error message if the state is in \"error\".", - "type": "string" - } - } - }, - "ErrorInformation": { - "description": "Error Information.", - "type": "object", - "properties": { - "message": { - "type": "string" - }, - "errorCode": { - "type": "string" - }, - "source": { - "enum": [ - "System", - "User", - "Unknown", - "Dependency" - ], - "type": "string" - } - } - }, - "SessionResponse": { - "type": "object", - "properties": { - "fabricSessionStateInfo": { - "$ref": "#/definitions/SessionStateInfo" - }, - "livyInfo": { - "$ref": "#/definitions/LivySessionStateInformation" - }, - "name": { - "description": "Name of the session.", - "type": "string" - }, - "id": { - "description": "Session ID created for the session.", - "type": "string" - }, - "appId": { - "description": "The application id of this session.", - "type": "string" - }, - "appInfo": { - "description": "Detailed application info.", - "type": "object", - "additionalProperties": { - "type": "string" - } - }, - "artifactId": { - "description": "Hosting artifact ID.", - "type": "string" - }, - "errorInfo": { - "description": "Detailed error information.", - "type": "array", - "items": { - "$ref": "#/definitions/ErrorInformation" - } - }, - "jobType": { - "description": "Spark job type.", - "enum": [ - "SparkBatch", - "SparkSession", - "ScopeBatch", - "JupyterEnvironment" - ], - "type": "string" - }, - "submitterId": { - "description": "ID of the user who submitted the sessionn.", - "type": "string" - }, - "submitterName": { - "description": "Name of the user who submitted the session.", - "type": "string" - }, - "log": { - "description": "Log lines.", - "type": "array", - "items": { - "type": "string" - } - }, - "pluginInfo": { - "$ref": "#/definitions/SparkServicePluginInformation" - }, - "schedulerInfo": { - "$ref": "#/definitions/SchedulerInformation" - }, - "state": { - "description": "Gets or sets the session state.", - "enum": [ - "starting", - "running", - "dead", - "success", - "killed", - "idle", - "error", - "shutting_down", - "not_started", - "busy", - "recovering", - "submitting", - "not_submitted" - ], - "type": "string" - }, - "tags": { - "description": "Optional tags.", - "type": "object", - "additionalProperties": { - "type": "string" - } - }, - "result": { - "enum": [ - "Uncertain", - "Succeeded", - "Failed", - "Cancelled" - ], - "type": "string" - }, - "cancellationReason": { - "description": "Cancellation reason.", - "type": "string" - } - } - }, - "SessionStateInfo": { - "description": "Session state info.", - "type": "object", - "properties": { - "state": { - "description": "Session acquisiton state.", - "enum": [ - "queued", - "libraryPackaging", - "acquiringSession", - "cancelling", - "cancelled", - "error", - "unknown" - ], - "type": "string" - }, - "errorMessage": { - "description": "Error message when in \"error\" state.", - "type": "string" - } - } - } - } -} \ No newline at end of file diff --git a/apache/livy/ExposedUI/python/fabric/swagger.yaml b/apache/livy/ExposedUI/python/fabric/swagger.yaml deleted file mode 100644 index f447f338..00000000 --- a/apache/livy/ExposedUI/python/fabric/swagger.yaml +++ /dev/null @@ -1,1250 +0,0 @@ -swagger: '2.0' -info: - version: v1 - title: Livy Public API -host: api.fabric.microsoft.com -schemes: - - https -paths: - /v1/workspaces/{workspaceId}/lakehouses/{lakehouseId}/livyApi/versions/{livyApiVersion}/batches: - post: - tags: - - LivyApiBatch - summary: Executes a batch. - operationId: LivyApiBatch_ExecuteBatchAsync - consumes: - - application/json - - text/json - produces: - - application/json - - text/json - parameters: - - name: workspaceId - in: path - description: '' - required: true - type: string - format: uuid - - name: lakehouseId - in: path - description: '' - required: true - type: string - format: uuid - - name: livyApiVersion - in: path - required: true - type: string - - name: payload - in: body - description: '' - required: true - schema: - $ref: '#/definitions/BatchRequest' - responses: - '202': - description: Accepted - schema: - $ref: '#/definitions/BatchResponse' - default: - description: Other status codes - schema: - $ref: '#/definitions/ErrorResponse' - get: - tags: - - LivyApiBatch - summary: List Batch Jobs. - operationId: LivyApiBatch_ListBatchJobsAsync - consumes: [] - produces: - - application/json - parameters: - - name: workspaceId - in: path - description: '' - required: true - type: string - format: uuid - - name: lakehouseId - in: path - description: '' - required: true - type: string - format: uuid - - name: livyApiVersion - in: path - required: true - type: string - - name: $top - in: query - type: string - description: '' - - name: $skip - in: query - type: string - description: '' - - name: $count - in: query - type: boolean - description: '' - responses: - '200': - description: OK - schema: - $ref: '#/definitions/LivySparkActivityList' - default: - description: Other Status - schema: - $ref: '#/definitions/ErrorResponse' - /v1/workspaces/{workspaceId}/lakehouses/{lakehouseId}/livyApi/versions/{livyApiVersion}/batches/{batchId}: - get: - tags: - - LivyApiBatch - summary: Gets a Batch Details. - operationId: LivyApiBatch_GetBatchAsync - consumes: [] - produces: - - application/json - parameters: - - name: workspaceId - in: path - description: '' - required: true - type: string - format: uuid - - name: lakehouseId - in: path - description: '' - required: true - type: string - format: uuid - - name: livyApiVersion - in: path - required: true - type: string - - name: batchId - in: path - description: '' - required: true - type: string - format: uuid - responses: - '200': - description: OK - schema: - $ref: '#/definitions/BatchResponse' - default: - description: Other status codes - schema: - $ref: '#/definitions/ErrorResponse' - delete: - tags: - - LivyApiBatch - summary: Cancels a Batch Execution. - operationId: LivyApiBatch_CancelBatchAsync - consumes: [] - produces: - - application/json - parameters: - - name: workspaceId - in: path - description: '' - required: true - type: string - format: uuid - - name: lakehouseId - in: path - description: '' - required: true - type: string - format: uuid - - name: livyApiVersion - in: path - required: true - type: string - - name: batchId - in: path - description: '' - required: true - type: string - format: uuid - responses: - '200': - description: OK - default: - description: Other status codes - schema: - $ref: '#/definitions/ErrorResponse' - /v1/workspaces/{workspaceId}/lakehouses/{lakehouseId}/livyApi/versions/{livyApiVersion}/sessions: - post: - tags: - - LivyApiSession - summary: Acquire a Spark Session. - operationId: LivyApiSession_AcquireSparkSessionAsync - consumes: - - application/json - produces: - - application/json - parameters: - - name: workspaceId - in: path - description: Workspace Id. - required: true - type: string - format: uuid - - name: lakehouseId - in: path - description: Lakehouse Id. - required: true - type: string - format: uuid - - name: livyApiVersion - in: path - required: true - type: string - - name: payload - in: body - description: CreateSessionRequest. - required: true - schema: - $ref: '#/definitions/SessionRequest' - responses: - '202': - description: Accepted - schema: - $ref: '#/definitions/SessionResponse' - default: - description: Other status code - schema: - $ref: '#/definitions/ErrorResponse' - get: - tags: - - LivyApiSession - summary: List spark sessions. - operationId: LivyApiSession_ListSparkSessionsAsync - consumes: [] - produces: - - application/json - - text/json - parameters: - - name: workspaceId - in: path - description: Workspace name. - required: true - type: string - format: uuid - - name: lakehouseId - in: path - description: Lakehouse Id. - required: true - type: string - format: uuid - - name: livyApiVersion - in: path - required: true - type: string - - name: $top - in: query - type: string - description: '' - - name: $skip - in: query - type: string - description: '' - - name: $count - in: query - type: boolean - description: '' - responses: - '200': - description: OK - schema: - $ref: '#/definitions/SessionResponse' - default: - description: Other status codes - schema: - $ref: '#/definitions/ErrorResponse' - /v1/workspaces/{workspaceId}/lakehouses/{lakehouseId}/livyApi/versions/{livyApiVersion}/sessions/{sessionId}: - get: - tags: - - LivyApiSession - summary: Get details of a spark session. - operationId: LivyApiSession_GetSparkSessionAsync - consumes: [] - produces: - - application/json - parameters: - - name: workspaceId - in: path - description: Workspace name. - required: true - type: string - format: uuid - - name: lakehouseId - in: path - description: Lakehouse Id. - required: true - type: string - format: uuid - - name: livyApiVersion - in: path - required: true - type: string - - name: sessionId - in: path - description: Spark session Id. - required: true - type: string - format: uuid - responses: - '200': - description: OK - schema: - $ref: '#/definitions/SessionResponse' - default: - description: Other status codes - schema: - $ref: '#/definitions/ErrorResponse' - delete: - tags: - - LivyApiSession - summary: Stops and deletes a spark session. - operationId: LivyApiSession_DeleteSparkSessionAsync - consumes: [] - produces: - - application/json - - text/json - parameters: - - name: workspaceId - in: path - description: Workspace Id. - required: true - type: string - format: uuid - - name: lakehouseId - in: path - description: Lakehouse Id. - required: true - type: string - format: uuid - - name: livyApiVersion - in: path - required: true - type: string - - name: sessionId - in: path - description: Session Id. - required: true - type: string - format: uuid - responses: - '200': - description: OK - default: - description: Other status codes - schema: - $ref: '#/definitions/ErrorResponse' - /v1/workspaces/{workspaceId}/lakehouses/{lakehouseId}/livyApi/versions/{livyApiVersion}/sessions/{sessionId}/statements: - post: - tags: - - LivyApiSession - summary: Execute a statement on a spark session. - operationId: LivyApiSession_ExecuteSparkSessionStatementAsync - consumes: - - application/json - - text/json - produces: - - application/json - - text/json - parameters: - - name: workspaceId - in: path - description: Workspace Id. - required: true - type: string - format: uuid - - name: lakehouseId - in: path - description: Lakehouse Id. - required: true - type: string - format: uuid - - name: livyApiVersion - in: path - required: true - type: string - - name: sessionId - in: path - description: Spark Session Id. - required: true - type: string - format: uuid - - name: payload - in: body - description: '' - required: true - schema: - $ref: '#/definitions/StatementRequest' - responses: - '200': - description: OK - schema: - $ref: '#/definitions/StatementResponse' - default: - description: Other status codes. - schema: - $ref: '#/definitions/ErrorResponse' - get: - tags: - - LivyApiSession - summary: List statements in an active session. - operationId: LivyApiSession_ListSparkSessionStatementsAsync - consumes: [] - produces: - - application/json - - text/json - parameters: - - name: workspaceId - in: path - description: Workspace name. - required: true - type: string - format: uuid - - name: lakehouseId - in: path - description: Lakehouse Id. - required: true - type: string - format: uuid - - name: livyApiVersion - in: path - required: true - type: string - - name: sessionId - in: path - description: '' - required: true - type: string - format: uuid - responses: - '200': - description: OK - schema: - $ref: '#/definitions/StatementsResponse' - default: - description: Other status codes. - schema: - $ref: '#/definitions/ErrorResponse' - /v1/workspaces/{workspaceId}/lakehouses/{lakehouseId}/livyApi/versions/{livyApiVersion}/sessions/{sessionId}/statements/{statementId}: - get: - tags: - - LivyApiSession - summary: Gets a spark statement from a spark session. - operationId: LivyApiSession_GetSparkSessionStatementAsync - consumes: [] - produces: - - application/json - - text/json - parameters: - - name: workspaceId - in: path - description: Workspace Id. - required: true - type: string - format: uuid - - name: lakehouseId - in: path - description: Lakehouse Id. - required: true - type: string - format: uuid - - name: sessionId - in: path - description: Spark Session Id. - required: true - type: string - format: uuid - - name: livyApiVersion - in: path - required: true - type: string - - name: statementId - in: path - description: Statement Id. - required: true - type: integer - format: int32 - - name: from - in: query - description: Offset (in byte) which the output should begin from. - type: integer - format: int32 - - name: size - in: query - description: Size (in byte) of the returned output. - type: integer - format: int32 - responses: - '200': - description: OK - default: - description: Other status codes. - schema: - $ref: '#/definitions/ErrorResponse' - /v1/workspaces/{workspaceId}/lakehouses/{lakehouseId}/livyApi/versions/{livyApiVersion}/sessions/{sessionId}/statements/{statementId}/cancel: - post: - tags: - - LivyApiSession - summary: Cancels a statement execution. - operationId: LivyApiSession_CancelSparkStatementAsync - consumes: [] - produces: - - application/json - - text/json - parameters: - - name: workspaceId - in: path - description: Workspace Id. - required: true - type: string - format: uuid - - name: lakehouseId - in: path - description: Lakehouse Id. - required: true - type: string - format: uuid - - name: livyApiVersion - in: path - required: true - type: string - - name: sessionId - in: path - description: Session Id. - required: true - type: string - format: uuid - - name: statementId - in: path - description: '' - required: true - type: integer - format: int32 - - responses: - '200': - description: OK - schema: - $ref: '#/definitions/StatementCancellationResponse' - default: - description: Other status codes. - schema: - $ref: '#/definitions/ErrorResponse' - /v1/workspaces/{workspaceId}/lakehouses/{lakehouseId}/livyApi/versions/{livyApiVersion}/sessions/{sessionId}/reset-timeout: - post: - tags: - - LivyApiSession - summary: Resets the timeout time of a session. - operationId: LivyApiSession_ResetSparkSessionTimeoutAsync - consumes: [] - produces: - - application/json - - text/json - parameters: - - name: workspaceId - in: path - description: Workspace Id. - required: true - type: string - format: uuid - - name: lakehouseId - in: path - description: Lakehouse Id. - required: true - type: string - format: uuid - - name: livyApiVersion - in: path - required: true - type: string - - name: sessionId - in: path - description: Session Id. - required: true - type: string - format: uuid - responses: - '204': - description: OK - default: - description: Other status codes. - schema: - $ref: '#/definitions/ErrorResponse' -definitions: - StatementResponse: - description: Statement Response. - type: object - properties: - id: - format: int32 - type: integer - code: - type: string - state: - enum: - - waiting - - running - - available - - Error - - cancelling - - cancelled - type: string - sourceId: - type: string - output: - $ref: '#/definitions/StatementOutput' - StatementOutput: - description: Statement Output. - type: object - properties: - status: - type: string - execution_count: - format: int32 - type: integer - data: - type: object - ename: - type: string - evalue: - type: string - traceback: - type: array - items: - type: string - BatchRequest: - description: Request for runing batch job. - type: object - allOf: - - $ref: '#/definitions/SessionRequest' - - type: object - properties: - file: - type: string - className: - type: string - args: - type: array - items: - type: string - jars: - type: array - items: - type: string - files: - type: array - items: - type: string - pyFiles: - type: array - items: - type: string - archives: - type: array - items: - type: string - SessionRequest: - description: Request for acquiring a Session. - type: object - properties: - name: - type: string - archives: - type: array - items: - type: string - conf: - type: object - additionalProperties: - type: string - tags: - description: Gets or sets the optional tags. - type: object - additionalProperties: - type: string - driverMemory: - type: string - driverCores: - format: int32 - type: integer - executorMemory: - type: string - executorCores: - format: int32 - type: integer - numExecutors: - format: int32 - type: integer - StatementRequest: - description: Statement Request Body. - type: object - properties: - code: - type: string - kind: - type: string - sourceId: - type: string - LivySessionStateInformation: - description: Livy Session State Information. - type: object - properties: - notStartedAt: - format: date-time - type: string - startingAt: - format: date-time - type: string - idleAt: - format: date-time - type: string - deadAt: - format: date-time - type: string - shuttingDownAt: - format: date-time - type: string - killedAt: - format: date-time - type: string - recoveringAt: - format: date-time - type: string - busyAt: - format: date-time - type: string - errorAt: - format: date-time - type: string - currentState: - type: string - jobCreationRequest: - $ref: '#/definitions/SessionRequest' - SchedulerInformation: - description: Scheduler Information. - type: object - properties: - submittedAt: - format: date-time - type: string - queuedAt: - format: date-time - type: string - scheduledAt: - format: date-time - type: string - endedAt: - format: date-time - type: string - cancellationRequestedAt: - format: date-time - type: string - currentState: - enum: - - Queued - - Scheduled - - Ended - type: string - SparkServicePluginInformation: - description: Spark Service Plugin Information. - type: object - properties: - preparationStartedAt: - format: date-time - type: string - resourceAcquisitionStartedAt: - format: date-time - type: string - submissionStartedAt: - format: date-time - type: string - monitoringStartedAt: - format: date-time - type: string - cleanupStartedAt: - format: date-time - type: string - currentState: - enum: - - Preparation - - ResourceAcquisition - - Queued - - Submission - - Monitoring - - Cleanup - - Ended - type: string - LivyRequestBase: - description: Livy Request Base. - type: object - properties: - name: - type: string - file: - type: string - className: - type: string - args: - type: array - items: - type: string - jars: - type: array - items: - type: string - files: - type: array - items: - type: string - pyFiles: - type: array - items: - type: string - archives: - type: array - items: - type: string - conf: - type: object - additionalProperties: - type: string - driverMemory: - type: string - driverCores: - format: int32 - type: integer - executorMemory: - type: string - executorCores: - format: int32 - type: integer - numExecutors: - format: int32 - type: integer - StatementsResponse: - description: Livy Statement Response Body. - type: object - properties: - statements: - type: array - items: - $ref: '#/definitions/StatementResponse' - total_statements: - format: int32 - type: integer - StatementCancellationResponse: - description: Livy Statement CancellationResponse. - type: object - properties: - msg: - type: string - BatchStateInformation: - description: Batch State Information. - type: object - properties: - notStartedAt: - format: date-time - type: string - startingAt: - format: date-time - type: string - runningAt: - format: date-time - type: string - deadAt: - format: date-time - type: string - successAt: - format: date-time - type: string - killedAt: - format: date-time - type: string - recoveringAt: - format: date-time - type: string - currentState: - type: string - jobCreationRequest: - $ref: '#/definitions/LivyRequestBase' - ErrorResponse: - description: The error response. - required: - - message - - errorCode - allOf: - - $ref: '#/definitions/ErrorResponseDetails' - properties: - requestId: - type: string - description: ID of the request associated with the error. - readOnly: true - moreDetails: - description: List of additional error details. - type: array - items: - $ref: '#/definitions/ErrorResponseDetails' - readOnly: true - readOnly: true - ErrorResponseDetails: - description: The error response details. - required: - - message - - errorCode - properties: - errorCode: - type: string - description: A specific identifier that provides information about an error condition, allowing for standardized communication between our service and its users. - message: - type: string - description: A human readable representation of the error. - relatedResource: - $ref: '#/definitions/ErrorRelatedResource' - ErrorRelatedResource: - description: The error related resource details object. - required: - - resourceId - - resourceType - properties: - resourceId: - type: string - description: Resource ID involved in the error. - resourceType: - type: string - description: Resource type involved in the error. - LivySparkActivityList: - description: List of batches or sessions. - type: object - properties: - items: - description: List of items. - type: array - items: - $ref: '#/definitions/LivySparkActivity' - totalCountOfMatchedItems: - format: int32 - description: Total count of matched items. - type: integer - pageSize: - format: int32 - description: Page size. - type: integer - LivySparkActivity: - description: Batch or Session description when getting a list. - type: object - properties: - id: - description: Activity ID. - type: string - appId: - description: Spark application ID. - type: string - name: - description: Batch or Session Name. - type: string - workspaceId: - description: Workspace ID. - type: string - submitterId: - description: Submitter ID. - type: string - submitterName: - description: Submitter name. - type: string - artifactId: - description: Artifact ID. - type: string - cancellationReason: - description: Cancellation reason. - type: string - result: - description: Job result. - enum: - - Uncertain - - Succeeded - - Failed - - Cancelled - type: string - submittedAt: - format: date-time - description: Submitted at time. - type: string - startedAt: - format: date-time - description: Started at time. - type: string - endedAt: - format: date-time - description: Ended at time. - type: string - errorSource: - description: Error source. - enum: - - System - - User - - Unknown - - Dependency - type: string - errorCode: - description: Error Code. - type: string - tags: - description: Optional tags. - type: object - additionalProperties: - type: string - schedulerState: - description: Scheduler state. - enum: - - Queued - - Scheduled - - Ended - type: string - pluginState: - description: Plugin state. - enum: - - Preparation - - ResourceAcquisition - - Queued - - Submission - - Monitoring - - Cleanup - - Ended - type: string - livyState: - description: Gets or sets livy state. - type: string - isJobTimedOut: - description: If job is timed out. - type: boolean - BatchResponse: - description: Class that represents livy responses for batch. - type: object - properties: - livyInfo: - $ref: '#/definitions/BatchStateInformation' - fabricBatchStateInfo: - $ref: '#/definitions/BatchStateInfo' - name: - description: Name of the batch. - type: string - id: - description: ID created for the batch. - type: string - appId: - description: Application id of this batch. - type: string - appInfo: - description: Detailed application info. - type: object - additionalProperties: - type: string - artifactId: - description: Hosting artifact Id. - type: string - errorInfo: - description: Detailed error information. - type: array - items: - $ref: '#/definitions/ErrorInformation' - jobType: - description: Spark job type. - enum: - - SparkBatch - - SparkSession - - ScopeBatch - - JupyterEnvironment - type: string - submitterId: - description: ID of e user who submitted the session. - type: string - submitterName: - description: Name of the user who submitted the session. - type: string - log: - description: Log lines. - type: array - items: - type: string - pluginInfo: - $ref: '#/definitions/SparkServicePluginInformation' - schedulerInfo: - $ref: '#/definitions/SchedulerInformation' - state: - description: State of the batch or session. - enum: - - starting - - running - - dead - - success - - killed - - idle - - error - - shutting_down - - not_started - - busy - - recovering - - submitting - - not_submitted - type: string - tags: - description: Optional tags. - type: object - additionalProperties: - type: string - result: - enum: - - Uncertain - - Succeeded - - Failed - - Cancelled - type: string - cancellationReason: - description: Cancellation reason. - type: string - BatchStateInfo: - description: Batch state info. - type: object - properties: - state: - description: State of the batch acquisition. - enum: - - unknown - - expired - - queued - - libraryPackaging - - submitting - - cancelling - - cancelled - - error - type: string - errorMessage: - description: Error message if the state is in "error". - type: string - ErrorInformation: - description: Error Information. - type: object - properties: - message: - type: string - errorCode: - type: string - source: - enum: - - System - - User - - Unknown - - Dependency - type: string - SessionResponse: - type: object - properties: - fabricSessionStateInfo: - $ref: '#/definitions/SessionStateInfo' - livyInfo: - $ref: '#/definitions/LivySessionStateInformation' - name: - description: Name of the session. - type: string - id: - description: Session ID created for the session. - type: string - appId: - description: The application id of this session. - type: string - appInfo: - description: Detailed application info. - type: object - additionalProperties: - type: string - artifactId: - description: Hosting artifact ID. - type: string - errorInfo: - description: Detailed error information. - type: array - items: - $ref: '#/definitions/ErrorInformation' - jobType: - description: Spark job type. - enum: - - SparkBatch - - SparkSession - - ScopeBatch - - JupyterEnvironment - type: string - submitterId: - description: ID of the user who submitted the sessionn. - type: string - submitterName: - description: Name of the user who submitted the session. - type: string - log: - description: Log lines. - type: array - items: - type: string - pluginInfo: - $ref: '#/definitions/SparkServicePluginInformation' - schedulerInfo: - $ref: '#/definitions/SchedulerInformation' - state: - description: Gets or sets the session state. - enum: - - starting - - running - - dead - - success - - killed - - idle - - error - - shutting_down - - not_started - - busy - - recovering - - submitting - - not_submitted - type: string - tags: - description: Optional tags. - type: object - additionalProperties: - type: string - result: - enum: - - Uncertain - - Succeeded - - Failed - - Cancelled - type: string - cancellationReason: - description: Cancellation reason. - type: string - SessionStateInfo: - description: Session state info. - type: object - properties: - state: - description: Session acquisiton state. - enum: - - queued - - libraryPackaging - - acquiringSession - - cancelling - - cancelled - - error - - unknown - type: string - errorMessage: - description: Error message when in "error" state. - type: string diff --git a/apache/livy/ExposedUI/python/fabric/test_fabric..ipynb b/apache/livy/ExposedUI/python/fabric/test_fabric..ipynb deleted file mode 100644 index d713ff1b..00000000 --- a/apache/livy/ExposedUI/python/fabric/test_fabric..ipynb +++ /dev/null @@ -1,296 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Authenticate and Request a token" - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "metadata": {}, - "outputs": [], - "source": [ - "from msal import PublicClientApplication\n", - "from dotenv import load_dotenv\n", - "import os\n", - "import requests\n", - "import time\n", - "\n", - "# Load environment variables from .env file\n", - "load_dotenv()\n", - "tenant_id = os.getenv('TENANT_ID')\n", - "client_id = os.getenv('CLIENT_ID')\n", - "workspace_id = os.getenv('WORKSPACE_ID')\n", - "lakehouse_id = os.getenv('LAKEHOUSE_ID')\n", - "redirect_url_port = os.getenv('REDIRECT_URL_PORT')\n", - "api_version = os.getenv('API_VERSION')\n", - "\n", - "app = PublicClientApplication(\n", - " client_id,\n", - " authority= f\"https://login.microsoftonline.com/{tenant_id}\", \n", - ")\n", - "\n", - "result = None\n", - "\n", - " # If no cached tokens or user interaction needed, acquire tokens interactively\n", - "if not result:\n", - " result = app.acquire_token_interactive(scopes=[\"https://api.fabric.microsoft.com/Lakehouse.Execute.All\", \"https://api.fabric.microsoft.com/Lakehouse.Read.All\", \"https://api.fabric.microsoft.com/Item.ReadWrite.All\", \n", - " \"https://api.fabric.microsoft.com/Workspace.ReadWrite.All\", \"https://api.fabric.microsoft.com/Code.AccessStorage.All\", \"https://api.fabric.microsoft.com/Code.AccessAzureKeyvault.All\", \n", - " \"https://api.fabric.microsoft.com/Code.AccessAzureDataExplorer.All\", \"https://api.fabric.microsoft.com/Code.AccessAzureDataLake.All\", \"https://api.fabric.microsoft.com/Code.AccessFabric.All\"],\n", - " port=f\"{redirect_url_port}\")\n", - "\n", - "# Get the access token\n", - "if \"access_token\" in result:\n", - " access_token = result[\"access_token\"]\n", - "else:\n", - " print(result.get(\"error\"))" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "print(access_token)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Request a Livy Session" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "The request to create the Livy session is submitted:{'id': '462fe66a-9858-408c-813c-b848da3d9e60', 'artifactId': '0db8ae59-a739-4b50-844c-ee2be3519871'}\n", - "462fe66a-9858-408c-813c-b848da3d9e60\n" - ] - } - ], - "source": [ - "if access_token:\n", - " api_base_url_mist='https://api.fabric.microsoft.com/v1'\n", - " livy_base_url = api_base_url_mist + \"/workspaces/\"+workspace_id+\"/lakehouses/\"+lakehouse_id +\"/livyApi/versions/\"+api_version+\"/sessions\"\n", - " headers = {\"Authorization\": \"Bearer \" + access_token}\n", - "\n", - "# Create a Livy session\n", - "create_livy_session = requests.post(livy_base_url, headers=headers, json={\n", - " \"name\": \"test pyspark session from python code\",\n", - " \"archives\": [],\n", - " \"conf\": { \n", - " },\n", - " \"tags\": {\n", - " },\n", - " \"driverMemory\": \"7g\",\n", - " \"driverCores\": 1,\n", - " \"executorMemory\": \"7g\",\n", - " \"executorCores\": 1,\n", - " \"numExecutors\": 2\n", - "})\n", - "print('The request to create the Livy session is submitted:' + str(create_livy_session.json()))\n", - "\n", - "livy_session_id = create_livy_session.json()['id']\n", - "print(livy_session_id)\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## List Livy Sessions" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "livy_session_url = livy_base_url\n", - "get_sessions_response = requests.get(livy_session_url, headers=headers)\n", - "print(get_sessions_response.json())" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Get details of a Livy Session" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "livy_session_url = livy_base_url + \"/\" + livy_session_id\n", - "get_session_response = requests.get(livy_session_url, headers=headers)\n", - "\n", - "while get_session_response.json()[\"state\"] != \"idle\":\n", - " time.sleep(5)\n", - " get_session_response = requests.get(livy_session_url, headers=headers)\n", - " print(get_session_response.json())\n", - " \n", - "print(get_session_response.json())" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Execute a statement on a Spark session - Local dataframe" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "\n", - "# call get session API\n", - "livy_session_url = livy_base_url + \"/\" + livy_session_id\n", - "get_session_response = requests.get(livy_session_url, headers=headers)\n", - "print(get_session_response.json())\n", - "while get_session_response.json()[\"state\"] != \"idle\":\n", - " time.sleep(5)\n", - " get_session_response = requests.get(livy_session_url, headers=headers)\n", - "\n", - "execute_statement = livy_session_url + \"/statements\"\n", - "code =\"\"\"\n", - "df = spark.createDataFrame([{\"id\": 1, \"name\": \"Mounir\"}])\n", - "df.show()\n", - "\"\"\"\n", - "execute_statement_response = requests.post(execute_statement, headers=headers, json={\n", - " \"code\": f\"{code}\",\n", - " \"kind\": \"pyspark\"\n", - " })\n", - "print('the statement code is submitted as: ' + str(execute_statement_response.json()))\n", - "\n", - "statement_id = str(execute_statement_response.json()['id'])\n", - "get_statement = livy_session_url+ \"/statements/\" + statement_id\n", - "get_statement_response = requests.get(get_statement, headers=headers)\n", - "\n", - "while get_statement_response.json()[\"state\"] != \"available\":\n", - " # Sleep for 5 seconds before making the next request\n", - " time.sleep(5)\n", - " print('the statement code is submitted and running : ' + str(execute_statement_response.json()))\n", - "\n", - " # Make the next request\n", - " get_statement_response = requests.get(get_statement, headers=headers)\n", - "\n", - "rst = get_statement_response.json()['output']['data']['text/plain']\n", - "print(rst)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Execute a statement on a Spark session - Data on the LakeHouse" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# call get session API\n", - "livy_session_url = livy_base_url + \"/\" + livy_session_id\n", - "get_session_response = requests.get(livy_session_url, headers=headers)\n", - "print(get_session_response.json())\n", - "while get_session_response.json()[\"state\"] != \"idle\":\n", - " time.sleep(5)\n", - " get_session_response = requests.get(livy_session_url, headers=headers)\n", - "\n", - "execute_statement = livy_session_url + \"/statements\"\n", - "code =\"\"\"\n", - "df = spark.sql(\"SELECT count(*) as Total, AGE FROM person GROUP BY AGE\").show()\n", - "df.show()\n", - "\"\"\"\n", - "execute_statement_response = requests.post(execute_statement, headers=headers, json={\n", - " \"code\": f\"{code}\",\n", - " \"kind\": \"pyspark\"\n", - " })\n", - "print('the statement code is submitted as: ' + str(execute_statement_response.json()))\n", - "\n", - "statement_id = str(execute_statement_response.json()['id'])\n", - "get_statement = livy_session_url+ \"/statements/\" + statement_id\n", - "get_statement_response = requests.get(get_statement, headers=headers)\n", - "\n", - "while get_statement_response.json()[\"state\"] != \"available\":\n", - " # Sleep for 5 seconds before making the next request\n", - " time.sleep(5)\n", - " print('the statement code is submitted and running : ' + str(execute_statement_response.json()))\n", - "\n", - " # Make the next request\n", - " get_statement_response = requests.get(get_statement, headers=headers)\n", - "\n", - "rst = get_statement_response.json()['output']['data']['text/plain']\n", - "print(rst)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Stop and delete a Livy Session" - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n" - ] - } - ], - "source": [ - "livy_session_url = livy_base_url + \"/\" + livy_session_id\n", - "\n", - "delete_session_response = requests.delete(livy_session_url, headers=headers)\n", - "print(delete_session_response)" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "python3.10.11", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.10.11" - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} diff --git a/apache/livy/ExposedUI/python/fabric/test_fabric.py b/apache/livy/ExposedUI/python/fabric/test_fabric.py deleted file mode 100644 index 0bdcb651..00000000 --- a/apache/livy/ExposedUI/python/fabric/test_fabric.py +++ /dev/null @@ -1,45 +0,0 @@ - -from msal import PublicClientApplication -from dotenv import load_dotenv -import os -import requests -import time - -# Load environment variables from .env file -load_dotenv() -tenant_id = os.getenv('TENANT_ID') -client_id = os.getenv('CLIENT_ID') -workspace_id = os.getenv('WORKSPACE_ID') -lakehouse_id = os.getenv('LAKEHOUSE_ID') -redirect_url_port = os.getenv('REDIRECT_URL_PORT') -api_version = os.getenv('API_VERSION') - -app = PublicClientApplication( - client_id, - authority= f"https://login.microsoftonline.com/{tenant_id}", -) - -result = None - - # If no cached tokens or user interaction needed, acquire tokens interactively -if not result: - result = app.acquire_token_interactive(scopes=["https://api.fabric.microsoft.com/Lakehouse.Execute.All", "https://api.fabric.microsoft.com/Lakehouse.Read.All", "https://api.fabric.microsoft.com/Item.ReadWrite.All", - "https://api.fabric.microsoft.com/Workspace.ReadWrite.All", "https://api.fabric.microsoft.com/Code.AccessStorage.All", "https://api.fabric.microsoft.com/Code.AccessAzureKeyvault.All", - "https://api.fabric.microsoft.com/Code.AccessAzureDataExplorer.All", "https://api.fabric.microsoft.com/Code.AccessAzureDataLake.All", "https://api.fabric.microsoft.com/Code.AccessFabric.All"], - port=f"{redirect_url_port}") - -# Get the access token -if "access_token" in result: - access_token = result["access_token"] -else: - print(result.get("error")) - -if access_token: - api_base_url_mist='https://api.fabric.microsoft.com/v1' - livy_base_url = api_base_url_mist + "/workspaces/"+workspace_id+"/lakehouses/"+lakehouse_id +"/livyApi/versions/"+api_version+"/sessions" - headers = {"Authorization": "Bearer " + access_token} - -# List Livy essions -livy_session_url = livy_base_url -get_sessions_response = requests.get(livy_session_url, headers=headers) -print(get_sessions_response.json()) diff --git a/apache/livy/ExposedUI/python/livy/delete_session.py b/apache/livy/ExposedUI/python/livy/delete_session.py deleted file mode 100644 index b0c5047b..00000000 --- a/apache/livy/ExposedUI/python/livy/delete_session.py +++ /dev/null @@ -1,5 +0,0 @@ -import json, pprint, requests, textwrap -host = 'http://localhost:8998' - -r = requests.delete(host + '/sessions/1') -pprint.pprint(r.json()) diff --git a/apache/livy/ExposedUI/python/livy/init_java_gateway.py b/apache/livy/ExposedUI/python/livy/init_java_gateway.py deleted file mode 100644 index 2304f509..00000000 --- a/apache/livy/ExposedUI/python/livy/init_java_gateway.py +++ /dev/null @@ -1,26 +0,0 @@ -import json, pprint, requests, textwrap -host = 'http://localhost:8998' -headers = {'Content-Type': 'application/json'} -statements_url = host + '/sessions/1/statements' - -data = { - 'code': textwrap.dedent(""" - # from https://stackoverflow.com/questions/65713299/javapackage-object-is-not-callable-error-executing-explain-in-pyspark-3-0 - # from https://github.com/apache/spark/blob/87bf6b0ea4ca0618c8604895d05037edce8b7cb0/python/pyspark/java_gateway.py#L153 - - from py4j.java_gateway import java_import - java_import(spark._sc._jvm, "org.apache.spark.SparkConf") - java_import(spark._sc._jvm, "org.apache.spark.api.java.*") - java_import(spark._sc._jvm, "org.apache.spark.api.python.*") - java_import(spark._sc._jvm, "org.apache.spark.ml.python.*") - java_import(spark._sc._jvm, "org.apache.spark.mllib.api.python.*") - java_import(spark._sc._jvm, "org.apache.spark.resource.*") - - java_import(spark._sc._jvm, "org.apache.spark.sql.*") - java_import(spark._sc._jvm, "org.apache.spark.sql.api.python.*") - java_import(spark._sc._jvm, "org.apache.spark.sql.hive.*") - """) -} - -r = requests.post(statements_url, data=json.dumps(data), headers=headers) -pprint.pprint(r.json()) diff --git a/apache/livy/ExposedUI/python/livy/run_code.py b/apache/livy/ExposedUI/python/livy/run_code.py deleted file mode 100644 index 464da289..00000000 --- a/apache/livy/ExposedUI/python/livy/run_code.py +++ /dev/null @@ -1,15 +0,0 @@ -import json, pprint, requests, textwrap -host = 'http://localhost:8998' -headers = {'Content-Type': 'application/json'} -statements_url = host + '/sessions/1/statements' - -data = { - 'code': textwrap.dedent(""" - df = spark.createDataFrame([{"id": 1, "name": "Mounir"}]) - - df.show() - """) -} - -r = requests.post(statements_url, data=json.dumps(data), headers=headers) -pprint.pprint(r.json()) diff --git a/apache/livy/ExposedUI/python/livy/run_code_external_file.py b/apache/livy/ExposedUI/python/livy/run_code_external_file.py deleted file mode 100644 index 53085659..00000000 --- a/apache/livy/ExposedUI/python/livy/run_code_external_file.py +++ /dev/null @@ -1,16 +0,0 @@ -import json, pprint, requests, textwrap -host = 'http://localhost:8998' -headers = {'Content-Type': 'application/json'} -statements_url = host + '/sessions/1/statements' - -external_python_file = './src/external_file.py' - -with open(external_python_file, 'r') as file: - pyspark_code = file.read() - -data = { - 'code': pyspark_code -} - -r = requests.post(statements_url, data=json.dumps(data), headers=headers) -pprint.pprint(r.json()) diff --git a/apache/livy/ExposedUI/python/livy/src/external_file.py b/apache/livy/ExposedUI/python/livy/src/external_file.py deleted file mode 100644 index def07fb9..00000000 --- a/apache/livy/ExposedUI/python/livy/src/external_file.py +++ /dev/null @@ -1,12 +0,0 @@ -# running the Apache Livy example (converted into Python3 syntax) https://livy.incubator.apache.org/examples/ -# The code is running from an external python file - -import random -NUM_SAMPLES = 100000 - -def sample(p): - x, y = random.random(), random.random() - return 1 if x*x + y*y < 1 else 0 - -count = sc.parallelize(range(0, NUM_SAMPLES)).map(sample).reduce(lambda a, b: a + b) -print("Pi is roughly %f" % (4.0 * count / NUM_SAMPLES)) diff --git a/apache/livy/ExposedUI/python/livy/start_session.py b/apache/livy/ExposedUI/python/livy/start_session.py deleted file mode 100644 index b810a4fe..00000000 --- a/apache/livy/ExposedUI/python/livy/start_session.py +++ /dev/null @@ -1,11 +0,0 @@ -# from https://livy.apache.org/examples/ - -# requires pip install requests -import json, pprint, requests, textwrap -host = 'http://localhost:8998' -headers = {'Content-Type': 'application/json'} - -data = {'kind': 'pyspark', 'name': 'test pyspark session from python code', 'proxyUser': 'Mounir', 'executorMemory': '2g'} - -r = requests.post(host + '/sessions', data=json.dumps(data), headers=headers) -pprint.pprint(r.json()) diff --git a/apache/livy/ExposedUI/python/livy/wait_for_idle.py b/apache/livy/ExposedUI/python/livy/wait_for_idle.py deleted file mode 100644 index 32b9bd66..00000000 --- a/apache/livy/ExposedUI/python/livy/wait_for_idle.py +++ /dev/null @@ -1,5 +0,0 @@ -import json, pprint, requests, textwrap -host = 'http://localhost:8998' - -r = requests.get(host + '/sessions/1') -pprint.pprint(r.json()) diff --git a/apache/livy/ExposedUI/python/spark-submit/test_pandas.py b/apache/livy/ExposedUI/python/spark-submit/test_pandas.py deleted file mode 100644 index bf8de60b..00000000 --- a/apache/livy/ExposedUI/python/spark-submit/test_pandas.py +++ /dev/null @@ -1,20 +0,0 @@ -import pandas as pd - -from pyspark.sql import SparkSession - -app_name = "simple-app-pandas" - -spark = SparkSession.builder.appName(app_name).getOrCreate() - -# Creating a DataFrame from a dictionary -data = { - 'Name': ['Alice', 'Bob', 'Charlie'], - 'Age': [25, 30, 35], - 'City': ['New York', 'Los Angeles', 'Chicago'] -} - -df = pd.DataFrame(data) -print(df) - -spark.stop() - diff --git a/apache/livy/ExposedUI/python/spark-submit/test_spark.py b/apache/livy/ExposedUI/python/spark-submit/test_spark.py deleted file mode 100644 index 5c6f6e95..00000000 --- a/apache/livy/ExposedUI/python/spark-submit/test_spark.py +++ /dev/null @@ -1,11 +0,0 @@ -from pyspark.sql import SparkSession - -app_name = "simple-app" - -spark = SparkSession.builder.appName(app_name).getOrCreate() - -df = spark.createDataFrame([{"id": 1, "name": "Mounir"}]) - -df.show() - -spark.stop() \ No newline at end of file From 74cec2c4d3361ea93c7a1f892b72fb147627e80b Mon Sep 17 00:00:00 2001 From: joernNNN Date: Fri, 20 Feb 2026 16:14:51 +0400 Subject: [PATCH 7/7] update readme to include alternative testing approach --- apache/livy/ExposedUI/README.md | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/apache/livy/ExposedUI/README.md b/apache/livy/ExposedUI/README.md index ac75f779..76a77fa8 100644 --- a/apache/livy/ExposedUI/README.md +++ b/apache/livy/ExposedUI/README.md @@ -19,6 +19,11 @@ curl -X POST -H "Content-Type: application/json" -d '{"code":"import os\nprint(o curl http://127.0.0.1:8998/sessions/$id/statements/$statements_id # output.data is the stdout ``` +#### alternative approach +instead, you can send the callback address to `batches` endpoint to check if the Apache Hive instance is exposed +```bash +curl -X POST -H "Content-Type: application/json" -d '{"file":"callback_address"}' http://localhost:8998/batches +``` ---