Skip to content

Commit a8d331d

Browse files
authored
Merge pull request #210 from KhiopsML/175-integrationtests-gcs-s3-remote-files
Reactivate the integration tests for remote file access (GCS, S3)
2 parents cbf932c + 292bbbe commit a8d331d

File tree

14 files changed

+343
-80
lines changed

14 files changed

+343
-80
lines changed

.github/workflows/dev-docker.yml

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,8 @@ env:
55
DEFAULT_IMAGE_INCREMENT: 0
66
DEFAULT_SERVER_REVISION: main
77
DEFAULT_PYTHON_VERSIONS: 3.8 3.9 3.10 3.11 3.12 3.13
8+
DEFAULT_KHIOPS_GCS_DRIVER_REVISION: 0.0.11
9+
DEFAULT_KHIOPS_S3_DRIVER_REVISION: 0.0.13
810
on:
911
pull_request:
1012
paths: [packaging/docker/khiopspydev/Dockerfile.*, .github/workflows/dev-docker.yml]
@@ -34,6 +36,14 @@ on:
3436
type: string
3537
default: main
3638
description: Khiops Server Revision
39+
khiops-gcs-driver-revision:
40+
type: string
41+
default: 0.0.11
42+
description: Driver version for Google Cloud Storage remote files
43+
khiops-s3-driver-revision:
44+
type: string
45+
default: 0.0.13
46+
description: Driver version for AWS-S3 remote files
3747
concurrency:
3848
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
3949
cancel-in-progress: true
@@ -55,6 +65,8 @@ jobs:
5565
echo "KHIOPSDEV_OS_CODENAME=$(echo '${{ matrix.khiopsdev-os }}' | tr -d '0-9.')" >> "$GITHUB_ENV"
5666
echo "SERVER_REVISION=${{ inputs.server-revision || env.DEFAULT_SERVER_REVISION }}" >> "$GITHUB_ENV"
5767
echo "IMAGE_URL=ghcr.io/khiopsml/khiops-python/khiopspydev-${{ matrix.khiopsdev-os }}" >> "$GITHUB_ENV"
68+
echo "KHIOPS_GCS_DRIVER_REVISION=${{ inputs.khiops-gcs-driver-revision || env.DEFAULT_KHIOPS_GCS_DRIVER_REVISION }}" >> "$GITHUB_ENV"
69+
echo "KHIOPS_S3_DRIVER_REVISION=${{ inputs.khiops-s3-driver-revision || env.DEFAULT_KHIOPS_S3_DRIVER_REVISION }}" >> "$GITHUB_ENV"
5870
- name: Checkout khiops-python sources
5971
uses: actions/checkout@v4
6072
- name: Set up Docker Buildx
@@ -81,13 +93,18 @@ jobs:
8193
- name: Build image and push it to GitHub Container Registry
8294
uses: docker/build-push-action@v5
8395
with:
96+
# Special hostname used by the integration tests for remote file access
97+
# added using inputs because /etc/hosts is read-only for alternate builders (buildx via moby buildkit)
98+
add-hosts: s3-bucket.localhost:127.0.0.1
8499
context: ./packaging/docker/khiopspydev/
85100
file: ./packaging/docker/khiopspydev/Dockerfile.${{ env.KHIOPSDEV_OS_CODENAME }}
86101
build-args: |
87102
"KHIOPS_REVISION=${{ env.KHIOPS_REVISION }}"
88103
"KHIOPSDEV_OS=${{ matrix.khiopsdev-os }}"
89104
"SERVER_REVISION=${{ env.SERVER_REVISION }}"
90105
"PYTHON_VERSIONS=${{ inputs.python-versions || env.DEFAULT_PYTHON_VERSIONS }}"
106+
"KHIOPS_GCS_DRIVER_REVISION=${{ env.KHIOPS_GCS_DRIVER_REVISION }}"
107+
"KHIOPS_S3_DRIVER_REVISION=${{ env.KHIOPS_S3_DRIVER_REVISION }}"
91108
tags: ${{ env.DOCKER_IMAGE_TAGS }}
92109
# Push only on manual request
93110
push: ${{ inputs.push || false }}
Lines changed: 52 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
---
2-
name: Unit Tests
2+
name: Tests
33
env:
44
DEFAULT_SAMPLES_REVISION: 10.2.4
55
DEFAULT_KHIOPS_DESKTOP_REVISION: 10.2.4
@@ -15,11 +15,11 @@ on:
1515
khiops-desktop-revision:
1616
default: 10.2.4
1717
description: Khiops Windows Desktop Application Version
18-
run-long-tests:
18+
run-expensive-tests:
1919
type: boolean
2020
required: false
2121
default: false
22-
description: Execute long tests
22+
description: Execute expensive tests
2323
pull_request:
2424
paths:
2525
- khiops/**.py
@@ -110,9 +110,43 @@ jobs:
110110
rm -rf khiops.egg-info
111111
done
112112
- name: Prepare Unit Tests Environment
113-
if: github.ref != 'dev' && github.ref != 'main' && ! inputs.run-long-tests
114-
run: echo "UNITTEST_ONLY_SHORT_TESTS=true" >> "$GITHUB_ENV"
115-
- name: Run Unit Tests
113+
if: github.ref != 'dev' && github.ref != 'main' && ! inputs.run-expensive-tests
114+
run: echo "SKIP_EXPENSIVE_TESTS=true" >> "$GITHUB_ENV"
115+
- name: Prepare Integration Tests on remote files
116+
env:
117+
AWS_ENDPOINT_URL: http://localhost:4569
118+
shell: bash
119+
run: |
120+
# Prepare AWS-S3 credentials and configuration
121+
mkdir -p ${GITHUB_WORKSPACE}/.aws/
122+
cat << EOF > ${GITHUB_WORKSPACE}/.aws/credentials
123+
[default]
124+
aws_access_key_id=KEY
125+
aws_secret_access_key=SECRET
126+
EOF
127+
cat << EOF > ${GITHUB_WORKSPACE}/.aws/configuration
128+
[default]
129+
endpoint_url=${AWS_ENDPOINT_URL}
130+
region=eu-north-1
131+
EOF
132+
echo "Generated AWS credentials..."
133+
cat ${GITHUB_WORKSPACE}/.aws/credentials
134+
echo "Generated AWS configuration..."
135+
cat ${GITHUB_WORKSPACE}/.aws/configuration
136+
/scripts/run_fake_remote_file_servers.sh . # launch the servers in the background
137+
- name: Authenticate to GCP using "Workload Identity Federation"
138+
# For integration tests on GCS we use a real Google account
139+
# Retrieve the Google credentials through "Workload Identity Federation"
140+
# see https://github.com/google-github-actions/auth?tab=readme-ov-file#workload-identity-federation-through-a-service-account
141+
uses: google-github-actions/auth@v2
142+
with:
143+
service_account: khiops-gcs-driver-test-sa@ino-olr-dak-ideal-sbx.iam.gserviceaccount.com
144+
workload_identity_provider: projects/322269704080/locations/global/workloadIdentityPools/github/providers/my-repo
145+
# 'create_credentials_file' is true by default but let's make it explicit
146+
# After authentication, the required GOOGLE_APPLICATION_CREDENTIALS environment variable is exported
147+
# https://github.com/google-github-actions/auth?tab=readme-ov-file#inputs-miscellaneous
148+
create_credentials_file: true
149+
- name: Run Unit & Integration Tests
116150
env:
117151
KHIOPS_SAMPLES_DIR: ${{ github.workspace }}/khiops-samples
118152
KHIOPS_DOCKER_RUNNER_URL: https://localhost:11000
@@ -124,6 +158,16 @@ jobs:
124158
rmaps_base_oversubscribe: true
125159
# Oversubscribe for MPI > 4.x
126160
OMPI_MCA_rmaps_base_oversubscribe: true
161+
# for the tests with GCS
162+
GCS_BUCKET_NAME: data-test-khiops-driver-gcs/khiops_data
163+
GCS_DRIVER_LOGLEVEL: info # set to debug for diagnosis
164+
# for the tests with S3
165+
S3_DRIVER_LOGLEVEL: info # set to debug for diagnosis
166+
S3_BUCKET_NAME: s3-bucket
167+
AWS_SHARED_CREDENTIALS_FILE: ${{ github.workspace }}/.aws/credentials
168+
AWS_CONFIG_FILE: ${{ github.workspace }}/.aws/configuration
169+
# Var for tests with S3
170+
no_proxy: localhost
127171
run: |
128172
# This is needed so that the Git tag is parsed and the khiops-python
129173
# version is retrieved
@@ -138,10 +182,10 @@ jobs:
138182
$CONDA run --no-capture-output -n "$CONDA_ENV" coverage report -m
139183
$CONDA run --no-capture-output -n "$CONDA_ENV" coverage xml -o "reports/$CONDA_ENV/py-coverage.xml"
140184
done
141-
- name: Display Unit Test Reports
185+
- name: Display Test Reports
142186
uses: dorny/test-reporter@v1
143187
with:
144-
name: Unit Tests ${{ matrix.python-version }}
188+
name: Run Tests ${{ matrix.python-version }}
145189
path: >-
146190
reports/py${{ matrix.python-version }}/TEST-tests.*.*.xml,
147191
reports/py${{ matrix.python-version }}_conda/TEST-tests.*.*.xml

khiops/core/internals/filesystems.py

Lines changed: 15 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@
2323
try:
2424
import boto3
2525
import boto3.session
26+
from boto3.exceptions import S3UploadFailedError
2627
from botocore.exceptions import ClientError
2728

2829
boto3_import_error = None
@@ -254,7 +255,7 @@ def copy_from_local(uri_or_path, local_path):
254255
Raises
255256
------
256257
RuntimeError
257-
If there was a problem when removing.
258+
If there was a problem when copying.
258259
"""
259260
create_resource(uri_or_path).copy_from_local(local_path)
260261

@@ -272,7 +273,7 @@ def copy_to_local(uri_or_path, local_path):
272273
Raises
273274
------
274275
RuntimeError
275-
If there was a problem when removing.
276+
If there was a problem when copying.
276277
"""
277278
create_resource(uri_or_path).copy_to_local(local_path)
278279

@@ -668,29 +669,22 @@ def remove(self):
668669
)
669670

670671
def copy_from_local(self, local_path):
671-
response = self.s3_client.Bucket(self.uri_info.netloc).upload_file(
672-
local_path, self.uri_info.path[1:]
673-
)
674-
status_code = response["ResponseMetadata"]["HTTPStatusCode"]
675-
copy_ok = 200 <= status_code <= 299
676-
if not copy_ok:
677-
raise RuntimeError(
678-
f"S3 copy_from_local failed {self.uri} with code {status_code}: "
679-
+ json.dumps(response)
672+
try:
673+
self.s3_client.Bucket(self.uri_info.netloc).upload_file(
674+
local_path, self.uri_info.path[1:]
680675
)
676+
# normalize the raised exception
677+
except S3UploadFailedError as exc:
678+
raise RuntimeError(f"S3 copy_from_local failed {self.uri}") from exc
681679

682680
def copy_to_local(self, local_path):
683-
response = self.s3_client.Bucket(self.uri_info.netloc).download_file(
684-
local_path, self.uri_info.path[1:]
685-
)
686-
status_code = response["ResponseMetadata"]["HTTPStatusCode"]
687-
copy_ok = 200 <= status_code <= 299
688-
if not copy_ok:
689-
raise RuntimeError(
690-
f"S3 download failed {self.uri} with code {status_code}: "
691-
+ json.dumps(response)
681+
try:
682+
self.s3_client.Bucket(self.uri_info.netloc).download_file(
683+
self.uri_info.path[1:], local_path
692684
)
693-
return copy_ok
685+
# normalize the raised exception
686+
except S3UploadFailedError as exc:
687+
raise RuntimeError(f"S3 download failed {self.uri}") from exc
694688

695689
def list_dir(self):
696690
# Add an extra slash to the path to treat it as a folder

khiops/core/internals/runner.py

Lines changed: 20 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -594,17 +594,26 @@ def _report_exit_status(
594594

595595
# Create the message reporting the errors and warnings
596596
error_msg = ""
597-
errors, fatal_errors, warning_messages = self._collect_errors(log_file_path)
598-
if warning_messages:
599-
error_msg += "Warnings in log:\n" + "".join(warning_messages)
600-
if errors:
601-
if error_msg:
602-
error_msg += "\n"
603-
error_msg += "Errors in log:\n" + "".join(errors)
604-
if fatal_errors:
605-
if error_msg:
606-
error_msg += "\n"
607-
error_msg += "Fatal errors in log:\n" + "".join(fatal_errors)
597+
# If the log file exists: Collect the errors and warnings messages
598+
if fs.exists(log_file_path):
599+
errors, fatal_errors, warning_messages = self._collect_errors(log_file_path)
600+
if warning_messages:
601+
error_msg += "Warnings in log:\n" + "".join(warning_messages)
602+
if errors:
603+
if error_msg:
604+
error_msg += "\n"
605+
error_msg += "Errors in log:\n" + "".join(errors)
606+
if fatal_errors:
607+
if error_msg:
608+
error_msg += "\n"
609+
error_msg += "Fatal errors in log:\n" + "".join(fatal_errors)
610+
# Otherwise warn that the log file is missing
611+
else:
612+
warnings.warn(
613+
f"Log file not found after {tool_name} execution."
614+
f"Path: {log_file_path}"
615+
)
616+
errors = fatal_errors = []
608617

609618
# Add stdout to the warning message if non empty
610619
if stdout:

packaging/conda/meta.yaml

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,8 +30,12 @@ requirements:
3030
- pandas >=0.25.3
3131
- scikit-learn >=0.22.2
3232
run_constrained:
33-
- boto3 >=1.17.39
33+
# do not necessary use the latest version
34+
# to avoid undesired breaking changes
35+
- boto3 >=1.17.39,<=1.35.69
3436
- google-cloud-storage >=1.37.0
37+
# an open issue on boto3 (https://github.com/boto/boto3/issues/3585) forces a min version of pyopenssl
38+
- pyopenssl>=24.0.0,<25.0.0
3539

3640
outputs:
3741
- name: {{ metadata.get('name') }}

packaging/docker/khiopspydev/Dockerfile.ubuntu

Lines changed: 39 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -10,14 +10,15 @@ ARG KHIOPS_REVISION
1010
RUN true \
1111
# Install git (for khiops-python version calculation) and pip \
1212
&& apt-get -y update \
13-
&& apt-get -y --no-install-recommends install git python3-pip zip pandoc wget \
13+
&& apt-get -y --no-install-recommends install git python3-pip zip pandoc wget ruby-dev \
1414
# Get Linux distribution codename \
1515
&& if [ -f /etc/os-release ]; then . /etc/os-release; fi \
1616
# Obtain the Khiops native package \
1717
&& KHIOPS_PKG_FILE=$KHIOPS_REVISION/khiops-core-openmpi_$KHIOPS_REVISION-1-$VERSION_CODENAME.amd64.deb \
1818
&& wget -O KHIOPS_CORE.deb "https://github.com/KhiopsML/khiops/releases/download/${KHIOPS_PKG_FILE}" \
19-
# Install the Khiops native package \
20-
&& dpkg -i --force-all KHIOPS_CORE.deb \
19+
# Install the Khiops native package : make it always succeed. \
20+
# If dpkg fails it is due to missing dependencies which will be installed by apt in the next line \
21+
&& (dpkg -i --force-all KHIOPS_CORE.deb || true) \
2122
&& apt-get -f -y install \
2223
&& rm -f KHIOPS_CORE.deb \
2324
# Set python to python3 \
@@ -39,6 +40,8 @@ RUN true \
3940
# set up all the supported Python environments under conda (for the unit tests)
4041
# relying on a variable containing all the versions
4142
ARG PYTHON_VERSIONS
43+
ARG KHIOPS_GCS_DRIVER_REVISION
44+
ARG KHIOPS_S3_DRIVER_REVISION
4245
RUN true \
4346
&& export CONDA="/root/miniforge3/bin/conda" \
4447
&& /bin/bash -c 'for version in ${PYTHON_VERSIONS}; \
@@ -48,15 +51,46 @@ RUN true \
4851
do \
4952
$CONDA create -y -n $CONDA_ENV python=${version}; \
5053
done; \
54+
# khiops core \
5155
$CONDA install -y -n py${version}_conda -c khiops-dev khiops-core=$(echo ${KHIOPS_REVISION} | tr -d "-") ; \
56+
# remote files drivers installed in the conda environment \
57+
$CONDA install -y -n py${version}_conda -c khiops \
58+
khiops-driver-s3=${KHIOPS_S3_DRIVER_REVISION} \
59+
khiops-driver-gcs=${KHIOPS_GCS_DRIVER_REVISION}; \
5260
done' \
5361
&& true
5462

5563
RUN mkdir -p /scripts
56-
COPY ./run_service.sh /scripts/run_service.sh
57-
RUN chmod +x /scripts/run_service.sh && \
64+
COPY ./run_service.sh ./run_fake_remote_file_servers.sh /scripts/
65+
RUN chmod +x /scripts/run_service.sh /scripts/run_fake_remote_file_servers.sh && \
5866
useradd -rm -d /home/ubuntu -s /bin/bash -g root -u 1000 ubuntu
5967

68+
# remote files drivers installed system-wide
69+
RUN true \
70+
# Get Linux distribution codename \
71+
&& if [ -f /etc/os-release ]; then . /etc/os-release; fi \
72+
&& wget -O khiops-gcs.deb https://github.com/KhiopsML/khiopsdriver-gcs/releases/download/${KHIOPS_GCS_DRIVER_REVISION}/khiops-driver-gcs_${KHIOPS_GCS_DRIVER_REVISION}-1-${VERSION_CODENAME}.amd64.deb \
73+
&& wget -O khiops-s3.deb https://github.com/KhiopsML/khiopsdriver-s3/releases/download/${KHIOPS_S3_DRIVER_REVISION}/khiops-driver-s3_${KHIOPS_S3_DRIVER_REVISION}-1-${VERSION_CODENAME}.amd64.deb \
74+
&& (dpkg -i --force-all khiops-gcs.deb khiops-s3.deb || true) \
75+
&& apt-get -f -y install \
76+
&& rm -f khiops-gcs.deb khiops-s3.deb \
77+
&& true
78+
6079
FROM ghcr.io/khiopsml/khiops-server:${SERVER_REVISION} AS server
80+
6181
FROM khiopsdev AS base
6282
COPY --from=server /service /usr/bin/service
83+
84+
# S3 fake file server (only in the ubuntu container)
85+
# Do not use the latest fakes3 version because starting from 1.3 a licence is required
86+
# if fakes3 is no longer compatible think about switching to an alternative and fully compatible server
87+
# (https://github.com/jamhall/s3rver:v3.7.1 is not yet for example)
88+
RUN gem install fakes3:1.2.1 sorted_set
89+
# Avoid resolving a fake s3-bucket.localhost hostname
90+
# Alternate builders (buildx via moby buildkit) mount /etc/hosts read-only, the following command will fail
91+
# echo "127.0.0.1 s3-bucket.localhost" >> /etc/hosts
92+
# You will have to add the `add-hosts` input instead (https://github.com/docker/build-push-action/#inputs)
93+
94+
# Port on which fakes3 is listening
95+
EXPOSE 4569
96+
Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
#!/bin/bash
2+
3+
ROOT_FOLDER=${1:-.} # defaults to current folder
4+
5+
# File server for S3 (runs in background)
6+
# WARNING :
7+
# -r : exposes pre-provisioned files (not currently used feature) : the direct child folder will be the bucket name
8+
# these files were uploaded once because fake-s3 creates metadata
9+
echo "Launching fakes3 in background..."
10+
PORT_NUMBER=${AWS_ENDPOINT_URL##*:}
11+
nohup /usr/local/bin/fakes3 \
12+
-r "${ROOT_FOLDER}"/tests/resources/remote-access \
13+
-p "${PORT_NUMBER}" > /dev/null < /dev/null 2>&1 & # needs to redirect all the 3 fds to free the TTY

setup.py

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,14 @@
4949
],
5050
cmdclass=versioneer.get_cmdclass(),
5151
extras_require={
52-
"s3": ["boto3>=1.17.39"],
52+
"s3": [
53+
# do not necessary use the latest version
54+
# to avoid undesired breaking changes
55+
"boto3>=1.17.39,<=1.35.69",
56+
# an open issue on boto3 (https://github.com/boto/boto3/issues/3585)
57+
# forces a minimal version of pyopenssl
58+
"pyopenssl>=24.0.0,<25.0.0",
59+
],
5360
"gcs": ["google-cloud-storage>=1.37.0"],
5461
},
5562
)

tests/test_helper.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -272,10 +272,10 @@ def get_with_subkey(dictionary, subkey):
272272
return values
273273

274274
@staticmethod
275-
def skip_long_test(test_case):
276-
if "UNITTEST_ONLY_SHORT_TESTS" in os.environ:
277-
if os.environ["UNITTEST_ONLY_SHORT_TESTS"].lower() == "true":
278-
test_case.skipTest("Skipping long test")
275+
def skip_expensive_test(test_case):
276+
if "SKIP_EXPENSIVE_TESTS" in os.environ:
277+
if os.environ["SKIP_EXPENSIVE_TESTS"].lower() == "true":
278+
test_case.skipTest("Skipping expensive test")
279279

280280
@staticmethod
281281
def create_parameter_trace():

0 commit comments

Comments
 (0)