Skip to content

Commit ea20b1b

Browse files
author
Thierry RAMORASOAVINA
committed
Reactivate the integration tests for remote file access (GCS, S3) for non-conda environments
- 2 fake servers (fakes3 and fake-gcs-server) are used locally instead of a real S3 and GCS servers. - The fake servers are provisioned with sample files before the tests run - During the construction of the khiopsdev docker image we need to install the s3/gcs drivers required by khiops - As these drivers are not released yet as conda packages we must disable the tests in conda environments (except when using a docker runner)
1 parent 6dcd790 commit ea20b1b

File tree

7 files changed

+236
-20
lines changed

7 files changed

+236
-20
lines changed

.github/workflows/dev-docker.yml

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,8 @@ env:
55
DEFAULT_IMAGE_INCREMENT: 0
66
DEFAULT_SERVER_REVISION: main
77
DEFAULT_PYTHON_VERSIONS: 3.8 3.9 3.10 3.11 3.12 3.13
8+
DEFAULT_KHIOPS_GCS_DRIVER_REVISION: 0.0.10
9+
DEFAULT_KHIOPS_S3_DRIVER_REVISION: 0.0.12
810
on:
911
pull_request:
1012
paths: [packaging/docker/khiopspydev/Dockerfile.*, .github/workflows/dev-docker.yml]
@@ -34,6 +36,14 @@ on:
3436
type: string
3537
default: main
3638
description: Khiops Server Revision
39+
khiops-gcs-driver-revision:
40+
type: string
41+
default: 0.0.10
42+
description: Driver version for Google Cloud Storage remote files
43+
khiops-s3-driver-revision:
44+
type: string
45+
default: 0.0.12
46+
description: Driver version for AWS-S3 remote files
3747
concurrency:
3848
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
3949
cancel-in-progress: true
@@ -55,6 +65,8 @@ jobs:
5565
echo "KHIOPSDEV_OS_CODENAME=$(echo '${{ matrix.khiopsdev-os }}' | tr -d '0-9.')" >> "$GITHUB_ENV"
5666
echo "SERVER_REVISION=${{ inputs.server-revision || env.DEFAULT_SERVER_REVISION }}" >> "$GITHUB_ENV"
5767
echo "IMAGE_URL=ghcr.io/khiopsml/khiops-python/khiopspydev-${{ matrix.khiopsdev-os }}" >> "$GITHUB_ENV"
68+
echo "KHIOPS_GCS_DRIVER_REVISION=${{ inputs.khiops-gcs-driver-revision || env.DEFAULT_KHIOPS_GCS_DRIVER_REVISION }}" >> "$GITHUB_ENV"
69+
echo "KHIOPS_S3_DRIVER_REVISION=${{ inputs.khiops-s3-driver-revision || env.DEFAULT_KHIOPS_S3_DRIVER_REVISION }}" >> "$GITHUB_ENV"
5870
- name: Checkout khiops-python sources
5971
uses: actions/checkout@v4
6072
- name: Set up Docker Buildx
@@ -81,13 +93,18 @@ jobs:
8193
- name: Build image and push it to GitHub Container Registry
8294
uses: docker/build-push-action@v5
8395
with:
96+
# Special hostname used by the integration tests for remote file access
97+
# added using inputs because /etc/hosts is read-only for alternate builders (buildx via moby buildkit)
98+
add-hosts: s3-bucket.localhost:127.0.0.1
8499
context: ./packaging/docker/khiopspydev/
85100
file: ./packaging/docker/khiopspydev/Dockerfile.${{ env.KHIOPSDEV_OS_CODENAME }}
86101
build-args: |
87102
"KHIOPS_REVISION=${{ env.KHIOPS_REVISION }}"
88103
"KHIOPSDEV_OS=${{ matrix.khiopsdev-os }}"
89104
"SERVER_REVISION=${{ env.SERVER_REVISION }}"
90105
"PYTHON_VERSIONS=${{ inputs.python-versions || env.DEFAULT_PYTHON_VERSIONS }}"
106+
"KHIOPS_GCS_DRIVER_REVISION=${{ env.KHIOPS_GCS_DRIVER_REVISION }}"
107+
"KHIOPS_S3_DRIVER_REVISION=${{ env.KHIOPS_S3_DRIVER_REVISION }}"
91108
tags: ${{ env.DOCKER_IMAGE_TAGS }}
92109
# Push only on manual request
93110
push: ${{ inputs.push || false }}
Lines changed: 44 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
---
2-
name: Unit Tests
2+
name: Tests
33
env:
44
DEFAULT_SAMPLES_REVISION: 10.2.4
55
DEFAULT_KHIOPS_DESKTOP_REVISION: 10.2.4
@@ -112,7 +112,32 @@ jobs:
112112
- name: Prepare Unit Tests Environment
113113
if: github.ref != 'dev' && github.ref != 'main' && ! inputs.run-long-tests
114114
run: echo "UNITTEST_ONLY_SHORT_TESTS=true" >> "$GITHUB_ENV"
115-
- name: Run Unit Tests
115+
- name: Prepare Integration Tests on remote files
116+
env:
117+
AWS_ENDPOINT_URL: http://localhost:4569
118+
shell: bash
119+
run: |
120+
# Prepare AWS-S3 credentials and configuration
121+
mkdir -p ${GITHUB_WORKSPACE}/.aws/
122+
cat << EOF > ${GITHUB_WORKSPACE}/.aws/credentials
123+
[default]
124+
aws_access_key_id=KEY
125+
aws_secret_access_key=SECRET
126+
EOF
127+
cat << EOF > ${GITHUB_WORKSPACE}/.aws/configuration
128+
[default]
129+
endpoint_url=${AWS_ENDPOINT_URL}
130+
region=eu-north-1
131+
EOF
132+
echo "Generated AWS credentials..."
133+
cat ${GITHUB_WORKSPACE}/.aws/credentials
134+
echo "Generated AWS configuration..."
135+
cat ${GITHUB_WORKSPACE}/.aws/configuration
136+
137+
# Prepare GCS credentials
138+
touch ${GITHUB_WORKSPACE}/google-credentials.json
139+
/scripts/run_fake_remote_file_servers.sh . # launch the servers in the background
140+
- name: Run Unit & Integration Tests
116141
env:
117142
KHIOPS_SAMPLES_DIR: ${{ github.workspace }}/khiops-samples
118143
KHIOPS_DOCKER_RUNNER_URL: https://localhost:11000
@@ -124,6 +149,21 @@ jobs:
124149
rmaps_base_oversubscribe: true
125150
# Oversubscribe for MPI > 4.x
126151
OMPI_MCA_rmaps_base_oversubscribe: true
152+
# for the tests with GCS
153+
GCS_BUCKET_NAME: gcs-bucket
154+
# we take advantage of the built-in `STORAGE_EMULATOR_HOST` env variable
155+
# that every GCS client can read and lets us use a local fake file server
156+
STORAGE_EMULATOR_HOST: http://localhost:4443
157+
# even in GCS emulation mode, the credentials file must exist
158+
GOOGLE_APPLICATION_CREDENTIALS: ${{ github.workspace }}/google-credentials.json
159+
GCS_DRIVER_LOGLEVEL: info # set to debug for diagnosis
160+
S3_DRIVER_LOGLEVEL: info # set to debug for diagnosis
161+
# for the tests with S3
162+
S3_BUCKET_NAME: s3-bucket
163+
AWS_SHARED_CREDENTIALS_FILE: ${{ github.workspace }}/.aws/credentials
164+
AWS_CONFIG_FILE: ${{ github.workspace }}/.aws/configuration
165+
# common var for tests with GCS & S3
166+
no_proxy: localhost
127167
run: |
128168
# This is needed so that the Git tag is parsed and the khiops-python
129169
# version is retrieved
@@ -138,10 +178,10 @@ jobs:
138178
$CONDA run --no-capture-output -n "$CONDA_ENV" coverage report -m
139179
$CONDA run --no-capture-output -n "$CONDA_ENV" coverage xml -o "reports/$CONDA_ENV/py-coverage.xml"
140180
done
141-
- name: Display Unit Test Reports
181+
- name: Display Test Reports
142182
uses: dorny/test-reporter@v1
143183
with:
144-
name: Unit Tests ${{ matrix.python-version }}
184+
name: Run Tests ${{ matrix.python-version }}
145185
path: >-
146186
reports/py${{ matrix.python-version }}/TEST-tests.*.*.xml,
147187
reports/py${{ matrix.python-version }}_conda/TEST-tests.*.*.xml

packaging/conda/meta.yaml

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,8 +30,12 @@ requirements:
3030
- pandas >=0.25.3
3131
- scikit-learn >=0.22.2
3232
run_constrained:
33-
- boto3 >=1.17.39
33+
# do not necessary use the latest version
34+
# to avoid undesired breaking changes
35+
- boto3 >=1.17.39,<=1.35.69
3436
- google-cloud-storage >=1.37.0
37+
# an open issue on boto3 (https://github.com/boto/boto3/issues/3585) forces a min version of pyopenssl
38+
- pyopenssl>=24.0.0,<25.0.0
3539

3640
outputs:
3741
- name: {{ metadata.get('name') }}

packaging/docker/khiopspydev/Dockerfile.ubuntu

Lines changed: 32 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -10,14 +10,15 @@ ARG KHIOPS_REVISION
1010
RUN true \
1111
# Install git (for khiops-python version calculation) and pip \
1212
&& apt-get -y update \
13-
&& apt-get -y --no-install-recommends install git python3-pip zip pandoc wget \
13+
&& apt-get -y --no-install-recommends install git python3-pip zip pandoc wget ruby-dev \
1414
# Get Linux distribution codename \
1515
&& if [ -f /etc/os-release ]; then . /etc/os-release; fi \
1616
# Obtain the Khiops native package \
1717
&& KHIOPS_PKG_FILE=$KHIOPS_REVISION/khiops-core-openmpi_$KHIOPS_REVISION-1-$VERSION_CODENAME.amd64.deb \
1818
&& wget -O KHIOPS_CORE.deb "https://github.com/KhiopsML/khiops/releases/download/${KHIOPS_PKG_FILE}" \
19-
# Install the Khiops native package \
20-
&& dpkg -i --force-all KHIOPS_CORE.deb \
19+
# Install the Khiops native package : make it always succeed. \
20+
# If dpkg fails it is due to missing dependencies which will be installed by apt in the next line \
21+
&& (dpkg -i --force-all KHIOPS_CORE.deb || true) \
2122
&& apt-get -f -y install \
2223
&& rm -f KHIOPS_CORE.deb \
2324
# Set python to python3 \
@@ -53,10 +54,36 @@ RUN true \
5354
&& true
5455

5556
RUN mkdir -p /scripts
56-
COPY ./run_service.sh /scripts/run_service.sh
57-
RUN chmod +x /scripts/run_service.sh && \
57+
COPY ./run_service.sh ./run_fake_remote_file_servers.sh /scripts/
58+
RUN chmod +x /scripts/run_service.sh /scripts/run_fake_remote_file_servers.sh && \
5859
useradd -rm -d /home/ubuntu -s /bin/bash -g root -u 1000 ubuntu
5960

61+
# new gcs & s3 drivers (written in C++)
62+
ARG KHIOPS_GCS_DRIVER_REVISION
63+
ARG KHIOPS_S3_DRIVER_REVISION
64+
RUN true \
65+
&& wget -O khiops-gcs.deb https://github.com/KhiopsML/khiopsdriver-gcs/releases/download/${KHIOPS_GCS_DRIVER_REVISION}/khiops-gcs_${KHIOPS_GCS_DRIVER_REVISION}.deb \
66+
&& wget -O khiops-s3.deb https://github.com/KhiopsML/khiopsdriver-s3/releases/download/${KHIOPS_S3_DRIVER_REVISION}/khiops-s3_${KHIOPS_S3_DRIVER_REVISION}.deb \
67+
&& (dpkg -i --force-all khiops-gcs.deb khiops-s3.deb || true) \
68+
&& apt-get -f -y install \
69+
&& rm -f khiops-gcs.deb khiops-s3.deb \
70+
&& true
71+
6072
FROM ghcr.io/khiopsml/khiops-server:${SERVER_REVISION} AS server
73+
6174
FROM khiopsdev AS base
6275
COPY --from=server /service /usr/bin/service
76+
77+
# S3 fake file server (only in the ubuntu container)
78+
# Do not use the latest fakes3 version because starting from 1.3 a licence is required
79+
# if fakes3 is no longer compatible think about switching to an alternative and fully compatible server
80+
# (https://github.com/jamhall/s3rver:v3.7.1 is not yet for example)
81+
RUN gem install fakes3:1.2.1 sorted_set
82+
# Avoid resolving a fake s3-bucket.localhost hostname
83+
# Alternate builders (buildx via moby buildkit) mount /etc/hosts read-only, the following command will fail
84+
# echo "127.0.0.1 s3-bucket.localhost" >> /etc/hosts
85+
# You will have to add the `add-hosts` input instead (https://github.com/docker/build-push-action/#inputs)
86+
87+
# Port on which fakes3 is listening
88+
EXPOSE 4569
89+
Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
#!/bin/bash
2+
3+
ROOT_FOLDER=${1:-.} # defaults to current folder
4+
5+
# File server for S3 (runs in background)
6+
# WARNING :
7+
# -r : exposes pre-provisioned files (not currently used feature) : the direct child folder will be the bucket name
8+
# these files were uploaded once because fake-s3 creates metadata
9+
echo "Launching fakes3 in background..."
10+
PORT_NUMBER=${AWS_ENDPOINT_URL##*:}
11+
nohup /usr/local/bin/fakes3 \
12+
-r "${ROOT_FOLDER}"/tests/resources/remote-access \
13+
-p "${PORT_NUMBER}" > /dev/null < /dev/null 2>&1 & # needs to redirect all the 3 fds to free the TTY

setup.py

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,14 @@
4949
],
5050
cmdclass=versioneer.get_cmdclass(),
5151
extras_require={
52-
"s3": ["boto3>=1.17.39"],
52+
"s3": [
53+
# do not necessary use the latest version
54+
# to avoid undesired breaking changes
55+
"boto3>=1.17.39,<=1.35.69",
56+
# an open issue on boto3 (https://github.com/boto/boto3/issues/3585)
57+
# forces a minimal version of pyopenssl
58+
"pyopenssl>=24.0.0,<25.0.0",
59+
],
5360
"gcs": ["google-cloud-storage>=1.37.0"],
5461
},
5562
)

0 commit comments

Comments
 (0)