janhq · hiento09 · Nov 1, 2024 · Oct 30, 2024 · Oct 30, 2024 · Oct 31, 2024
diff --git a/.github/workflows/test-cortexso-model-hub.yml b/.github/workflows/test-cortexso-model-hub.yml
@@ -0,0 +1,110 @@
+name: Test cortexso Model Hub
+
+on:
+  schedule:
+    - cron: "0 16 * * *" # every day at 23:00 UTC+7
+  workflow_dispatch:
+
+jobs:
+  build-and-test:
+    runs-on: ${{ matrix.runs-on }}
+    timeout-minutes: 1440
+    strategy:
+      fail-fast: false
+      matrix:
+        include:
+          - os: "linux"
+            name: "amd64"
+            runs-on: "ubuntu-20-04-e2e-cortexcpp-model-hub"
+            cmake-flags: "-DCORTEX_CPP_VERSION=${{github.head_ref}} -DCMAKE_BUILD_TEST=ON -DCMAKE_TOOLCHAIN_FILE=vcpkg/scripts/buildsystems/vcpkg.cmake"
+            build-deps-cmake-flags: ""
+            ccache-dir: ""
+    steps:
+      - name: Clone
+        id: checkout
+        uses: actions/checkout@v3
+        with:
+          submodules: recursive
+
+      - name: use python
+        uses: actions/setup-python@v5
+        with:
+          python-version: "3.10"
+
+      - name: Install tools on Linux
+        run: |
+          sudo chown -R runner:runner /home/runner/cortexcpp
+          python3 -m pip install awscli
+
+      - name: Download vcpkg cache from s3
+        continue-on-error: true
+        run: |
+          aws s3 sync s3://${{ secrets.MINIO_BUCKET_NAME }}/cortex-cpp-vcpkg-linux /home/runner/.cache/vcpkg  --endpoint ${{ secrets.MINIO_ENDPOINT }} --cli-read-timeout 0
+        env:
+          AWS_ACCESS_KEY_ID: "${{ secrets.MINIO_ACCESS_KEY_ID }}"
+          AWS_SECRET_ACCESS_KEY: "${{ secrets.MINIO_SECRET_ACCESS_KEY }}"
+          AWS_DEFAULT_REGION: "${{ secrets.MINIO_REGION }}"
+
+      - name: Configure vcpkg
+        run: |
+          cd engine
+          make configure-vcpkg
+
+      - name: Build
+        run: |
+          cd engine
+          make build CMAKE_EXTRA_FLAGS="${{ matrix.cmake-flags }}" BUILD_DEPS_CMAKE_EXTRA_FLAGS="${{ matrix.build-deps-cmake-flags }}"
+
+      - name: Run unit tests
+        run: |
+          cd engine
+          make run-unit-tests
+
+      - name: Run setup config for linux
+        shell: bash
+        run: |
+          cd engine
+          ./build/cortex --version
+          sed -i 's/huggingFaceToken: ""/huggingFaceToken: "${{ secrets.HUGGINGFACE_TOKEN_READ }}"/' ~/.cortexrc
+
+      - name: Run e2e tests
+        run: |
+          cd engine
+          cp build/cortex build/cortex-nightly
+          cp build/cortex build/cortex-beta
+          python -m pip install --upgrade pip
+          python -m pip install -r e2e-test/requirements.txt
+          python e2e-test/main.py
+          pytest e2e-test/test_api_cortexso_hub_llamacpp_engine.py
+          rm build/cortex-nightly
+          rm build/cortex-beta
+        env:
+          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+          HF_TOKEN: ${{ secrets.HUGGINGFACE_TOKEN_E2E }}
+
+      - name: Pre-package
+        run: |
+          cd engine
+          make pre-package DESTINATION_BINARY_NAME="cortex"
+
+      - name: Package
+        run: |
+          cd engine
+          make package
+
+      - name: Upload Artifact
+        uses: actions/upload-artifact@v4
+        with:
+          name: cortex-${{ matrix.os }}-${{ matrix.name }}
+          path: ./engine/cortex
+
+
+      - name: Upload linux vcpkg cache to s3
+        continue-on-error: true
+        if: always()
+        run: |
+          aws s3 sync /home/runner/.cache/vcpkg s3://${{ secrets.MINIO_BUCKET_NAME }}/cortex-cpp-vcpkg-linux  --endpoint ${{ secrets.MINIO_ENDPOINT }}
+        env:
+          AWS_ACCESS_KEY_ID: "${{ secrets.MINIO_ACCESS_KEY_ID }}"
+          AWS_SECRET_ACCESS_KEY: "${{ secrets.MINIO_SECRET_ACCESS_KEY }}"
+          AWS_DEFAULT_REGION: "${{ secrets.MINIO_REGION }}"
diff --git a/engine/e2e-test/test_api_cortexso_hub_llamacpp_engine.py b/engine/e2e-test/test_api_cortexso_hub_llamacpp_engine.py
@@ -0,0 +1,164 @@
+import pytest
+import requests
+import os
+
+from pathlib import Path
+from test_runner import (
+    run,
+    start_server,
+    stop_server,
+    wait_for_websocket_download_success_event,
+)
+
+collection_id = "cortexso/local-models-6683a6e29e8f3018845b16db"
+token = os.getenv("HF_TOKEN")
+if not token:
+    raise ValueError("HF_TOKEN environment variable not set")
+
+def get_repos_in_collection(collection_id, token):
+    # API endpoint to get list of repos in the collection
+    url = f"https://huggingface.co/api/collections/{collection_id}"
+    headers = {"Authorization": f"Bearer {token}"}
+    response = requests.get(url, headers=headers)
+
+    # Check response and retrieve repo IDs if successful
+    if response.status_code == 200:
+        return [repo['id'] for repo in response.json()["items"]]
+    else:
+        print("Error fetching repos:", response.status_code, response.json())
+        return []
+
+def get_repo_branches(repo_id, token):
+    # API endpoint to get list of branches for each repo
+    url = f"https://huggingface.co/api/models/{repo_id}/refs"
+    headers = {"Authorization": f"Bearer {token}"}
+    response = requests.get(url, headers=headers)
+
+    # Check response and get the gguf branch
+    if response.status_code == 200:
+        branches = response.json()["branches"]
+        return [branch['name'] for branch in branches if branch['name'] == 'gguf']
+    else:
+        print(f"Error fetching branches for {repo_id}:", response.status_code, response.json())
+        return []
+
+def get_all_repos_and_default_branches_gguf(collection_id, token):
+    # Get list of repos from the collection
+    repos = get_repos_in_collection(collection_id, token)
+    combined_list = []
+
+    # Iterate over each repo and fetch branches
+    for repo_id in repos:
+        branches = get_repo_branches(repo_id, token)
+        for branch in branches:
+            combined_list.append(f"{repo_id.split('/')[1]}:{branch}")
+
+    return combined_list
+
+#Call the function and print the results
+repo_branches = get_all_repos_and_default_branches_gguf(collection_id, token)
+
+class TestCortexsoModels:
+
+    @pytest.fixture(autouse=True)
+    def setup_and_teardown(self, request):
+        # Setup
+        success = start_server()
+        if not success:
+            raise Exception("Failed to start server")
+        # Delete model if exists
+        for model_url in repo_branches:
+            run(
+                "Delete model",
+                [
+                    "models",
+                    "delete",
+                    model_url,
+                ],
+            )
+        yield
+
+        # Teardown
+        for model_url in repo_branches:
+            run(
+                "Delete model",
+                [
+                    "models",
+                    "delete",
+                    model_url,
+                ],
+            )
+        stop_server()
+
+    @pytest.mark.parametrize("model_url", repo_branches)
+    @pytest.mark.asyncio
+    async def test_models_on_cortexso_hub(self, model_url):
+
+        # Pull model from cortexso hub
+        json_body = {
+            "model": model_url
+        }
+        response = requests.post("http://localhost:3928/models/pull", json=json_body)
+        assert response.status_code == 200, f"Failed to pull model: {model_url}"
+
+        await wait_for_websocket_download_success_event(timeout=None)
+
+        # Check if the model was pulled successfully
+        get_model_response = requests.get(
+            f"http://127.0.0.1:3928/models/{model_url}"
+        )
+        assert get_model_response.status_code == 200, f"Failed to fetch model: {model_url}"
+        assert (
+            get_model_response.json()["model"] == model_url
+        ), f"Unexpected model name for: {model_url}"
+
+        # Check if the model is available in the list of models
+        response = requests.get("http://localhost:3928/models")
+        assert response.status_code == 200
+        models = [i["id"] for i in response.json()["data"]]
+        assert model_url in models, f"Model not found in list: {model_url}"
+
+        # Install Engine
+        exit_code, output, error = run(
+            "Install Engine", ["engines", "install", "llama-cpp"], timeout=None, capture = False
+        )
+        root = Path.home()
+        assert os.path.exists(root / "cortexcpp" / "engines" / "cortex.llamacpp" / "version.txt")
+        assert exit_code == 0, f"Install engine failed with error: {error}"
+
+        # Start the model
+        response = requests.post("http://localhost:3928/models/start", json=json_body)
+        assert response.status_code == 200, f"status_code: {response.status_code}"
+
+        # Send an inference request
+        inference_json_body = {
+            "frequency_penalty": 0.2,
+            "max_tokens": 4096,
+            "messages": [
+                {
+                "content": "",
+                "role": "user"
+                }
+            ],
+            "model": model_url,
+            "presence_penalty": 0.6,
+            "stop": [
+                "End"
+            ],
+            "stream": False,
+            "temperature": 0.8,
+            "top_p": 0.95
+            }
+        response = requests.post("http://localhost:3928/v1/chat/completions", json=inference_json_body, headers={"Content-Type": "application/json"})
+        assert response.status_code == 200, f"status_code: {response.status_code} response: {response.json()}"
+
+        # Stop the model
+        response = requests.post("http://localhost:3928/models/stop", json=json_body)
+        assert response.status_code == 200, f"status_code: {response.status_code}"
+
+        # Uninstall Engine
+        exit_code, output, error = run(
+            "Uninstall engine", ["engines", "uninstall", "llama-cpp"]
+        )
+        assert "Engine llama-cpp uninstalled successfully!" in output
+        assert exit_code == 0, f"Install engine failed with error: {error}"