diff --git a/.github/workflows/wheels.yml b/.github/workflows/wheels.yml index aa8e18d..6909fc1 100644 --- a/.github/workflows/wheels.yml +++ b/.github/workflows/wheels.yml @@ -4,22 +4,19 @@ on: workflow_dispatch: jobs: - build: + build-local: strategy: fail-fast: false matrix: os: - ubuntu-22.04 # - windows-2019 - python: - - "3.9" - - "3.10" - - "3.11" - - "3.12" - torch_version: - - "2.7.0" - cuda_short_version: - - "126" + python: ['3.10', '3.11', '3.12'] + torch_version: ['2.7.0', '2.8.0', '2.9.0'] + cuda_short_version: ['126'] + exclude: + - torch_version: '2.9.0' + python: '3.9' uses: ./.github/workflows/wheels_build.yml with: @@ -28,44 +25,82 @@ jobs: torch_version: ${{ matrix.torch_version }} cuda_short_version: ${{ matrix.cuda_short_version }} + build-pypi: + # Single canonical build intended for PyPI: no local CUDA/torch suffix + strategy: + fail-fast: false + matrix: + os: ['ubuntu-22.04'] + python: ['3.10', '3.11', '3.12'] + + uses: ./.github/workflows/wheels_build.yml + with: + os: ${{ matrix.os }} + python: ${{ matrix.python }} + torch_version: '2.9.0' + cuda_short_version: '128' + append_local_version: '0' # 0 to disable local version suffix + # publish to GitHub Release - gh_release: - name: gh_release - needs: build - runs-on: ubuntu-20.04 - - timeout-minutes: 360 - defaults: - run: - shell: bash + # gh_release: + # name: gh_release + # needs: build + # runs-on: ubuntu-20.04 + + # timeout-minutes: 360 + # defaults: + # run: + # shell: bash + # steps: + # - uses: actions/download-artifact@v4 + # with: + # path: dist + + # - run: ls -R dist/ + + # # create night release if it's a push to main + # - if: github.event_name == 'push' && github.ref == 'refs/heads/main' + # name: Nightly Release + # uses: andelf/nightly-release@v1 + # env: + # GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + # with: + # tag_name: nightly + # name: 'stable-fast Nightly Release $$' + # prerelease: true + # body: 'TODO: Add nightly release notes' + # files: | + # dist/*/*.whl + + # # create release if it's a tag like vx.y.z + # - if: github.ref_type == 'tag' && startsWith(github.ref, 'refs/tags/v') + # name: Release + # uses: softprops/action-gh-release@v1 + # with : + # files: | + # dist/*/*.whl + + + consolidate-wheels: + needs: [build-local, build-pypi] + runs-on: ubuntu-latest steps: - - uses: actions/download-artifact@v4 + - name: Download all wheel artifacts + uses: actions/download-artifact@v4 with: path: dist - - run: ls -R dist/ + - name: Consolidate wheels into a single folder + run: | + mkdir -p consolidated_wheels + find dist -name '*.whl' -exec cp {} consolidated_wheels/ \; + ls -l consolidated_wheels - # create night release if it's a push to main - - if: github.event_name == 'push' && github.ref == 'refs/heads/main' - name: Nightly Release - uses: andelf/nightly-release@v1 - env: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + - name: Upload consolidated wheels + uses: actions/upload-artifact@v4 with: - tag_name: nightly - name: 'stable-fast Nightly Release $$' - prerelease: true - body: 'TODO: Add nightly release notes' - files: | - dist/*/*.whl - - # create release if it's a tag like vx.y.z - - if: github.ref_type == 'tag' && startsWith(github.ref, 'refs/tags/v') - name: Release - uses: softprops/action-gh-release@v1 - with : - files: | - dist/*/*.whl + name: built-wheels + path: consolidated_wheels # upload_pip: # needs: build diff --git a/.github/workflows/wheels_build.yml b/.github/workflows/wheels_build.yml index 0872838..8570da9 100644 --- a/.github/workflows/wheels_build.yml +++ b/.github/workflows/wheels_build.yml @@ -17,6 +17,11 @@ on: required: true type: string description: "Example: 117 for 11.7" + append_local_version: + required: false + type: string + default: '1' + description: "Set to '0' to disable SFAST local version suffix" cudnn_version_major: required: false default: '8' @@ -38,6 +43,11 @@ on: required: true type: string description: "Example: 117 for 11.7" + append_local_version: + required: false + type: string + default: '1' + description: "Set to '0' to disable SFAST local version suffix" cudnn_version_major: required: false default: '8' @@ -54,7 +64,7 @@ env: TORCH_CUDA_ARCH_LIST: "6.0 6.1 7.0 7.5 8.0 8.6 8.9 9.0+PTX" MAX_JOBS: 2 DISTUTILS_USE_SDK: 1 # otherwise distutils will complain on windows about multiple versions of msvc - SFAST_APPEND_VERSION: 1 + SFAST_APPEND_VERSION: ${{ inputs.append_local_version }} TWINE_USERNAME: __token__ jobs: @@ -81,12 +91,14 @@ jobs: cushort = "${{ inputs.cuda_short_version }}" # https://github.com/Jimver/cuda-toolkit/blob/master/src/links/linux-links.ts full_version, install_script = { + "128": ("12.8.0", "https://developer.download.nvidia.com/compute/cuda/12.8.0/local_installers/cuda_12.8.0_570.86.10_linux.run"), "126": ("12.6.0", "https://developer.download.nvidia.com/compute/cuda/12.6.0/local_installers/cuda_12.6.0_560.28.03_linux.run"), "118": ("11.8.0", "https://developer.download.nvidia.com/compute/cuda/11.8.0/local_installers/cuda_11.8.0_520.61.05_linux.run"), "117": ("11.7.1", "https://developer.download.nvidia.com/compute/cuda/11.7.1/local_installers/cuda_11.7.1_515.65.01_linux.run"), "116": ("11.6.2", "https://developer.download.nvidia.com/compute/cuda/11.6.2/local_installers/cuda_11.6.2_510.47.03_linux.run"), }[cushort] cudnn_pypi_package = { + "128": "nvidia-cudnn-cu12", "126": "nvidia-cudnn-cu12", "121": "nvidia-cudnn-cu12", "118": "nvidia-cudnn-cu11", @@ -184,17 +196,9 @@ jobs: set -Eeuo pipefail git config --global --add safe.directory "*" version=`cat version.txt` - torch_version_suffix=torch$(echo ${{ inputs.torch_version }} | sed 's/\.//g') - cuda_version_suffix=${{ steps.cuda_info.outputs.CUDA_VERSION_SUFFIX }} - nightly_tag=$([[ ${VERSION_SOURCE} == 'tag' ]] && echo '' || echo '.dev'`date +%Y%m%d`) - - if [[ "${{ inputs.cuda_short_version }}" == "126" ]]; then - echo "BUILD_VERSION=${version}" >> ${GITHUB_ENV} - echo "BUILD_VERSION=${version}" >> ${GITHUB_OUTPUT} - else - echo "BUILD_VERSION=${version}+${torch_version_suffix}${cuda_version_suffix}" >> ${GITHUB_ENV} - echo "BUILD_VERSION=${version}+${torch_version_suffix}${cuda_version_suffix}" >> ${GITHUB_OUTPUT} - fi + echo "BUILD_VERSION=${version}" >> ${GITHUB_ENV} + echo "BUILD_VERSION=${version}" >> ${GITHUB_OUTPUT} + - run: echo "sfast-${BUILD_VERSION}" - run: echo "release version" if: ${{ !contains(steps.sfast_version.outputs.BUILD_VERSION, '.dev') }} @@ -218,7 +222,7 @@ jobs: CUDNN_PYPI_PACKAGE: ${{ steps.cuda_info.outputs.CUDNN_PYPI_PACKAGE }} run: | cudnn_next_version_major=$((${CUDNN_VERSION_MAJOR} + 1)) - cudnn_package_name="${CUDNN_PYPI_PACKAGE}>=${CUDNN_VERSION_MAJOR}.0.0.0,<9.6.0.0" + cudnn_package_name="${CUDNN_PYPI_PACKAGE}>=${CUDNN_VERSION_MAJOR}.0.0.0,<=9.10.2.21" $PY -m pip install --upgrade pip $PY -m pip install wheel setuptools ninja twine "torch==${{ inputs.torch_version }}" "${cudnn_package_name}" -r requirements.txt --extra-index-url https://download.pytorch.org/whl/cu${{ inputs.cuda_short_version }} --no-cache-dir @@ -229,8 +233,18 @@ jobs: PLAT_ARG: ${{ contains(inputs.os, 'ubuntu') && '--plat-name manylinux2014_x86_64' || '' }} - run: du -h dist/* - - uses: actions/upload-artifact@v4 + + - name: Upload artifact (local build) + if: ${{ inputs.append_local_version != '0' }} + uses: actions/upload-artifact@v4 with: name: ${{ inputs.os }}-py${{ inputs.python }}-torch${{ inputs.torch_version }}+cu${{ inputs.cuda_short_version }} path: dist/*.whl + + - name: Upload artifact (pypi build) + if: ${{ inputs.append_local_version == '0' }} + uses: actions/upload-artifact@v4 + with: + name: ${{ inputs.os }}-py${{ inputs.python }} + path: dist/*.whl # Note: it might be helpful to have additional steps that test if the built wheels actually work diff --git a/setup.py b/setup.py index 9a6e99a..ee6dfe1 100644 --- a/setup.py +++ b/setup.py @@ -1,16 +1,16 @@ #!/usr/bin/env python import glob +import os import platform import subprocess -import os # import shutil from os import path -from setuptools import find_packages, setup # from typing import List import torch +from setuptools import find_packages, setup from torch.utils.cpp_extension import CUDA_HOME, CUDNN_HOME, CppExtension, CUDAExtension torch_ver = [int(x) for x in torch.__version__.split(".")[:2]] @@ -19,7 +19,36 @@ def fetch_requirements(): with open("requirements.txt") as f: - reqs = f.read().strip().split("\n") + lines = f.read().strip().split("\n") + + # Drop empty/comment lines + base_reqs = [] + for line in lines: + stripped = line.strip() + if not stripped or stripped.startswith("#"): + continue + base_reqs.append(stripped) + + # Constrain torch to the major.minor of the torch that was used to build + # the wheel, e.g. torch 2.7.0 -> "torch>=2.7,<2.8". + major, minor = torch_ver[0], torch_ver[1] + torch_constraint = f"torch>={major}.{minor},<{major}.{minor + 1}" + + reqs = [] + torch_added = False + for req in base_reqs: + # Replace any existing torch specification with our constrained one. + if req.split()[0].startswith("torch"): + if not torch_added: + reqs.append(torch_constraint) + torch_added = True + # Skip additional torch lines + else: + reqs.append(req) + + if not torch_added: + reqs.append(torch_constraint) + return reqs @@ -41,6 +70,42 @@ def get_version(): date_str = datetime.today().strftime("%y%m%d") version = version + ".dev" + date_str + # Optionally append the CUDA and torch version used to build this + # wheel as a local version suffix, e.g. "+cu126.torch2.7". + if os.getenv("SFAST_APPEND_VERSION", "0") not in ( + "0", + "", + "false", + "False", + ): + torch_version_str = os.getenv("TORCH_VERSION_FOR_BUILD", torch.__version__) + + # torch_version_str is typically like "2.7.0+cu126" or "2.7.0". + cuda_token = "" + base_torch = torch_version_str + if "+" in torch_version_str: + base_torch, _, local_torch = torch_version_str.partition("+") + # Take the first token from the local version, e.g. "cu126". + cuda_token = local_torch.split(".")[0] + + base_parts = base_torch.split(".") + if len(base_parts) >= 2: + torch_major_minor = ".".join(base_parts[:2]) + else: + torch_major_minor = base_torch + + local_suffix = f"torch{torch_major_minor}" + if cuda_token: + local_suffix = f"{cuda_token}.{local_suffix}" + + # Ensure we only ever have a single '+' in the version; if there is + # already a local segment, extend it with ".cuXXX.torchY.Z". + if "+" in version: + base_v, _, local_v = version.partition("+") + version = f"{base_v}+{local_v}.{local_suffix}" + else: + version = f"{version}+{local_suffix}" + init_py_path = path.join(this_dir, "src", "sfast", "__init__.py") init_py = open(init_py_path, "r").readlines() new_init_py = [l for l in init_py if not l.startswith("__version__")] @@ -52,8 +117,7 @@ def get_version(): def get_cuda_version(cuda_dir) -> int: nvcc_bin = "nvcc" if cuda_dir is None else cuda_dir + "/bin/nvcc" - raw_output = subprocess.check_output([nvcc_bin, "-V"], - universal_newlines=True) + raw_output = subprocess.check_output([nvcc_bin, "-V"], universal_newlines=True) output = raw_output.split() release_idx = output.index("release") + 1 release = output[release_idx].split(".") @@ -69,13 +133,10 @@ def get_extensions(): extensions_dir = path.join(this_dir, "src", "sfast", "csrc") include_dirs = [extensions_dir] - sources = glob.glob(path.join(extensions_dir, "**", "*.cpp"), - recursive=True) + sources = glob.glob(path.join(extensions_dir, "**", "*.cpp"), recursive=True) # common code between cuda and rocm platforms, for hipify version [1,0,0] and later. - source_cuda = glob.glob(path.join(extensions_dir, "**", "*.cu"), - recursive=True) - source_cuda_rt = glob.glob(path.join(extensions_dir, "**", "*.cc"), - recursive=True) + source_cuda = glob.glob(path.join(extensions_dir, "**", "*.cu"), recursive=True) + source_cuda_rt = glob.glob(path.join(extensions_dir, "**", "*.cc"), recursive=True) extension = CppExtension @@ -89,19 +150,25 @@ def get_extensions(): # Skip the above useless check as we will always compile with CUDA support, # and the CI might be running on CPU-only machines. if platform.system() != "Darwin" and os.getenv("WITH_CUDA", "1") != "0": - assert CUDA_HOME is not None, "Cannot find CUDA installation. If you want to compile without CUDA, set `WITH_CUDA=0`." + assert CUDA_HOME is not None, ( + "Cannot find CUDA installation. If you want to compile without CUDA, set `WITH_CUDA=0`." + ) cutlass_root = os.path.join(this_dir, "third_party", "cutlass") cutlass_include = os.path.join(cutlass_root, "include") - if not os.path.exists(cutlass_root) or not os.path.exists( - cutlass_include): - raise RuntimeError("Cannot find cutlass. Please run " - "`git submodule update --init --recursive`.") + if not os.path.exists(cutlass_root) or not os.path.exists(cutlass_include): + raise RuntimeError( + "Cannot find cutlass. Please run " + "`git submodule update --init --recursive`." + ) include_dirs.append(cutlass_include) - cutlass_tools_util_include = os.path.join(cutlass_root, "tools", - "util", "include") + cutlass_tools_util_include = os.path.join( + cutlass_root, "tools", "util", "include" + ) include_dirs.append(cutlass_tools_util_include) - cutlass_examples_dual_gemm = os.path.join(cutlass_root, "examples", "45_dual_gemm") + cutlass_examples_dual_gemm = os.path.join( + cutlass_root, "examples", "45_dual_gemm" + ) include_dirs.append(cutlass_examples_dual_gemm) extension = CUDAExtension @@ -155,6 +222,11 @@ def get_extensions(): extra_compile_args["nvcc"].append("-ccbin={}".format(CC)) if CUDNN_HOME is None: + # Prefer PyTorch's own toolchain for cuDNN / cuBLAS when possible. + # For older NVIDIA wheels (where nvidia.cudnn/cublas had a real __file__) + # we still support discovering headers via that path, but newer PyTorch + # / NVIDIA packaging may expose these as non file-backed modules, in which + # case we simply fall back to PyTorch/CUDA's default include/library dirs. try: # Try to use the bundled version of CUDNN with PyTorch installation. # This is also used in CI. @@ -162,8 +234,19 @@ def get_extensions(): except ImportError: cudnn = None + cudnn_dir = None if cudnn is not None: - cudnn_dir = os.path.dirname(cudnn.__file__) + # Prefer __file__ for regular packages; fall back to __path__ for + # namespace-style packages used by newer NVIDIA wheels. + cudnn_file = getattr(cudnn, "__file__", None) + if cudnn_file: + cudnn_dir = os.path.dirname(cudnn_file) + else: + cudnn_paths = getattr(cudnn, "__path__", None) + if cudnn_paths: + cudnn_dir = list(cudnn_paths)[0] + + if cudnn_dir is not None: print("Using CUDNN from {}".format(cudnn_dir)) include_dirs.append(os.path.join(cudnn_dir, "include")) # Hope PyTorch knows how to link it correctly. @@ -181,8 +264,17 @@ def get_extensions(): except ImportError: cublas = None + cublas_dir = None if cublas is not None: - cublas_dir = os.path.dirname(cublas.__file__) + cublas_file = getattr(cublas, "__file__", None) + if cublas_file: + cublas_dir = os.path.dirname(cublas_file) + else: + cublas_paths = getattr(cublas, "__path__", None) + if cublas_paths: + cublas_dir = list(cublas_paths)[0] + + if cublas_dir is not None: print("Using CUBLAS from {}".format(cublas_dir)) include_dirs.append(os.path.join(cublas_dir, "include")) # Hope PyTorch knows how to link it correctly. @@ -222,13 +314,12 @@ def get_extensions(): version=get_version(), author="Cheng Zeyi", url="https://github.com/chengzeyi/stable-fast", - description= - "Stable Fast is an ultra lightweight performance optimization framework" + description="Stable Fast is an ultra lightweight performance optimization framework" " for Hugging Fase diffuser pipelines.", package_dir={ - '': 'src', + "": "src", }, - packages=find_packages(where='src'), + packages=find_packages(where="src"), # include submodules in third_party python_requires=">=3.7", install_requires=fetch_requirements(), diff --git a/version.txt b/version.txt index 238d6e8..b0f3d96 100644 --- a/version.txt +++ b/version.txt @@ -1 +1 @@ -1.0.7 +1.0.8