forked from thu-ml/TurboDiffusion
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathsetup.py
More file actions
80 lines (71 loc) · 2.34 KB
/
setup.py
File metadata and controls
80 lines (71 loc) · 2.34 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
"""
Copyright (c) 2025 by TurboDiffusion team.
Licensed under the Apache License, Version 2.0 (the "License");
Citation (please cite if you use this code):
@article{zhang2025turbodiffusion,
title={TurboDiffusion: Accelerating Video Diffusion Models by 100-200 Times},
author={Zhang, Jintao and Zheng, Kaiwen and Jiang, Kai and Wang, Haoxu and Stoica, Ion and Gonzalez, Joseph E and Chen, Jianfei and Zhu, Jun},
journal={arXiv preprint arXiv:2512.16093},
year={2025}
}
"""
from pathlib import Path
from setuptools import setup, find_packages
from torch.utils.cpp_extension import BuildExtension, CUDAExtension
ops_dir = Path(__file__).parent / "turbodiffusion" / "ops"
cutlass_dir = ops_dir / "cutlass"
nvcc_flags = [
"-O3",
"-std=c++17",
"-U__CUDA_NO_HALF_OPERATORS__",
"-U__CUDA_NO_HALF_CONVERSIONS__",
"-U__CUDA_NO_BFLOAT16_OPERATORS__",
"-U__CUDA_NO_BFLOAT16_CONVERSIONS__",
"-U__CUDA_NO_BFLOAT162_OPERATORS__",
"-U__CUDA_NO_BFLOAT162_CONVERSIONS__",
"--expt-relaxed-constexpr",
"--expt-extended-lambda",
"--use_fast_math",
"--ptxas-options=--verbose,--warn-on-local-memory-usage",
"-lineinfo",
"-DCUTLASS_DEBUG_TRACE_LEVEL=0",
"-DNDEBUG",
"-Xcompiler",
"-fPIC"
]
cc_flag = [
"-gencode", "arch=compute_120a,code=sm_120a",
"-gencode", "arch=compute_100,code=sm_100",
"-gencode", "arch=compute_90,code=sm_90",
"-gencode", "arch=compute_89,code=sm_89",
"-gencode", "arch=compute_80,code=sm_80"
]
ext_modules = [
CUDAExtension(
name="turbo_diffusion_ops",
sources=[
"turbodiffusion/ops/bindings.cpp",
"turbodiffusion/ops/quant/quant.cu",
"turbodiffusion/ops/norm/rmsnorm.cu",
"turbodiffusion/ops/norm/layernorm.cu",
"turbodiffusion/ops/gemm/gemm.cu"
],
extra_compile_args={
"cxx": ["-O3", "-std=c++17"],
"nvcc": nvcc_flags + ["-DEXECMODE=0"] + cc_flag + ["--threads", "4"],
},
include_dirs=[
cutlass_dir / "include",
cutlass_dir / "tools" / "util" / "include",
ops_dir
],
libraries=["cuda"],
)
]
setup(
packages=find_packages(
exclude=("build", "csrc", "include", "tests", "dist", "docs", "benchmarks")
),
ext_modules=ext_modules,
cmdclass={"build_ext": BuildExtension},
)