Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
241 changes: 241 additions & 0 deletions .github/workflows/npu-test.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,241 @@
name: NPU Build and Test

on:
push:
paths:
- '.github/workflows/npu-test.yml'
pull_request:
paths:
- '.github/workflows/npu-test.yml'
workflow_dispatch:
inputs:
torch_nightly_date:
description: 'PyTorch nightly 日期 (格式: YYYYMMDD,留空使用最新版)'
required: false
default: ''

jobs:
build-and-test:
name: Build and Test torch_npu
runs-on: linux-aarch64-a3-2
container:
image: swr.cn-north-4.myhuaweicloud.com/frameworkptadapter/pytorch_2.11.0_a2_aarch64_builder:20260331
options: --user root
env:
PYTHON_VERSION: '3.11'
DOCKER_IMAGE: swr.cn-north-4.myhuaweicloud.com/frameworkptadapter/pytorch_2.11.0_a2_aarch64_builder:20260331
AUDITWHEEL_PLAT: 'skip'

steps:
- name: Checkout
uses: actions/checkout@v4
with:
submodules: recursive

- name: Setup cache directories
run: |
mkdir -p /github/home/.cache/pip
mkdir -p /github/home/.cache/ccache
chmod -R 777 /github/home/.cache

- name: Install ccache
run: |
yum install -y ccache
ccache --version

- name: Cache pip
uses: actions/cache@v4
with:
path: /github/home/.cache/pip
key: pip-arm-py${{ env.PYTHON_VERSION }}-build-test
restore-keys: |
pip-arm-py${{ env.PYTHON_VERSION }}-

- name: Uninstall pre-installed packages
run: |
pip${{ env.PYTHON_VERSION }} uninstall -y torch torchvision pyyaml setuptools auditwheel || true
echo "Pre-installed packages uninstalled"

- name: Install PyTorch nightly
id: install_torch
run: |
PIP=pip${{ env.PYTHON_VERSION }}
PYTHON=python${{ env.PYTHON_VERSION }}

export PIP_CACHE_DIR=/github/home/.cache/pip
$PIP install --upgrade pip

# 安装基础依赖
$PIP install pyyaml setuptools auditwheel

if [ -n "${{ github.event.inputs.torch_nightly_date }}" ]; then
DATE="${{ github.event.inputs.torch_nightly_date }}"
$PIP install --pre "torch==2.12.0.dev${DATE}" --index-url https://download.pytorch.org/whl/nightly/cpu
else
# 按 requirements.txt 安装固定版本
$PIP install --pre "torch==2.12.0.dev20260217" --extra-index-url https://download.pytorch.org/whl/nightly/cpu
fi

TORCH_VER=$($PYTHON -c "import torch; print(torch.__version__)")
echo "version=${TORCH_VER}" >> $GITHUB_OUTPUT
echo "PyTorch nightly version: ${TORCH_VER}"

- name: Cache ccache
uses: actions/cache@v4
with:
path: /github/home/.cache/ccache
key: ccache-arm-py${{ env.PYTHON_VERSION }}-torch${{ steps.install_torch.outputs.version }}-${{ github.sha }}
restore-keys: |
ccache-arm-py${{ env.PYTHON_VERSION }}-torch${{ steps.install_torch.outputs.version }}-

- name: Build torch_npu wheel
id: build
run: |
PYTHON=python${{ env.PYTHON_VERSION }}

# 配置 ccache
if command -v ccache &> /dev/null; then
echo "ccache found, enabling ccache"
ccache -M 10G
ccache -z || true
export CC="ccache gcc"
export CXX="ccache g++"
export CCACHE_DIR=/github/home/.cache/ccache
export CCACHE_COMPRESS=1
export CCACHE_MAXSIZE=10G
export CCACHE_BASEDIR="${PWD}"
USE_CCACHE=1
else
echo "ccache not found, building without cache"
USE_CCACHE=0
fi

# 构建参数
echo "nproc value: $(nproc)"
echo "MAX_JOBS: 40"
export MAX_JOBS=40
export DISABLE_INSTALL_TORCHAIR=FALSE
export BUILD_WITHOUT_SHA=1

# 使用 ci/build.sh 脚本
bash ci/build.sh --python=${{ env.PYTHON_VERSION }} 2>&1 | tee /tmp/build.log
BUILD_STATUS=${PIPESTATUS[0]}

# ccache 统计(兼容 ccache 3.x/4.x 格式)
if [ "${USE_CCACHE}" = "1" ]; then
CCACHE_STATS=$(ccache -s | grep -E "cache hit|cache miss|cache size|hit rate" | tr '\n' ' ')
echo "ccache_stats=${CCACHE_STATS}" >> $GITHUB_OUTPUT
ccache -s
fi

echo "status=${BUILD_STATUS}" >> $GITHUB_OUTPUT
if [ ${BUILD_STATUS} -eq 0 ]; then
WHL=$(ls dist/*.whl 2>/dev/null | head -1)
echo "wheel=${WHL}" >> $GITHUB_OUTPUT
echo "Build succeeded: ${WHL}"
fi
exit ${BUILD_STATUS}

- name: Install torch_npu wheel
run: |
pip${{ env.PYTHON_VERSION }} install dist/torch_npu*.whl
echo "torch_npu wheel installed"

- name: Check Ascend paths
run: |
echo "=== Checking Ascend paths ==="
ls -la /usr/local/Ascend/ 2>&1 || echo "/usr/local/Ascend not found"
ls -la /usr/local/Ascend/cann/ 2>&1 || echo "/usr/local/Ascend/cann not found"
ls -la /usr/local/Ascend/nnal/ 2>&1 || echo "/usr/local/Ascend/nnal not found"

- name: Verify NPU availability
run: |
# 加载 CANN 环境变量
source /usr/local/Ascend/cann/set_env.sh 2>/dev/null || true
source /usr/local/Ascend/nnal/atb/set_env.sh 2>/dev/null || true

PYTHON=python${{ env.PYTHON_VERSION }}
# 切换到项目根目录的上一级,避免从源码目录加载 torch_npu
cd ..
echo "=== Testing torch_npu import ==="
$PYTHON -c "import torch; print(f'torch: {torch.__version__}'); import torch_npu; print(f'torch_npu: {torch_npu.__version__}'); print(f'NPU available: {torch.npu.is_available()}'); print(f'NPU count: {torch.npu.device_count()}'); print(f'NPU name: {torch.npu.get_device_name(0) if torch.npu.is_available() else \"N/A\"}')"

- name: Run test_device.py
id: run_tests
run: |
# 加载 CANN 环境变量
source /usr/local/Ascend/cann/set_env.sh 2>/dev/null || true
source /usr/local/Ascend/nnal/atb/set_env.sh 2>/dev/null || true

PYTHON=python${{ env.PYTHON_VERSION }}
PIP=pip${{ env.PYTHON_VERSION }}

# 步骤开始时自动回到项目根目录,进入 test 目录执行测试
cd test
$PYTHON -m pytest npu/test_device.py -v 2>&1 | tee /tmp/test.log

if [ $? -eq 0 ]; then
echo "status=0" >> $GITHUB_OUTPUT
echo "test_device.py: PASSED"
else
echo "status=1" >> $GITHUB_OUTPUT
echo "test_device.py: FAILED"
fi

- name: Upload build log
if: always()
uses: actions/upload-artifact@v4
with:
name: build-log-${{ github.run_number }}
path: /tmp/build.log
if-no-files-found: warn

- name: Upload test log
if: always()
uses: actions/upload-artifact@v4
with:
name: test-log-${{ github.run_number }}
path: /tmp/test.log
if-no-files-found: warn

- name: Upload wheel artifact
if: steps.build.outputs.status == '0'
uses: actions/upload-artifact@v4
with:
name: torch_npu-wheel-${{ github.run_number }}
path: dist/*.whl
if-no-files-found: warn

- name: Build and Test summary
if: always()
run: |
BUILD_STATUS="${{ steps.build.outputs.status }}"
TEST_STATUS="${{ steps.run_tests.outputs.status }}"

if [ "${BUILD_STATUS}" = "0" ]; then
BUILD_ICON="✅ SUCCESS"
else
BUILD_ICON="❌ FAILED"
fi

if [ "${TEST_STATUS}" = "0" ]; then
TEST_ICON="✅ PASSED"
else
TEST_ICON="❌ FAILED"
fi

cat >> $GITHUB_STEP_SUMMARY << EOF
## NPU Build and Test

| 项目 | 详情 |
|------|------|
| 执行时间 | $(date -u '+%Y-%m-%d %H:%M UTC') |
| Docker 镜像 | \`${{ env.DOCKER_IMAGE }}\` |
| PyTorch Nightly | \`${{ steps.install_torch.outputs.version }}\` |
| 仓库 Commit | \`${{ github.sha }}\` |
| ccache 统计 | ${{ steps.build.outputs.ccache_stats || 'N/A' }} |
| 构建结果 | ${BUILD_ICON} |
| 测试结果 | ${TEST_ICON} |

$( [ "${BUILD_STATUS}" = "0" ] && echo "> Wheel: \`${{ steps.build.outputs.wheel }}\`" || echo "> 查看 build-log artifact 获取详细错误信息" )
EOF
Loading