From 22690567b0a2e7764608b274d8b449a0be6eb0ca Mon Sep 17 00:00:00 2001 From: tongke Date: Wed, 24 Jun 2026 16:10:05 +0800 Subject: [PATCH 1/4] ci: pin torch=2.9.1 --- .github/workflows/build-release.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/build-release.yml b/.github/workflows/build-release.yml index 72e593c..10cfd43 100644 --- a/.github/workflows/build-release.yml +++ b/.github/workflows/build-release.yml @@ -61,7 +61,7 @@ jobs: - name: Install Python dependencies run: | python3.12 -m pip install --no-cache-dir --upgrade pip - python3.12 -m pip install --no-cache-dir torch --index-url ${{ matrix.cuda == 'cu129' && 'https://download.pytorch.org/whl/cu129' || 'https://download.pytorch.org/whl/cu130' }} + python3.12 -m pip install --no-cache-dir torch==2.9.1 --index-url ${{ matrix.cuda == 'cu129' && 'https://download.pytorch.org/whl/cu129' || 'https://download.pytorch.org/whl/cu130' }} python3.12 -m pip install --no-cache-dir setuptools wheel "setuptools_scm>=6.0" build ninja auditwheel patchelf - name: Compute version From 50cada648e365e3145e23d45d42159df1e9bdd3d Mon Sep 17 00:00:00 2001 From: tongke Date: Wed, 24 Jun 2026 16:56:42 +0800 Subject: [PATCH 2/4] ci: add LD_LIBRARY_PATH for pip-installed CUDA libraries The nvidia/cuda devel container images include the CUDA toolkit but not cuDNN. When torch is installed via pip, nvidia-cudnn-cu12 (and friends) land in site-packages/nvidia/*/lib/, which is not on the system library search path. This causes an ImportError for libcudnn.so.9 when the build backend tries to import torch. Add a step that discovers all PyTorch and NVIDIA pip package library directories and appends them to LD_LIBRARY_PATH via GITHUB_ENV, with a validation check that libcudnn.so.9 is actually found. --- .github/workflows/build-release.yml | 31 +++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) diff --git a/.github/workflows/build-release.yml b/.github/workflows/build-release.yml index 10cfd43..ecd18cf 100644 --- a/.github/workflows/build-release.yml +++ b/.github/workflows/build-release.yml @@ -64,6 +64,37 @@ jobs: python3.12 -m pip install --no-cache-dir torch==2.9.1 --index-url ${{ matrix.cuda == 'cu129' && 'https://download.pytorch.org/whl/cu129' || 'https://download.pytorch.org/whl/cu130' }} python3.12 -m pip install --no-cache-dir setuptools wheel "setuptools_scm>=6.0" build ninja auditwheel patchelf + - name: Expose PyTorch CUDA libraries + run: | + python3.12 - <<'PY' + import os + import site + from pathlib import Path + + library_dirs = [] + for site_packages in map(Path, site.getsitepackages()): + torch_lib = site_packages / "torch" / "lib" + nvidia_libs = sorted(site_packages.glob("nvidia/*/lib")) + for path in [torch_lib, *nvidia_libs]: + if path.is_dir(): + library_dirs.append(path) + + if not library_dirs: + raise SystemExit("ERROR: no PyTorch CUDA library directories found") + if not any((path / "libcudnn.so.9").exists() for path in library_dirs): + raise SystemExit("ERROR: libcudnn.so.9 not found in PyTorch CUDA library directories") + + current = os.environ.get("LD_LIBRARY_PATH") + ld_library_path = ":".join(map(str, library_dirs + ([current] if current else []))) + + with open(os.environ["GITHUB_ENV"], "a", encoding="utf-8") as github_env: + github_env.write(f"LD_LIBRARY_PATH={ld_library_path}\n") + + print("LD_LIBRARY_PATH entries:") + for path in library_dirs: + print(f" {path}") + PY + - name: Compute version id: version run: | From 628b28f86a6532b679012bbb9ce054f8be982118 Mon Sep 17 00:00:00 2001 From: tongke Date: Wed, 24 Jun 2026 17:11:58 +0800 Subject: [PATCH 3/4] ci: simplify LD_LIBRARY_PATH step to 1-liner MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Verified working in CI — drop debug logging, redundant validation, and heredoc in favor of a compact inline python -c one-liner. --- .github/workflows/build-release.yml | 36 +++++++---------------------- 1 file changed, 8 insertions(+), 28 deletions(-) diff --git a/.github/workflows/build-release.yml b/.github/workflows/build-release.yml index ecd18cf..0bf83ee 100644 --- a/.github/workflows/build-release.yml +++ b/.github/workflows/build-release.yml @@ -65,35 +65,15 @@ jobs: python3.12 -m pip install --no-cache-dir setuptools wheel "setuptools_scm>=6.0" build ninja auditwheel patchelf - name: Expose PyTorch CUDA libraries + # The nvidia/cuda devel image ships the CUDA toolkit but not cuDNN. + # torch's pip wheels bundle cuDNN inside site-packages/nvidia/*/lib/, + # which is not on the linker search path — add it so torch._C can load. run: | - python3.12 - <<'PY' - import os - import site - from pathlib import Path - - library_dirs = [] - for site_packages in map(Path, site.getsitepackages()): - torch_lib = site_packages / "torch" / "lib" - nvidia_libs = sorted(site_packages.glob("nvidia/*/lib")) - for path in [torch_lib, *nvidia_libs]: - if path.is_dir(): - library_dirs.append(path) - - if not library_dirs: - raise SystemExit("ERROR: no PyTorch CUDA library directories found") - if not any((path / "libcudnn.so.9").exists() for path in library_dirs): - raise SystemExit("ERROR: libcudnn.so.9 not found in PyTorch CUDA library directories") - - current = os.environ.get("LD_LIBRARY_PATH") - ld_library_path = ":".join(map(str, library_dirs + ([current] if current else []))) - - with open(os.environ["GITHUB_ENV"], "a", encoding="utf-8") as github_env: - github_env.write(f"LD_LIBRARY_PATH={ld_library_path}\n") - - print("LD_LIBRARY_PATH entries:") - for path in library_dirs: - print(f" {path}") - PY + echo "LD_LIBRARY_PATH=$(python3.12 -c ' + import site, os + print(":".join(p for sp in site.getsitepackages() + for p in [sp + "/torch/lib"] + __import__("glob").glob(sp + "/nvidia/*/lib") + if os.path.isdir(p)))${LD_LIBRARY_PATH:+:$LD_LIBRARY_PATH}")" >> "$GITHUB_ENV" - name: Compute version id: version From b6af85b083429fdb825783577111f56e0bd9019a Mon Sep 17 00:00:00 2001 From: tongke Date: Wed, 24 Jun 2026 17:19:50 +0800 Subject: [PATCH 4/4] ci: fix shell quoting in LD_LIBRARY_PATH step The previous 1-liner broke because single quotes inside $(python3.12 -c '...') conflicted with the outer shell quoting. Switch to a heredoc (<<'EOF') for the Python snippet, which avoids nested quoting entirely and keeps the code readable. --- .github/workflows/build-release.yml | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/.github/workflows/build-release.yml b/.github/workflows/build-release.yml index 0bf83ee..73ea492 100644 --- a/.github/workflows/build-release.yml +++ b/.github/workflows/build-release.yml @@ -69,11 +69,17 @@ jobs: # torch's pip wheels bundle cuDNN inside site-packages/nvidia/*/lib/, # which is not on the linker search path — add it so torch._C can load. run: | - echo "LD_LIBRARY_PATH=$(python3.12 -c ' - import site, os - print(":".join(p for sp in site.getsitepackages() - for p in [sp + "/torch/lib"] + __import__("glob").glob(sp + "/nvidia/*/lib") - if os.path.isdir(p)))${LD_LIBRARY_PATH:+:$LD_LIBRARY_PATH}")" >> "$GITHUB_ENV" + LIB_DIRS="$(python3.12 <<'EOF' + import glob, os, site + dirs = [] + for sp in site.getsitepackages(): + for p in [os.path.join(sp, "torch", "lib")] + glob.glob(os.path.join(sp, "nvidia", "*", "lib")): + if os.path.isdir(p): + dirs.append(p) + print(":".join(dirs)) + EOF + )" + echo "LD_LIBRARY_PATH=${LIB_DIRS}${LD_LIBRARY_PATH:+:$LD_LIBRARY_PATH}" >> "$GITHUB_ENV" - name: Compute version id: version