Skip to content
Open
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion .github/workflows/ci_linux.yml
Original file line number Diff line number Diff line change
Expand Up @@ -152,11 +152,14 @@ jobs:
--exclude cust
'

# Exclude rustc_codegen_nvvm: `--all-features` enables its `llvm19` feature,
# whose build.rs requires an LLVM 19 toolchain not present in the CI image.
- name: Check documentation
run: |
docker exec "$CONTAINER_NAME" bash -lc 'set -euo pipefail
export RUSTDOCFLAGS=-Dwarnings
cargo doc --workspace --all-features --document-private-items --no-deps
cargo doc --workspace --all-features --document-private-items --no-deps \
--exclude rustc_codegen_nvvm
'

- name: Stop build container
Expand Down
4 changes: 3 additions & 1 deletion .github/workflows/ci_windows.yml
Original file line number Diff line number Diff line change
Expand Up @@ -123,12 +123,14 @@ jobs:
--exclude blastoff --exclude cudnn --exclude cudnn-sys --exclude cust

# Exclude crates that require cuDNN, not available on Windows CI: cudnn, cudnn-sys.
# Exclude rustc_codegen_nvvm: `--all-features` enables its `llvm19` feature,
# whose build.rs requires an LLVM 19 toolchain not present in the CI image.
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I guss we should add this to the images for the build step?

Copy link
Copy Markdown
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

  • Linux LLVM 19 in CI images
  • Windows LLVM 19 prebuilt
  • RockyLinux 9 specifically (the genuinely awkward one)
  • Dual LLVM 7 + LLVM 19 testing in CI

could I land those in a separate followup PR? I'll create a tracking issue and then go figure out how to get Linux and Windows to both work https://github.com/rust-gpu/rustc_codegen_nvvm-llvm/releases/ 19

- name: Check documentation
env:
RUSTDOCFLAGS: -Dwarnings
run: |
cargo doc --workspace --all-features --document-private-items --no-deps `
--exclude cudnn --exclude cudnn-sys
--exclude cudnn --exclude cudnn-sys --exclude rustc_codegen_nvvm

# Disabled due to dll issues, someone with Windows knowledge needed
# - name: Compiletest
Expand Down
4 changes: 4 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -2,3 +2,7 @@ book
/target
**/.vscode
.devcontainer
.codex
rustc-ice-*.txt
.nix-driver-libs
.claude
1 change: 1 addition & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

36 changes: 30 additions & 6 deletions crates/cuda_builder/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -196,6 +196,21 @@ pub struct CudaBuilder {
pub final_module_path: Option<PathBuf>,
}

/// Default arch for new `CudaBuilder`s.
///
/// When the backend is being built with LLVM 19 support (detected via the `LLVM_CONFIG_19`
/// env var — the same signal `rustc_codegen_nvvm`'s build script uses), default to the
/// lowest Blackwell compute capability (`Compute100`). Pre-Blackwell archs use the legacy
/// LLVM 7 NVVM dialect, so pairing them with an LLVM 19 backend is never the right choice.
/// Callers can still override via [`CudaBuilder::arch`].
fn default_arch() -> NvvmArch {
if env::var_os("LLVM_CONFIG_19").is_some() {
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not a fan of the env variables.

Is there any way to tell so we can just do the right thing automatically in the default case? Maybe query rustc / the nvvm backend and expose which llvm it supports there (via rustflags?)?

Copy link
Copy Markdown
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ebfe81b solved here or no?

NvvmArch::Compute100
} else {
NvvmArch::default()
}
}

impl CudaBuilder {
pub fn new(path_to_crate_root: impl AsRef<Path>) -> Self {
Self {
Expand All @@ -204,7 +219,7 @@ impl CudaBuilder {
ptx_file_copy_path: None,
generate_line_info: true,
nvvm_opts: true,
arch: NvvmArch::default(),
Comment thread
brandonros marked this conversation as resolved.
arch: default_arch(),
ftz: false,
fast_sqrt: false,
fast_div: false,
Expand Down Expand Up @@ -355,6 +370,7 @@ impl CudaBuilder {
/// ptx file. If [`ptx_file_copy_path`](Self::ptx_file_copy_path) is set, this returns the copied path.
pub fn build(self) -> Result<PathBuf, CudaBuilderError> {
println!("cargo:rerun-if-changed={}", self.path_to_crate.display());
println!("cargo:rerun-if-env-changed=LLVM_CONFIG_19");
let path = invoke_rustc(&self)?;
if let Some(copy_path) = self.ptx_file_copy_path {
std::fs::copy(path, &copy_path).map_err(CudaBuilderError::FailedToCopyPtxFile)?;
Expand Down Expand Up @@ -550,13 +566,21 @@ fn build_backend_and_find(filename: &str) -> Option<PathBuf> {

let target_dir = workspace_dir.join("target").join("cuda-builder-codegen");

let status = Command::new("cargo")
.args(["build", "-p", "rustc_codegen_nvvm"])
let mut cmd = Command::new("cargo");
cmd.args(["build", "-p", "rustc_codegen_nvvm"])
.arg("--target-dir")
.arg(&target_dir)
.current_dir(&workspace_dir)
.status()
.ok()?;
.current_dir(&workspace_dir);

// Propagate the llvm19 cargo feature to the nested build when the surrounding
// shell is configured for LLVM 19 (signalled by LLVM_CONFIG_19). Without this
// rustc_codegen_nvvm's build.rs defaults to the LLVM 7 path and falls through
// to the prebuilt LLVM 7 download, which fails on Linux.
if env::var_os("LLVM_CONFIG_19").is_some() {
cmd.args(["--features", "llvm19"]);
}

let status = cmd.status().ok()?;

if !status.success() {
return None;
Expand Down
8 changes: 8 additions & 0 deletions crates/cust/build.rs
Original file line number Diff line number Diff line change
Expand Up @@ -40,4 +40,12 @@ fn main() {
println!("cargo::rustc-cfg=cuGraphGetEdges_v2");
println!("cargo::rustc-cfg=cuCtxCreate_v4");
}

// In CUDA 13.2 the `id` field in `CUmemLocation_st` was placed inside an anonymous union.
// Bindgen renders this as `__bindgen_anon_1: CUmemLocation_st__bindgen_ty_1` instead of a
// direct `id` field. This cfg gates the struct initialization syntax accordingly.
println!("cargo::rustc-check-cfg=cfg(cuMemLocation_anon_id)");
if driver_version >= 13020 {
println!("cargo::rustc-cfg=cuMemLocation_anon_id");
}
}
15 changes: 15 additions & 0 deletions crates/cust/src/memory/unified.rs
Original file line number Diff line number Diff line change
Expand Up @@ -647,6 +647,9 @@ pub trait MemoryAdvise<T: DeviceCopy>: private::Sealed {
#[cfg(cuMemPrefetchAsync_v2)]
driver_sys::CUmemLocation {
type_: driver_sys::CUmemLocationType::CU_MEM_LOCATION_TYPE_DEVICE,
#[cfg(cuMemLocation_anon_id)]
Copy link
Copy Markdown
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this is from #368

__bindgen_anon_1: driver_sys::CUmemLocation_st__bindgen_ty_1 { id },
#[cfg(not(cuMemLocation_anon_id))]
id,
},
#[cfg(not(cuMemPrefetchAsync_v2))]
Expand Down Expand Up @@ -693,6 +696,9 @@ pub trait MemoryAdvise<T: DeviceCopy>: private::Sealed {
#[cfg(cuMemPrefetchAsync_v2)]
driver_sys::CUmemLocation {
type_: driver_sys::CUmemLocationType::CU_MEM_LOCATION_TYPE_DEVICE,
#[cfg(cuMemLocation_anon_id)]
__bindgen_anon_1: driver_sys::CUmemLocation_st__bindgen_ty_1 { id },
#[cfg(not(cuMemLocation_anon_id))]
id,
},
#[cfg(not(cuMemPrefetchAsync_v2))]
Expand Down Expand Up @@ -735,6 +741,9 @@ pub trait MemoryAdvise<T: DeviceCopy>: private::Sealed {
#[cfg(cuMemAdvise_v2)]
driver_sys::CUmemLocation {
type_: driver_sys::CUmemLocationType::CU_MEM_LOCATION_TYPE_DEVICE,
#[cfg(cuMemLocation_anon_id)]
__bindgen_anon_1: driver_sys::CUmemLocation_st__bindgen_ty_1 { id },
#[cfg(not(cuMemLocation_anon_id))]
id,
},
#[cfg(not(cuMemAdvise_v2))]
Expand Down Expand Up @@ -777,6 +786,9 @@ pub trait MemoryAdvise<T: DeviceCopy>: private::Sealed {
#[cfg(cuMemAdvise_v2)]
driver_sys::CUmemLocation {
type_: driver_sys::CUmemLocationType::CU_MEM_LOCATION_TYPE_DEVICE,
#[cfg(cuMemLocation_anon_id)]
__bindgen_anon_1: driver_sys::CUmemLocation_st__bindgen_ty_1 { id },
#[cfg(not(cuMemLocation_anon_id))]
id,
},
#[cfg(not(cuMemAdvise_v2))]
Expand All @@ -801,6 +813,9 @@ pub trait MemoryAdvise<T: DeviceCopy>: private::Sealed {
#[cfg(cuMemAdvise_v2)]
driver_sys::CUmemLocation {
type_: driver_sys::CUmemLocationType::CU_MEM_LOCATION_TYPE_DEVICE,
#[cfg(cuMemLocation_anon_id)]
__bindgen_anon_1: driver_sys::CUmemLocation_st__bindgen_ty_1 { id },
#[cfg(not(cuMemLocation_anon_id))]
id,
},
#[cfg(not(cuMemAdvise_v2))]
Expand Down
1 change: 1 addition & 0 deletions crates/cust_raw/build/cuda_sdk.rs
Original file line number Diff line number Diff line change
Expand Up @@ -215,6 +215,7 @@ impl CudaSdk {
vec![
cuda_root.join("nvvm").join("bin"),
cuda_root.join("nvvm").join("lib64"),
cuda_root.join("nvvm").join("lib"),
]
};
let library_dirs = Self::normalize_dirpaths(search_dirs);
Expand Down
9 changes: 6 additions & 3 deletions crates/cust_raw/build/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -99,9 +99,12 @@ fn main() {
println!("cargo::rustc-link-search=native={}", libdir.display());
}
println!("cargo::rustc-link-lib=dylib=nvvm");
// Handle libdevice support.
fs::copy(sdk.libdevice_bitcode_path(), outdir.join("libdevice.bc"))
.expect("Cannot copy libdevice bitcode file.");
// `fs::copy` preserves source mode. When libdevice.10.bc comes from
// the Nix store (0444), re-running this build can't overwrite the
// previous copy in OUT_DIR. Drop it first.
let dest = outdir.join("libdevice.bc");
let _ = fs::remove_file(&dest);
fs::copy(sdk.libdevice_bitcode_path(), &dest).expect("Cannot copy libdevice bitcode file.");
}
}

Expand Down
32 changes: 30 additions & 2 deletions crates/nvvm/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@ use std::{
ffi::{CStr, CString},
fmt::Display,
mem::MaybeUninit,
ptr::null_mut,
str::FromStr,
};

Expand Down Expand Up @@ -325,6 +324,10 @@ pub enum NvvmArch {
Compute89,
Compute90,
Compute90a,
/// First Blackwell arch and the cutoff for NVVM's modern IR dialect — everything at
/// or above this capability uses the LLVM 19-flavored bitcode accepted by CUDA 12.9+
/// `libnvvm`. See [`NvvmArch::uses_modern_ir_dialect`]. This is also the default arch
/// `cuda_builder` picks when the backend is built with `LLVM_CONFIG_19` set.
Compute100,
Compute100f,
Compute100a,
Expand Down Expand Up @@ -448,6 +451,14 @@ impl NvvmArch {
self.capability_value() % 10
}

/// Whether this target uses NVVM's modern IR dialect rather than the legacy LLVM 7 dialect.
///
/// CUDA 13.2 documents the modern dialect as Blackwell-and-later only, which begins at
/// `compute_100`.
pub fn uses_modern_ir_dialect(&self) -> bool {
self.capability_value() >= 100
}

/// Get the target feature string (e.g., "compute_50" for `Compute50`, "compute_90a" for
/// `Compute90a`).
pub fn target_feature(&self) -> &'static str {
Expand Down Expand Up @@ -739,7 +750,24 @@ impl NvvmProgram {
/// Verify the program without actually compiling it. In the case of invalid IR, you can find
/// more detailed error info by calling [`compiler_log`](Self::compiler_log).
pub fn verify(&self) -> Result<(), NvvmError> {
unsafe { nvvm_sys::nvvmVerifyProgram(self.raw, 0, null_mut()).to_result() }
self.verify_with_options(&[])
}

/// Like [`verify`](Self::verify), but runs the verifier with the same `NvvmOption`s that will
/// be passed to [`compile`](Self::compile). Passing the user-selected `-arch=compute_XXX` in
/// particular matters for CUDA 12.9+ / LLVM 19 bitcode: without it the verifier can fall back
/// to the legacy LLVM 7 parser and reject modern-dialect bitcode that would otherwise compile
/// fine.
pub fn verify_with_options(&self, options: &[NvvmOption]) -> Result<(), NvvmError> {
unsafe {
let options = options.iter().map(|x| format!("{x}\0")).collect::<Vec<_>>();
let mut options_ptr = options
.iter()
.map(|x| x.as_ptr().cast())
.collect::<Vec<_>>();
nvvm_sys::nvvmVerifyProgram(self.raw, options.len() as i32, options_ptr.as_mut_ptr())
.to_result()
}
}
}

Expand Down
4 changes: 4 additions & 0 deletions crates/rustc_codegen_nvvm/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,10 @@ readme = "../../README.md"
[lib]
crate-type = ["dylib"]

[features]
default = []
llvm19 = []

[dependencies]
nvvm = { version = "0.1", path = "../nvvm" }
rustc-demangle = "0.1.24"
Expand Down
Loading
Loading