diff --git a/compiler/rustc_codegen_llvm/src/llvm_util.rs b/compiler/rustc_codegen_llvm/src/llvm_util.rs index 180559d28d848..32694135705e8 100644 --- a/compiler/rustc_codegen_llvm/src/llvm_util.rs +++ b/compiler/rustc_codegen_llvm/src/llvm_util.rs @@ -510,6 +510,9 @@ fn print_target_cpus(sess: &Session, tm: &llvm::TargetMachine, out: &mut String) }; let mut cpus = cpu_names .lines() + .filter(|cpu_name| { + !sess.target.unsupported_cpus.contains(&std::borrow::Cow::Borrowed(*cpu_name)) + }) .map(|cpu_name| Cpu { cpu_name, remark: make_remark(cpu_name) }) .collect::>(); diff --git a/compiler/rustc_codegen_ssa/src/base.rs b/compiler/rustc_codegen_ssa/src/base.rs index 50c439593c306..14f4dca532737 100644 --- a/compiler/rustc_codegen_ssa/src/base.rs +++ b/compiler/rustc_codegen_ssa/src/base.rs @@ -696,6 +696,13 @@ pub fn codegen_crate( tcx.dcx().emit_fatal(errors::CpuRequired); } + if let Some(target_cpu) = &tcx.sess.opts.cg.target_cpu + && tcx.sess.target.unsupported_cpus.contains(&target_cpu.into()) + { + // The target cpu is explicitly listed as an unsupported cpu + tcx.dcx().emit_fatal(errors::CpuUnsupported { target_cpu: target_cpu.clone() }); + } + let cgu_name_builder = &mut CodegenUnitNameBuilder::new(tcx); // Run the monomorphization collector and partition the collected items into diff --git a/compiler/rustc_codegen_ssa/src/errors.rs b/compiler/rustc_codegen_ssa/src/errors.rs index f1112510af0f0..006b7f881ce23 100644 --- a/compiler/rustc_codegen_ssa/src/errors.rs +++ b/compiler/rustc_codegen_ssa/src/errors.rs @@ -540,6 +540,12 @@ pub(crate) struct InsufficientVSCodeProduct; #[diag("target requires explicitly specifying a cpu with `-C target-cpu`")] pub(crate) struct CpuRequired; +#[derive(Diagnostic)] +#[diag("target cpu `{$target_cpu}` is known but unsupported")] +pub(crate) struct CpuUnsupported { + pub target_cpu: String, +} + #[derive(Diagnostic)] #[diag("processing debug info with `dsymutil` failed: {$status}")] #[note("{$output}")] diff --git a/compiler/rustc_codegen_ssa/src/target_features.rs b/compiler/rustc_codegen_ssa/src/target_features.rs index 24f731c01996d..4b1b0866f2eb9 100644 --- a/compiler/rustc_codegen_ssa/src/target_features.rs +++ b/compiler/rustc_codegen_ssa/src/target_features.rs @@ -387,8 +387,20 @@ pub fn target_spec_to_backend_features<'a>( sess: &'a Session, mut extend_backend_features: impl FnMut(&'a str, /* enable */ bool), ) { - // Compute implied features let mut rust_features = vec![]; + + // This check handles SM versions that defaults (by LLVM) to unsupported (by Rust) PTX ISA versions. + // sm_70, sm_72 and sm_75 defaults to PTX ISA versions with major version 6, while sm_80 default to 7.0 + if sess.target.arch == Arch::Nvptx64 + && matches!( + sess.opts.cg.target_cpu.as_deref(), + None | Some("sm_70") | Some("sm_72") | Some("sm_75") + ) + { + rust_features.push((true, "ptx70")); + } + + // Compute implied features parse_rust_feature_list( sess, &sess.target.features, diff --git a/compiler/rustc_target/src/spec/json.rs b/compiler/rustc_target/src/spec/json.rs index 5507af0866758..8448c2ab51b3d 100644 --- a/compiler/rustc_target/src/spec/json.rs +++ b/compiler/rustc_target/src/spec/json.rs @@ -116,6 +116,7 @@ impl Target { forward!(asm_args); forward!(cpu); forward!(need_explicit_cpu); + forward!(unsupported_cpus); forward!(features); forward!(dynamic_linking); forward_opt!(direct_access_external_data); @@ -320,6 +321,7 @@ impl ToJson for Target { target_option_val!(asm_args); target_option_val!(cpu); target_option_val!(need_explicit_cpu); + target_option_val!(unsupported_cpus); target_option_val!(features); target_option_val!(dynamic_linking); target_option_val!(direct_access_external_data); @@ -543,6 +545,7 @@ struct TargetSpecJson { asm_args: Option]>>, cpu: Option>, need_explicit_cpu: Option, + unsupported_cpus: Option]>>, features: Option>, dynamic_linking: Option, direct_access_external_data: Option, diff --git a/compiler/rustc_target/src/spec/mod.rs b/compiler/rustc_target/src/spec/mod.rs index 768e43146a0c1..74cf01e77754e 100644 --- a/compiler/rustc_target/src/spec/mod.rs +++ b/compiler/rustc_target/src/spec/mod.rs @@ -2361,6 +2361,10 @@ pub struct TargetOptions { /// Whether a cpu needs to be explicitly set. /// Set to true if there is no default cpu. Defaults to false. pub need_explicit_cpu: bool, + /// A list of CPUs that are provided by LLVM but are considered unsupported by Rust. + /// These CPUs are omitted from `--print target-cpus` output and will cause an error + /// if used with `-Ctarget-cpu`. + pub unsupported_cpus: StaticCow<[StaticCow]>, /// Default (Rust) target features to enable for this target. These features /// overwrite `-Ctarget-cpu` but can be overwritten with `-Ctarget-features`. /// Corresponds to `llc -mattr=$llvm_features` where `$llvm_features` is the @@ -2818,6 +2822,7 @@ impl Default for TargetOptions { asm_args: cvs![], cpu: "generic".into(), need_explicit_cpu: false, + unsupported_cpus: cvs![], features: "".into(), direct_access_external_data: None, dynamic_linking: false, diff --git a/compiler/rustc_target/src/spec/targets/nvptx64_nvidia_cuda.rs b/compiler/rustc_target/src/spec/targets/nvptx64_nvidia_cuda.rs index 87c2693e9877f..d8a0bd50ee204 100644 --- a/compiler/rustc_target/src/spec/targets/nvptx64_nvidia_cuda.rs +++ b/compiler/rustc_target/src/spec/targets/nvptx64_nvidia_cuda.rs @@ -1,6 +1,6 @@ use crate::spec::{ Arch, LinkSelfContainedDefault, LinkerFlavor, MergeFunctions, Os, PanicStrategy, Target, - TargetMetadata, TargetOptions, + TargetMetadata, TargetOptions, cvs, }; pub(crate) fn target() -> Target { @@ -22,7 +22,13 @@ pub(crate) fn target() -> Target { linker_flavor: LinkerFlavor::Llbc, // With `ptx-linker` approach, it can be later overridden via link flags. - cpu: "sm_30".into(), + cpu: "sm_70".into(), + + // No longer supported architectures + unsupported_cpus: cvs!( + "sm_20", "sm_21", "sm_30", "sm_32", "sm_35", "sm_37", "sm_50", "sm_52", "sm_53", + "sm_60", "sm_61", "sm_62" + ), // FIXME: create tests for the atomics. max_atomic_width: Some(64), diff --git a/compiler/rustc_target/src/target_features.rs b/compiler/rustc_target/src/target_features.rs index 9040c4eb1e399..498806f8c1c51 100644 --- a/compiler/rustc_target/src/target_features.rs +++ b/compiler/rustc_target/src/target_features.rs @@ -536,19 +536,7 @@ const MIPS_FEATURES: &[(&str, Stability, ImpliedFeatures)] = &[ const NVPTX_FEATURES: &[(&str, Stability, ImpliedFeatures)] = &[ // tidy-alphabetical-start - ("sm_20", Unstable(sym::nvptx_target_feature), &[]), - ("sm_21", Unstable(sym::nvptx_target_feature), &["sm_20"]), - ("sm_30", Unstable(sym::nvptx_target_feature), &["sm_21"]), - ("sm_32", Unstable(sym::nvptx_target_feature), &["sm_30"]), - ("sm_35", Unstable(sym::nvptx_target_feature), &["sm_32"]), - ("sm_37", Unstable(sym::nvptx_target_feature), &["sm_35"]), - ("sm_50", Unstable(sym::nvptx_target_feature), &["sm_37"]), - ("sm_52", Unstable(sym::nvptx_target_feature), &["sm_50"]), - ("sm_53", Unstable(sym::nvptx_target_feature), &["sm_52"]), - ("sm_60", Unstable(sym::nvptx_target_feature), &["sm_53"]), - ("sm_61", Unstable(sym::nvptx_target_feature), &["sm_60"]), - ("sm_62", Unstable(sym::nvptx_target_feature), &["sm_61"]), - ("sm_70", Unstable(sym::nvptx_target_feature), &["sm_62"]), + ("sm_70", Unstable(sym::nvptx_target_feature), &[]), ("sm_72", Unstable(sym::nvptx_target_feature), &["sm_70"]), ("sm_75", Unstable(sym::nvptx_target_feature), &["sm_72"]), ("sm_80", Unstable(sym::nvptx_target_feature), &["sm_75"]), @@ -567,19 +555,7 @@ const NVPTX_FEATURES: &[(&str, Stability, ImpliedFeatures)] = &[ ("sm_120a", Unstable(sym::nvptx_target_feature), &["sm_120"]), // tidy-alphabetical-end // tidy-alphabetical-start - ("ptx32", Unstable(sym::nvptx_target_feature), &[]), - ("ptx40", Unstable(sym::nvptx_target_feature), &["ptx32"]), - ("ptx41", Unstable(sym::nvptx_target_feature), &["ptx40"]), - ("ptx42", Unstable(sym::nvptx_target_feature), &["ptx41"]), - ("ptx43", Unstable(sym::nvptx_target_feature), &["ptx42"]), - ("ptx50", Unstable(sym::nvptx_target_feature), &["ptx43"]), - ("ptx60", Unstable(sym::nvptx_target_feature), &["ptx50"]), - ("ptx61", Unstable(sym::nvptx_target_feature), &["ptx60"]), - ("ptx62", Unstable(sym::nvptx_target_feature), &["ptx61"]), - ("ptx63", Unstable(sym::nvptx_target_feature), &["ptx62"]), - ("ptx64", Unstable(sym::nvptx_target_feature), &["ptx63"]), - ("ptx65", Unstable(sym::nvptx_target_feature), &["ptx64"]), - ("ptx70", Unstable(sym::nvptx_target_feature), &["ptx65"]), + ("ptx70", Unstable(sym::nvptx_target_feature), &[]), ("ptx71", Unstable(sym::nvptx_target_feature), &["ptx70"]), ("ptx72", Unstable(sym::nvptx_target_feature), &["ptx71"]), ("ptx73", Unstable(sym::nvptx_target_feature), &["ptx72"]), diff --git a/src/doc/rustc/src/platform-support/nvptx64-nvidia-cuda.md b/src/doc/rustc/src/platform-support/nvptx64-nvidia-cuda.md index 2d9fc85dad33d..02510d9f3dee2 100644 --- a/src/doc/rustc/src/platform-support/nvptx64-nvidia-cuda.md +++ b/src/doc/rustc/src/platform-support/nvptx64-nvidia-cuda.md @@ -25,13 +25,23 @@ There are two options for using the core library: ### Target and features -It is generally necessary to specify the target, such as `-C target-cpu=sm_89`, because the default is very old. This implies two target features: `sm_89` and `ptx78` (and all preceding features within `sm_*` and `ptx*`). Rust will default to using the oldest PTX version that supports the target processor (see [this table](https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#release-notes-ptx-release-history)), which maximizes driver compatibility. -One can use `-C target-feature=+ptx80` to choose a later PTX version without changing the target (the default in this case, `ptx78`, requires CUDA driver version 11.8, while `ptx80` would require driver version 12.0). +It is often beneficial to specify the target SM architecture, such as `-C target-cpu=sm_89`, because the default prioritizes broad compatibility rather than performance. Doing so also selects the PTX version as the *maximum* of (a) the oldest PTX version that supports the chosen target processor (see [this table](https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#release-notes-ptx-release-history)) and (b) the oldest PTX version supported by the Rust toolchain, which maximizes driver compatibility. +One can use `-C target-feature=+ptx80` to choose a later PTX version without changing the target SM architecture (the default in this case, `ptx78`, requires CUDA driver version 11.8, while `ptx80` would require driver version 12.0). Later PTX versions may allow more efficient code generation. Although Rust follows LLVM in representing `ptx*` and `sm_*` as target features, they should be thought of as having crate granularity, set via (either via `-Ctarget-cpu` and optionally `-Ctarget-feature`). While the compiler accepts `#[target_feature(enable = "ptx80", enable = "sm_89")]`, it is not supported, may not behave as intended, and may become erroneous in the future. +## Minimum SM and PTX support by Rust version +Support for old hardware architectures and PTX ISA versions is periodically dropped. This table shows the minimum supported versions per Rust version. + +| Rust | SM minimum | PTX ISA minimum | +| ------------ | -------------- | --------------- | +| - 1.96 | 2.0 | 3.2 | +| 1.97 - TBD | 7.0 (Volta+) | 7.0 (CUDA 11+) | + +For a full overview of which GPUs support code built for a specific SM version, see the [CUDA GPU Compute Capability documentation](https://developer.nvidia.com/cuda/gpus). + ## Building Rust kernels A `no_std` crate containing one or more functions with `extern "ptx-kernel"` can be compiled to PTX using a command like the following. diff --git a/tests/assembly-llvm/nvptx-arch-default.rs b/tests/assembly-llvm/nvptx-arch-default.rs index 22b4a680e322c..e71304e453303 100644 --- a/tests/assembly-llvm/nvptx-arch-default.rs +++ b/tests/assembly-llvm/nvptx-arch-default.rs @@ -8,5 +8,6 @@ extern crate breakpoint_panic_handler; // Verify default target arch with ptx-linker. -// CHECK: .target sm_30 +// CHECK: .version 7.0 +// CHECK: .target sm_70 // CHECK: .address_size 64 diff --git a/tests/assembly-llvm/nvptx-arch-emit-asm.rs b/tests/assembly-llvm/nvptx-arch-emit-asm.rs index e47f8e78e3679..9266309c6202e 100644 --- a/tests/assembly-llvm/nvptx-arch-emit-asm.rs +++ b/tests/assembly-llvm/nvptx-arch-emit-asm.rs @@ -5,5 +5,6 @@ #![no_std] // Verify default arch without ptx-linker involved. -// CHECK: .target sm_30 +// CHECK: .version 7.0 +// CHECK: .target sm_70 // CHECK: .address_size 64 diff --git a/tests/assembly-llvm/nvptx-arch-target-cpu.rs b/tests/assembly-llvm/nvptx-arch-target-cpu.rs index e02ad0d558a96..b5062f1ba20d7 100644 --- a/tests/assembly-llvm/nvptx-arch-target-cpu.rs +++ b/tests/assembly-llvm/nvptx-arch-target-cpu.rs @@ -1,5 +1,5 @@ //@ assembly-output: ptx-linker -//@ compile-flags: --crate-type cdylib -C target-cpu=sm_50 +//@ compile-flags: --crate-type cdylib -C target-cpu=sm_87 //@ only-nvptx64 #![no_std] @@ -8,5 +8,5 @@ extern crate breakpoint_panic_handler; // Verify target arch override via `target-cpu`. -// CHECK: .target sm_50 +// CHECK: .target sm_87 // CHECK: .address_size 64 diff --git a/tests/ui/check-cfg/target_feature.stderr b/tests/ui/check-cfg/target_feature.stderr index b53419c512b08..981c173242408 100644 --- a/tests/ui/check-cfg/target_feature.stderr +++ b/tests/ui/check-cfg/target_feature.stderr @@ -229,18 +229,6 @@ LL | cfg!(target_feature = "_UNEXPECTED_VALUE"); `power9-altivec` `power9-vector` `prfchw` -`ptx32` -`ptx40` -`ptx41` -`ptx42` -`ptx43` -`ptx50` -`ptx60` -`ptx61` -`ptx62` -`ptx63` -`ptx64` -`ptx65` `ptx70` `ptx71` `ptx72` @@ -290,18 +278,6 @@ LL | cfg!(target_feature = "_UNEXPECTED_VALUE"); `sm_101a` `sm_120` `sm_120a` -`sm_20` -`sm_21` -`sm_30` -`sm_32` -`sm_35` -`sm_37` -`sm_50` -`sm_52` -`sm_53` -`sm_60` -`sm_61` -`sm_62` `sm_70` `sm_72` `sm_75` diff --git a/tests/ui/target-cpu/unsupported-target-cpu.nvptx-sm60.stderr b/tests/ui/target-cpu/unsupported-target-cpu.nvptx-sm60.stderr new file mode 100644 index 0000000000000..76092b8391f33 --- /dev/null +++ b/tests/ui/target-cpu/unsupported-target-cpu.nvptx-sm60.stderr @@ -0,0 +1,4 @@ +error: target cpu `sm_60` is known but unsupported + +error: aborting due to 1 previous error + diff --git a/tests/ui/target-cpu/unsupported-target-cpu.rs b/tests/ui/target-cpu/unsupported-target-cpu.rs new file mode 100644 index 0000000000000..dafbfbc015ec1 --- /dev/null +++ b/tests/ui/target-cpu/unsupported-target-cpu.rs @@ -0,0 +1,14 @@ +//! Check that certain target *respect* the unsupported-cpus in `-C target-cpu`. + +//@ revisions: nvptx-sm60 + +//@[nvptx-sm60] compile-flags: --target=nvptx64-nvidia-cuda --crate-type=rlib -Ctarget-cpu=sm_60 +//@[nvptx-sm60] needs-llvm-components: nvptx +//@[nvptx-sm60] build-fail +//@ ignore-backends: gcc + +#![feature(no_core)] +#![no_core] +#![crate_type = "rlib"] + +//[nvptx-sm60]~? ERROR target cpu `sm_60` is known but unsupported