Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions compiler/rustc_codegen_llvm/src/llvm_util.rs
Original file line number Diff line number Diff line change
Expand Up @@ -510,6 +510,9 @@ fn print_target_cpus(sess: &Session, tm: &llvm::TargetMachine, out: &mut String)
};
let mut cpus = cpu_names
.lines()
.filter(|cpu_name| {
!sess.target.unsupported_cpus.contains(&std::borrow::Cow::Borrowed(*cpu_name))
})
.map(|cpu_name| Cpu { cpu_name, remark: make_remark(cpu_name) })
.collect::<VecDeque<_>>();

Expand Down
7 changes: 7 additions & 0 deletions compiler/rustc_codegen_ssa/src/base.rs
Original file line number Diff line number Diff line change
Expand Up @@ -696,6 +696,13 @@ pub fn codegen_crate<B: ExtraBackendMethods>(
tcx.dcx().emit_fatal(errors::CpuRequired);
}

if let Some(target_cpu) = &tcx.sess.opts.cg.target_cpu
&& tcx.sess.target.unsupported_cpus.contains(&target_cpu.into())
{
// The target cpu is explicitly listed as an unsupported cpu
tcx.dcx().emit_fatal(errors::CpuUnsupported { target_cpu: target_cpu.clone() });
}

let cgu_name_builder = &mut CodegenUnitNameBuilder::new(tcx);

// Run the monomorphization collector and partition the collected items into
Expand Down
6 changes: 6 additions & 0 deletions compiler/rustc_codegen_ssa/src/errors.rs
Original file line number Diff line number Diff line change
Expand Up @@ -540,6 +540,12 @@ pub(crate) struct InsufficientVSCodeProduct;
#[diag("target requires explicitly specifying a cpu with `-C target-cpu`")]
pub(crate) struct CpuRequired;

#[derive(Diagnostic)]
#[diag("target cpu `{$target_cpu}` is known but unsupported")]
pub(crate) struct CpuUnsupported {
pub target_cpu: String,
}

#[derive(Diagnostic)]
#[diag("processing debug info with `dsymutil` failed: {$status}")]
#[note("{$output}")]
Expand Down
14 changes: 13 additions & 1 deletion compiler/rustc_codegen_ssa/src/target_features.rs
Original file line number Diff line number Diff line change
Expand Up @@ -387,8 +387,20 @@ pub fn target_spec_to_backend_features<'a>(
sess: &'a Session,
mut extend_backend_features: impl FnMut(&'a str, /* enable */ bool),
) {
// Compute implied features
let mut rust_features = vec![];

// This check handles SM versions that defaults (by LLVM) to unsupported (by Rust) PTX ISA versions.
// sm_70, sm_72 and sm_75 defaults to PTX ISA versions with major version 6, while sm_80 default to 7.0
if sess.target.arch == Arch::Nvptx64
&& matches!(
sess.opts.cg.target_cpu.as_deref(),
None | Some("sm_70") | Some("sm_72") | Some("sm_75")
)
{
rust_features.push((true, "ptx70"));
}

// Compute implied features
parse_rust_feature_list(
sess,
&sess.target.features,
Expand Down
3 changes: 3 additions & 0 deletions compiler/rustc_target/src/spec/json.rs
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,7 @@ impl Target {
forward!(asm_args);
forward!(cpu);
forward!(need_explicit_cpu);
forward!(unsupported_cpus);
forward!(features);
forward!(dynamic_linking);
forward_opt!(direct_access_external_data);
Expand Down Expand Up @@ -320,6 +321,7 @@ impl ToJson for Target {
target_option_val!(asm_args);
target_option_val!(cpu);
target_option_val!(need_explicit_cpu);
target_option_val!(unsupported_cpus);
target_option_val!(features);
target_option_val!(dynamic_linking);
target_option_val!(direct_access_external_data);
Expand Down Expand Up @@ -543,6 +545,7 @@ struct TargetSpecJson {
asm_args: Option<StaticCow<[StaticCow<str>]>>,
cpu: Option<StaticCow<str>>,
need_explicit_cpu: Option<bool>,
unsupported_cpus: Option<StaticCow<[StaticCow<str>]>>,
features: Option<StaticCow<str>>,
dynamic_linking: Option<bool>,
direct_access_external_data: Option<bool>,
Expand Down
5 changes: 5 additions & 0 deletions compiler/rustc_target/src/spec/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2361,6 +2361,10 @@ pub struct TargetOptions {
/// Whether a cpu needs to be explicitly set.
/// Set to true if there is no default cpu. Defaults to false.
pub need_explicit_cpu: bool,
/// A list of CPUs that are provided by LLVM but are considered unsupported by Rust.
/// These CPUs are omitted from `--print target-cpus` output and will cause an error
/// if used with `-Ctarget-cpu`.
pub unsupported_cpus: StaticCow<[StaticCow<str>]>,
/// Default (Rust) target features to enable for this target. These features
/// overwrite `-Ctarget-cpu` but can be overwritten with `-Ctarget-features`.
/// Corresponds to `llc -mattr=$llvm_features` where `$llvm_features` is the
Expand Down Expand Up @@ -2818,6 +2822,7 @@ impl Default for TargetOptions {
asm_args: cvs![],
cpu: "generic".into(),
need_explicit_cpu: false,
unsupported_cpus: cvs![],
features: "".into(),
direct_access_external_data: None,
dynamic_linking: false,
Expand Down
10 changes: 8 additions & 2 deletions compiler/rustc_target/src/spec/targets/nvptx64_nvidia_cuda.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
use crate::spec::{
Arch, LinkSelfContainedDefault, LinkerFlavor, MergeFunctions, Os, PanicStrategy, Target,
TargetMetadata, TargetOptions,
TargetMetadata, TargetOptions, cvs,
};

pub(crate) fn target() -> Target {
Expand All @@ -22,7 +22,13 @@ pub(crate) fn target() -> Target {
linker_flavor: LinkerFlavor::Llbc,

// With `ptx-linker` approach, it can be later overridden via link flags.
cpu: "sm_30".into(),
cpu: "sm_70".into(),

// No longer supported architectures
unsupported_cpus: cvs!(
"sm_20", "sm_21", "sm_30", "sm_32", "sm_35", "sm_37", "sm_50", "sm_52", "sm_53",
"sm_60", "sm_61", "sm_62"
),

// FIXME: create tests for the atomics.
max_atomic_width: Some(64),
Expand Down
28 changes: 2 additions & 26 deletions compiler/rustc_target/src/target_features.rs
Original file line number Diff line number Diff line change
Expand Up @@ -536,19 +536,7 @@ const MIPS_FEATURES: &[(&str, Stability, ImpliedFeatures)] = &[

const NVPTX_FEATURES: &[(&str, Stability, ImpliedFeatures)] = &[
// tidy-alphabetical-start
("sm_20", Unstable(sym::nvptx_target_feature), &[]),
Comment thread
ZuseZ4 marked this conversation as resolved.
("sm_21", Unstable(sym::nvptx_target_feature), &["sm_20"]),
("sm_30", Unstable(sym::nvptx_target_feature), &["sm_21"]),
("sm_32", Unstable(sym::nvptx_target_feature), &["sm_30"]),
("sm_35", Unstable(sym::nvptx_target_feature), &["sm_32"]),
("sm_37", Unstable(sym::nvptx_target_feature), &["sm_35"]),
("sm_50", Unstable(sym::nvptx_target_feature), &["sm_37"]),
("sm_52", Unstable(sym::nvptx_target_feature), &["sm_50"]),
("sm_53", Unstable(sym::nvptx_target_feature), &["sm_52"]),
("sm_60", Unstable(sym::nvptx_target_feature), &["sm_53"]),
("sm_61", Unstable(sym::nvptx_target_feature), &["sm_60"]),
("sm_62", Unstable(sym::nvptx_target_feature), &["sm_61"]),
("sm_70", Unstable(sym::nvptx_target_feature), &["sm_62"]),
("sm_70", Unstable(sym::nvptx_target_feature), &[]),
("sm_72", Unstable(sym::nvptx_target_feature), &["sm_70"]),
("sm_75", Unstable(sym::nvptx_target_feature), &["sm_72"]),
("sm_80", Unstable(sym::nvptx_target_feature), &["sm_75"]),
Expand All @@ -567,19 +555,7 @@ const NVPTX_FEATURES: &[(&str, Stability, ImpliedFeatures)] = &[
("sm_120a", Unstable(sym::nvptx_target_feature), &["sm_120"]),
// tidy-alphabetical-end
// tidy-alphabetical-start
("ptx32", Unstable(sym::nvptx_target_feature), &[]),
("ptx40", Unstable(sym::nvptx_target_feature), &["ptx32"]),
("ptx41", Unstable(sym::nvptx_target_feature), &["ptx40"]),
("ptx42", Unstable(sym::nvptx_target_feature), &["ptx41"]),
("ptx43", Unstable(sym::nvptx_target_feature), &["ptx42"]),
("ptx50", Unstable(sym::nvptx_target_feature), &["ptx43"]),
("ptx60", Unstable(sym::nvptx_target_feature), &["ptx50"]),
("ptx61", Unstable(sym::nvptx_target_feature), &["ptx60"]),
("ptx62", Unstable(sym::nvptx_target_feature), &["ptx61"]),
("ptx63", Unstable(sym::nvptx_target_feature), &["ptx62"]),
("ptx64", Unstable(sym::nvptx_target_feature), &["ptx63"]),
("ptx65", Unstable(sym::nvptx_target_feature), &["ptx64"]),
("ptx70", Unstable(sym::nvptx_target_feature), &["ptx65"]),
("ptx70", Unstable(sym::nvptx_target_feature), &[]),
("ptx71", Unstable(sym::nvptx_target_feature), &["ptx70"]),
("ptx72", Unstable(sym::nvptx_target_feature), &["ptx71"]),
("ptx73", Unstable(sym::nvptx_target_feature), &["ptx72"]),
Expand Down
14 changes: 12 additions & 2 deletions src/doc/rustc/src/platform-support/nvptx64-nvidia-cuda.md
Original file line number Diff line number Diff line change
Expand Up @@ -25,13 +25,23 @@ There are two options for using the core library:

### Target and features

It is generally necessary to specify the target, such as `-C target-cpu=sm_89`, because the default is very old. This implies two target features: `sm_89` and `ptx78` (and all preceding features within `sm_*` and `ptx*`). Rust will default to using the oldest PTX version that supports the target processor (see [this table](https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#release-notes-ptx-release-history)), which maximizes driver compatibility.
One can use `-C target-feature=+ptx80` to choose a later PTX version without changing the target (the default in this case, `ptx78`, requires CUDA driver version 11.8, while `ptx80` would require driver version 12.0).
It is often beneficial to specify the target SM architecture, such as `-C target-cpu=sm_89`, because the default prioritizes broad compatibility rather than performance. Doing so also selects the PTX version as the *maximum* of (a) the oldest PTX version that supports the chosen target processor (see [this table](https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#release-notes-ptx-release-history)) and (b) the oldest PTX version supported by the Rust toolchain, which maximizes driver compatibility.
One can use `-C target-feature=+ptx80` to choose a later PTX version without changing the target SM architecture (the default in this case, `ptx78`, requires CUDA driver version 11.8, while `ptx80` would require driver version 12.0).
Later PTX versions may allow more efficient code generation.

Although Rust follows LLVM in representing `ptx*` and `sm_*` as target features, they should be thought of as having crate granularity, set via (either via `-Ctarget-cpu` and optionally `-Ctarget-feature`).
While the compiler accepts `#[target_feature(enable = "ptx80", enable = "sm_89")]`, it is not supported, may not behave as intended, and may become erroneous in the future.

## Minimum SM and PTX support by Rust version
Support for old hardware architectures and PTX ISA versions is periodically dropped. This table shows the minimum supported versions per Rust version.

| Rust | SM minimum | PTX ISA minimum |
| ------------ | -------------- | --------------- |
| - 1.96 | 2.0 | 3.2 |
| 1.97 - TBD | 7.0 (Volta+) | 7.0 (CUDA 11+) |

For a full overview of which GPUs support code built for a specific SM version, see the [CUDA GPU Compute Capability documentation](https://developer.nvidia.com/cuda/gpus).

## Building Rust kernels

A `no_std` crate containing one or more functions with `extern "ptx-kernel"` can be compiled to PTX using a command like the following.
Expand Down
3 changes: 2 additions & 1 deletion tests/assembly-llvm/nvptx-arch-default.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,5 +8,6 @@
extern crate breakpoint_panic_handler;

// Verify default target arch with ptx-linker.
// CHECK: .target sm_30
// CHECK: .version 7.0
// CHECK: .target sm_70
// CHECK: .address_size 64
3 changes: 2 additions & 1 deletion tests/assembly-llvm/nvptx-arch-emit-asm.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,5 +5,6 @@
#![no_std]

// Verify default arch without ptx-linker involved.
// CHECK: .target sm_30
// CHECK: .version 7.0
// CHECK: .target sm_70
// CHECK: .address_size 64
4 changes: 2 additions & 2 deletions tests/assembly-llvm/nvptx-arch-target-cpu.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
//@ assembly-output: ptx-linker
//@ compile-flags: --crate-type cdylib -C target-cpu=sm_50
//@ compile-flags: --crate-type cdylib -C target-cpu=sm_87
//@ only-nvptx64

#![no_std]
Expand All @@ -8,5 +8,5 @@
extern crate breakpoint_panic_handler;

// Verify target arch override via `target-cpu`.
// CHECK: .target sm_50
// CHECK: .target sm_87
// CHECK: .address_size 64
24 changes: 0 additions & 24 deletions tests/ui/check-cfg/target_feature.stderr
Original file line number Diff line number Diff line change
Expand Up @@ -229,18 +229,6 @@ LL | cfg!(target_feature = "_UNEXPECTED_VALUE");
`power9-altivec`
`power9-vector`
`prfchw`
`ptx32`
`ptx40`
`ptx41`
`ptx42`
`ptx43`
`ptx50`
`ptx60`
`ptx61`
`ptx62`
`ptx63`
`ptx64`
`ptx65`
`ptx70`
`ptx71`
`ptx72`
Expand Down Expand Up @@ -290,18 +278,6 @@ LL | cfg!(target_feature = "_UNEXPECTED_VALUE");
`sm_101a`
`sm_120`
`sm_120a`
`sm_20`
`sm_21`
`sm_30`
`sm_32`
`sm_35`
`sm_37`
`sm_50`
`sm_52`
`sm_53`
`sm_60`
`sm_61`
`sm_62`
`sm_70`
`sm_72`
`sm_75`
Expand Down
4 changes: 4 additions & 0 deletions tests/ui/target-cpu/unsupported-target-cpu.nvptx-sm60.stderr
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
error: target cpu `sm_60` is known but unsupported

error: aborting due to 1 previous error

14 changes: 14 additions & 0 deletions tests/ui/target-cpu/unsupported-target-cpu.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
//! Check that certain target *respect* the unsupported-cpus in `-C target-cpu`.

//@ revisions: nvptx-sm60

//@[nvptx-sm60] compile-flags: --target=nvptx64-nvidia-cuda --crate-type=rlib -Ctarget-cpu=sm_60
//@[nvptx-sm60] needs-llvm-components: nvptx
//@[nvptx-sm60] build-fail
//@ ignore-backends: gcc

#![feature(no_core)]
#![no_core]
#![crate_type = "rlib"]

//[nvptx-sm60]~? ERROR target cpu `sm_60` is known but unsupported
Loading