From 00603d4d4ac19ca7949f923981a5ec0716fc9ca4 Mon Sep 17 00:00:00 2001 From: James Barford-Evans Date: Fri, 17 Apr 2026 10:47:49 +0100 Subject: [PATCH 1/6] Update logic for handling target feature inline always mismatches --- compiler/rustc_codegen_llvm/src/attributes.rs | 84 ++++++++++++++----- compiler/rustc_codegen_llvm/src/builder.rs | 40 +++++---- compiler/rustc_middle/src/ty/context.rs | 54 +++++++++++- compiler/rustc_target/src/target_features.rs | 6 ++ 4 files changed, 143 insertions(+), 41 deletions(-) diff --git a/compiler/rustc_codegen_llvm/src/attributes.rs b/compiler/rustc_codegen_llvm/src/attributes.rs index 1f59d250e08a0..1afd23c6a5e82 100644 --- a/compiler/rustc_codegen_llvm/src/attributes.rs +++ b/compiler/rustc_codegen_llvm/src/attributes.rs @@ -40,25 +40,23 @@ pub(crate) fn remove_string_attr_from_llfn(llfn: &Value, name: &str) { llvm::RemoveStringAttrFromFn(llfn, name); } -/// Get LLVM attribute for the provided inline heuristic. -pub(crate) fn inline_attr<'ll, 'tcx>( +/// Get LLVM attribute for the provided inline heuristic that can be applied +/// to a function definition +fn inline_attr_for_fn_def<'ll, 'tcx>( cx: &SimpleCx<'ll>, tcx: TyCtxt<'tcx>, instance: ty::Instance<'tcx>, + has_function_features: bool, ) -> Option<&'ll Attribute> { - // `optnone` requires `noinline` - let codegen_fn_attrs = tcx.codegen_fn_attrs(instance.def_id()); - let inline = match (codegen_fn_attrs.inline, &codegen_fn_attrs.optimize) { - (_, OptimizeAttr::DoNotOptimize) => InlineAttr::Never, - (InlineAttr::None, _) if instance.def.requires_inline(tcx) => InlineAttr::Hint, - (inline, _) => inline, - }; - if !tcx.sess.opts.unstable_opts.inline_llvm { // disable LLVM inlining return Some(AttributeKind::NoInline.create_attr(cx.llcx)); } - match inline { + + let codegen_fn_attrs = tcx.codegen_fn_attrs(instance.def_id()); + let inline = get_inline_attr_from_codegen_fn_attrs(tcx, codegen_fn_attrs, instance); + + let llvm_attr = match inline { InlineAttr::Hint => Some(AttributeKind::InlineHint.create_attr(cx.llcx)), InlineAttr::Always | InlineAttr::Force { .. } => { Some(AttributeKind::AlwaysInline.create_attr(cx.llcx)) @@ -71,7 +69,58 @@ pub(crate) fn inline_attr<'ll, 'tcx>( } } InlineAttr::None => None, + }; + + let is_inline_always = is_inline_always_attr(inline); + // Keep non-`#[inline(always)]` attributes on the function definition as + // usual. `#[inline(always)]` can also stay on the function when there are + // no per-function target features. + // + // Once a function has target features, we avoid attaching `alwaysinline` + // to the definition itself. In that case the attribute is checked and, + // when legal, emitted on individual call sites instead. + if !is_inline_always || (is_inline_always && !has_function_features) { llvm_attr } else { None } +} + +/// Get the `InlineAttr` for the given instance. +fn get_inline_attr_from_codegen_fn_attrs<'tcx>( + tcx: TyCtxt<'tcx>, + codegen_fn_attrs: &CodegenFnAttrs, + instance: ty::Instance<'tcx>, +) -> InlineAttr { + // `optnone` requires `noinline` + match (codegen_fn_attrs.inline, &codegen_fn_attrs.optimize) { + (_, OptimizeAttr::DoNotOptimize) => InlineAttr::Never, + (InlineAttr::None, _) if instance.def.requires_inline(tcx) => InlineAttr::Hint, + (inline, _) => inline, + } +} + +#[inline] +fn is_inline_always_attr(inline_attr: InlineAttr) -> bool { + matches!(inline_attr, InlineAttr::Always | InlineAttr::Force { .. }) +} + +/// Do we have an LLVM inline always attribute for the callsite? +pub(crate) fn has_inline_always_callsite_attribute<'tcx>( + tcx: TyCtxt<'tcx>, + attrs: &CodegenFnAttrs, + instance: ty::Instance<'tcx>, +) -> bool { + // disable LLVM inlining + if !tcx.sess.opts.unstable_opts.inline_llvm { + return false; } + + // If there are no target features on the function then we do not want to + // return anything. As the attribute will have been applied to the function + // definition. + if attrs.target_features.is_empty() { + return false; + } + + // We are only interested in the `#[inline(always)]` attribute + is_inline_always_attr(get_inline_attr_from_codegen_fn_attrs(tcx, attrs, instance)) } #[inline] @@ -568,14 +617,11 @@ pub(crate) fn llfn_attrs_from_instance<'ll, 'tcx>( let function_features = codegen_fn_attrs.target_features.iter().map(|f| f.name.as_str()).collect::>(); - // Apply function attributes as per usual if there are no user defined - // target features otherwise this will get applied at the callsite. - if function_features.is_empty() { - if let Some(instance) = instance - && let Some(inline_attr) = inline_attr(cx, tcx, instance) - { - to_add.push(inline_attr); - } + if let Some(instance) = instance + && let Some(inline_attr) = + inline_attr_for_fn_def(cx, tcx, instance, !function_features.is_empty()) + { + to_add.push(inline_attr); } let function_features = function_features diff --git a/compiler/rustc_codegen_llvm/src/builder.rs b/compiler/rustc_codegen_llvm/src/builder.rs index 7e5f71209fbab..19328ef5c8687 100644 --- a/compiler/rustc_codegen_llvm/src/builder.rs +++ b/compiler/rustc_codegen_llvm/src/builder.rs @@ -15,7 +15,7 @@ use rustc_codegen_ssa::mir::place::PlaceRef; use rustc_codegen_ssa::traits::*; use rustc_data_structures::small_c_str::SmallCStr; use rustc_hir::def_id::DefId; -use rustc_middle::middle::codegen_fn_attrs::{CodegenFnAttrs, TargetFeature, TargetFeatureKind}; +use rustc_middle::middle::codegen_fn_attrs::CodegenFnAttrs; use rustc_middle::ty::layout::{ FnAbiError, FnAbiOfHelpers, FnAbiRequest, HasTypingEnv, LayoutError, LayoutOfHelpers, TyAndLayout, @@ -30,12 +30,12 @@ use smallvec::SmallVec; use tracing::{debug, instrument}; use crate::abi::FnAbiLlvmExt; -use crate::attributes; +use crate::attributes::{self}; use crate::common::Funclet; use crate::context::{CodegenCx, FullCx, GenericCx, SCx}; use crate::llvm::{ - self, AtomicOrdering, AtomicRmwBinOp, BasicBlock, FromGeneric, GEPNoWrapFlags, Metadata, TRUE, - ToLlvmBool, Type, Value, + self, AtomicOrdering, AtomicRmwBinOp, AttributeKind, BasicBlock, FromGeneric, GEPNoWrapFlags, + Metadata, TRUE, ToLlvmBool, Type, Value, }; use crate::type_of::LayoutLlvmExt; @@ -1419,28 +1419,26 @@ impl<'a, 'll, 'tcx> BuilderMethods<'a, 'tcx> for Builder<'a, 'll, 'tcx> { ) }; - if let Some(callee_instance) = callee_instance { + // Check for whether we can add `#[inline(always)]` to a callsite + if let (Some(callee_instance), Some(caller_attrs)) = (callee_instance, caller_attrs) { // Attributes on the function definition being called let callee_attrs = self.cx.tcx.codegen_fn_attrs(callee_instance.def_id()); - if let Some(caller_attrs) = caller_attrs - // If there is an inline attribute and a target feature that matches - // we will add the attribute to the callsite otherwise we'll omit - // this and not add the attribute to prevent soundness issues. - && let Some(inlining_rule) = attributes::inline_attr(&self.cx, self.cx.tcx, callee_instance) - && self.cx.tcx.is_target_feature_call_safe( - &callee_attrs.target_features, - &caller_attrs.target_features.iter().cloned().chain( - self.cx.tcx.sess.target_features.iter().map(|feat| TargetFeature { - name: *feat, - kind: TargetFeatureKind::Implied, - }) - ).collect::>(), - ) - { + + // Only propagate `#[inline(always)]` to the callsite when there is + // an attribute and the caller and callee are compatible for + // inlining here. + if attributes::has_inline_always_callsite_attribute( + self.cx.tcx, + callee_attrs, + callee_instance, + ) && self.tcx.is_call_inline_able_at_callsite( + &callee_attrs.target_features, + &caller_attrs.target_features, + ) { attributes::apply_to_callsite( call, llvm::AttributePlace::Function, - &[inlining_rule], + &[AttributeKind::AlwaysInline.create_attr(self.cx.llcx)], ); } } diff --git a/compiler/rustc_middle/src/ty/context.rs b/compiler/rustc_middle/src/ty/context.rs index c25f5b402eb0d..9e0d1109d072b 100644 --- a/compiler/rustc_middle/src/ty/context.rs +++ b/compiler/rustc_middle/src/ty/context.rs @@ -18,7 +18,7 @@ use std::{fmt, iter, mem}; use rustc_abi::{ExternAbi, FieldIdx, Layout, LayoutData, TargetDataLayout, VariantIdx}; use rustc_ast as ast; use rustc_data_structures::defer; -use rustc_data_structures::fx::FxHashMap; +use rustc_data_structures::fx::{FxHashMap, FxIndexSet}; use rustc_data_structures::intern::Interned; use rustc_data_structures::jobserver::Proxy; use rustc_data_structures::profiling::SelfProfilerRef; @@ -1231,6 +1231,58 @@ impl<'tcx> TyCtxt<'tcx> { .all(|feature| body_features.iter().any(|f| f.name == feature.name)) } + /// Returns whether a callee can safely be always inlined into a caller at + /// this callsite. + /// + /// This requires more than "the caller has at least the callee's target + /// features". We also require the caller and callee to agree on any + /// target features that affect the vector ABI, otherwise inlining could + /// reinterpret arguments under a different calling convention. + pub fn is_call_inline_able_at_callsite( + self, + callee_features: &[TargetFeature], + caller_features: &[TargetFeature], + ) -> bool { + // Fold in globally enabled target features, since they are part of the + // effective feature set for both sides. + let callee_features = self.effective_inline_target_features(callee_features); + let caller_features = self.effective_inline_target_features(caller_features); + + // A plain subset check is not sufficient. For example, `avx` + // implicitly enables `sse`, so a callee that only requires `sse` + // appears to be a subset of an `avx` caller. However, `avx` also + // changes how vector arguments are passed, so inlining that callee into + // the caller would cross an ABI boundary. Require both the feature + // subset relation and matching ABI-relevant vector features. + self.vector_abi_matches(&callee_features, &caller_features) + && callee_features.is_subset(&caller_features) + } + + fn vector_abi_matches( + self, + callee_features: &FxIndexSet, + caller_features: &FxIndexSet, + ) -> bool { + self.abi_target_features(caller_features) == self.abi_target_features(callee_features) + } + + pub fn abi_target_features(self, feature_names: &FxIndexSet) -> FxIndexSet { + feature_names + .iter() + .cloned() + .filter(|it| self.sess.target.feature_could_influence_vector_length(it.as_str())) + .collect() + } + + pub fn effective_inline_target_features( + self, + features: &[TargetFeature], + ) -> FxIndexSet { + let mut all_features = self.sess.unstable_target_features.clone(); + all_features.extend(features.iter().map(|it| it.name)); + all_features + } + /// Returns the safe version of the signature of the given function, if calling it /// would be safe in the context of the given caller. pub fn adjust_target_feature_sig( diff --git a/compiler/rustc_target/src/target_features.rs b/compiler/rustc_target/src/target_features.rs index e2bf1c48b7b47..6c5c4cb351493 100644 --- a/compiler/rustc_target/src/target_features.rs +++ b/compiler/rustc_target/src/target_features.rs @@ -1070,6 +1070,12 @@ impl Target { } } + /// Could the feature influence the vector length? + pub fn feature_could_influence_vector_length(&self, feature: &str) -> bool { + self.features_for_correct_fixed_length_vector_abi().iter().any(|(_, name)| *name == feature) + || self.features_for_correct_scalable_vector_abi() == Some(feature) + } + pub fn tied_target_features(&self) -> &'static [&'static [&'static str]] { match &self.arch { Arch::AArch64 | Arch::Arm64EC => AARCH64_TIED_FEATURES, From 9a7bc31ab042040e6753b4dc82e2b9af87ca2b46 Mon Sep 17 00:00:00 2001 From: James Barford-Evans Date: Fri, 17 Apr 2026 10:48:18 +0100 Subject: [PATCH 2/6] Update diagnostic messages --- .../check_inline_always_target_features.rs | 91 ++++++++++++++----- compiler/rustc_mir_transform/src/errors.rs | 41 ++++++--- 2 files changed, 97 insertions(+), 35 deletions(-) diff --git a/compiler/rustc_mir_transform/src/check_inline_always_target_features.rs b/compiler/rustc_mir_transform/src/check_inline_always_target_features.rs index abad28f0a8f83..c11aeded3713d 100644 --- a/compiler/rustc_mir_transform/src/check_inline_always_target_features.rs +++ b/compiler/rustc_mir_transform/src/check_inline_always_target_features.rs @@ -1,5 +1,5 @@ use rustc_hir::attrs::InlineAttr; -use rustc_middle::middle::codegen_fn_attrs::{TargetFeature, TargetFeatureKind}; +use rustc_middle::middle::codegen_fn_attrs::TargetFeatureKind; use rustc_middle::mir::{Body, TerminatorKind}; use rustc_middle::ty::{self, TyCtxt}; @@ -51,36 +51,83 @@ fn check_inline_always_target_features<'tcx>(tcx: TyCtxt<'tcx>, body: &Body<'tcx // Scan the users defined target features and ensure they // match the caller. - if tcx.is_target_feature_call_safe( + if tcx.is_call_inline_able_at_callsite( &callee_codegen_fn_attrs.target_features, - &caller_codegen_fn_attrs - .target_features - .iter() - .cloned() - .chain(tcx.sess.target_features.iter().map(|feat| TargetFeature { - name: *feat, - kind: TargetFeatureKind::Implied, - })) - .collect::>(), + &caller_codegen_fn_attrs.target_features, ) { continue; } - let callee_only: Vec<_> = callee_codegen_fn_attrs + // Use the full target feature sets, including implied and + // command-line features, to classify the mismatch. Diagnostic + // messages should still only mention the non-implied features + // that the user actually enabled. + let caller_features = + tcx.effective_inline_target_features(&caller_codegen_fn_attrs.target_features); + let callee_features = + tcx.effective_inline_target_features(&callee_codegen_fn_attrs.target_features); + + let explicit_caller_features: Vec<_> = caller_codegen_fn_attrs + .target_features + .iter() + .cloned() + .filter(|it| it.kind != TargetFeatureKind::Implied) + .collect(); + let explicit_callee_features: Vec<_> = callee_codegen_fn_attrs .target_features .iter() - .filter(|it| !caller_codegen_fn_attrs.target_features.contains(it)) - .filter(|it| !matches!(it.kind, TargetFeatureKind::Implied)) - .map(|it| it.name.as_str()) + .cloned() + .filter(|it| it.kind != TargetFeatureKind::Implied) .collect(); - crate::errors::emit_inline_always_target_feature_diagnostic( - tcx, - terminator.source_info.span, - callee_def_id, - caller_def_id.into(), - &callee_only, - ); + let explicit_caller_features = + tcx.effective_inline_target_features(&explicit_caller_features); + let explicit_callee_features = + tcx.effective_inline_target_features(&explicit_callee_features); + + // If the callee's features are otherwise a subset of the + // caller's, then the mismatch is only due to the caller using a + // different vector ABI from the callee. + if callee_features.is_subset(&caller_features) { + // We only want to display the target features the user + // missed out. Not every feature that is possibly enabled. + let caller_abi_features = tcx.abi_target_features(&explicit_caller_features); + let callee_abi_features = tcx.abi_target_features(&explicit_callee_features); + let caller_only = caller_abi_features + .difference(&callee_abi_features) + .map(|it| it.as_str()) + .collect::>() + .join(", "); + + // Emit that the issue is caused by a vector ABI mismatch. + crate::errors::emit_inline_always_target_feature_diagnostic( + tcx, + terminator.source_info.span, + callee_def_id, + caller_def_id.into(), + &caller_only, + caller_def_id.into(), + callee_def_id, + ); + } else { + let callee_only = explicit_callee_features + .difference(&explicit_caller_features) + .map(|it| it.as_str()) + .collect::>() + .join(", "); + + // Emit that the issue stems from the callee having features + // enabled that the caller does not have enabled. + crate::errors::emit_inline_always_target_feature_diagnostic( + tcx, + terminator.source_info.span, + callee_def_id, + caller_def_id.into(), + &callee_only, + callee_def_id, + caller_def_id.into(), + ); + } } _ => (), } diff --git a/compiler/rustc_mir_transform/src/errors.rs b/compiler/rustc_mir_transform/src/errors.rs index 39c85489f939a..b94ab32750421 100644 --- a/compiler/rustc_mir_transform/src/errors.rs +++ b/compiler/rustc_mir_transform/src/errors.rs @@ -11,22 +11,35 @@ use rustc_session::lint::{self, Lint}; use rustc_span::def_id::DefId; use rustc_span::{Ident, Span, Symbol}; -/// Emit diagnostic for calls to `#[inline(always)]`-annotated functions with a -/// `#[target_feature]` attribute where the caller enables a different set of target features. -pub(crate) fn emit_inline_always_target_feature_diagnostic<'a, 'tcx>( +/// Emit the `inline_always_mismatching_target_features` lint for a call to an +/// `#[inline(always)]` function that cannot be inlined at the call site. +/// +/// This is used for both direct caller/callee target feature mismatches and +/// vector ABI mismatches. `feature_source_def_id` identifies the side that +/// contributes the missing features, while `feature_target_def_id` identifies +/// the side that should be updated by the suggestion. +pub(crate) fn emit_inline_always_target_feature_diagnostic<'tcx>( tcx: TyCtxt<'tcx>, call_span: Span, callee_def_id: DefId, caller_def_id: DefId, - callee_only: &[&'a str], + missing_features: &str, + feature_source_def_id: DefId, + feature_target_def_id: DefId, ) { tcx.emit_node_span_lint( lint::builtin::INLINE_ALWAYS_MISMATCHING_TARGET_FEATURES, tcx.local_def_id_to_hir_id(caller_def_id.as_local().unwrap()), call_span, rustc_errors::DiagDecorator(|lint| { + // These calls to `tcx.def_path_str(...)` need to live inside this + // closure otherwise can cause an ICE, see; + // https://github.com/rust-lang/rust/pull/150805 let callee = tcx.def_path_str(callee_def_id); - let caller = tcx.def_path_str(caller_def_id); + let feature_target = tcx.def_path_str(feature_target_def_id); + let feature_source = tcx.def_path_str(feature_source_def_id); + + let suggested_features = missing_features.replace(", ", ","); lint.primary_message(format!( "call to `#[inline(always)]`-annotated `{callee}` \ @@ -35,17 +48,19 @@ pub(crate) fn emit_inline_always_target_feature_diagnostic<'a, 'tcx>( lint.note("function will not be inlined"); lint.note(format!( - "the following target features are on `{callee}` but missing from `{caller}`: {}", - callee_only.join(", ") + "the following target features are on `{feature_source}` but missing from \ + `{feature_target}`: {missing_features}" )); - lint.span_note(callee_def_id.default_span(tcx), format!("`{callee}` is defined here")); + lint.span_note( + feature_source_def_id.default_span(tcx), + format!("`{feature_source}` is defined here"), + ); - let feats = callee_only.join(","); lint.span_suggestion( - tcx.def_span(caller_def_id).shrink_to_lo(), - format!("add `#[target_feature]` attribute to `{caller}`"), - format!("#[target_feature(enable = \"{feats}\")]\n"), - lint::Applicability::MaybeIncorrect, + tcx.def_span(feature_target_def_id).shrink_to_lo(), + format!("add `#[target_feature]` attribute to `{feature_target}`"), + format!("#[target_feature(enable = \"{suggested_features}\")]\n"), + Applicability::MaybeIncorrect, ); }), ); From c985c9807bee0ce07bc05da433d9c8616f5011e2 Mon Sep 17 00:00:00 2001 From: James Barford-Evans Date: Fri, 17 Apr 2026 10:54:07 +0100 Subject: [PATCH 3/6] Update and create new tests --- .../inline-always-callsite-global-avx512f.rs | 31 +++++++++++++ .../inline-always-vector-abi-avx512f.rs | 31 +++++++++++++ .../inline-always-vector-abi-avx512f.stderr | 22 +++++++++ ...inline-always-vector-abi-callee-missing.rs | 44 ++++++++++++++++++ ...ne-always-vector-abi-callee-missing.stderr | 22 +++++++++ .../inline-always-vector-abi-global-avx.rs | 34 ++++++++++++++ .../inline-always-vector-abi.rs | 46 +++++++++++++++++++ .../inline-always-vector-abi.stderr | 41 +++++++++++++++++ .../inline-always.aarch64.stderr | 4 +- 9 files changed, 273 insertions(+), 2 deletions(-) create mode 100644 tests/codegen-llvm/inline-always-callsite-global-avx512f.rs create mode 100644 tests/ui/target-feature/inline-always-vector-abi-avx512f.rs create mode 100644 tests/ui/target-feature/inline-always-vector-abi-avx512f.stderr create mode 100644 tests/ui/target-feature/inline-always-vector-abi-callee-missing.rs create mode 100644 tests/ui/target-feature/inline-always-vector-abi-callee-missing.stderr create mode 100644 tests/ui/target-feature/inline-always-vector-abi-global-avx.rs create mode 100644 tests/ui/target-feature/inline-always-vector-abi.rs create mode 100644 tests/ui/target-feature/inline-always-vector-abi.stderr diff --git a/tests/codegen-llvm/inline-always-callsite-global-avx512f.rs b/tests/codegen-llvm/inline-always-callsite-global-avx512f.rs new file mode 100644 index 0000000000000..f35aff67918ec --- /dev/null +++ b/tests/codegen-llvm/inline-always-callsite-global-avx512f.rs @@ -0,0 +1,31 @@ +//@ add-minicore +//@ compile-flags: --target x86_64-unknown-linux-gnu -C target-feature=+avx512f -Zinline-mir=no -C no-prepopulate-passes +//@ needs-llvm-components: x86 + +#![crate_type = "lib"] +#![feature(no_core, lang_items, target_feature_inline_always)] +#![no_core] + +extern crate minicore; +use minicore::*; + +#[inline(always)] +#[target_feature(enable = "sse")] +#[no_mangle] +pub unsafe fn single_target_feature() -> i32 { + 42 +} + +// `avx512f` is enough here because it implicitly enables `avx`, which in turn +// implies `sse`. That makes the caller compatible with the callee at this +// callsite, so the `alwaysinline` attribute should be emitted on the call. +#[no_mangle] +// CHECK-LABEL: define{{( noundef)?}} i32 @inherits_from_global() unnamed_addr +pub fn inherits_from_global() -> i32 { + unsafe { + // CHECK: %_0 = call{{( noundef)?}} i32 @single_target_feature() [[CALL_ATTRS:#[0-9]+]] + single_target_feature() + } +} + +// CHECK: attributes [[CALL_ATTRS]] = { alwaysinline nounwind } diff --git a/tests/ui/target-feature/inline-always-vector-abi-avx512f.rs b/tests/ui/target-feature/inline-always-vector-abi-avx512f.rs new file mode 100644 index 0000000000000..91142f23d9fbe --- /dev/null +++ b/tests/ui/target-feature/inline-always-vector-abi-avx512f.rs @@ -0,0 +1,31 @@ +//@ build-pass +//@ compile-flags: --crate-type=lib --target=x86_64-unknown-linux-gnu +//@ only-x86_64 +//@ needs-llvm-components: x86 +//@ ignore-backends: gcc + +#![feature(target_feature_inline_always)] +#![allow(dead_code, unused_unsafe)] + +use std::arch::x86_64::__m256; + +#[inline(never)] +#[target_feature(enable = "sse")] +fn sink(_x: &__m256) {} + +#[inline(always)] +#[target_feature(enable = "sse")] +fn callee_missing_avx512f(x: &__m256, y: bool) { + if y { + callee_missing_avx512f(x, y); + } else { + sink(x); + } +} + +// `avx512f` only changes the `__m256` ABI because it implicitly enables `avx`. +#[target_feature(enable = "avx512f")] +fn caller_has_avx512f_abi_mismatch(x: &__m256, y: bool) { + unsafe { callee_missing_avx512f(x, y) } + //~^ WARNING call to `#[inline(always)]`-annotated `callee_missing_avx512f` requires the same target features to be inlined [inline_always_mismatching_target_features] +} diff --git a/tests/ui/target-feature/inline-always-vector-abi-avx512f.stderr b/tests/ui/target-feature/inline-always-vector-abi-avx512f.stderr new file mode 100644 index 0000000000000..26a05ffc9363f --- /dev/null +++ b/tests/ui/target-feature/inline-always-vector-abi-avx512f.stderr @@ -0,0 +1,22 @@ +warning: call to `#[inline(always)]`-annotated `callee_missing_avx512f` requires the same target features to be inlined + --> $DIR/inline-always-vector-abi-avx512f.rs:29:14 + | +LL | unsafe { callee_missing_avx512f(x, y) } + | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + | + = note: function will not be inlined + = note: the following target features are on `caller_has_avx512f_abi_mismatch` but missing from `callee_missing_avx512f`: avx512f +note: `caller_has_avx512f_abi_mismatch` is defined here + --> $DIR/inline-always-vector-abi-avx512f.rs:28:1 + | +LL | fn caller_has_avx512f_abi_mismatch(x: &__m256, y: bool) { + | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + = note: `#[warn(inline_always_mismatching_target_features)]` on by default +help: add `#[target_feature]` attribute to `callee_missing_avx512f` + | +LL + #[target_feature(enable = "avx512f")] +LL | fn callee_missing_avx512f(x: &__m256, y: bool) { + | + +warning: 1 warning emitted + diff --git a/tests/ui/target-feature/inline-always-vector-abi-callee-missing.rs b/tests/ui/target-feature/inline-always-vector-abi-callee-missing.rs new file mode 100644 index 0000000000000..b2ae43c3a5f78 --- /dev/null +++ b/tests/ui/target-feature/inline-always-vector-abi-callee-missing.rs @@ -0,0 +1,44 @@ +//@ run-pass +//@ compile-flags: -C opt-level=3 +//@ only-x86_64 +//@ only-linux +//@ ignore-backends: gcc + +#![feature(target_feature_inline_always)] + +use std::arch::x86_64::__m256; + +#[inline(never)] +#[target_feature(enable = "sse")] +fn f(x: &__m256) { + let x = unsafe { std::mem::transmute::<_, [u32; 8]>(*x) }; + assert_eq!(x, [1, 2, 3, 4, 5, 6, 7, 8]); +} + +#[inline(always)] +#[target_feature(enable = "sse")] +fn g(x: &__m256, y: bool) { + if y { + g(x, y); + } else { + f(x); + } +} + +#[target_feature(enable = "avx")] +fn h(x: &__m256, y: bool) { + g(x, y) + //~^ WARNING call to `#[inline(always)]`-annotated `g` requires the same target features to be inlined [inline_always_mismatching_target_features] +} + +fn main() { + if !is_x86_feature_detected!("avx") { + return; + } + + let x = std::hint::black_box(unsafe { + std::mem::transmute::<_, __m256>([1_u32, 2, 3, 4, 5, 6, 7, 8]) + }); + let y = std::hint::black_box(false); + unsafe { h(&x, y) } +} diff --git a/tests/ui/target-feature/inline-always-vector-abi-callee-missing.stderr b/tests/ui/target-feature/inline-always-vector-abi-callee-missing.stderr new file mode 100644 index 0000000000000..9db2725d1cdf4 --- /dev/null +++ b/tests/ui/target-feature/inline-always-vector-abi-callee-missing.stderr @@ -0,0 +1,22 @@ +warning: call to `#[inline(always)]`-annotated `g` requires the same target features to be inlined + --> $DIR/inline-always-vector-abi-callee-missing.rs:30:5 + | +LL | g(x, y) + | ^^^^^^^ + | + = note: function will not be inlined + = note: the following target features are on `h` but missing from `g`: avx +note: `h` is defined here + --> $DIR/inline-always-vector-abi-callee-missing.rs:29:1 + | +LL | fn h(x: &__m256, y: bool) { + | ^^^^^^^^^^^^^^^^^^^^^^^^^ + = note: `#[warn(inline_always_mismatching_target_features)]` on by default +help: add `#[target_feature]` attribute to `g` + | +LL + #[target_feature(enable = "avx")] +LL | fn g(x: &__m256, y: bool) { + | + +warning: 1 warning emitted + diff --git a/tests/ui/target-feature/inline-always-vector-abi-global-avx.rs b/tests/ui/target-feature/inline-always-vector-abi-global-avx.rs new file mode 100644 index 0000000000000..c72560077dc79 --- /dev/null +++ b/tests/ui/target-feature/inline-always-vector-abi-global-avx.rs @@ -0,0 +1,34 @@ +//@ run-pass +//@ compile-flags: -C opt-level=3 -Ctarget-feature=+avx +//@ only-x86_64 +//@ only-linux +//@ ignore-backends: gcc + +#![feature(target_feature_inline_always)] + +use std::arch::x86_64::__m256; + +const EXPECTED: [u32; 8] = [1, 2, 3, 4, 5, 6, 7, 8]; + +#[inline(never)] +#[target_feature(enable = "sse")] +fn f(x: &__m256) { + let x = unsafe { std::mem::transmute::<_, [u32; 8]>(*x) }; + assert_eq!(x, EXPECTED); +} + +#[inline(always)] +#[target_feature(enable = "sse")] +fn g(x: &__m256) { + f(x); +} + +#[target_feature(enable = "avx")] +fn h(x: &__m256) { + g(x); +} + +fn main() { + let x = std::hint::black_box(unsafe { std::mem::transmute::<_, __m256>(EXPECTED) }); + unsafe { h(&x); } +} diff --git a/tests/ui/target-feature/inline-always-vector-abi.rs b/tests/ui/target-feature/inline-always-vector-abi.rs new file mode 100644 index 0000000000000..36cd29208c826 --- /dev/null +++ b/tests/ui/target-feature/inline-always-vector-abi.rs @@ -0,0 +1,46 @@ +//@ build-pass +//@ compile-flags: --crate-type=lib --target=x86_64-unknown-linux-gnu +//@ only-x86_64 +//@ needs-llvm-components: x86 +//@ ignore-backends: gcc + +#![feature(target_feature_inline_always)] +#![allow(dead_code, unused_unsafe)] + +use std::arch::x86_64::__m256; + +#[inline(never)] +#[target_feature(enable = "sse")] +fn sink(_x: &__m256) {} + +#[inline(always)] +#[target_feature(enable = "sse")] +fn callee_missing_avx(x: &__m256, y: bool) { + if y { + callee_missing_avx(x, y); + } else { + sink(x); + } +} + +#[target_feature(enable = "avx")] +fn caller_has_abi_mismatch(x: &__m256, y: bool) { + unsafe { callee_missing_avx(x, y) } + //~^ WARNING call to `#[inline(always)]`-annotated `callee_missing_avx` requires the same target features to be inlined [inline_always_mismatching_target_features] +} + +#[inline(always)] +#[target_feature(enable = "avx")] +fn callee_requires_avx(x: &__m256, y: bool) { + if y { + callee_requires_avx(x, y); + } else { + sink(x); + } +} + +#[target_feature(enable = "sse")] +fn caller_missing_avx(x: &__m256, y: bool) { + unsafe { callee_requires_avx(x, y) } + //~^ WARNING call to `#[inline(always)]`-annotated `callee_requires_avx` requires the same target features to be inlined [inline_always_mismatching_target_features] +} diff --git a/tests/ui/target-feature/inline-always-vector-abi.stderr b/tests/ui/target-feature/inline-always-vector-abi.stderr new file mode 100644 index 0000000000000..c8bdb8a5e43ff --- /dev/null +++ b/tests/ui/target-feature/inline-always-vector-abi.stderr @@ -0,0 +1,41 @@ +warning: call to `#[inline(always)]`-annotated `callee_missing_avx` requires the same target features to be inlined + --> $DIR/inline-always-vector-abi.rs:28:14 + | +LL | unsafe { callee_missing_avx(x, y) } + | ^^^^^^^^^^^^^^^^^^^^^^^^ + | + = note: function will not be inlined + = note: the following target features are on `caller_has_abi_mismatch` but missing from `callee_missing_avx`: avx +note: `caller_has_abi_mismatch` is defined here + --> $DIR/inline-always-vector-abi.rs:27:1 + | +LL | fn caller_has_abi_mismatch(x: &__m256, y: bool) { + | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + = note: `#[warn(inline_always_mismatching_target_features)]` on by default +help: add `#[target_feature]` attribute to `callee_missing_avx` + | +LL + #[target_feature(enable = "avx")] +LL | fn callee_missing_avx(x: &__m256, y: bool) { + | + +warning: call to `#[inline(always)]`-annotated `callee_requires_avx` requires the same target features to be inlined + --> $DIR/inline-always-vector-abi.rs:44:14 + | +LL | unsafe { callee_requires_avx(x, y) } + | ^^^^^^^^^^^^^^^^^^^^^^^^^ + | + = note: function will not be inlined + = note: the following target features are on `callee_requires_avx` but missing from `caller_missing_avx`: avx +note: `callee_requires_avx` is defined here + --> $DIR/inline-always-vector-abi.rs:34:1 + | +LL | fn callee_requires_avx(x: &__m256, y: bool) { + | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +help: add `#[target_feature]` attribute to `caller_missing_avx` + | +LL + #[target_feature(enable = "avx")] +LL | fn caller_missing_avx(x: &__m256, y: bool) { + | + +warning: 2 warnings emitted + diff --git a/tests/ui/target-feature/inline-always.aarch64.stderr b/tests/ui/target-feature/inline-always.aarch64.stderr index 8b58923f2170f..6c7efb5d81475 100644 --- a/tests/ui/target-feature/inline-always.aarch64.stderr +++ b/tests/ui/target-feature/inline-always.aarch64.stderr @@ -5,7 +5,7 @@ LL | target_feature_identity(); | ^^^^^^^^^^^^^^^^^^^^^^^^^ | = note: function will not be inlined - = note: the following target features are on `target_feature_identity` but missing from `call_no_target_features`: neon, fp16 + = note: the following target features are on `target_feature_identity` but missing from `call_no_target_features`: fp16 note: `target_feature_identity` is defined here --> $DIR/inline-always.rs:17:1 | @@ -14,7 +14,7 @@ LL | pub unsafe fn target_feature_identity() {} = note: `#[warn(inline_always_mismatching_target_features)]` on by default help: add `#[target_feature]` attribute to `call_no_target_features` | -LL + #[target_feature(enable = "neon,fp16")] +LL + #[target_feature(enable = "fp16")] LL | unsafe fn call_no_target_features() { | From 3f1a0b75e209278aa9b52475b45368017f7906ac Mon Sep 17 00:00:00 2001 From: James Barford-Evans Date: Tue, 21 Apr 2026 10:47:07 +0100 Subject: [PATCH 4/6] Add a `noinline` attribute at the callsite when we bail on adding `alwaysinline` --- compiler/rustc_codegen_llvm/src/builder.rs | 29 +++++++++++++------ ...ine-always-callsite-noinline-cmpxchg16b.rs | 28 ++++++++++++++++++ 2 files changed, 48 insertions(+), 9 deletions(-) create mode 100644 tests/codegen-llvm/inline-always-callsite-noinline-cmpxchg16b.rs diff --git a/compiler/rustc_codegen_llvm/src/builder.rs b/compiler/rustc_codegen_llvm/src/builder.rs index 19328ef5c8687..73f04915b58d2 100644 --- a/compiler/rustc_codegen_llvm/src/builder.rs +++ b/compiler/rustc_codegen_llvm/src/builder.rs @@ -1426,20 +1426,31 @@ impl<'a, 'll, 'tcx> BuilderMethods<'a, 'tcx> for Builder<'a, 'll, 'tcx> { // Only propagate `#[inline(always)]` to the callsite when there is // an attribute and the caller and callee are compatible for - // inlining here. + // inlining here. Otherwise we explicitly emit a `noinline` to + // ensure that the function will not get inlined through an LLVM + // pass. if attributes::has_inline_always_callsite_attribute( self.cx.tcx, callee_attrs, callee_instance, - ) && self.tcx.is_call_inline_able_at_callsite( - &callee_attrs.target_features, - &caller_attrs.target_features, ) { - attributes::apply_to_callsite( - call, - llvm::AttributePlace::Function, - &[AttributeKind::AlwaysInline.create_attr(self.cx.llcx)], - ); + if self.tcx.is_call_inline_able_at_callsite( + &callee_attrs.target_features, + &caller_attrs.target_features, + ) { + attributes::apply_to_callsite( + call, + llvm::AttributePlace::Function, + &[AttributeKind::AlwaysInline.create_attr(self.cx.llcx)], + ); + } else { + // Ensure the function call will not be inlined. + attributes::apply_to_callsite( + call, + llvm::AttributePlace::Function, + &[AttributeKind::NoInline.create_attr(self.cx.llcx)], + ); + } } } diff --git a/tests/codegen-llvm/inline-always-callsite-noinline-cmpxchg16b.rs b/tests/codegen-llvm/inline-always-callsite-noinline-cmpxchg16b.rs new file mode 100644 index 0000000000000..7728fd1a0eea3 --- /dev/null +++ b/tests/codegen-llvm/inline-always-callsite-noinline-cmpxchg16b.rs @@ -0,0 +1,28 @@ +//@ compile-flags: --crate-type=lib --target x86_64-unknown-linux-gnu -O -Zinline-mir=no -C no-prepopulate-passes +//@ needs-llvm-components: x86 +//@ only-x86_64 +//@ ignore-backends: gcc + +#![feature(core_intrinsics, target_feature_inline_always)] +#![allow(incomplete_features)] + +use std::intrinsics::{AtomicOrdering, atomic_load}; + +#[inline(always)] +#[target_feature(enable = "cmpxchg16b")] +#[unsafe(no_mangle)] +pub fn load(x: *const u128) -> u128 { + unsafe { atomic_load::(x) } +} + +#[unsafe(no_mangle)] +// CHECK-LABEL: define{{.*}} @load_core( +pub fn load_core(x: *const u128) -> u128 { + // `cmpxchg16b` is not enabled on the caller, so the ineligible + // `#[inline(always)]` callee must be marked `noinline` at the callsite. + // + // CHECK: %_0 = {{(tail )?}}call{{.*}} @load(ptr{{.*}} %x) [[CALL_ATTRS:#[0-9]+]] + unsafe { load(x) } +} + +// CHECK: attributes [[CALL_ATTRS]] = { {{.*}}noinline{{.*}} } From e0c933501c513dbd6b9b6a85324b2a23facff081 Mon Sep 17 00:00:00 2001 From: James Barford-Evans Date: Tue, 5 May 2026 14:06:56 +0100 Subject: [PATCH 5/6] Remove noinline if alwaysinline failed --- compiler/rustc_codegen_llvm/src/builder.rs | 7 ------- 1 file changed, 7 deletions(-) diff --git a/compiler/rustc_codegen_llvm/src/builder.rs b/compiler/rustc_codegen_llvm/src/builder.rs index 73f04915b58d2..182bd0cd6d094 100644 --- a/compiler/rustc_codegen_llvm/src/builder.rs +++ b/compiler/rustc_codegen_llvm/src/builder.rs @@ -1443,13 +1443,6 @@ impl<'a, 'll, 'tcx> BuilderMethods<'a, 'tcx> for Builder<'a, 'll, 'tcx> { llvm::AttributePlace::Function, &[AttributeKind::AlwaysInline.create_attr(self.cx.llcx)], ); - } else { - // Ensure the function call will not be inlined. - attributes::apply_to_callsite( - call, - llvm::AttributePlace::Function, - &[AttributeKind::NoInline.create_attr(self.cx.llcx)], - ); } } } From 4176411a37f3f46f85c7c73d7d7402c5cf69e29f Mon Sep 17 00:00:00 2001 From: James Barford-Evans Date: Tue, 5 May 2026 14:07:35 +0100 Subject: [PATCH 6/6] Ensure `AlwaysInlinerPass()` runs before other inlining passes --- compiler/rustc_llvm/llvm-wrapper/PassWrapper.cpp | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/compiler/rustc_llvm/llvm-wrapper/PassWrapper.cpp b/compiler/rustc_llvm/llvm-wrapper/PassWrapper.cpp index c7e8f99465e18..d79a4142af50e 100644 --- a/compiler/rustc_llvm/llvm-wrapper/PassWrapper.cpp +++ b/compiler/rustc_llvm/llvm-wrapper/PassWrapper.cpp @@ -37,6 +37,7 @@ #include "llvm/Support/VirtualFileSystem.h" #include "llvm/Target/TargetMachine.h" #include "llvm/TargetParser/Host.h" +#include "llvm/Transforms/IPO/AlwaysInliner.h" #include "llvm/Transforms/IPO/FunctionImport.h" #include "llvm/Transforms/IPO/Internalize.h" #include "llvm/Transforms/IPO/LowerTypeTests.h" @@ -542,6 +543,15 @@ void LLVMSelfProfileInitializeCallbacks( }); } +/* Ensure that functions with the attribute `#[inline(always)]` are inlined + * ahead of functions that could be inlined through the heuristic inliner. */ +void LLVMAddAlwaysInlinerPassToStartOfPipeline(PassBuilder &PB) { + PB.registerPipelineStartEPCallback( + [](ModulePassManager &MPM, OptimizationLevel Level) { + MPM.addPass(AlwaysInlinerPass()); + }); +} + enum class LLVMRustOptStage { PreLinkNoLTO, PreLinkThinLTO, @@ -836,10 +846,12 @@ extern "C" LLVMRustResult LLVMRustOptimize( // buildO0DefaultPipeline() instead. At the same time, the LTO pipelines do // support O0 and using them is required. if (OptLevel == OptimizationLevel::O0 && !IsLTO) { + LLVMAddAlwaysInlinerPassToStartOfPipeline(PB); // We manually schedule ThinLTOBufferPasses below, so don't pass the value // to enable it here. MPM = PB.buildO0DefaultPipeline(OptLevel); } else { + LLVMAddAlwaysInlinerPassToStartOfPipeline(PB); switch (OptStage) { case LLVMRustOptStage::PreLinkNoLTO: if (ThinLTOBufferRef) { @@ -879,6 +891,7 @@ extern "C" LLVMRustResult LLVMRustOptimize( } } } else { + LLVMAddAlwaysInlinerPassToStartOfPipeline(PB); // We're not building any of the default pipelines but we still want to // add the verifier, instrumentation, etc passes if they were requested for (const auto &C : PipelineStartEPCallbacks)