Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
84 changes: 65 additions & 19 deletions compiler/rustc_codegen_llvm/src/attributes.rs
Original file line number Diff line number Diff line change
Expand Up @@ -39,25 +39,23 @@ pub(crate) fn remove_string_attr_from_llfn(llfn: &Value, name: &str) {
llvm::RemoveStringAttrFromFn(llfn, name);
}

/// Get LLVM attribute for the provided inline heuristic.
pub(crate) fn inline_attr<'ll, 'tcx>(
/// Get LLVM attribute for the provided inline heuristic that can be applied
/// to a function definition
fn inline_attr_for_fn_def<'ll, 'tcx>(
cx: &SimpleCx<'ll>,
tcx: TyCtxt<'tcx>,
instance: ty::Instance<'tcx>,
has_function_features: bool,
) -> Option<&'ll Attribute> {
// `optnone` requires `noinline`
let codegen_fn_attrs = tcx.codegen_fn_attrs(instance.def_id());
let inline = match (codegen_fn_attrs.inline, &codegen_fn_attrs.optimize) {
(_, OptimizeAttr::DoNotOptimize) => InlineAttr::Never,
(InlineAttr::None, _) if instance.def.requires_inline(tcx) => InlineAttr::Hint,
(inline, _) => inline,
};

if !tcx.sess.opts.unstable_opts.inline_llvm {
// disable LLVM inlining
return Some(AttributeKind::NoInline.create_attr(cx.llcx));
}
match inline {

let codegen_fn_attrs = tcx.codegen_fn_attrs(instance.def_id());
let inline = get_inline_attr_from_codegen_fn_attrs(tcx, codegen_fn_attrs, instance);

let llvm_attr = match inline {
InlineAttr::Hint => Some(AttributeKind::InlineHint.create_attr(cx.llcx)),
InlineAttr::Always | InlineAttr::Force { .. } => {
Some(AttributeKind::AlwaysInline.create_attr(cx.llcx))
Expand All @@ -70,7 +68,58 @@ pub(crate) fn inline_attr<'ll, 'tcx>(
}
}
InlineAttr::None => None,
};

let is_inline_always = is_inline_always_attr(inline);
// Keep non-`#[inline(always)]` attributes on the function definition as
// usual. `#[inline(always)]` can also stay on the function when there are
// no per-function target features.
//
// Once a function has target features, we avoid attaching `alwaysinline`
// to the definition itself. In that case the attribute is checked and,
// when legal, emitted on individual call sites instead.
if !is_inline_always || (is_inline_always && !has_function_features) { llvm_attr } else { None }
}

/// Get the `InlineAttr` for the given instance.
fn get_inline_attr_from_codegen_fn_attrs<'tcx>(
tcx: TyCtxt<'tcx>,
codegen_fn_attrs: &CodegenFnAttrs,
instance: ty::Instance<'tcx>,
) -> InlineAttr {
// `optnone` requires `noinline`
match (codegen_fn_attrs.inline, &codegen_fn_attrs.optimize) {
(_, OptimizeAttr::DoNotOptimize) => InlineAttr::Never,
(InlineAttr::None, _) if instance.def.requires_inline(tcx) => InlineAttr::Hint,
(inline, _) => inline,
}
}

#[inline]
fn is_inline_always_attr(inline_attr: InlineAttr) -> bool {
matches!(inline_attr, InlineAttr::Always | InlineAttr::Force { .. })
}

/// Do we have an LLVM inline always attribute for the callsite?
pub(crate) fn has_inline_always_callsite_attribute<'tcx>(
tcx: TyCtxt<'tcx>,
attrs: &CodegenFnAttrs,
instance: ty::Instance<'tcx>,
) -> bool {
// disable LLVM inlining
if !tcx.sess.opts.unstable_opts.inline_llvm {
return false;
}

// If there are no target features on the function then we do not want to
// return anything. As the attribute will have been applied to the function
// definition.
if attrs.target_features.is_empty() {
return false;
}

// We are only interested in the `#[inline(always)]` attribute
is_inline_always_attr(get_inline_attr_from_codegen_fn_attrs(tcx, attrs, instance))
}

#[inline]
Expand Down Expand Up @@ -534,14 +583,11 @@ pub(crate) fn llfn_attrs_from_instance<'ll, 'tcx>(
let function_features =
codegen_fn_attrs.target_features.iter().map(|f| f.name.as_str()).collect::<Vec<&str>>();

// Apply function attributes as per usual if there are no user defined
// target features otherwise this will get applied at the callsite.
if function_features.is_empty() {
if let Some(instance) = instance
&& let Some(inline_attr) = inline_attr(cx, tcx, instance)
{
to_add.push(inline_attr);
}
if let Some(instance) = instance
&& let Some(inline_attr) =
inline_attr_for_fn_def(cx, tcx, instance, !function_features.is_empty())
{
to_add.push(inline_attr);
}

let function_features = function_features
Expand Down
57 changes: 33 additions & 24 deletions compiler/rustc_codegen_llvm/src/builder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ use rustc_codegen_ssa::mir::place::PlaceRef;
use rustc_codegen_ssa::traits::*;
use rustc_data_structures::small_c_str::SmallCStr;
use rustc_hir::def_id::DefId;
use rustc_middle::middle::codegen_fn_attrs::{CodegenFnAttrs, TargetFeature, TargetFeatureKind};
use rustc_middle::middle::codegen_fn_attrs::CodegenFnAttrs;
use rustc_middle::ty::layout::{
FnAbiError, FnAbiOfHelpers, FnAbiRequest, HasTypingEnv, LayoutError, LayoutOfHelpers,
TyAndLayout,
Expand All @@ -31,12 +31,12 @@ use smallvec::SmallVec;
use tracing::{debug, instrument};

use crate::abi::FnAbiLlvmExt;
use crate::attributes;
use crate::attributes::{self};
use crate::common::Funclet;
use crate::context::{CodegenCx, FullCx, GenericCx, SCx};
use crate::llvm::{
self, AtomicOrdering, AtomicRmwBinOp, BasicBlock, FromGeneric, GEPNoWrapFlags, Metadata, TRUE,
ToLlvmBool, Type, Value,
self, AtomicOrdering, AtomicRmwBinOp, AttributeKind, BasicBlock, FromGeneric, GEPNoWrapFlags,
Metadata, TRUE, ToLlvmBool, Type, Value,
};
use crate::type_of::LayoutLlvmExt;

Expand Down Expand Up @@ -1427,29 +1427,38 @@ impl<'a, 'll, 'tcx> BuilderMethods<'a, 'tcx> for Builder<'a, 'll, 'tcx> {
)
};

if let Some(callee_instance) = callee_instance {
// Check for whether we can add `#[inline(always)]` to a callsite
if let (Some(callee_instance), Some(caller_attrs)) = (callee_instance, caller_attrs) {
// Attributes on the function definition being called
let callee_attrs = self.cx.tcx.codegen_fn_attrs(callee_instance.def_id());
if let Some(caller_attrs) = caller_attrs
// If there is an inline attribute and a target feature that matches
// we will add the attribute to the callsite otherwise we'll omit
// this and not add the attribute to prevent soundness issues.
&& let Some(inlining_rule) = attributes::inline_attr(&self.cx, self.cx.tcx, callee_instance)
&& self.cx.tcx.is_target_feature_call_safe(

// Only propagate `#[inline(always)]` to the callsite when there is
// an attribute and the caller and callee are compatible for
// inlining here. Otherwise we explicitly emit a `noinline` to
// ensure that the function will not get inlined through an LLVM
// pass.
Copy link
Copy Markdown
Member

@RalfJung RalfJung Apr 21, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

To echo the problem @tmiasko raised in #145574, which I now finally understand: LLVM can move a call site to another function via inlining. So whatever reasoning we do here based on the attributes of the current caller is largely pointless since we don't know the attributes of the actual caller that this call may eventually end up in.

View changes since the review

Copy link
Copy Markdown
Member

@RalfJung RalfJung Apr 21, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Actually constructing a counterexample could be tricky because LLVM seems to inline alwaysinline functions first, so I have not managed to get it to move around an alwaysinline call site. But that seems like a fragile property to rely on.

The entire approach to target_feature_inline_always seems very unprincipled to me. We have no solid writeup of when alwaysinline is safe to put on a call site in LLVM IR, which means we don't even know the exact property rustc has to check before adding that attribute. This feature needs less "let's implement something and see if it works" (an approach that does not work for optimizations where the test suite is never even close to being able to find all bugs) and more "let's figure out a principled argument for why what we want to do could be correct".

Copy link
Copy Markdown
Member

@RalfJung RalfJung Apr 21, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Right now, based on the most recent example by @tmiasko , I am not convinced that there even exists a sound way to use alwaysinline in LLVM IR for calls to functions with extra target features. Even the most conservative option where we require full feature equality seems to go wrong:

#[inline(never)]
#[target_feature(enable = "sse")]
pub fn i(x: &__m256) {
  std::hint::black_box(x);
}

#[inline(always)]
#[target_feature(enable = "sse")]
pub fn f(x: &__m256) {
    i(x);
}

#[target_feature(enable = "sse")]
pub fn g(x: &__m256) {
    f(x) // alwaysinline call site
}

#[target_feature(enable = "sse", enable = "avx")]
pub fn h(x: &__m256) {
    g(x)
}

Imagine LLVM first inlines g into h (sound because none of the calls in g has a target-feature-dependent ABI). Then LLVM changes i to receive the argument by-value (sound because the only caller and callee have the same target features). We end up with:

#[inline(never)]
#[target_feature(enable = "sse")]
pub fn i(x: __m256) { // an actual by-value argument
  std::hint::black_box(x);
}

#[target_feature(enable = "sse")]
pub fn f(x: &__m256) {
    i(*x);
}

#[target_feature(enable = "sse", enable = "avx")]
pub fn h(x: &__m256) {
    f(x) // alwaysinline call site
}

Now we inline f:

#[inline(never)]
#[target_feature(enable = "sse")]
pub fn i(x: __m256) { // an actual by-value argument
  std::hint::black_box(x);
}

#[target_feature(enable = "sse", enable = "avx")]
pub fn h(x: &__m256) {
    i(*x)
}

Now the caller and callee disagree on the ABI. Oopsie...

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think I understand your point, but I may still be missing something.

I took your example, removed #[inline(always)], and compared the output here: https://godbolt.org/z/8xs8Gfjbj. In both cases, whether f has #[inline(always)] or not, h ends up directly calling i.

I also tried removing all pub declarations except on h: https://godbolt.org/z/onszcGPPc. That produces the same output both with and without #[inline(always)] on f.

So from these examples, it does not seem like #[inline(always)] is making the situation less safe. The underlying problem looks like an LLVM issue that exists regardless, rather than something introduced by the attribute itself.

We have no solid writeup of when alwaysinline is safe to put on a call site in LLVM IR

What kind of writeup would you want here? The current code comments are probably not enough, but I want to make sure I understand what is missing and where you would expect it to live.

The rule this PR is trying to encode is:

  • The caller must not affect the callee's ABI.
  • The callee's target features must be a subset of the caller's target features.

If both conditions hold, we apply alwaysinline at the call site. If either condition fails, we instead apply noinline, which prevents further inlining at that call site.

The goal of this PR is not to solve every inlining-related issue. It is to make problematic cases of the specific #[inline(always)] attribute usage easier to detect and safer to handle.

For example, in the cx16 issue (load_internal as the callee), condition 2 would fail if load_internal were marked #[inline(always)]. In that case, this PR would emit a warning and apply noinline at the call site, preventing further LLVM inlining there. That is safer than today's #[inline] behaviour, because we both surface the problem to the user and block additional inlining at that call site. At the same time, because alwaysinline is no longer attached to the function definition itself like inlinehint, valid uses of load_internal can still be inlined.

Likewise, in the example from the tracking issue, which is the basis for this test case. Condition 1 fails, because the caller affects the callee's ABI.

Copy link
Copy Markdown
Member

@RalfJung RalfJung Apr 22, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I took your example, removed #[inline(always)], and compared the output here: https://godbolt.org/z/8xs8Gfjbj. In both cases, whether f has #[inline(always)] or not, h ends up directly calling i.

I also tried removing all pub declarations except on h: https://godbolt.org/z/onszcGPPc. That produces the same output both with and without #[inline(always)] on f.

Yes, LLVM today happens to do inlining in a different order that avoids the problem, at least for this particular example. But I would not want to bet the soundness of the language on LLVM always sticking to this inlining order.

That's why I described the expected order of applied optimizations in my example. You have to apply the optimizations manually to confirm (or reject) my reasoning. But as long as it is permitted for LLVM to do these optimizations in this order, that means the code we generate is unsound.

Copy link
Copy Markdown
Member

@RalfJung RalfJung Apr 22, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What kind of writeup would you want here?

I would like to see an argument for why the rules you came up with are enough to guarantee that we avoid LLVM's soundness bugs around alwaysinline. That's the main goal here, after all: alwaysinline is fundamentally busted, but this feature is trying to use it soundly somehow. So far I am not convinced that is even possible. The example above shows, I think, that even with the very strict rule "caller and callee must have the exact same target features", alwaysinline is still unsound.

You cannot argue for the correctness of those rules by pointing at some examples. You can only make such an argument by saying: here's how inlining in LLVM works, and here is why under every possible inlining choice LLVM might make on any program, the result will be sound. Usually we leave that work to LLVM, but the entire premise of this approach is that we do the work ourselves because LLVM doesn't do it properly. That's fundamentally very hard (much harder than doing it in LLVM) as it requires reasoning "behind LLVM's back", so we need to be very careful and deliberate.

if attributes::has_inline_always_callsite_attribute(
self.cx.tcx,
callee_attrs,
callee_instance,
) {
if self.tcx.is_call_inline_able_at_callsite(
&callee_attrs.target_features,
&caller_attrs.target_features.iter().cloned().chain(
self.cx.tcx.sess.target_features.iter().map(|feat| TargetFeature {
name: *feat,
kind: TargetFeatureKind::Implied,
})
).collect::<Vec<_>>(),
)
{
attributes::apply_to_callsite(
call,
llvm::AttributePlace::Function,
&[inlining_rule],
);
&caller_attrs.target_features,
) {
attributes::apply_to_callsite(
call,
llvm::AttributePlace::Function,
&[AttributeKind::AlwaysInline.create_attr(self.cx.llcx)],
);
} else {
// Ensure the function call will not be inlined.
attributes::apply_to_callsite(
call,
llvm::AttributePlace::Function,
&[AttributeKind::NoInline.create_attr(self.cx.llcx)],
);
}
}
}

Expand Down
54 changes: 53 additions & 1 deletion compiler/rustc_middle/src/ty/context.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ use rustc_abi::{ExternAbi, FieldIdx, Layout, LayoutData, TargetDataLayout, Varia
use rustc_ast as ast;
use rustc_data_structures::defer;
use rustc_data_structures::fingerprint::Fingerprint;
use rustc_data_structures::fx::FxHashMap;
use rustc_data_structures::fx::{FxHashMap, FxIndexSet};
use rustc_data_structures::intern::Interned;
use rustc_data_structures::jobserver::Proxy;
use rustc_data_structures::profiling::SelfProfilerRef;
Expand Down Expand Up @@ -1321,6 +1321,58 @@ impl<'tcx> TyCtxt<'tcx> {
.all(|feature| body_features.iter().any(|f| f.name == feature.name))
}

/// Returns whether a callee can safely be always inlined into a caller at
/// this callsite.
///
/// This requires more than "the caller has at least the callee's target
/// features". We also require the caller and callee to agree on any
/// target features that affect the vector ABI, otherwise inlining could
/// reinterpret arguments under a different calling convention.
pub fn is_call_inline_able_at_callsite(
self,
callee_features: &[TargetFeature],
caller_features: &[TargetFeature],
) -> bool {
// Fold in globally enabled target features, since they are part of the
// effective feature set for both sides.
let callee_features = self.effective_inline_target_features(callee_features);
let caller_features = self.effective_inline_target_features(caller_features);

// A plain subset check is not sufficient. For example, `avx`
// implicitly enables `sse`, so a callee that only requires `sse`
// appears to be a subset of an `avx` caller. However, `avx` also
// changes how vector arguments are passed, so inlining that callee into
// the caller would cross an ABI boundary. Require both the feature
// subset relation and matching ABI-relevant vector features.
self.vector_abi_matches(&callee_features, &caller_features)
&& callee_features.is_subset(&caller_features)
}

fn vector_abi_matches(
self,
callee_features: &FxIndexSet<Symbol>,
caller_features: &FxIndexSet<Symbol>,
) -> bool {
self.abi_target_features(caller_features) == self.abi_target_features(callee_features)
}

pub fn abi_target_features(self, feature_names: &FxIndexSet<Symbol>) -> FxIndexSet<Symbol> {
feature_names
.iter()
.cloned()
.filter(|it| self.sess.target.feature_could_influence_vector_length(it.as_str()))
.collect()
}

pub fn effective_inline_target_features(
self,
features: &[TargetFeature],
) -> FxIndexSet<Symbol> {
let mut all_features = self.sess.unstable_target_features.clone();
all_features.extend(features.iter().map(|it| it.name));
all_features
}

/// Returns the safe version of the signature of the given function, if calling it
/// would be safe in the context of the given caller.
pub fn adjust_target_feature_sig(
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
use rustc_hir::attrs::InlineAttr;
use rustc_middle::middle::codegen_fn_attrs::{TargetFeature, TargetFeatureKind};
use rustc_middle::middle::codegen_fn_attrs::TargetFeatureKind;
use rustc_middle::mir::{Body, TerminatorKind};
use rustc_middle::ty::{self, TyCtxt};

Expand Down Expand Up @@ -51,36 +51,83 @@ fn check_inline_always_target_features<'tcx>(tcx: TyCtxt<'tcx>, body: &Body<'tcx

// Scan the users defined target features and ensure they
// match the caller.
if tcx.is_target_feature_call_safe(
if tcx.is_call_inline_able_at_callsite(
&callee_codegen_fn_attrs.target_features,
&caller_codegen_fn_attrs
.target_features
.iter()
.cloned()
.chain(tcx.sess.target_features.iter().map(|feat| TargetFeature {
name: *feat,
kind: TargetFeatureKind::Implied,
}))
.collect::<Vec<_>>(),
&caller_codegen_fn_attrs.target_features,
) {
continue;
}

let callee_only: Vec<_> = callee_codegen_fn_attrs
// Use the full target feature sets, including implied and
// command-line features, to classify the mismatch. Diagnostic
// messages should still only mention the non-implied features
// that the user actually enabled.
let caller_features =
tcx.effective_inline_target_features(&caller_codegen_fn_attrs.target_features);
let callee_features =
tcx.effective_inline_target_features(&callee_codegen_fn_attrs.target_features);

let explicit_caller_features: Vec<_> = caller_codegen_fn_attrs
.target_features
.iter()
.cloned()
.filter(|it| it.kind != TargetFeatureKind::Implied)
.collect();
let explicit_callee_features: Vec<_> = callee_codegen_fn_attrs
.target_features
.iter()
.filter(|it| !caller_codegen_fn_attrs.target_features.contains(it))
.filter(|it| !matches!(it.kind, TargetFeatureKind::Implied))
.map(|it| it.name.as_str())
.cloned()
.filter(|it| it.kind != TargetFeatureKind::Implied)
.collect();

crate::errors::emit_inline_always_target_feature_diagnostic(
tcx,
terminator.source_info.span,
callee_def_id,
caller_def_id.into(),
&callee_only,
);
let explicit_caller_features =
tcx.effective_inline_target_features(&explicit_caller_features);
let explicit_callee_features =
tcx.effective_inline_target_features(&explicit_callee_features);

// If the callee's features are otherwise a subset of the
// caller's, then the mismatch is only due to the caller using a
// different vector ABI from the callee.
if callee_features.is_subset(&caller_features) {
// We only want to display the target features the user
// missed out. Not every feature that is possibly enabled.
let caller_abi_features = tcx.abi_target_features(&explicit_caller_features);
let callee_abi_features = tcx.abi_target_features(&explicit_callee_features);
let caller_only = caller_abi_features
.difference(&callee_abi_features)
.map(|it| it.as_str())
.collect::<Vec<_>>()
.join(", ");

// Emit that the issue is caused by a vector ABI mismatch.
crate::errors::emit_inline_always_target_feature_diagnostic(
tcx,
terminator.source_info.span,
callee_def_id,
caller_def_id.into(),
&caller_only,
caller_def_id.into(),
callee_def_id,
);
} else {
let callee_only = explicit_callee_features
.difference(&explicit_caller_features)
.map(|it| it.as_str())
.collect::<Vec<_>>()
.join(", ");

// Emit that the issue stems from the callee having features
// enabled that the caller does not have enabled.
crate::errors::emit_inline_always_target_feature_diagnostic(
tcx,
terminator.source_info.span,
callee_def_id,
caller_def_id.into(),
&callee_only,
callee_def_id,
caller_def_id.into(),
);
}
}
_ => (),
}
Expand Down
Loading
Loading