Skip to content

Pass tune-cpu to LLVM #76830

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Oct 13, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 15 additions & 0 deletions compiler/rustc_codegen_llvm/src/attributes.rs
Original file line number Diff line number Diff line change
Expand Up @@ -194,6 +194,18 @@ pub fn apply_target_cpu_attr(cx: &CodegenCx<'ll, '_>, llfn: &'ll Value) {
);
}

pub fn apply_tune_cpu_attr(cx: &CodegenCx<'ll, '_>, llfn: &'ll Value) {
if let Some(tune) = llvm_util::tune_cpu(cx.tcx.sess) {
let tune_cpu = SmallCStr::new(tune);
llvm::AddFunctionAttrStringValue(
llfn,
llvm::AttributePlace::Function,
const_cstr!("tune-cpu"),
tune_cpu.as_c_str(),
);
}
}

/// Sets the `NonLazyBind` LLVM attribute on a given function,
/// assuming the codegen options allow skipping the PLT.
pub fn non_lazy_bind(sess: &Session, llfn: &'ll Value) {
Expand Down Expand Up @@ -300,6 +312,9 @@ pub fn from_fn_attrs(cx: &CodegenCx<'ll, 'tcx>, llfn: &'ll Value, instance: ty::
// Without this, ThinLTO won't inline Rust functions into Clang generated
// functions (because Clang annotates functions this way too).
apply_target_cpu_attr(cx, llfn);
// tune-cpu is only conveyed through the attribute for our purpose.
// The target doesn't care; the subtarget reads our attribute.
apply_tune_cpu_attr(cx, llfn);

let features = llvm_target_features(cx.tcx.sess)
.map(|s| s.to_string())
Expand Down
3 changes: 2 additions & 1 deletion compiler/rustc_codegen_llvm/src/context.rs
Original file line number Diff line number Diff line change
Expand Up @@ -417,7 +417,8 @@ impl MiscMethods<'tcx> for CodegenCx<'ll, 'tcx> {
}

fn apply_target_cpu_attr(&self, llfn: &'ll Value) {
attributes::apply_target_cpu_attr(self, llfn)
attributes::apply_target_cpu_attr(self, llfn);
attributes::apply_tune_cpu_attr(self, llfn);
}

fn create_used_variable(&self) {
Expand Down
3 changes: 3 additions & 0 deletions compiler/rustc_codegen_llvm/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,9 @@ impl ExtraBackendMethods for LlvmCodegenBackend {
fn target_cpu<'b>(&self, sess: &'b Session) -> &'b str {
llvm_util::target_cpu(sess)
}
fn tune_cpu<'b>(&self, sess: &'b Session) -> Option<&'b str> {
llvm_util::tune_cpu(sess)
}
}

impl WriteBackendMethods for LlvmCodegenBackend {
Expand Down
22 changes: 17 additions & 5 deletions compiler/rustc_codegen_llvm/src/llvm_util.rs
Original file line number Diff line number Diff line change
Expand Up @@ -351,11 +351,7 @@ pub(crate) fn print(req: PrintRequest, sess: &Session) {
}
}

pub fn target_cpu(sess: &Session) -> &str {
let name = match sess.opts.cg.target_cpu {
Some(ref s) => &**s,
None => &*sess.target.target.options.cpu,
};
fn handle_native(name: &str) -> &str {
if name != "native" {
return name;
}
Expand All @@ -366,3 +362,19 @@ pub fn target_cpu(sess: &Session) -> &str {
str::from_utf8(slice::from_raw_parts(ptr as *const u8, len)).unwrap()
}
}

pub fn target_cpu(sess: &Session) -> &str {
let name = match sess.opts.cg.target_cpu {
Some(ref s) => &**s,
None => &*sess.target.target.options.cpu,
};

handle_native(name)
}

pub fn tune_cpu(sess: &Session) -> Option<&str> {
match sess.opts.debugging_opts.tune_cpu {
Some(ref s) => Some(handle_native(&**s)),
None => None,
}
}
1 change: 1 addition & 0 deletions compiler/rustc_codegen_ssa/src/traits/backend.rs
Original file line number Diff line number Diff line change
Expand Up @@ -116,4 +116,5 @@ pub trait ExtraBackendMethods: CodegenBackend + WriteBackendMethods + Sized + Se
opt_level: config::OptLevel,
) -> Arc<dyn Fn() -> Result<Self::TargetMachine, String> + Send + Sync>;
fn target_cpu<'b>(&self, sess: &'b Session) -> &'b str;
fn tune_cpu<'b>(&self, sess: &'b Session) -> Option<&'b str>;
}
1 change: 1 addition & 0 deletions compiler/rustc_interface/src/tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -583,6 +583,7 @@ fn test_debugging_options_tracking_hash() {
tracked!(symbol_mangling_version, SymbolManglingVersion::V0);
tracked!(teach, true);
tracked!(thinlto, Some(true));
tracked!(tune_cpu, Some(String::from("abc")));
tracked!(tls_model, Some(TlsModel::GeneralDynamic));
tracked!(treat_err_as_bug, Some(1));
tracked!(unleash_the_miri_inside_of_you, true);
Expand Down
2 changes: 2 additions & 0 deletions compiler/rustc_session/src/options.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1079,6 +1079,8 @@ options! {DebuggingOptions, DebuggingSetter, basic_debugging_options,
"show extended diagnostic help (default: no)"),
terminal_width: Option<usize> = (None, parse_opt_uint, [UNTRACKED],
"set the current terminal width"),
tune_cpu: Option<String> = (None, parse_opt_string, [TRACKED],
"select processor to schedule for (`rustc --print target-cpus` for details)"),
thinlto: Option<bool> = (None, parse_opt_bool, [TRACKED],
"enable ThinLTO when possible"),
// We default to 1 here since we want to behave like
Expand Down
20 changes: 18 additions & 2 deletions src/doc/rustc/src/codegen-options/index.md
Original file line number Diff line number Diff line change
Expand Up @@ -497,8 +497,10 @@ point instructions in software. It takes one of the following values:
This instructs `rustc` to generate code specifically for a particular processor.

You can run `rustc --print target-cpus` to see the valid options to pass
here. Additionally, `native` can be passed to use the processor of the host
machine. Each target has a default base CPU.
here. Each target has a default base CPU. Special values include:

* `native` can be passed to use the processor of the host machine.
* `generic` refers to an LLVM target with minimal features but modern tuning.

## target-feature

Expand Down Expand Up @@ -530,6 +532,20 @@ This also supports the feature `+crt-static` and `-crt-static` to control
Each target and [`target-cpu`](#target-cpu) has a default set of enabled
features.

## tune-cpu
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Moving this option to -Z made it unstable and not-actually-a-codegen-option. At the very least this should be annotated as unstable.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Annotated, but not moved yet since I don't know where unstable opts should be documented.


This instructs `rustc` to schedule code specifically for a particular
processor. This does not affect the compatibility (instruction sets or ABI),
but should make your code slightly more efficient on the selected CPU.

The valid options are the same as those for [`target-cpu`](#target-cpu).
The default is `None`, which LLVM translates as the `target-cpu`.

This is an unstable option. Use `-Z tune-cpu=machine` to specify a value.

Due to limitations in LLVM (12.0.0-git9218f92), this option is currently
effective only for x86 targets.

[option-emit]: ../command-line-arguments.md#option-emit
[option-o-optimize]: ../command-line-arguments.md#option-o-optimize
[profile-guided optimization]: ../profile-guided-optimization.md
Expand Down
21 changes: 21 additions & 0 deletions src/test/codegen/tune-cpu-on-functions.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
// This test makes sure that functions get annotated with the proper
// "tune-cpu" attribute in LLVM.

// no-prefer-dynamic
// ignore-tidy-linelength
// compile-flags: -C no-prepopulate-passes -C panic=abort -C linker-plugin-lto -Cpasses=name-anon-globals -Z tune-cpu=generic

#![crate_type = "staticlib"]

// CHECK-LABEL: define {{.*}} @exported() {{.*}} #0
#[no_mangle]
pub extern fn exported() {
not_exported();
}

// CHECK-LABEL: ; tune_cpu_on_functions::not_exported
// CHECK-NEXT: ; Function Attrs:
// CHECK-NEXT: define {{.*}}() {{.*}} #0
fn not_exported() {}

// CHECK: attributes #0 = {{.*}} "tune-cpu"="{{.*}}"