Skip to content

Commit b4da534

Browse files
authored
Merge pull request #185 from andjo403/ebx_asm
Avoid using ebx as an asm! operand
2 parents a8eacaa + 404e220 commit b4da534

File tree

1 file changed

+51
-78
lines changed

1 file changed

+51
-78
lines changed

measureme/src/counters.rs

Lines changed: 51 additions & 78 deletions
Original file line numberDiff line numberDiff line change
@@ -525,47 +525,22 @@ mod hw {
525525
/// the width of the register (32 to 64 bits, e.g. 48-bit seems common).
526526
#[inline(always)]
527527
fn rdpmc(reg_idx: u32) -> u64 {
528-
let (lo, hi): (u32, u32);
529-
unsafe {
530-
// NOTE(eddyb) below comment is outdated (the other branch uses `cpuid`).
531-
if cfg!(unserialized_rdpmc) && false {
532-
// FIXME(eddyb) the Intel and AMD manuals warn about the need for
533-
// "serializing instructions" before/after `rdpmc`, if avoiding any
534-
// reordering is desired, but do not agree on the full set of usable
535-
// "serializing instructions" (e.g. `mfence` isn't listed in both).
536-
//
537-
// The only usable, and guaranteed to work, "serializing instruction"
538-
// appears to be `cpuid`, but it doesn't seem easy to use, especially
539-
// due to the overlap in registers with `rdpmc` itself, and it might
540-
// have too high of a cost, compared to serialization benefits (if any).
541-
asm!("rdpmc", in("ecx") reg_idx, out("eax") lo, out("edx") hi, options(nostack));
542-
} else {
543-
asm!(
544-
// Dummy `cpuid(0)` to serialize instruction execution.
545-
"xor %eax, %eax", // Intel syntax: "xor eax, eax"
546-
"cpuid",
547-
548-
"mov {rdpmc_ecx:e}, %ecx", // Intel syntax: "mov ecx, {rdpmc_ecx:e}"
549-
"rdpmc",
550-
rdpmc_ecx = in(reg) reg_idx,
551-
out("eax") lo,
552-
out("edx") hi,
553-
554-
// `cpuid` clobbers (not overwritten by `rdpmc`).
555-
out("ebx") _,
556-
out("ecx") _,
557-
558-
options(nostack),
559-
560-
// HACK(eddyb) LLVM 9 and older do not support modifiers
561-
// in Intel syntax inline asm; whenever Rust minimum LLVM
562-
// version becomes LLVM 10, remove and replace above
563-
// instructions with Intel syntax version (from comments).
564-
options(att_syntax),
565-
);
566-
}
528+
// NOTE(eddyb) below comment is outdated (the other branch uses `cpuid`).
529+
if cfg!(unserialized_rdpmc) && false {
530+
// FIXME(eddyb) the Intel and AMD manuals warn about the need for
531+
// "serializing instructions" before/after `rdpmc`, if avoiding any
532+
// reordering is desired, but do not agree on the full set of usable
533+
// "serializing instructions" (e.g. `mfence` isn't listed in both).
534+
//
535+
// The only usable, and guaranteed to work, "serializing instruction"
536+
// appears to be `cpuid`, but it doesn't seem easy to use, especially
537+
// due to the overlap in registers with `rdpmc` itself, and it might
538+
// have too high of a cost, compared to serialization benefits (if any).
539+
unserialized_rdpmc(reg_idx)
540+
} else {
541+
serialize_instruction_execution();
542+
unserialized_rdpmc(reg_idx)
567543
}
568-
lo as u64 | (hi as u64) << 32
569544
}
570545

571546
/// Read two hardware performance counters at once (see `rdpmc`).
@@ -574,44 +549,49 @@ mod hw {
574549
/// only requires one "serializing instruction", rather than two.
575550
#[inline(always)]
576551
fn rdpmc_pair(a_reg_idx: u32, b_reg_idx: u32) -> (u64, u64) {
577-
let (a_lo, a_hi): (u32, u32);
578-
let (b_lo, b_hi): (u32, u32);
552+
serialize_instruction_execution();
553+
(unserialized_rdpmc(a_reg_idx), unserialized_rdpmc(b_reg_idx))
554+
}
555+
556+
/// Dummy `cpuid(0)` to serialize instruction execution.
557+
#[inline(always)]
558+
fn serialize_instruction_execution() {
579559
unsafe {
580560
asm!(
581-
// Dummy `cpuid(0)` to serialize instruction execution.
582-
"xor %eax, %eax", // Intel syntax: "xor eax, eax"
561+
"xor eax, eax",
562+
// LLVM sometimes reserves `ebx` for its internal use, so we need to use
563+
// a scratch register for it instead.
564+
"mov {tmp_rbx:r}, rbx",
583565
"cpuid",
584-
585-
"mov {a_rdpmc_ecx:e}, %ecx", // Intel syntax: "mov ecx, {a_rdpmc_ecx:e}"
586-
"rdpmc",
587-
"mov %eax, {a_rdpmc_eax:e}", // Intel syntax: "mov {a_rdpmc_eax:e}, eax"
588-
"mov %edx, {a_rdpmc_edx:e}", // Intel syntax: "mov {a_rdpmc_edx:e}, edx"
589-
"mov {b_rdpmc_ecx:e}, %ecx", // Intel syntax: "mov ecx, {b_rdpmc_ecx:e}"
590-
"rdpmc",
591-
a_rdpmc_ecx = in(reg) a_reg_idx,
592-
a_rdpmc_eax = out(reg) a_lo,
593-
a_rdpmc_edx = out(reg) a_hi,
594-
b_rdpmc_ecx = in(reg) b_reg_idx,
595-
out("eax") b_lo,
596-
out("edx") b_hi,
597-
598-
// `cpuid` clobbers (not overwritten by `rdpmc`).
599-
out("ebx") _,
600-
out("ecx") _,
566+
"mov rbx, {tmp_rbx:r}",
567+
tmp_rbx = lateout(reg) _,
568+
// `cpuid` clobbers.
569+
lateout("eax") _,
570+
lateout("edx") _,
571+
lateout("ecx") _,
601572

602573
options(nostack),
574+
);
575+
}
576+
}
603577

604-
// HACK(eddyb) LLVM 9 and older do not support modifiers
605-
// in Intel syntax inline asm; whenever Rust minimum LLVM
606-
// version becomes LLVM 10, remove and replace above
607-
// instructions with Intel syntax version (from comments).
608-
options(att_syntax),
578+
/// Read the hardware performance counter indicated by `reg_idx`.
579+
///
580+
/// If the counter is signed, sign extension should be performed based on
581+
/// the width of the register (32 to 64 bits, e.g. 48-bit seems common).
582+
#[inline(always)]
583+
fn unserialized_rdpmc(reg_idx: u32) -> u64 {
584+
let (lo, hi): (u32, u32);
585+
unsafe {
586+
asm!(
587+
"rdpmc",
588+
in("ecx") reg_idx,
589+
lateout("eax") lo,
590+
lateout("edx") hi,
591+
options(nostack)
609592
);
610593
}
611-
(
612-
a_lo as u64 | (a_hi as u64) << 32,
613-
b_lo as u64 | (b_hi as u64) << 32,
614-
)
594+
lo as u64 | (hi as u64) << 32
615595
}
616596

617597
/// Categorization of `x86_64` CPUs, primarily based on how they
@@ -815,17 +795,10 @@ mod hw {
815795
let mut _tmp: u64 = 0;
816796
unsafe {
817797
asm!(
818-
// Intel syntax: "lock xadd [{atomic}], {tmp}"
819-
"lock xadd {tmp}, ({atomic})",
798+
"lock xadd qword ptr [{atomic}], {tmp}",
820799

821800
atomic = in(reg) &mut atomic,
822801
tmp = inout(reg) _tmp,
823-
824-
// HACK(eddyb) LLVM 9 and older do not support modifiers
825-
// in Intel syntax inline asm; whenever Rust minimum LLVM
826-
// version becomes LLVM 10, remove and replace above
827-
// instructions with Intel syntax version (from comments).
828-
options(att_syntax),
829802
);
830803
}
831804

0 commit comments

Comments
 (0)