@@ -525,47 +525,22 @@ mod hw {
525
525
/// the width of the register (32 to 64 bits, e.g. 48-bit seems common).
526
526
#[ inline( always) ]
527
527
fn rdpmc ( reg_idx : u32 ) -> u64 {
528
- let ( lo, hi) : ( u32 , u32 ) ;
529
- unsafe {
530
- // NOTE(eddyb) below comment is outdated (the other branch uses `cpuid`).
531
- if cfg ! ( unserialized_rdpmc) && false {
532
- // FIXME(eddyb) the Intel and AMD manuals warn about the need for
533
- // "serializing instructions" before/after `rdpmc`, if avoiding any
534
- // reordering is desired, but do not agree on the full set of usable
535
- // "serializing instructions" (e.g. `mfence` isn't listed in both).
536
- //
537
- // The only usable, and guaranteed to work, "serializing instruction"
538
- // appears to be `cpuid`, but it doesn't seem easy to use, especially
539
- // due to the overlap in registers with `rdpmc` itself, and it might
540
- // have too high of a cost, compared to serialization benefits (if any).
541
- asm ! ( "rdpmc" , in( "ecx" ) reg_idx, out( "eax" ) lo, out( "edx" ) hi, options( nostack) ) ;
542
- } else {
543
- asm ! (
544
- // Dummy `cpuid(0)` to serialize instruction execution.
545
- "xor %eax, %eax" , // Intel syntax: "xor eax, eax"
546
- "cpuid" ,
547
-
548
- "mov {rdpmc_ecx:e}, %ecx" , // Intel syntax: "mov ecx, {rdpmc_ecx:e}"
549
- "rdpmc" ,
550
- rdpmc_ecx = in( reg) reg_idx,
551
- out( "eax" ) lo,
552
- out( "edx" ) hi,
553
-
554
- // `cpuid` clobbers (not overwritten by `rdpmc`).
555
- out( "ebx" ) _,
556
- out( "ecx" ) _,
557
-
558
- options( nostack) ,
559
-
560
- // HACK(eddyb) LLVM 9 and older do not support modifiers
561
- // in Intel syntax inline asm; whenever Rust minimum LLVM
562
- // version becomes LLVM 10, remove and replace above
563
- // instructions with Intel syntax version (from comments).
564
- options( att_syntax) ,
565
- ) ;
566
- }
528
+ // NOTE(eddyb) below comment is outdated (the other branch uses `cpuid`).
529
+ if cfg ! ( unserialized_rdpmc) && false {
530
+ // FIXME(eddyb) the Intel and AMD manuals warn about the need for
531
+ // "serializing instructions" before/after `rdpmc`, if avoiding any
532
+ // reordering is desired, but do not agree on the full set of usable
533
+ // "serializing instructions" (e.g. `mfence` isn't listed in both).
534
+ //
535
+ // The only usable, and guaranteed to work, "serializing instruction"
536
+ // appears to be `cpuid`, but it doesn't seem easy to use, especially
537
+ // due to the overlap in registers with `rdpmc` itself, and it might
538
+ // have too high of a cost, compared to serialization benefits (if any).
539
+ unserialized_rdpmc ( reg_idx)
540
+ } else {
541
+ serialize_instruction_execution ( ) ;
542
+ unserialized_rdpmc ( reg_idx)
567
543
}
568
- lo as u64 | ( hi as u64 ) << 32
569
544
}
570
545
571
546
/// Read two hardware performance counters at once (see `rdpmc`).
@@ -574,44 +549,49 @@ mod hw {
574
549
/// only requires one "serializing instruction", rather than two.
575
550
#[ inline( always) ]
576
551
fn rdpmc_pair ( a_reg_idx : u32 , b_reg_idx : u32 ) -> ( u64 , u64 ) {
577
- let ( a_lo, a_hi) : ( u32 , u32 ) ;
578
- let ( b_lo, b_hi) : ( u32 , u32 ) ;
552
+ serialize_instruction_execution ( ) ;
553
+ ( unserialized_rdpmc ( a_reg_idx) , unserialized_rdpmc ( b_reg_idx) )
554
+ }
555
+
556
+ /// Dummy `cpuid(0)` to serialize instruction execution.
557
+ #[ inline( always) ]
558
+ fn serialize_instruction_execution ( ) {
579
559
unsafe {
580
560
asm ! (
581
- // Dummy `cpuid(0)` to serialize instruction execution.
582
- "xor %eax, %eax" , // Intel syntax: "xor eax, eax"
561
+ "xor eax, eax" ,
562
+ // LLVM sometimes reserves `ebx` for its internal use, so we need to use
563
+ // a scratch register for it instead.
564
+ "mov {tmp_rbx:r}, rbx" ,
583
565
"cpuid" ,
584
-
585
- "mov {a_rdpmc_ecx:e}, %ecx" , // Intel syntax: "mov ecx, {a_rdpmc_ecx:e}"
586
- "rdpmc" ,
587
- "mov %eax, {a_rdpmc_eax:e}" , // Intel syntax: "mov {a_rdpmc_eax:e}, eax"
588
- "mov %edx, {a_rdpmc_edx:e}" , // Intel syntax: "mov {a_rdpmc_edx:e}, edx"
589
- "mov {b_rdpmc_ecx:e}, %ecx" , // Intel syntax: "mov ecx, {b_rdpmc_ecx:e}"
590
- "rdpmc" ,
591
- a_rdpmc_ecx = in( reg) a_reg_idx,
592
- a_rdpmc_eax = out( reg) a_lo,
593
- a_rdpmc_edx = out( reg) a_hi,
594
- b_rdpmc_ecx = in( reg) b_reg_idx,
595
- out( "eax" ) b_lo,
596
- out( "edx" ) b_hi,
597
-
598
- // `cpuid` clobbers (not overwritten by `rdpmc`).
599
- out( "ebx" ) _,
600
- out( "ecx" ) _,
566
+ "mov rbx, {tmp_rbx:r}" ,
567
+ tmp_rbx = lateout( reg) _,
568
+ // `cpuid` clobbers.
569
+ lateout( "eax" ) _,
570
+ lateout( "edx" ) _,
571
+ lateout( "ecx" ) _,
601
572
602
573
options( nostack) ,
574
+ ) ;
575
+ }
576
+ }
603
577
604
- // HACK(eddyb) LLVM 9 and older do not support modifiers
605
- // in Intel syntax inline asm; whenever Rust minimum LLVM
606
- // version becomes LLVM 10, remove and replace above
607
- // instructions with Intel syntax version (from comments).
608
- options( att_syntax) ,
578
+ /// Read the hardware performance counter indicated by `reg_idx`.
579
+ ///
580
+ /// If the counter is signed, sign extension should be performed based on
581
+ /// the width of the register (32 to 64 bits, e.g. 48-bit seems common).
582
+ #[ inline( always) ]
583
+ fn unserialized_rdpmc ( reg_idx : u32 ) -> u64 {
584
+ let ( lo, hi) : ( u32 , u32 ) ;
585
+ unsafe {
586
+ asm ! (
587
+ "rdpmc" ,
588
+ in( "ecx" ) reg_idx,
589
+ lateout( "eax" ) lo,
590
+ lateout( "edx" ) hi,
591
+ options( nostack)
609
592
) ;
610
593
}
611
- (
612
- a_lo as u64 | ( a_hi as u64 ) << 32 ,
613
- b_lo as u64 | ( b_hi as u64 ) << 32 ,
614
- )
594
+ lo as u64 | ( hi as u64 ) << 32
615
595
}
616
596
617
597
/// Categorization of `x86_64` CPUs, primarily based on how they
@@ -815,17 +795,10 @@ mod hw {
815
795
let mut _tmp: u64 = 0 ;
816
796
unsafe {
817
797
asm ! (
818
- // Intel syntax: "lock xadd [{atomic}], {tmp}"
819
- "lock xadd {tmp}, ({atomic})" ,
798
+ "lock xadd qword ptr [{atomic}], {tmp}" ,
820
799
821
800
atomic = in( reg) & mut atomic,
822
801
tmp = inout( reg) _tmp,
823
-
824
- // HACK(eddyb) LLVM 9 and older do not support modifiers
825
- // in Intel syntax inline asm; whenever Rust minimum LLVM
826
- // version becomes LLVM 10, remove and replace above
827
- // instructions with Intel syntax version (from comments).
828
- options( att_syntax) ,
829
802
) ;
830
803
}
831
804
0 commit comments