Skip to content

Commit 7d95912

Browse files
committed
Add umin, smin, umax, smax to isIdempotentRMW
1 parent 169de76 commit 7d95912

File tree

2 files changed

+145
-103
lines changed

2 files changed

+145
-103
lines changed

llvm/lib/CodeGen/AtomicExpandPass.cpp

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1580,10 +1580,16 @@ bool AtomicExpandImpl::isIdempotentRMW(AtomicRMWInst *RMWI) {
15801580
case AtomicRMWInst::Sub:
15811581
case AtomicRMWInst::Or:
15821582
case AtomicRMWInst::Xor:
1583+
case AtomicRMWInst::UMax: // umax(x, 0) == x
15831584
return C->isZero();
15841585
case AtomicRMWInst::And:
15851586
return C->isMinusOne();
1586-
// FIXME: we could also treat Min/Max/UMin/UMax by the INT_MIN/INT_MAX/...
1587+
case AtomicRMWInst::Max: // max(x, INT_MIN) == x
1588+
return C->isMinValue(/*isSigned=*/true);
1589+
case AtomicRMWInst::Min: // min(x, INT_MAX) == x
1590+
return C->isMaxValue(/*isSigned=*/true);
1591+
case AtomicRMWInst::UMin: // umin(x, UINT_MAX) == x
1592+
return C->isMaxValue(/*isSigned=*/false);
15871593
default:
15881594
return false;
15891595
}

llvm/test/CodeGen/X86/atomic-idempotent.ll

Lines changed: 138 additions & 102 deletions
Original file line numberDiff line numberDiff line change
@@ -629,26 +629,32 @@ define void @atomic_umin_uint_max(ptr %addr) {
629629
;
630630
; X64-LABEL: atomic_umin_uint_max:
631631
; X64: # %bb.0:
632+
; X64-NEXT: lock orl $0, -{{[0-9]+}}(%rsp)
632633
; X64-NEXT: movl (%rdi), %eax
633-
; X64-NEXT: .p2align 4
634-
; X64-NEXT: .LBB15_1: # %atomicrmw.start
635-
; X64-NEXT: # =>This Inner Loop Header: Depth=1
636-
; X64-NEXT: lock cmpxchgl %eax, (%rdi)
637-
; X64-NEXT: jne .LBB15_1
638-
; X64-NEXT: # %bb.2: # %atomicrmw.end
639634
; X64-NEXT: retq
640635
;
641-
; X86-LABEL: atomic_umin_uint_max:
642-
; X86: # %bb.0:
643-
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
644-
; X86-NEXT: movl (%ecx), %eax
645-
; X86-NEXT: .p2align 4
646-
; X86-NEXT: .LBB15_1: # %atomicrmw.start
647-
; X86-NEXT: # =>This Inner Loop Header: Depth=1
648-
; X86-NEXT: lock cmpxchgl %eax, (%ecx)
649-
; X86-NEXT: jne .LBB15_1
650-
; X86-NEXT: # %bb.2: # %atomicrmw.end
651-
; X86-NEXT: retl
636+
; X86-SSE2-LABEL: atomic_umin_uint_max:
637+
; X86-SSE2: # %bb.0:
638+
; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
639+
; X86-SSE2-NEXT: mfence
640+
; X86-SSE2-NEXT: movl (%eax), %eax
641+
; X86-SSE2-NEXT: retl
642+
;
643+
; X86-SLM-LABEL: atomic_umin_uint_max:
644+
; X86-SLM: # %bb.0:
645+
; X86-SLM-NEXT: movl {{[0-9]+}}(%esp), %eax
646+
; X86-SLM-NEXT: lock orl $0, (%esp)
647+
; X86-SLM-NEXT: movl (%eax), %eax
648+
; X86-SLM-NEXT: retl
649+
;
650+
; X86-ATOM-LABEL: atomic_umin_uint_max:
651+
; X86-ATOM: # %bb.0:
652+
; X86-ATOM-NEXT: movl {{[0-9]+}}(%esp), %eax
653+
; X86-ATOM-NEXT: lock orl $0, (%esp)
654+
; X86-ATOM-NEXT: movl (%eax), %eax
655+
; X86-ATOM-NEXT: nop
656+
; X86-ATOM-NEXT: nop
657+
; X86-ATOM-NEXT: retl
652658
atomicrmw umin ptr %addr, i32 -1 seq_cst
653659
ret void
654660
}
@@ -660,26 +666,32 @@ define void @atomic_umax_zero(ptr %addr) {
660666
;
661667
; X64-LABEL: atomic_umax_zero:
662668
; X64: # %bb.0:
669+
; X64-NEXT: lock orl $0, -{{[0-9]+}}(%rsp)
663670
; X64-NEXT: movl (%rdi), %eax
664-
; X64-NEXT: .p2align 4
665-
; X64-NEXT: .LBB16_1: # %atomicrmw.start
666-
; X64-NEXT: # =>This Inner Loop Header: Depth=1
667-
; X64-NEXT: lock cmpxchgl %eax, (%rdi)
668-
; X64-NEXT: jne .LBB16_1
669-
; X64-NEXT: # %bb.2: # %atomicrmw.end
670671
; X64-NEXT: retq
671672
;
672-
; X86-LABEL: atomic_umax_zero:
673-
; X86: # %bb.0:
674-
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
675-
; X86-NEXT: movl (%ecx), %eax
676-
; X86-NEXT: .p2align 4
677-
; X86-NEXT: .LBB16_1: # %atomicrmw.start
678-
; X86-NEXT: # =>This Inner Loop Header: Depth=1
679-
; X86-NEXT: lock cmpxchgl %eax, (%ecx)
680-
; X86-NEXT: jne .LBB16_1
681-
; X86-NEXT: # %bb.2: # %atomicrmw.end
682-
; X86-NEXT: retl
673+
; X86-SSE2-LABEL: atomic_umax_zero:
674+
; X86-SSE2: # %bb.0:
675+
; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
676+
; X86-SSE2-NEXT: mfence
677+
; X86-SSE2-NEXT: movl (%eax), %eax
678+
; X86-SSE2-NEXT: retl
679+
;
680+
; X86-SLM-LABEL: atomic_umax_zero:
681+
; X86-SLM: # %bb.0:
682+
; X86-SLM-NEXT: movl {{[0-9]+}}(%esp), %eax
683+
; X86-SLM-NEXT: lock orl $0, (%esp)
684+
; X86-SLM-NEXT: movl (%eax), %eax
685+
; X86-SLM-NEXT: retl
686+
;
687+
; X86-ATOM-LABEL: atomic_umax_zero:
688+
; X86-ATOM: # %bb.0:
689+
; X86-ATOM-NEXT: movl {{[0-9]+}}(%esp), %eax
690+
; X86-ATOM-NEXT: lock orl $0, (%esp)
691+
; X86-ATOM-NEXT: movl (%eax), %eax
692+
; X86-ATOM-NEXT: nop
693+
; X86-ATOM-NEXT: nop
694+
; X86-ATOM-NEXT: retl
683695
atomicrmw umax ptr %addr, i32 0 seq_cst
684696
ret void
685697
}
@@ -691,26 +703,32 @@ define void @atomic_min_smax_char(ptr %addr) {
691703
;
692704
; X64-LABEL: atomic_min_smax_char:
693705
; X64: # %bb.0:
706+
; X64-NEXT: lock orl $0, -{{[0-9]+}}(%rsp)
694707
; X64-NEXT: movzbl (%rdi), %eax
695-
; X64-NEXT: .p2align 4
696-
; X64-NEXT: .LBB17_1: # %atomicrmw.start
697-
; X64-NEXT: # =>This Inner Loop Header: Depth=1
698-
; X64-NEXT: lock cmpxchgb %al, (%rdi)
699-
; X64-NEXT: jne .LBB17_1
700-
; X64-NEXT: # %bb.2: # %atomicrmw.end
701708
; X64-NEXT: retq
702709
;
703-
; X86-LABEL: atomic_min_smax_char:
704-
; X86: # %bb.0:
705-
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
706-
; X86-NEXT: movzbl (%ecx), %eax
707-
; X86-NEXT: .p2align 4
708-
; X86-NEXT: .LBB17_1: # %atomicrmw.start
709-
; X86-NEXT: # =>This Inner Loop Header: Depth=1
710-
; X86-NEXT: lock cmpxchgb %al, (%ecx)
711-
; X86-NEXT: jne .LBB17_1
712-
; X86-NEXT: # %bb.2: # %atomicrmw.end
713-
; X86-NEXT: retl
710+
; X86-SSE2-LABEL: atomic_min_smax_char:
711+
; X86-SSE2: # %bb.0:
712+
; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
713+
; X86-SSE2-NEXT: mfence
714+
; X86-SSE2-NEXT: movzbl (%eax), %eax
715+
; X86-SSE2-NEXT: retl
716+
;
717+
; X86-SLM-LABEL: atomic_min_smax_char:
718+
; X86-SLM: # %bb.0:
719+
; X86-SLM-NEXT: movl {{[0-9]+}}(%esp), %eax
720+
; X86-SLM-NEXT: lock orl $0, (%esp)
721+
; X86-SLM-NEXT: movzbl (%eax), %eax
722+
; X86-SLM-NEXT: retl
723+
;
724+
; X86-ATOM-LABEL: atomic_min_smax_char:
725+
; X86-ATOM: # %bb.0:
726+
; X86-ATOM-NEXT: movl {{[0-9]+}}(%esp), %eax
727+
; X86-ATOM-NEXT: lock orl $0, (%esp)
728+
; X86-ATOM-NEXT: movzbl (%eax), %eax
729+
; X86-ATOM-NEXT: nop
730+
; X86-ATOM-NEXT: nop
731+
; X86-ATOM-NEXT: retl
714732
atomicrmw min ptr %addr, i8 127 seq_cst
715733
ret void
716734
}
@@ -722,26 +740,32 @@ define void @atomic_max_smin_char(ptr %addr) {
722740
;
723741
; X64-LABEL: atomic_max_smin_char:
724742
; X64: # %bb.0:
743+
; X64-NEXT: lock orl $0, -{{[0-9]+}}(%rsp)
725744
; X64-NEXT: movzbl (%rdi), %eax
726-
; X64-NEXT: .p2align 4
727-
; X64-NEXT: .LBB18_1: # %atomicrmw.start
728-
; X64-NEXT: # =>This Inner Loop Header: Depth=1
729-
; X64-NEXT: lock cmpxchgb %al, (%rdi)
730-
; X64-NEXT: jne .LBB18_1
731-
; X64-NEXT: # %bb.2: # %atomicrmw.end
732745
; X64-NEXT: retq
733746
;
734-
; X86-LABEL: atomic_max_smin_char:
735-
; X86: # %bb.0:
736-
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
737-
; X86-NEXT: movzbl (%ecx), %eax
738-
; X86-NEXT: .p2align 4
739-
; X86-NEXT: .LBB18_1: # %atomicrmw.start
740-
; X86-NEXT: # =>This Inner Loop Header: Depth=1
741-
; X86-NEXT: lock cmpxchgb %al, (%ecx)
742-
; X86-NEXT: jne .LBB18_1
743-
; X86-NEXT: # %bb.2: # %atomicrmw.end
744-
; X86-NEXT: retl
747+
; X86-SSE2-LABEL: atomic_max_smin_char:
748+
; X86-SSE2: # %bb.0:
749+
; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
750+
; X86-SSE2-NEXT: mfence
751+
; X86-SSE2-NEXT: movzbl (%eax), %eax
752+
; X86-SSE2-NEXT: retl
753+
;
754+
; X86-SLM-LABEL: atomic_max_smin_char:
755+
; X86-SLM: # %bb.0:
756+
; X86-SLM-NEXT: movl {{[0-9]+}}(%esp), %eax
757+
; X86-SLM-NEXT: lock orl $0, (%esp)
758+
; X86-SLM-NEXT: movzbl (%eax), %eax
759+
; X86-SLM-NEXT: retl
760+
;
761+
; X86-ATOM-LABEL: atomic_max_smin_char:
762+
; X86-ATOM: # %bb.0:
763+
; X86-ATOM-NEXT: movl {{[0-9]+}}(%esp), %eax
764+
; X86-ATOM-NEXT: lock orl $0, (%esp)
765+
; X86-ATOM-NEXT: movzbl (%eax), %eax
766+
; X86-ATOM-NEXT: nop
767+
; X86-ATOM-NEXT: nop
768+
; X86-ATOM-NEXT: retl
745769
atomicrmw max ptr %addr, i8 -128 seq_cst
746770
ret void
747771
}
@@ -753,26 +777,32 @@ define void @atomic_min_umax_char(ptr %addr) {
753777
;
754778
; X64-LABEL: atomic_min_umax_char:
755779
; X64: # %bb.0:
780+
; X64-NEXT: lock orl $0, -{{[0-9]+}}(%rsp)
756781
; X64-NEXT: movzbl (%rdi), %eax
757-
; X64-NEXT: .p2align 4
758-
; X64-NEXT: .LBB19_1: # %atomicrmw.start
759-
; X64-NEXT: # =>This Inner Loop Header: Depth=1
760-
; X64-NEXT: lock cmpxchgb %al, (%rdi)
761-
; X64-NEXT: jne .LBB19_1
762-
; X64-NEXT: # %bb.2: # %atomicrmw.end
763782
; X64-NEXT: retq
764783
;
765-
; X86-LABEL: atomic_min_umax_char:
766-
; X86: # %bb.0:
767-
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
768-
; X86-NEXT: movzbl (%ecx), %eax
769-
; X86-NEXT: .p2align 4
770-
; X86-NEXT: .LBB19_1: # %atomicrmw.start
771-
; X86-NEXT: # =>This Inner Loop Header: Depth=1
772-
; X86-NEXT: lock cmpxchgb %al, (%ecx)
773-
; X86-NEXT: jne .LBB19_1
774-
; X86-NEXT: # %bb.2: # %atomicrmw.end
775-
; X86-NEXT: retl
784+
; X86-SSE2-LABEL: atomic_min_umax_char:
785+
; X86-SSE2: # %bb.0:
786+
; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
787+
; X86-SSE2-NEXT: mfence
788+
; X86-SSE2-NEXT: movzbl (%eax), %eax
789+
; X86-SSE2-NEXT: retl
790+
;
791+
; X86-SLM-LABEL: atomic_min_umax_char:
792+
; X86-SLM: # %bb.0:
793+
; X86-SLM-NEXT: movl {{[0-9]+}}(%esp), %eax
794+
; X86-SLM-NEXT: lock orl $0, (%esp)
795+
; X86-SLM-NEXT: movzbl (%eax), %eax
796+
; X86-SLM-NEXT: retl
797+
;
798+
; X86-ATOM-LABEL: atomic_min_umax_char:
799+
; X86-ATOM: # %bb.0:
800+
; X86-ATOM-NEXT: movl {{[0-9]+}}(%esp), %eax
801+
; X86-ATOM-NEXT: lock orl $0, (%esp)
802+
; X86-ATOM-NEXT: movzbl (%eax), %eax
803+
; X86-ATOM-NEXT: nop
804+
; X86-ATOM-NEXT: nop
805+
; X86-ATOM-NEXT: retl
776806
atomicrmw umin ptr %addr, i8 255 seq_cst
777807
ret void
778808
}
@@ -784,26 +814,32 @@ define void @atomic_max_umin_char(ptr %addr) {
784814
;
785815
; X64-LABEL: atomic_max_umin_char:
786816
; X64: # %bb.0:
817+
; X64-NEXT: lock orl $0, -{{[0-9]+}}(%rsp)
787818
; X64-NEXT: movzbl (%rdi), %eax
788-
; X64-NEXT: .p2align 4
789-
; X64-NEXT: .LBB20_1: # %atomicrmw.start
790-
; X64-NEXT: # =>This Inner Loop Header: Depth=1
791-
; X64-NEXT: lock cmpxchgb %al, (%rdi)
792-
; X64-NEXT: jne .LBB20_1
793-
; X64-NEXT: # %bb.2: # %atomicrmw.end
794819
; X64-NEXT: retq
795820
;
796-
; X86-LABEL: atomic_max_umin_char:
797-
; X86: # %bb.0:
798-
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
799-
; X86-NEXT: movzbl (%ecx), %eax
800-
; X86-NEXT: .p2align 4
801-
; X86-NEXT: .LBB20_1: # %atomicrmw.start
802-
; X86-NEXT: # =>This Inner Loop Header: Depth=1
803-
; X86-NEXT: lock cmpxchgb %al, (%ecx)
804-
; X86-NEXT: jne .LBB20_1
805-
; X86-NEXT: # %bb.2: # %atomicrmw.end
806-
; X86-NEXT: retl
821+
; X86-SSE2-LABEL: atomic_max_umin_char:
822+
; X86-SSE2: # %bb.0:
823+
; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
824+
; X86-SSE2-NEXT: mfence
825+
; X86-SSE2-NEXT: movzbl (%eax), %eax
826+
; X86-SSE2-NEXT: retl
827+
;
828+
; X86-SLM-LABEL: atomic_max_umin_char:
829+
; X86-SLM: # %bb.0:
830+
; X86-SLM-NEXT: movl {{[0-9]+}}(%esp), %eax
831+
; X86-SLM-NEXT: lock orl $0, (%esp)
832+
; X86-SLM-NEXT: movzbl (%eax), %eax
833+
; X86-SLM-NEXT: retl
834+
;
835+
; X86-ATOM-LABEL: atomic_max_umin_char:
836+
; X86-ATOM: # %bb.0:
837+
; X86-ATOM-NEXT: movl {{[0-9]+}}(%esp), %eax
838+
; X86-ATOM-NEXT: lock orl $0, (%esp)
839+
; X86-ATOM-NEXT: movzbl (%eax), %eax
840+
; X86-ATOM-NEXT: nop
841+
; X86-ATOM-NEXT: nop
842+
; X86-ATOM-NEXT: retl
807843
atomicrmw umax ptr %addr, i8 0 seq_cst
808844
ret void
809845
}

0 commit comments

Comments
 (0)