Skip to content

[AtomicExpandPass] Match isIdempotentRMW with InstcombineRMW #142277

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 3 commits into
base: main
Choose a base branch
from

Conversation

AZero13
Copy link
Contributor

@AZero13 AZero13 commented May 31, 2025

Add umin, smin, umax, smax to isIdempotentRMW

@llvmbot
Copy link
Member

llvmbot commented May 31, 2025

@llvm/pr-subscribers-backend-x86

Author: AZero13 (AZero13)

Changes

Full diff: https://github.com/llvm/llvm-project/pull/142277.diff

3 Files Affected:

  • (modified) llvm/lib/CodeGen/AtomicExpandPass.cpp (+7-1)
  • (modified) llvm/test/CodeGen/X86/atomic-idempotent.ll (+222)
  • (modified) llvm/test/Transforms/InstCombine/atomicrmw.ll (+20)
diff --git a/llvm/lib/CodeGen/AtomicExpandPass.cpp b/llvm/lib/CodeGen/AtomicExpandPass.cpp
index c376de877ac7d..2b86bed152d7c 100644
--- a/llvm/lib/CodeGen/AtomicExpandPass.cpp
+++ b/llvm/lib/CodeGen/AtomicExpandPass.cpp
@@ -1580,10 +1580,16 @@ bool AtomicExpandImpl::isIdempotentRMW(AtomicRMWInst *RMWI) {
   case AtomicRMWInst::Sub:
   case AtomicRMWInst::Or:
   case AtomicRMWInst::Xor:
+  case AtomicRMWInst::UMax: // umax(x, 0) == x
     return C->isZero();
   case AtomicRMWInst::And:
     return C->isMinusOne();
-  // FIXME: we could also treat Min/Max/UMin/UMax by the INT_MIN/INT_MAX/...
+  case AtomicRMWInst::Max: // max(x, INT_MIN) == x
+    return C->isMinValue(/*isSigned=*/true);
+  case AtomicRMWInst::Min: // min(x, INT_MAX) == x
+    return C->isMaxValue(/*isSigned=*/true);
+  case AtomicRMWInst::UMin: // umin(x, UINT_MAX) == x
+    return C->isMaxValue(/*isSigned=*/false);
   default:
     return false;
   }
diff --git a/llvm/test/CodeGen/X86/atomic-idempotent.ll b/llvm/test/CodeGen/X86/atomic-idempotent.ll
index 020f9eb793102..db3dedb6f7ac0 100644
--- a/llvm/test/CodeGen/X86/atomic-idempotent.ll
+++ b/llvm/test/CodeGen/X86/atomic-idempotent.ll
@@ -622,4 +622,226 @@ define void @or8_nouse_seq_cst(ptr %p) #0 {
   ret void
 }
 
+define void @atomic_umin_uint_max(ptr %addr) {
+; CHECK-LABEL: @atomic_umin_uint_max(
+; CHECK-NEXT:    [[RES:%.*]] = atomicrmw or ptr [[ADDR:%.*]], i32 0 monotonic, align 4
+; CHECK-NEXT:    ret i32 [[RES]]
+;
+; X64-LABEL: atomic_umin_uint_max:
+; X64:       # %bb.0:
+; X64-NEXT:    lock orl $0, -{{[0-9]+}}(%rsp)
+; X64-NEXT:    movl (%rdi), %eax
+; X64-NEXT:    retq
+;
+; X86-SSE2-LABEL: atomic_umin_uint_max:
+; X86-SSE2:       # %bb.0:
+; X86-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-SSE2-NEXT:    mfence
+; X86-SSE2-NEXT:    movl (%eax), %eax
+; X86-SSE2-NEXT:    retl
+;
+; X86-SLM-LABEL: atomic_umin_uint_max:
+; X86-SLM:       # %bb.0:
+; X86-SLM-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-SLM-NEXT:    lock orl $0, (%esp)
+; X86-SLM-NEXT:    movl (%eax), %eax
+; X86-SLM-NEXT:    retl
+;
+; X86-ATOM-LABEL: atomic_umin_uint_max:
+; X86-ATOM:       # %bb.0:
+; X86-ATOM-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-ATOM-NEXT:    lock orl $0, (%esp)
+; X86-ATOM-NEXT:    movl (%eax), %eax
+; X86-ATOM-NEXT:    nop
+; X86-ATOM-NEXT:    nop
+; X86-ATOM-NEXT:    retl
+  atomicrmw umin ptr %addr, i32 -1 seq_cst
+  ret void
+}
+
+define void @atomic_umax_zero(ptr %addr) {
+; CHECK-LABEL: @atomic_umax_zero(
+; CHECK-NEXT:    [[RES:%.*]] = atomicrmw or ptr [[ADDR:%.*]], i32 0 monotonic, align 4
+; CHECK-NEXT:    ret i32 [[RES]]
+;
+; X64-LABEL: atomic_umax_zero:
+; X64:       # %bb.0:
+; X64-NEXT:    lock orl $0, -{{[0-9]+}}(%rsp)
+; X64-NEXT:    movl (%rdi), %eax
+; X64-NEXT:    retq
+;
+; X86-SSE2-LABEL: atomic_umax_zero:
+; X86-SSE2:       # %bb.0:
+; X86-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-SSE2-NEXT:    mfence
+; X86-SSE2-NEXT:    movl (%eax), %eax
+; X86-SSE2-NEXT:    retl
+;
+; X86-SLM-LABEL: atomic_umax_zero:
+; X86-SLM:       # %bb.0:
+; X86-SLM-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-SLM-NEXT:    lock orl $0, (%esp)
+; X86-SLM-NEXT:    movl (%eax), %eax
+; X86-SLM-NEXT:    retl
+;
+; X86-ATOM-LABEL: atomic_umax_zero:
+; X86-ATOM:       # %bb.0:
+; X86-ATOM-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-ATOM-NEXT:    lock orl $0, (%esp)
+; X86-ATOM-NEXT:    movl (%eax), %eax
+; X86-ATOM-NEXT:    nop
+; X86-ATOM-NEXT:    nop
+; X86-ATOM-NEXT:    retl
+  atomicrmw umax ptr %addr, i32 0 seq_cst
+  ret void
+}
+
+define void @atomic_min_smax_char(ptr %addr) {
+; CHECK-LABEL: @atomic_min_smax_char(
+; CHECK-NEXT:    [[RES:%.*]] = atomicrmw or ptr [[ADDR:%.*]], i8 0 monotonic, align 1
+; CHECK-NEXT:    ret i8 [[RES]]
+;
+; X64-LABEL: atomic_min_smax_char:
+; X64:       # %bb.0:
+; X64-NEXT:    lock orl $0, -{{[0-9]+}}(%rsp)
+; X64-NEXT:    movzbl (%rdi), %eax
+; X64-NEXT:    retq
+;
+; X86-SSE2-LABEL: atomic_min_smax_char:
+; X86-SSE2:       # %bb.0:
+; X86-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-SSE2-NEXT:    mfence
+; X86-SSE2-NEXT:    movzbl (%eax), %eax
+; X86-SSE2-NEXT:    retl
+;
+; X86-SLM-LABEL: atomic_min_smax_char:
+; X86-SLM:       # %bb.0:
+; X86-SLM-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-SLM-NEXT:    lock orl $0, (%esp)
+; X86-SLM-NEXT:    movzbl (%eax), %eax
+; X86-SLM-NEXT:    retl
+;
+; X86-ATOM-LABEL: atomic_min_smax_char:
+; X86-ATOM:       # %bb.0:
+; X86-ATOM-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-ATOM-NEXT:    lock orl $0, (%esp)
+; X86-ATOM-NEXT:    movzbl (%eax), %eax
+; X86-ATOM-NEXT:    nop
+; X86-ATOM-NEXT:    nop
+; X86-ATOM-NEXT:    retl
+  atomicrmw min ptr %addr, i8 127 seq_cst
+  ret void
+}
+
+define void @atomic_max_smin_char(ptr %addr) {
+; CHECK-LABEL: @atomic_max_smin_char(
+; CHECK-NEXT:    [[RES:%.*]] = atomicrmw or ptr [[ADDR:%.*]], i8 0 monotonic, align 1
+; CHECK-NEXT:    ret i8 [[RES]]
+;
+; X64-LABEL: atomic_max_smin_char:
+; X64:       # %bb.0:
+; X64-NEXT:    lock orl $0, -{{[0-9]+}}(%rsp)
+; X64-NEXT:    movzbl (%rdi), %eax
+; X64-NEXT:    retq
+;
+; X86-SSE2-LABEL: atomic_max_smin_char:
+; X86-SSE2:       # %bb.0:
+; X86-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-SSE2-NEXT:    mfence
+; X86-SSE2-NEXT:    movzbl (%eax), %eax
+; X86-SSE2-NEXT:    retl
+;
+; X86-SLM-LABEL: atomic_max_smin_char:
+; X86-SLM:       # %bb.0:
+; X86-SLM-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-SLM-NEXT:    lock orl $0, (%esp)
+; X86-SLM-NEXT:    movzbl (%eax), %eax
+; X86-SLM-NEXT:    retl
+;
+; X86-ATOM-LABEL: atomic_max_smin_char:
+; X86-ATOM:       # %bb.0:
+; X86-ATOM-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-ATOM-NEXT:    lock orl $0, (%esp)
+; X86-ATOM-NEXT:    movzbl (%eax), %eax
+; X86-ATOM-NEXT:    nop
+; X86-ATOM-NEXT:    nop
+; X86-ATOM-NEXT:    retl
+  atomicrmw max ptr %addr, i8 -128 seq_cst
+  ret void
+}
+
+define void @atomic_min_umax_char(ptr %addr) {
+; CHECK-LABEL: @atomic_min_umax_char(
+; CHECK-NEXT:    [[RES:%.*]] = atomicrmw or ptr [[ADDR:%.*]], i8 0 monotonic, align 1
+; CHECK-NEXT:    ret i8 [[RES]]
+;
+; X64-LABEL: atomic_min_umax_char:
+; X64:       # %bb.0:
+; X64-NEXT:    lock orl $0, -{{[0-9]+}}(%rsp)
+; X64-NEXT:    movzbl (%rdi), %eax
+; X64-NEXT:    retq
+;
+; X86-SSE2-LABEL: atomic_min_umax_char:
+; X86-SSE2:       # %bb.0:
+; X86-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-SSE2-NEXT:    mfence
+; X86-SSE2-NEXT:    movzbl (%eax), %eax
+; X86-SSE2-NEXT:    retl
+;
+; X86-SLM-LABEL: atomic_min_umax_char:
+; X86-SLM:       # %bb.0:
+; X86-SLM-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-SLM-NEXT:    lock orl $0, (%esp)
+; X86-SLM-NEXT:    movzbl (%eax), %eax
+; X86-SLM-NEXT:    retl
+;
+; X86-ATOM-LABEL: atomic_min_umax_char:
+; X86-ATOM:       # %bb.0:
+; X86-ATOM-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-ATOM-NEXT:    lock orl $0, (%esp)
+; X86-ATOM-NEXT:    movzbl (%eax), %eax
+; X86-ATOM-NEXT:    nop
+; X86-ATOM-NEXT:    nop
+; X86-ATOM-NEXT:    retl
+  atomicrmw umin ptr %addr, i8 255 seq_cst
+  ret void
+}
+
+define void @atomic_max_umin_char(ptr %addr) {
+; CHECK-LABEL: @atomic_max_umin_char(
+; CHECK-NEXT:    [[RES:%.*]] = atomicrmw or ptr [[ADDR:%.*]], i8 0 monotonic, align 1
+; CHECK-NEXT:    ret i8 [[RES]]
+;
+; X64-LABEL: atomic_max_umin_char:
+; X64:       # %bb.0:
+; X64-NEXT:    lock orl $0, -{{[0-9]+}}(%rsp)
+; X64-NEXT:    movzbl (%rdi), %eax
+; X64-NEXT:    retq
+;
+; X86-SSE2-LABEL: atomic_max_umin_char:
+; X86-SSE2:       # %bb.0:
+; X86-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-SSE2-NEXT:    mfence
+; X86-SSE2-NEXT:    movzbl (%eax), %eax
+; X86-SSE2-NEXT:    retl
+;
+; X86-SLM-LABEL: atomic_max_umin_char:
+; X86-SLM:       # %bb.0:
+; X86-SLM-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-SLM-NEXT:    lock orl $0, (%esp)
+; X86-SLM-NEXT:    movzbl (%eax), %eax
+; X86-SLM-NEXT:    retl
+;
+; X86-ATOM-LABEL: atomic_max_umin_char:
+; X86-ATOM:       # %bb.0:
+; X86-ATOM-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-ATOM-NEXT:    lock orl $0, (%esp)
+; X86-ATOM-NEXT:    movzbl (%eax), %eax
+; X86-ATOM-NEXT:    nop
+; X86-ATOM-NEXT:    nop
+; X86-ATOM-NEXT:    retl
+  atomicrmw umax ptr %addr, i8 0 seq_cst
+  ret void
+}
+
 attributes #0 = { nounwind }
diff --git a/llvm/test/Transforms/InstCombine/atomicrmw.ll b/llvm/test/Transforms/InstCombine/atomicrmw.ll
index ca5ffd110ad61..b6c0e1e810f96 100644
--- a/llvm/test/Transforms/InstCombine/atomicrmw.ll
+++ b/llvm/test/Transforms/InstCombine/atomicrmw.ll
@@ -85,6 +85,26 @@ define i8 @atomic_max_smin_char(ptr %addr) {
   ret i8 %res
 }
 
+; Idempotent atomicrmw are still canonicalized.
+define i8 @atomic_min_umax_char(ptr %addr) {
+; CHECK-LABEL: @atomic_min_umax_char(
+; CHECK-NEXT:    [[RES:%.*]] = atomicrmw or ptr [[ADDR:%.*]], i8 0 monotonic, align 1
+; CHECK-NEXT:    ret i8 [[RES]]
+;
+  %res = atomicrmw umin ptr %addr, i8 255 monotonic
+  ret i8 %res
+}
+
+; Idempotent atomicrmw are still canonicalized.
+define i8 @atomic_max_umin_char(ptr %addr) {
+; CHECK-LABEL: @atomic_max_umin_char(
+; CHECK-NEXT:    [[RES:%.*]] = atomicrmw or ptr [[ADDR:%.*]], i8 0 monotonic, align 1
+; CHECK-NEXT:    ret i8 [[RES]]
+;
+  %res = atomicrmw umax ptr %addr, i8 0 monotonic
+  ret i8 %res
+}
+
 ; Idempotent atomicrmw are still canonicalized.
 define float @atomic_fsub_zero(ptr %addr) {
 ; CHECK-LABEL: @atomic_fsub_zero(

@llvmbot
Copy link
Member

llvmbot commented May 31, 2025

@llvm/pr-subscribers-llvm-transforms

Author: AZero13 (AZero13)

Changes

Full diff: https://github.com/llvm/llvm-project/pull/142277.diff

3 Files Affected:

  • (modified) llvm/lib/CodeGen/AtomicExpandPass.cpp (+7-1)
  • (modified) llvm/test/CodeGen/X86/atomic-idempotent.ll (+222)
  • (modified) llvm/test/Transforms/InstCombine/atomicrmw.ll (+20)
diff --git a/llvm/lib/CodeGen/AtomicExpandPass.cpp b/llvm/lib/CodeGen/AtomicExpandPass.cpp
index c376de877ac7d..2b86bed152d7c 100644
--- a/llvm/lib/CodeGen/AtomicExpandPass.cpp
+++ b/llvm/lib/CodeGen/AtomicExpandPass.cpp
@@ -1580,10 +1580,16 @@ bool AtomicExpandImpl::isIdempotentRMW(AtomicRMWInst *RMWI) {
   case AtomicRMWInst::Sub:
   case AtomicRMWInst::Or:
   case AtomicRMWInst::Xor:
+  case AtomicRMWInst::UMax: // umax(x, 0) == x
     return C->isZero();
   case AtomicRMWInst::And:
     return C->isMinusOne();
-  // FIXME: we could also treat Min/Max/UMin/UMax by the INT_MIN/INT_MAX/...
+  case AtomicRMWInst::Max: // max(x, INT_MIN) == x
+    return C->isMinValue(/*isSigned=*/true);
+  case AtomicRMWInst::Min: // min(x, INT_MAX) == x
+    return C->isMaxValue(/*isSigned=*/true);
+  case AtomicRMWInst::UMin: // umin(x, UINT_MAX) == x
+    return C->isMaxValue(/*isSigned=*/false);
   default:
     return false;
   }
diff --git a/llvm/test/CodeGen/X86/atomic-idempotent.ll b/llvm/test/CodeGen/X86/atomic-idempotent.ll
index 020f9eb793102..db3dedb6f7ac0 100644
--- a/llvm/test/CodeGen/X86/atomic-idempotent.ll
+++ b/llvm/test/CodeGen/X86/atomic-idempotent.ll
@@ -622,4 +622,226 @@ define void @or8_nouse_seq_cst(ptr %p) #0 {
   ret void
 }
 
+define void @atomic_umin_uint_max(ptr %addr) {
+; CHECK-LABEL: @atomic_umin_uint_max(
+; CHECK-NEXT:    [[RES:%.*]] = atomicrmw or ptr [[ADDR:%.*]], i32 0 monotonic, align 4
+; CHECK-NEXT:    ret i32 [[RES]]
+;
+; X64-LABEL: atomic_umin_uint_max:
+; X64:       # %bb.0:
+; X64-NEXT:    lock orl $0, -{{[0-9]+}}(%rsp)
+; X64-NEXT:    movl (%rdi), %eax
+; X64-NEXT:    retq
+;
+; X86-SSE2-LABEL: atomic_umin_uint_max:
+; X86-SSE2:       # %bb.0:
+; X86-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-SSE2-NEXT:    mfence
+; X86-SSE2-NEXT:    movl (%eax), %eax
+; X86-SSE2-NEXT:    retl
+;
+; X86-SLM-LABEL: atomic_umin_uint_max:
+; X86-SLM:       # %bb.0:
+; X86-SLM-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-SLM-NEXT:    lock orl $0, (%esp)
+; X86-SLM-NEXT:    movl (%eax), %eax
+; X86-SLM-NEXT:    retl
+;
+; X86-ATOM-LABEL: atomic_umin_uint_max:
+; X86-ATOM:       # %bb.0:
+; X86-ATOM-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-ATOM-NEXT:    lock orl $0, (%esp)
+; X86-ATOM-NEXT:    movl (%eax), %eax
+; X86-ATOM-NEXT:    nop
+; X86-ATOM-NEXT:    nop
+; X86-ATOM-NEXT:    retl
+  atomicrmw umin ptr %addr, i32 -1 seq_cst
+  ret void
+}
+
+define void @atomic_umax_zero(ptr %addr) {
+; CHECK-LABEL: @atomic_umax_zero(
+; CHECK-NEXT:    [[RES:%.*]] = atomicrmw or ptr [[ADDR:%.*]], i32 0 monotonic, align 4
+; CHECK-NEXT:    ret i32 [[RES]]
+;
+; X64-LABEL: atomic_umax_zero:
+; X64:       # %bb.0:
+; X64-NEXT:    lock orl $0, -{{[0-9]+}}(%rsp)
+; X64-NEXT:    movl (%rdi), %eax
+; X64-NEXT:    retq
+;
+; X86-SSE2-LABEL: atomic_umax_zero:
+; X86-SSE2:       # %bb.0:
+; X86-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-SSE2-NEXT:    mfence
+; X86-SSE2-NEXT:    movl (%eax), %eax
+; X86-SSE2-NEXT:    retl
+;
+; X86-SLM-LABEL: atomic_umax_zero:
+; X86-SLM:       # %bb.0:
+; X86-SLM-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-SLM-NEXT:    lock orl $0, (%esp)
+; X86-SLM-NEXT:    movl (%eax), %eax
+; X86-SLM-NEXT:    retl
+;
+; X86-ATOM-LABEL: atomic_umax_zero:
+; X86-ATOM:       # %bb.0:
+; X86-ATOM-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-ATOM-NEXT:    lock orl $0, (%esp)
+; X86-ATOM-NEXT:    movl (%eax), %eax
+; X86-ATOM-NEXT:    nop
+; X86-ATOM-NEXT:    nop
+; X86-ATOM-NEXT:    retl
+  atomicrmw umax ptr %addr, i32 0 seq_cst
+  ret void
+}
+
+define void @atomic_min_smax_char(ptr %addr) {
+; CHECK-LABEL: @atomic_min_smax_char(
+; CHECK-NEXT:    [[RES:%.*]] = atomicrmw or ptr [[ADDR:%.*]], i8 0 monotonic, align 1
+; CHECK-NEXT:    ret i8 [[RES]]
+;
+; X64-LABEL: atomic_min_smax_char:
+; X64:       # %bb.0:
+; X64-NEXT:    lock orl $0, -{{[0-9]+}}(%rsp)
+; X64-NEXT:    movzbl (%rdi), %eax
+; X64-NEXT:    retq
+;
+; X86-SSE2-LABEL: atomic_min_smax_char:
+; X86-SSE2:       # %bb.0:
+; X86-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-SSE2-NEXT:    mfence
+; X86-SSE2-NEXT:    movzbl (%eax), %eax
+; X86-SSE2-NEXT:    retl
+;
+; X86-SLM-LABEL: atomic_min_smax_char:
+; X86-SLM:       # %bb.0:
+; X86-SLM-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-SLM-NEXT:    lock orl $0, (%esp)
+; X86-SLM-NEXT:    movzbl (%eax), %eax
+; X86-SLM-NEXT:    retl
+;
+; X86-ATOM-LABEL: atomic_min_smax_char:
+; X86-ATOM:       # %bb.0:
+; X86-ATOM-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-ATOM-NEXT:    lock orl $0, (%esp)
+; X86-ATOM-NEXT:    movzbl (%eax), %eax
+; X86-ATOM-NEXT:    nop
+; X86-ATOM-NEXT:    nop
+; X86-ATOM-NEXT:    retl
+  atomicrmw min ptr %addr, i8 127 seq_cst
+  ret void
+}
+
+define void @atomic_max_smin_char(ptr %addr) {
+; CHECK-LABEL: @atomic_max_smin_char(
+; CHECK-NEXT:    [[RES:%.*]] = atomicrmw or ptr [[ADDR:%.*]], i8 0 monotonic, align 1
+; CHECK-NEXT:    ret i8 [[RES]]
+;
+; X64-LABEL: atomic_max_smin_char:
+; X64:       # %bb.0:
+; X64-NEXT:    lock orl $0, -{{[0-9]+}}(%rsp)
+; X64-NEXT:    movzbl (%rdi), %eax
+; X64-NEXT:    retq
+;
+; X86-SSE2-LABEL: atomic_max_smin_char:
+; X86-SSE2:       # %bb.0:
+; X86-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-SSE2-NEXT:    mfence
+; X86-SSE2-NEXT:    movzbl (%eax), %eax
+; X86-SSE2-NEXT:    retl
+;
+; X86-SLM-LABEL: atomic_max_smin_char:
+; X86-SLM:       # %bb.0:
+; X86-SLM-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-SLM-NEXT:    lock orl $0, (%esp)
+; X86-SLM-NEXT:    movzbl (%eax), %eax
+; X86-SLM-NEXT:    retl
+;
+; X86-ATOM-LABEL: atomic_max_smin_char:
+; X86-ATOM:       # %bb.0:
+; X86-ATOM-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-ATOM-NEXT:    lock orl $0, (%esp)
+; X86-ATOM-NEXT:    movzbl (%eax), %eax
+; X86-ATOM-NEXT:    nop
+; X86-ATOM-NEXT:    nop
+; X86-ATOM-NEXT:    retl
+  atomicrmw max ptr %addr, i8 -128 seq_cst
+  ret void
+}
+
+define void @atomic_min_umax_char(ptr %addr) {
+; CHECK-LABEL: @atomic_min_umax_char(
+; CHECK-NEXT:    [[RES:%.*]] = atomicrmw or ptr [[ADDR:%.*]], i8 0 monotonic, align 1
+; CHECK-NEXT:    ret i8 [[RES]]
+;
+; X64-LABEL: atomic_min_umax_char:
+; X64:       # %bb.0:
+; X64-NEXT:    lock orl $0, -{{[0-9]+}}(%rsp)
+; X64-NEXT:    movzbl (%rdi), %eax
+; X64-NEXT:    retq
+;
+; X86-SSE2-LABEL: atomic_min_umax_char:
+; X86-SSE2:       # %bb.0:
+; X86-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-SSE2-NEXT:    mfence
+; X86-SSE2-NEXT:    movzbl (%eax), %eax
+; X86-SSE2-NEXT:    retl
+;
+; X86-SLM-LABEL: atomic_min_umax_char:
+; X86-SLM:       # %bb.0:
+; X86-SLM-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-SLM-NEXT:    lock orl $0, (%esp)
+; X86-SLM-NEXT:    movzbl (%eax), %eax
+; X86-SLM-NEXT:    retl
+;
+; X86-ATOM-LABEL: atomic_min_umax_char:
+; X86-ATOM:       # %bb.0:
+; X86-ATOM-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-ATOM-NEXT:    lock orl $0, (%esp)
+; X86-ATOM-NEXT:    movzbl (%eax), %eax
+; X86-ATOM-NEXT:    nop
+; X86-ATOM-NEXT:    nop
+; X86-ATOM-NEXT:    retl
+  atomicrmw umin ptr %addr, i8 255 seq_cst
+  ret void
+}
+
+define void @atomic_max_umin_char(ptr %addr) {
+; CHECK-LABEL: @atomic_max_umin_char(
+; CHECK-NEXT:    [[RES:%.*]] = atomicrmw or ptr [[ADDR:%.*]], i8 0 monotonic, align 1
+; CHECK-NEXT:    ret i8 [[RES]]
+;
+; X64-LABEL: atomic_max_umin_char:
+; X64:       # %bb.0:
+; X64-NEXT:    lock orl $0, -{{[0-9]+}}(%rsp)
+; X64-NEXT:    movzbl (%rdi), %eax
+; X64-NEXT:    retq
+;
+; X86-SSE2-LABEL: atomic_max_umin_char:
+; X86-SSE2:       # %bb.0:
+; X86-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-SSE2-NEXT:    mfence
+; X86-SSE2-NEXT:    movzbl (%eax), %eax
+; X86-SSE2-NEXT:    retl
+;
+; X86-SLM-LABEL: atomic_max_umin_char:
+; X86-SLM:       # %bb.0:
+; X86-SLM-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-SLM-NEXT:    lock orl $0, (%esp)
+; X86-SLM-NEXT:    movzbl (%eax), %eax
+; X86-SLM-NEXT:    retl
+;
+; X86-ATOM-LABEL: atomic_max_umin_char:
+; X86-ATOM:       # %bb.0:
+; X86-ATOM-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-ATOM-NEXT:    lock orl $0, (%esp)
+; X86-ATOM-NEXT:    movzbl (%eax), %eax
+; X86-ATOM-NEXT:    nop
+; X86-ATOM-NEXT:    nop
+; X86-ATOM-NEXT:    retl
+  atomicrmw umax ptr %addr, i8 0 seq_cst
+  ret void
+}
+
 attributes #0 = { nounwind }
diff --git a/llvm/test/Transforms/InstCombine/atomicrmw.ll b/llvm/test/Transforms/InstCombine/atomicrmw.ll
index ca5ffd110ad61..b6c0e1e810f96 100644
--- a/llvm/test/Transforms/InstCombine/atomicrmw.ll
+++ b/llvm/test/Transforms/InstCombine/atomicrmw.ll
@@ -85,6 +85,26 @@ define i8 @atomic_max_smin_char(ptr %addr) {
   ret i8 %res
 }
 
+; Idempotent atomicrmw are still canonicalized.
+define i8 @atomic_min_umax_char(ptr %addr) {
+; CHECK-LABEL: @atomic_min_umax_char(
+; CHECK-NEXT:    [[RES:%.*]] = atomicrmw or ptr [[ADDR:%.*]], i8 0 monotonic, align 1
+; CHECK-NEXT:    ret i8 [[RES]]
+;
+  %res = atomicrmw umin ptr %addr, i8 255 monotonic
+  ret i8 %res
+}
+
+; Idempotent atomicrmw are still canonicalized.
+define i8 @atomic_max_umin_char(ptr %addr) {
+; CHECK-LABEL: @atomic_max_umin_char(
+; CHECK-NEXT:    [[RES:%.*]] = atomicrmw or ptr [[ADDR:%.*]], i8 0 monotonic, align 1
+; CHECK-NEXT:    ret i8 [[RES]]
+;
+  %res = atomicrmw umax ptr %addr, i8 0 monotonic
+  ret i8 %res
+}
+
 ; Idempotent atomicrmw are still canonicalized.
 define float @atomic_fsub_zero(ptr %addr) {
 ; CHECK-LABEL: @atomic_fsub_zero(

@AZero13 AZero13 changed the title [AtomicExpandPass] Add umin, smin, umax, smax to isIdempotentRMW [AtomicExpandPass] Match isIdempotentRMW with InstcombineRMW May 31, 2025
@AZero13
Copy link
Contributor Author

AZero13 commented Jun 3, 2025

@topperc Thoughts on this?

@AZero13 AZero13 requested a review from topperc June 3, 2025 23:19
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Projects
None yet
Development

Successfully merging this pull request may close these issues.

3 participants