diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp index 9864adc4dcc95..072705bd31b1a 100644 --- a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp +++ b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp @@ -7132,10 +7132,8 @@ bool X86TTIImpl::isProfitableToSinkOperands(Instruction *I, using namespace llvm::PatternMatch; FixedVectorType *VTy = dyn_cast(I->getType()); - if (!VTy) - return false; - if (I->getOpcode() == Instruction::Mul && + if (VTy && I->getOpcode() == Instruction::Mul && VTy->getElementType()->isIntegerTy(64)) { for (auto &Op : I->operands()) { // Make sure we are not already sinking this operand @@ -7159,9 +7157,6 @@ bool X86TTIImpl::isProfitableToSinkOperands(Instruction *I, return !Ops.empty(); } - // A uniform shift amount in a vector shift or funnel shift may be much - // cheaper than a generic variable vector shift, so make that pattern visible - // to SDAG by sinking the shuffle instruction next to the shift. int ShiftAmountOpNum = -1; if (I->isShift()) ShiftAmountOpNum = 1; @@ -7170,16 +7165,31 @@ bool X86TTIImpl::isProfitableToSinkOperands(Instruction *I, II->getIntrinsicID() == Intrinsic::fshr) ShiftAmountOpNum = 2; } - if (ShiftAmountOpNum == -1) return false; + auto *ShiftAmount = &I->getOperandUse(ShiftAmountOpNum); - auto *Shuf = dyn_cast(I->getOperand(ShiftAmountOpNum)); + // A uniform shift amount in a vector shift or funnel shift may be much + // cheaper than a generic variable vector shift, so make that pattern visible + // to SDAG by sinking the shuffle instruction next to the shift. + auto *Shuf = dyn_cast(ShiftAmount); if (Shuf && getSplatIndex(Shuf->getShuffleMask()) >= 0 && isVectorShiftByScalarCheap(I->getType())) { - Ops.push_back(&I->getOperandUse(ShiftAmountOpNum)); + Ops.push_back(ShiftAmount); return true; } + // Casts taking a constant expression (generally derived from a global + // variable address) as an operand are profitable to sink because they appear + // as subexpressions in the instruction sequence generated by the + // LowerTypeTests pass which is expected to pattern match to the rotate + // instruction's immediate operand. + if (auto *CI = dyn_cast(ShiftAmount)) { + if (isa(CI->getOperand(0))) { + Ops.push_back(ShiftAmount); + return true; + } + } + return false; } diff --git a/llvm/test/Transforms/CodeGenPrepare/X86/inhibit-zext-constant-hoist.ll b/llvm/test/Transforms/CodeGenPrepare/X86/inhibit-zext-constant-hoist.ll new file mode 100644 index 0000000000000..301f22483cdd1 --- /dev/null +++ b/llvm/test/Transforms/CodeGenPrepare/X86/inhibit-zext-constant-hoist.ll @@ -0,0 +1,178 @@ +; Make sure that if optimizations hoist or CSE zext(const) it gets undone by CodeGenPrepare. + +; This IR is normally generated by LowerTypeTests during ThinLTO importing +; so it will go through the ThinLTO pass pipeline. +; RUN: opt -passes='thinlto' -S < %s | opt -codegenprepare -S | FileCheck %s +; RUN: opt -passes='thinlto' -S < %s | opt -codegenprepare -S | FileCheck %s +; RUN: opt -passes='thinlto' -S < %s | opt -codegenprepare -S | FileCheck %s +; RUN: opt -passes='thinlto' -S < %s | opt -codegenprepare -S | FileCheck %s + +; Also check the regular pipelines for completeness. +; RUN: opt -O0 -S < %s | opt -codegenprepare -S | FileCheck %s +; RUN: opt -O1 -S < %s | opt -codegenprepare -S | FileCheck %s +; RUN: opt -O2 -S < %s | opt -codegenprepare -S | FileCheck %s +; RUN: opt -O3 -S < %s | opt -codegenprepare -S | FileCheck %s +; RUN: opt -Os -S < %s | opt -codegenprepare -S | FileCheck %s +; RUN: opt -Oz -S < %s | opt -codegenprepare -S | FileCheck %s + +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +@__typeid__ZTS1S_global_addr = external hidden global [0 x i8], code_model "small" +@__typeid__ZTS1S_align = external hidden global [0 x i8], !absolute_symbol !0 +@__typeid__ZTS1S_size_m1 = external hidden global [0 x i8], !absolute_symbol !1 + +; Check that we still have two pairs of zexts (non dominating case). + +; CHECK: define void @f1 +define void @f1(i1 noundef zeroext %0, ptr noundef %1, ptr noundef %2) { + br i1 %0, label %4, label %17 + +4: + %5 = load ptr, ptr %1, align 8 + %6 = ptrtoint ptr %5 to i64 + %7 = sub i64 %6, ptrtoint (ptr @__typeid__ZTS1S_global_addr to i64) + ; CHECK: zext {{.*}} @__typeid__ZTS1S_align + %8 = zext i8 ptrtoint (ptr @__typeid__ZTS1S_align to i8) to i64 + %9 = lshr i64 %7, %8 + ; CHECK: zext {{.*}} @__typeid__ZTS1S_align + %10 = zext i8 sub (i8 64, i8 ptrtoint (ptr @__typeid__ZTS1S_align to i8)) to i64 + %11 = shl i64 %7, %10 + %12 = or i64 %9, %11 + %13 = icmp ule i64 %12, ptrtoint (ptr @__typeid__ZTS1S_size_m1 to i64) + br i1 %13, label %15, label %14 + +14: + call void @llvm.ubsantrap(i8 2) + unreachable + +15: + %16 = load ptr, ptr %5, align 8 + call void %16(ptr noundef nonnull align 8 dereferenceable(8) %1) + br label %30 + +17: + %18 = load ptr, ptr %2, align 8 + %19 = ptrtoint ptr %18 to i64 + %20 = sub i64 %19, ptrtoint (ptr @__typeid__ZTS1S_global_addr to i64) + ; CHECK: zext {{.*}} @__typeid__ZTS1S_align + %21 = zext i8 ptrtoint (ptr @__typeid__ZTS1S_align to i8) to i64 + %22 = lshr i64 %20, %21 + ; CHECK: zext {{.*}} @__typeid__ZTS1S_align + %23 = zext i8 sub (i8 64, i8 ptrtoint (ptr @__typeid__ZTS1S_align to i8)) to i64 + %24 = shl i64 %20, %23 + %25 = or i64 %22, %24 + %26 = icmp ule i64 %25, ptrtoint (ptr @__typeid__ZTS1S_size_m1 to i64) + br i1 %26, label %28, label %27 + +27: + call void @llvm.ubsantrap(i8 2) #6 + unreachable + +28: + %29 = load ptr, ptr %18, align 8 + call void %29(ptr noundef nonnull align 8 dereferenceable(8) %2) + br label %30 + +30: + ret void +} + +; Check that we still have two pairs of zexts (dominating case). + +; CHECK: define void @f2 +define void @f2(i1 noundef zeroext %0, ptr noundef %1, ptr noundef %2) { + %4 = load ptr, ptr %1, align 8 + %5 = ptrtoint ptr %4 to i64 + %6 = sub i64 %5, ptrtoint (ptr @__typeid__ZTS1S_global_addr to i64) + ; CHECK: zext {{.*}} @__typeid__ZTS1S_align + %7 = zext i8 ptrtoint (ptr @__typeid__ZTS1S_align to i8) to i64 + %8 = lshr i64 %6, %7 + ; CHECK: zext {{.*}} @__typeid__ZTS1S_align + %9 = zext i8 sub (i8 64, i8 ptrtoint (ptr @__typeid__ZTS1S_align to i8)) to i64 + %10 = shl i64 %6, %9 + %11 = or i64 %8, %10 + %12 = icmp ule i64 %11, ptrtoint (ptr @__typeid__ZTS1S_size_m1 to i64) + br i1 %12, label %14, label %13 + +13: ; preds = %3 + call void @llvm.ubsantrap(i8 2) + unreachable + +14: ; preds = %3 + %15 = load ptr, ptr %4, align 8 + call void %15(ptr noundef nonnull align 8 dereferenceable(8) %1) + br i1 %0, label %16, label %29 + +16: ; preds = %14 + %17 = load ptr, ptr %2, align 8 + %18 = ptrtoint ptr %17 to i64 + %19 = sub i64 %18, ptrtoint (ptr @__typeid__ZTS1S_global_addr to i64) + ; CHECK: zext {{.*}} @__typeid__ZTS1S_align + %20 = zext i8 ptrtoint (ptr @__typeid__ZTS1S_align to i8) to i64 + %21 = lshr i64 %19, %20 + ; CHECK: zext {{.*}} @__typeid__ZTS1S_align + %22 = zext i8 sub (i8 64, i8 ptrtoint (ptr @__typeid__ZTS1S_align to i8)) to i64 + %23 = shl i64 %19, %22 + %24 = or i64 %21, %23 + %25 = icmp ule i64 %24, ptrtoint (ptr @__typeid__ZTS1S_size_m1 to i64) + br i1 %25, label %27, label %26 + +26: ; preds = %16 + call void @llvm.ubsantrap(i8 2) #6 + unreachable + +27: ; preds = %16 + %28 = load ptr, ptr %17, align 8 + call void %28(ptr noundef nonnull align 8 dereferenceable(8) %2) + br label %29 + +29: ; preds = %27, %14 + ret void +} + +; Check that the zexts aren't moved to the preheader (or anywhere else) +; and stay in the same basic block. + +; CHECK: define void @f3 +define void @f3(ptr noundef readonly captures(address) %0, ptr noundef readnone captures(address) %1) { + %3 = icmp eq ptr %0, %1 + br i1 %3, label %21, label %4 + +4: + ; CHECK: = phi + %5 = phi ptr [ %19, %17 ], [ %0, %2 ] + %6 = load ptr, ptr %5, align 8 + %7 = load ptr, ptr %6, align 8 + %8 = ptrtoint ptr %7 to i64 + %9 = sub i64 %8, ptrtoint (ptr @__typeid__ZTS1S_global_addr to i64) + ; CHECK: zext {{.*}} @__typeid__ZTS1S_align + %10 = zext i8 ptrtoint (ptr @__typeid__ZTS1S_align to i8) to i64 + %11 = lshr i64 %9, %10 + ; CHECK: zext {{.*}} @__typeid__ZTS1S_align + %12 = zext i8 sub (i8 64, i8 ptrtoint (ptr @__typeid__ZTS1S_align to i8)) to i64 + %13 = shl i64 %9, %12 + %14 = or i64 %11, %13 + %15 = icmp ule i64 %14, ptrtoint (ptr @__typeid__ZTS1S_size_m1 to i64) + br i1 %15, label %17, label %16 + +16: + call void @llvm.ubsantrap(i8 2) + unreachable + +17: + %18 = load ptr, ptr %7, align 8 + call void %18(ptr noundef nonnull align 8 dereferenceable(8) %6) + %19 = getelementptr inbounds nuw i8, ptr %5, i64 8 + %20 = icmp eq ptr %19, %1 + br i1 %20, label %21, label %4 + +21: + ret void +} + +declare i1 @llvm.type.test(ptr, metadata) +declare void @llvm.ubsantrap(i8 immarg) + +!0 = !{i64 0, i64 256} +!1 = !{i64 0, i64 128}