diff --git a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp index 7980ddb26491b..e7f8964cf5758 100644 --- a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp +++ b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp @@ -105,7 +105,7 @@ class AArch64InstructionSelector : public InstructionSelector { bool earlySelectSHL(MachineInstr &I, MachineRegisterInfo &MRI); /// Eliminate same-sized cross-bank copies into stores before selectImpl(). - bool contractCrossBankCopyIntoStore(MachineInstr &I, + bool contractCrossBankCopyIntoStore(GStore &I, MachineRegisterInfo &MRI); bool convertPtrAddToAdd(MachineInstr &I, MachineRegisterInfo &MRI); @@ -1939,8 +1939,9 @@ bool AArch64InstructionSelector::preISelLower(MachineInstr &I) { return true; } case TargetOpcode::G_STORE: { - bool Changed = contractCrossBankCopyIntoStore(I, MRI); - MachineOperand &SrcOp = I.getOperand(0); + auto &StoreMI = cast(I); + bool Changed = contractCrossBankCopyIntoStore(StoreMI, MRI); + MachineOperand &SrcOp = StoreMI.getOperand(0); if (MRI.getType(SrcOp.getReg()).isPointer()) { // Allow matching with imported patterns for stores of pointers. Unlike // G_LOAD/G_PTR_ADD, we may not have selected all users. So, emit a copy @@ -1951,6 +1952,28 @@ bool AArch64InstructionSelector::preISelLower(MachineInstr &I) { RBI.constrainGenericRegister(NewSrc, AArch64::GPR64RegClass, MRI); Changed = true; } +#if 0 + // Now look for truncating stores to the FPR bank. We don't support these, + // but since truncating store formation happens before RBS, we can only + // split them up again here. We don't want to assign truncstores to GPR only + // since that would have a perf impact due to extra moves. + LLT SrcTy = MRI.getType(SrcReg); + if (RBI.getRegBank(SrcReg, MRI, TRI)->getID() == AArch64::FPRRegBankID) { + if (SrcTy.isScalar() && + SrcTy.getSizeInBits() > StoreMI.getMemSizeInBits()) { + // Generate an explicit truncate and make this into a non-truncating + // store. + auto Trunc = + MIB.buildTrunc(LLT::scalar(StoreMI.getMemSizeInBits()), SrcReg); + MRI.setRegBank(Trunc.getReg(0), RBI.getRegBank(AArch64::FPRRegBankID)); + if (!select(*Trunc)) { + return false; + } + SrcOp.setReg(Trunc.getReg(0)); + return true; + } + } +#endif return Changed; } case TargetOpcode::G_PTR_ADD: @@ -2086,8 +2109,7 @@ bool AArch64InstructionSelector::earlySelectSHL(MachineInstr &I, } bool AArch64InstructionSelector::contractCrossBankCopyIntoStore( - MachineInstr &I, MachineRegisterInfo &MRI) { - assert(I.getOpcode() == TargetOpcode::G_STORE && "Expected G_STORE"); + GStore &StoreMI, MachineRegisterInfo &MRI) { // If we're storing a scalar, it doesn't matter what register bank that // scalar is on. All that matters is the size. // @@ -2102,11 +2124,11 @@ bool AArch64InstructionSelector::contractCrossBankCopyIntoStore( // G_STORE %x:gpr(s32) // // And then continue the selection process normally. - Register DefDstReg = getSrcRegIgnoringCopies(I.getOperand(0).getReg(), MRI); + Register DefDstReg = getSrcRegIgnoringCopies(StoreMI.getValueReg(), MRI); if (!DefDstReg.isValid()) return false; LLT DefDstTy = MRI.getType(DefDstReg); - Register StoreSrcReg = I.getOperand(0).getReg(); + Register StoreSrcReg = StoreMI.getValueReg(); LLT StoreSrcTy = MRI.getType(StoreSrcReg); // If we get something strange like a physical register, then we shouldn't @@ -2118,12 +2140,16 @@ bool AArch64InstructionSelector::contractCrossBankCopyIntoStore( if (DefDstTy.getSizeInBits() != StoreSrcTy.getSizeInBits()) return false; + // Is this store a truncating one? + if (StoreSrcTy.getSizeInBits() != StoreMI.getMemSizeInBits()) + return false; + if (RBI.getRegBank(StoreSrcReg, MRI, TRI) == RBI.getRegBank(DefDstReg, MRI, TRI)) return false; // We have a cross-bank copy, which is entering a store. Let's fold it. - I.getOperand(0).setReg(DefDstReg); + StoreMI.getOperand(0).setReg(DefDstReg); return true; } @@ -2702,9 +2728,8 @@ bool AArch64InstructionSelector::select(MachineInstr &I) { case TargetOpcode::G_ZEXTLOAD: case TargetOpcode::G_LOAD: case TargetOpcode::G_STORE: { - GLoadStore &LdSt = cast(I); bool IsZExtLoad = I.getOpcode() == TargetOpcode::G_ZEXTLOAD; - LLT PtrTy = MRI.getType(LdSt.getPointerReg()); + LLT PtrTy = MRI.getType(I.getOperand(1).getReg()); if (PtrTy != LLT::pointer(0, 64)) { LLVM_DEBUG(dbgs() << "Load/Store pointer has type: " << PtrTy @@ -2712,19 +2737,20 @@ bool AArch64InstructionSelector::select(MachineInstr &I) { return false; } - uint64_t MemSizeInBytes = LdSt.getMemSize(); - unsigned MemSizeInBits = LdSt.getMemSizeInBits(); - AtomicOrdering Order = LdSt.getMMO().getSuccessOrdering(); + auto &MemOp = **I.memoperands_begin(); + uint64_t MemSizeInBytes = MemOp.getSize(); + unsigned MemSizeInBits = MemSizeInBytes * 8; + AtomicOrdering Order = MemOp.getSuccessOrdering(); // Need special instructions for atomics that affect ordering. if (Order != AtomicOrdering::NotAtomic && Order != AtomicOrdering::Unordered && Order != AtomicOrdering::Monotonic) { - assert(!isa(LdSt)); + assert(I.getOpcode() != TargetOpcode::G_ZEXTLOAD); if (MemSizeInBytes > 64) return false; - if (isa(LdSt)) { + if (I.getOpcode() == TargetOpcode::G_LOAD) { static unsigned Opcodes[] = {AArch64::LDARB, AArch64::LDARH, AArch64::LDARW, AArch64::LDARX}; I.setDesc(TII.get(Opcodes[Log2_32(MemSizeInBytes)])); @@ -2738,7 +2764,7 @@ bool AArch64InstructionSelector::select(MachineInstr &I) { } #ifndef NDEBUG - const Register PtrReg = LdSt.getPointerReg(); + const Register PtrReg = I.getOperand(1).getReg(); const RegisterBank &PtrRB = *RBI.getRegBank(PtrReg, MRI, TRI); // Sanity-check the pointer register. assert(PtrRB.getID() == AArch64::GPRRegBankID && @@ -2747,31 +2773,13 @@ bool AArch64InstructionSelector::select(MachineInstr &I) { "Load/Store pointer operand isn't a pointer"); #endif - const Register ValReg = LdSt.getReg(0); - const LLT ValTy = MRI.getType(ValReg); + const Register ValReg = I.getOperand(0).getReg(); const RegisterBank &RB = *RBI.getRegBank(ValReg, MRI, TRI); - // The code below doesn't support truncating stores, so we need to split it - // again. - if (isa(LdSt) && ValTy.getSizeInBits() > MemSizeInBits) { - unsigned SubReg; - LLT MemTy = LdSt.getMMO().getMemoryType(); - auto *RC = getRegClassForTypeOnBank(MemTy, RB, RBI); - if (!getSubRegForClass(RC, TRI, SubReg)) - return false; - - // Generate a subreg copy. - auto Copy = MIB.buildInstr(TargetOpcode::COPY, {MemTy}, {}) - .addReg(ValReg, 0, SubReg) - .getReg(0); - RBI.constrainGenericRegister(Copy, *RC, MRI); - LdSt.getOperand(0).setReg(Copy); - } - // Helper lambda for partially selecting I. Either returns the original // instruction with an updated opcode, or a new instruction. auto SelectLoadStoreAddressingMode = [&]() -> MachineInstr * { - bool IsStore = isa(I); + bool IsStore = I.getOpcode() == TargetOpcode::G_STORE; const unsigned NewOpc = selectLoadStoreUIOp(I.getOpcode(), RB.getID(), MemSizeInBits); if (NewOpc == I.getOpcode()) @@ -2788,8 +2796,7 @@ bool AArch64InstructionSelector::select(MachineInstr &I) { // Folded something. Create a new instruction and return it. auto NewInst = MIB.buildInstr(NewOpc, {}, {}, I.getFlags()); - Register CurValReg = I.getOperand(0).getReg(); - IsStore ? NewInst.addUse(CurValReg) : NewInst.addDef(CurValReg); + IsStore ? NewInst.addUse(ValReg) : NewInst.addDef(ValReg); NewInst.cloneMemRefs(I); for (auto &Fn : *AddrModeFns) Fn(NewInst); diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/contract-store.mir b/llvm/test/CodeGen/AArch64/GlobalISel/contract-store.mir index 61a70dd782651..cb92b6a58ba5e 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/contract-store.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/contract-store.mir @@ -1,15 +1,6 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py # RUN: llc -mtriple=aarch64-unknown-unknown -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s ---- | - define void @contract_s64_gpr(i64* %addr) { ret void } - define void @contract_s32_gpr(i32* %addr) { ret void } - define void @contract_s64_fpr(i64* %addr) { ret void } - define void @contract_s32_fpr(i32* %addr) { ret void } - define void @contract_s16_fpr(i16* %addr) { ret void } - define void @contract_g_unmerge_values_first(i128* %addr) { ret void } - define void @contract_g_unmerge_values_second(i128* %addr) { ret void } -... --- name: contract_s64_gpr legalized: true @@ -20,11 +11,11 @@ body: | ; CHECK-LABEL: name: contract_s64_gpr ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0 ; CHECK: [[COPY1:%[0-9]+]]:gpr64 = COPY $x1 - ; CHECK: STRXui [[COPY1]], [[COPY]], 0 :: (store (s64) into %ir.addr) + ; CHECK: STRXui [[COPY1]], [[COPY]], 0 :: (store (s64)) %0:gpr(p0) = COPY $x0 %1:gpr(s64) = COPY $x1 %2:fpr(s64) = COPY %1 - G_STORE %2:fpr(s64), %0 :: (store (s64) into %ir.addr) + G_STORE %2:fpr(s64), %0 :: (store (s64)) ... --- name: contract_s32_gpr @@ -36,11 +27,11 @@ body: | ; CHECK-LABEL: name: contract_s32_gpr ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0 ; CHECK: [[COPY1:%[0-9]+]]:gpr32 = COPY $w1 - ; CHECK: STRWui [[COPY1]], [[COPY]], 0 :: (store (s32) into %ir.addr) + ; CHECK: STRWui [[COPY1]], [[COPY]], 0 :: (store (s32)) %0:gpr(p0) = COPY $x0 %1:gpr(s32) = COPY $w1 %2:fpr(s32) = COPY %1 - G_STORE %2:fpr(s32), %0 :: (store (s32) into %ir.addr) + G_STORE %2:fpr(s32), %0 :: (store (s32)) ... --- name: contract_s64_fpr @@ -52,11 +43,11 @@ body: | ; CHECK-LABEL: name: contract_s64_fpr ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0 ; CHECK: [[COPY1:%[0-9]+]]:fpr64 = COPY $d1 - ; CHECK: STRDui [[COPY1]], [[COPY]], 0 :: (store (s64) into %ir.addr) + ; CHECK: STRDui [[COPY1]], [[COPY]], 0 :: (store (s64)) %0:gpr(p0) = COPY $x0 %1:fpr(s64) = COPY $d1 %2:gpr(s64) = COPY %1 - G_STORE %2:gpr(s64), %0 :: (store (s64) into %ir.addr) + G_STORE %2:gpr(s64), %0 :: (store (s64)) ... --- name: contract_s32_fpr @@ -68,11 +59,11 @@ body: | ; CHECK-LABEL: name: contract_s32_fpr ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0 ; CHECK: [[COPY1:%[0-9]+]]:fpr32 = COPY $s1 - ; CHECK: STRSui [[COPY1]], [[COPY]], 0 :: (store (s32) into %ir.addr) + ; CHECK: STRSui [[COPY1]], [[COPY]], 0 :: (store (s32)) %0:gpr(p0) = COPY $x0 %1:fpr(s32) = COPY $s1 %2:gpr(s32) = COPY %1 - G_STORE %2:gpr(s32), %0 :: (store (s32) into %ir.addr) + G_STORE %2:gpr(s32), %0 :: (store (s32)) ... --- name: contract_s16_fpr @@ -84,11 +75,11 @@ body: | ; CHECK-LABEL: name: contract_s16_fpr ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0 ; CHECK: [[COPY1:%[0-9]+]]:fpr16 = COPY $h1 - ; CHECK: STRHui [[COPY1]], [[COPY]], 0 :: (store (s16) into %ir.addr) + ; CHECK: STRHui [[COPY1]], [[COPY]], 0 :: (store (s16)) %0:gpr(p0) = COPY $x0 %1:fpr(s16) = COPY $h1 %2:gpr(s16) = COPY %1 - G_STORE %2:gpr(s16), %0 :: (store (s16) into %ir.addr) + G_STORE %2:gpr(s16), %0 :: (store (s16)) ... --- name: contract_g_unmerge_values_first @@ -99,15 +90,16 @@ body: | liveins: $x0, $x1 ; CHECK-LABEL: name: contract_g_unmerge_values_first ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0 - ; CHECK: [[LOAD:%[0-9]+]]:fpr128 = LDRQui [[COPY]], 0 - ; CHECK: [[COPY1:%[0-9]+]]:fpr64 = COPY [[LOAD]].dsub - ; CHECK: STRDui [[COPY1]], [[COPY]], 0 :: (store (s64) into %ir.addr) + ; CHECK: [[LDRQui:%[0-9]+]]:fpr128 = LDRQui [[COPY]], 0 :: (dereferenceable load (<2 x s64>)) + ; CHECK: [[COPY1:%[0-9]+]]:fpr64 = COPY [[LDRQui]].dsub + ; CHECK: [[CPYi64_:%[0-9]+]]:fpr64 = CPYi64 [[LDRQui]], 1 + ; CHECK: STRDui [[COPY1]], [[COPY]], 0 :: (store (s64)) %0:gpr(p0) = COPY $x0 - %1:fpr(<2 x s64>) = G_LOAD %0:gpr(p0) :: (dereferenceable load (<2 x s64>) from %ir.addr) + %1:fpr(<2 x s64>) = G_LOAD %0:gpr(p0) :: (dereferenceable load (<2 x s64>)) %2:fpr(s64), %3:fpr(s64) = G_UNMERGE_VALUES %1:fpr(<2 x s64>) %4:gpr(s64) = COPY %2 %5:gpr(s64) = COPY %3 - G_STORE %4:gpr(s64), %0 :: (store (s64) into %ir.addr) + G_STORE %4:gpr(s64), %0 :: (store (s64)) ... --- name: contract_g_unmerge_values_second @@ -118,12 +110,31 @@ body: | liveins: $x0, $x1 ; CHECK-LABEL: name: contract_g_unmerge_values_second ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0 - ; CHECK: [[LOAD:%[0-9]+]]:fpr128 = LDRQui [[COPY]], 0 - ; CHECK: [[COPY1:%[0-9]+]]:fpr64 = CPYi64 [[LOAD]], 1 - ; CHECK: STRDui [[COPY1]], [[COPY]], 0 :: (store (s64) into %ir.addr) + ; CHECK: [[LDRQui:%[0-9]+]]:fpr128 = LDRQui [[COPY]], 0 :: (dereferenceable load (<2 x s64>)) + ; CHECK: [[COPY1:%[0-9]+]]:fpr64 = COPY [[LDRQui]].dsub + ; CHECK: [[CPYi64_:%[0-9]+]]:fpr64 = CPYi64 [[LDRQui]], 1 + ; CHECK: STRDui [[CPYi64_]], [[COPY]], 0 :: (store (s64)) %0:gpr(p0) = COPY $x0 - %1:fpr(<2 x s64>) = G_LOAD %0:gpr(p0) :: (dereferenceable load (<2 x s64>) from %ir.addr) + %1:fpr(<2 x s64>) = G_LOAD %0:gpr(p0) :: (dereferenceable load (<2 x s64>)) %2:fpr(s64), %3:fpr(s64) = G_UNMERGE_VALUES %1:fpr(<2 x s64>) %4:gpr(s64) = COPY %2 %5:gpr(s64) = COPY %3 - G_STORE %5:gpr(s64), %0 :: (store (s64) into %ir.addr) + G_STORE %5:gpr(s64), %0 :: (store (s64)) +... +--- +name: contract_s16_truncstore +legalized: true +regBankSelected: true +body: | + bb.0: + liveins: $x0, $s1 + ; CHECK-LABEL: name: contract_s16_truncstore + ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0 + ; CHECK: [[COPY1:%[0-9]+]]:fpr32 = COPY $s1 + ; CHECK: [[COPY2:%[0-9]+]]:gpr32 = COPY [[COPY1]] + ; CHECK: STRHHui [[COPY2]], [[COPY]], 0 :: (store (s16)) + %0:gpr(p0) = COPY $x0 + %1:fpr(s32) = COPY $s1 + %2:gpr(s32) = COPY %1 + G_STORE %2:gpr(s32), %0 :: (store (s16)) +... diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/select-store-truncating-float.mir b/llvm/test/CodeGen/AArch64/GlobalISel/select-store-truncating-float.mir deleted file mode 100644 index 3e06016f4af53..0000000000000 --- a/llvm/test/CodeGen/AArch64/GlobalISel/select-store-truncating-float.mir +++ /dev/null @@ -1,116 +0,0 @@ -# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -mtriple=aarch64-- -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s ---- | - define void @truncating_f32(double %x) { - %alloca = alloca i32, align 4 - %bitcast = bitcast double %x to i64 - %trunc = trunc i64 %bitcast to i32 - store i32 %trunc, i32* %alloca, align 4 - ret void - } - - define void @truncating_f16(double %x) { - %alloca = alloca i16, align 2 - %bitcast = bitcast double %x to i64 - %trunc = trunc i64 %bitcast to i16 - store i16 %trunc, i16* %alloca, align 2 - ret void - } - - define void @truncating_f8(double %x) { - %alloca = alloca i8, align 1 - %bitcast = bitcast double %x to i64 - %trunc = trunc i64 %bitcast to i8 - store i8 %trunc, i8* %alloca, align 1 - ret void - } - -... ---- -name: truncating_f32 -alignment: 4 -legalized: true -regBankSelected: true -tracksRegLiveness: true -liveins: - - { reg: '$d0' } -frameInfo: - maxAlignment: 4 -stack: - - { id: 0, name: alloca, size: 4, alignment: 4 } -machineFunctionInfo: {} -body: | - bb.1 (%ir-block.0): - liveins: $d0 - - ; CHECK-LABEL: name: truncating_f32 - ; CHECK: liveins: $d0 - ; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY $d0 - ; CHECK: [[COPY1:%[0-9]+]]:fpr32 = COPY [[COPY]].ssub - ; CHECK: STRSui [[COPY1]], %stack.0.alloca, 0 :: (store (s32) into %ir.alloca) - ; CHECK: RET_ReallyLR - %0:fpr(s64) = COPY $d0 - %1:gpr(p0) = G_FRAME_INDEX %stack.0.alloca - G_STORE %0(s64), %1(p0) :: (store (s32) into %ir.alloca) - RET_ReallyLR - -... ---- -name: truncating_f16 -alignment: 4 -legalized: true -regBankSelected: true -tracksRegLiveness: true -liveins: - - { reg: '$d0' } -frameInfo: - maxAlignment: 2 -stack: - - { id: 0, name: alloca, size: 2, alignment: 2 } -machineFunctionInfo: {} -body: | - bb.1 (%ir-block.0): - liveins: $d0 - - ; CHECK-LABEL: name: truncating_f16 - ; CHECK: liveins: $d0 - ; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY $d0 - ; CHECK: [[COPY1:%[0-9]+]]:fpr16 = COPY [[COPY]].hsub - ; CHECK: STRHui [[COPY1]], %stack.0.alloca, 0 :: (store (s16) into %ir.alloca) - ; CHECK: RET_ReallyLR - %0:fpr(s64) = COPY $d0 - %1:gpr(p0) = G_FRAME_INDEX %stack.0.alloca - G_STORE %0(s64), %1(p0) :: (store (s16) into %ir.alloca) - RET_ReallyLR - -... ---- -name: truncating_f8 -alignment: 4 -legalized: true -regBankSelected: true -tracksRegLiveness: true -liveins: - - { reg: '$d0' } -frameInfo: - maxAlignment: 1 -stack: - - { id: 0, name: alloca, size: 1, alignment: 1 } -machineFunctionInfo: {} -body: | - bb.1 (%ir-block.0): - liveins: $d0 - - ; CHECK-LABEL: name: truncating_f8 - ; CHECK: liveins: $d0 - ; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY $d0 - ; CHECK: [[COPY1:%[0-9]+]]:fpr16 = COPY [[COPY]].hsub - ; CHECK: [[COPY2:%[0-9]+]]:fpr8 = COPY [[COPY1]] - ; CHECK: STRBui [[COPY2]], %stack.0.alloca, 0 :: (store (s8) into %ir.alloca) - ; CHECK: RET_ReallyLR - %0:fpr(s64) = COPY $d0 - %1:gpr(p0) = G_FRAME_INDEX %stack.0.alloca - G_STORE %0(s64), %1(p0) :: (store (s8) into %ir.alloca) - RET_ReallyLR - -... diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/select-with-no-legality-check.mir b/llvm/test/CodeGen/AArch64/GlobalISel/select-with-no-legality-check.mir index 9d044d18b8c24..e0983dbfdd14c 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/select-with-no-legality-check.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/select-with-no-legality-check.mir @@ -278,13 +278,13 @@ body: | ; CHECK-LABEL: name: test_rule96_id2146_at_idx8070 ; CHECK: liveins: $x0 ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0 - ; CHECK: [[LDRBui:%[0-9]+]]:fpr8 = LDRBui [[COPY]], 0 :: (load (s8)) + ; CHECK: [[LDRBui:%[0-9]+]]:fpr8 = LDRBui [[COPY]], 0 :: (load (s1)) ; CHECK: [[COPY1:%[0-9]+]]:gpr32 = COPY [[LDRBui]] - ; CHECK: [[UBFMWri:%[0-9]+]]:gpr32 = UBFMWri [[COPY1]], 0, 7 + ; CHECK: [[UBFMWri:%[0-9]+]]:gpr32 = UBFMWri [[COPY1]], 0, 0 ; CHECK: $noreg = PATCHABLE_RET [[UBFMWri]] %2:gpr(p0) = COPY $x0 - %0:fpr(s8) = G_LOAD %2(p0) :: (load (s8)) - %1:gpr(s32) = G_ZEXT %0(s8) + %0:fpr(s1) = G_LOAD %2(p0) :: (load (s1)) + %1:gpr(s32) = G_ZEXT %0(s1) $noreg = PATCHABLE_RET %1(s32) ...