Skip to content

Commit 33deaa1

Browse files
committed
[memcpyopt] Common code into performCallSlotOptzn [NFC]
We have the same code repeated in both callers, sink it into callee. The motivation here isn't just code style, we can also defer the relatively expensive aliasing checks until the cheap structural preconditions have been validated. (e.g. Don't bother aliasing if src is not an alloca.) This helps compile time significantly.
1 parent 7d6e8f2 commit 33deaa1

File tree

1 file changed

+32
-31
lines changed

1 file changed

+32
-31
lines changed

llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp

Lines changed: 32 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -770,15 +770,6 @@ bool MemCpyOptPass::processStore(StoreInst *SI, BasicBlock::iterator &BBI) {
770770
C = dyn_cast_or_null<CallInst>(LoadClobber->getMemoryInst());
771771
}
772772

773-
if (C) {
774-
// Check that nothing touches the dest of the "copy" between
775-
// the call and the store.
776-
MemoryLocation StoreLoc = MemoryLocation::get(SI);
777-
if (accessedBetween(*AA, StoreLoc, MSSA->getMemoryAccess(C),
778-
MSSA->getMemoryAccess(SI)))
779-
C = nullptr;
780-
}
781-
782773
if (C) {
783774
bool changed = performCallSlotOptzn(
784775
LI, SI, SI->getPointerOperand()->stripPointerCasts(),
@@ -905,6 +896,23 @@ bool MemCpyOptPass::performCallSlotOptzn(Instruction *cpyLoad,
905896
if (cpySize < srcSize)
906897
return false;
907898

899+
if (C->getParent() != cpyStore->getParent()) {
900+
LLVM_DEBUG(dbgs() << "Call Slot: block local restriction\n");
901+
return false;
902+
}
903+
904+
MemoryLocation DestLoc = isa<StoreInst>(cpyStore) ?
905+
MemoryLocation::get(cpyStore) :
906+
MemoryLocation::getForDest(cast<MemCpyInst>(cpyStore));
907+
908+
// Check that nothing touches the dest of the copy between
909+
// the call and the store/memcpy.
910+
if (accessedBetween(*AA, DestLoc, MSSA->getMemoryAccess(C),
911+
MSSA->getMemoryAccess(cpyStore))) {
912+
LLVM_DEBUG(dbgs() << "Call Slot: Dest pointer modified after call\n");
913+
return false;
914+
}
915+
908916
// Check that accessing the first srcSize bytes of dest will not cause a
909917
// trap. Otherwise the transform is invalid since it might cause a trap
910918
// to occur earlier than it otherwise would.
@@ -914,6 +922,7 @@ bool MemCpyOptPass::performCallSlotOptzn(Instruction *cpyLoad,
914922
return false;
915923
}
916924

925+
917926
// Make sure that nothing can observe cpyDest being written early. There are
918927
// a number of cases to consider:
919928
// 1. cpyDest cannot be accessed between C and cpyStore as a precondition of
@@ -1443,28 +1452,20 @@ bool MemCpyOptPass::processMemCpy(MemCpyInst *M, BasicBlock::iterator &BBI) {
14431452
if (Instruction *MI = MD->getMemoryInst()) {
14441453
if (auto *CopySize = dyn_cast<ConstantInt>(M->getLength())) {
14451454
if (auto *C = dyn_cast<CallInst>(MI)) {
1446-
// The memcpy must post-dom the call. Limit to the same block for
1447-
// now. Additionally, we need to ensure that there are no accesses
1448-
// to dest between the call and the memcpy. Accesses to src will be
1449-
// checked by performCallSlotOptzn().
1450-
// TODO: Support non-local call-slot optimization?
1451-
if (C->getParent() == M->getParent() &&
1452-
!accessedBetween(*AA, DestLoc, MD, MA)) {
1453-
// FIXME: Can we pass in either of dest/src alignment here instead
1454-
// of conservatively taking the minimum?
1455-
Align Alignment = std::min(M->getDestAlign().valueOrOne(),
1456-
M->getSourceAlign().valueOrOne());
1457-
if (performCallSlotOptzn(
1458-
M, M, M->getDest(), M->getSource(),
1459-
TypeSize::getFixed(CopySize->getZExtValue()), Alignment,
1460-
C)) {
1461-
LLVM_DEBUG(dbgs() << "Performed call slot optimization:\n"
1462-
<< " call: " << *C << "\n"
1463-
<< " memcpy: " << *M << "\n");
1464-
eraseInstruction(M);
1465-
++NumMemCpyInstr;
1466-
return true;
1467-
}
1455+
// FIXME: Can we pass in either of dest/src alignment here instead
1456+
// of conservatively taking the minimum?
1457+
Align Alignment = std::min(M->getDestAlign().valueOrOne(),
1458+
M->getSourceAlign().valueOrOne());
1459+
if (performCallSlotOptzn(
1460+
M, M, M->getDest(), M->getSource(),
1461+
TypeSize::getFixed(CopySize->getZExtValue()), Alignment,
1462+
C)) {
1463+
LLVM_DEBUG(dbgs() << "Performed call slot optimization:\n"
1464+
<< " call: " << *C << "\n"
1465+
<< " memcpy: " << *M << "\n");
1466+
eraseInstruction(M);
1467+
++NumMemCpyInstr;
1468+
return true;
14681469
}
14691470
}
14701471
}

0 commit comments

Comments
 (0)