@@ -259,7 +259,6 @@ static LogicalResult checkImplementationStatus(Operation &op) {
259
259
checkInReduction (op, result);
260
260
checkMergeable (op, result);
261
261
checkPriority (op, result);
262
- checkPrivate (op, result);
263
262
checkUntied (op, result);
264
263
})
265
264
.Case ([&](omp::TaskgroupOp op) {
@@ -701,9 +700,9 @@ convertOmpCritical(Operation &opInst, llvm::IRBuilderBase &builder,
701
700
702
701
// / Populates `privatizations` with privatization declarations used for the
703
702
// / given op.
704
- // / TODO: generalise beyond ParallelOp
703
+ template < class OP >
705
704
static void collectPrivatizationDecls (
706
- omp::ParallelOp op, SmallVectorImpl<omp::PrivateClauseOp> &privatizations) {
705
+ OP op, SmallVectorImpl<omp::PrivateClauseOp> &privatizations) {
707
706
std::optional<ArrayAttr> attr = op.getPrivateSyms ();
708
707
if (!attr)
709
708
return ;
@@ -1252,6 +1251,79 @@ static LogicalResult allocAndInitializeReductionVars(
1252
1251
return success ();
1253
1252
}
1254
1253
1254
+ // / Allocate delayed private variables. Returns the basic block which comes
1255
+ // / after all of these allocations. llvm::Value * for each of these private
1256
+ // / variables are populated in llvmPrivateVars.
1257
+ template <class OP >
1258
+ static llvm::Expected<llvm::BasicBlock *>
1259
+ allocatePrivateVars (OP opInst, llvm::IRBuilderBase &builder,
1260
+ LLVM::ModuleTranslation &moduleTranslation,
1261
+ MutableArrayRef<BlockArgument> privateBlockArgs,
1262
+ MutableArrayRef<omp::PrivateClauseOp> privateDecls,
1263
+ llvm::SmallVector<llvm::Value *> &llvmPrivateVars,
1264
+ const llvm::OpenMPIRBuilder::InsertPointTy &allocaIP) {
1265
+ // Allocate private vars
1266
+ llvm::BranchInst *allocaTerminator =
1267
+ llvm::cast<llvm::BranchInst>(allocaIP.getBlock ()->getTerminator ());
1268
+ builder.SetInsertPoint (allocaTerminator);
1269
+ assert (allocaTerminator->getNumSuccessors () == 1 &&
1270
+ " This is an unconditional branch created by OpenMPIRBuilder" );
1271
+ llvm::BasicBlock *afterAllocas = allocaTerminator->getSuccessor (0 );
1272
+
1273
+ // FIXME: Some of the allocation regions do more than just allocating.
1274
+ // They read from their block argument (amongst other non-alloca things).
1275
+ // When OpenMPIRBuilder outlines the parallel region into a different
1276
+ // function it places the loads for live in-values (such as these block
1277
+ // arguments) at the end of the entry block (because the entry block is
1278
+ // assumed to contain only allocas). Therefore, if we put these complicated
1279
+ // alloc blocks in the entry block, these will not dominate the availability
1280
+ // of the live-in values they are using. Fix this by adding a latealloc
1281
+ // block after the entry block to put these in (this also helps to avoid
1282
+ // mixing non-alloca code with allocas).
1283
+ // Alloc regions which do not use the block argument can still be placed in
1284
+ // the entry block (therefore keeping the allocas together).
1285
+ llvm::BasicBlock *privAllocBlock = nullptr ;
1286
+ if (!privateBlockArgs.empty ())
1287
+ privAllocBlock = splitBB (builder, true , " omp.private.latealloc" );
1288
+ for (unsigned i = 0 ; i < privateBlockArgs.size (); ++i) {
1289
+ Region &allocRegion = privateDecls[i].getAllocRegion ();
1290
+
1291
+ // map allocation region block argument
1292
+ llvm::Value *nonPrivateVar =
1293
+ moduleTranslation.lookupValue (opInst.getPrivateVars ()[i]);
1294
+ assert (nonPrivateVar);
1295
+ moduleTranslation.mapValue (privateDecls[i].getAllocMoldArg (),
1296
+ nonPrivateVar);
1297
+
1298
+ // in-place convert the private allocation region
1299
+ SmallVector<llvm::Value *, 1 > phis;
1300
+ if (privateDecls[i].getAllocMoldArg ().getUses ().empty ()) {
1301
+ // TODO this should use
1302
+ // allocaIP.getBlock()->getFirstNonPHIOrDbgOrAlloca() so it goes before
1303
+ // the code for fetching the thread id. Not doing this for now to avoid
1304
+ // test churn.
1305
+ builder.SetInsertPoint (allocaIP.getBlock ()->getTerminator ());
1306
+ } else {
1307
+ builder.SetInsertPoint (privAllocBlock->getTerminator ());
1308
+ }
1309
+ if (failed (inlineConvertOmpRegions (allocRegion, " omp.private.alloc" ,
1310
+ builder, moduleTranslation, &phis)))
1311
+ return llvm::createStringError (
1312
+ " failed to inline `alloc` region of `omp.private`" );
1313
+
1314
+ assert (phis.size () == 1 && " expected one allocation to be yielded" );
1315
+
1316
+ moduleTranslation.mapValue (privateBlockArgs[i], phis[0 ]);
1317
+ llvmPrivateVars.push_back (phis[0 ]);
1318
+
1319
+ // clear alloc region block argument mapping in case it needs to be
1320
+ // re-created with a different source for another use of the same
1321
+ // reduction decl
1322
+ moduleTranslation.forgetMapping (allocRegion);
1323
+ }
1324
+ return afterAllocas;
1325
+ }
1326
+
1255
1327
static LogicalResult
1256
1328
convertOmpSections (Operation &opInst, llvm::IRBuilderBase &builder,
1257
1329
LLVM::ModuleTranslation &moduleTranslation) {
@@ -1486,16 +1558,98 @@ convertOmpTaskOp(omp::TaskOp taskOp, llvm::IRBuilderBase &builder,
1486
1558
if (failed (checkImplementationStatus (*taskOp)))
1487
1559
return failure ();
1488
1560
1489
- auto bodyCB = [&](InsertPointTy allocaIP, InsertPointTy codegenIP) {
1561
+ // Collect delayed privatisation declarations
1562
+ MutableArrayRef<BlockArgument> privateBlockArgs =
1563
+ cast<omp::BlockArgOpenMPOpInterface>(*taskOp).getPrivateBlockArgs ();
1564
+ SmallVector<llvm::Value *> llvmPrivateVars;
1565
+ SmallVector<omp::PrivateClauseOp> privateDecls;
1566
+ llvmPrivateVars.reserve (privateBlockArgs.size ());
1567
+ privateDecls.reserve (privateBlockArgs.size ());
1568
+ collectPrivatizationDecls (taskOp, privateDecls);
1569
+
1570
+ auto bodyCB = [&](InsertPointTy allocaIP,
1571
+ InsertPointTy codegenIP) -> llvm::Error {
1490
1572
// Save the alloca insertion point on ModuleTranslation stack for use in
1491
1573
// nested regions.
1492
1574
LLVM::ModuleTranslation::SaveStack<OpenMPAllocaStackFrame> frame (
1493
1575
moduleTranslation, allocaIP);
1494
1576
1577
+ llvm::Expected<llvm::BasicBlock *> afterAllocas = allocatePrivateVars (
1578
+ taskOp, builder, moduleTranslation, privateBlockArgs, privateDecls,
1579
+ llvmPrivateVars, allocaIP);
1580
+ if (handleError (afterAllocas, *taskOp).failed ())
1581
+ return llvm::make_error<PreviouslyReportedError>();
1582
+
1583
+ // Apply copy region for firstprivate
1584
+ bool needsFirstPrivate =
1585
+ llvm::any_of (privateDecls, [](omp::PrivateClauseOp &privOp) {
1586
+ return privOp.getDataSharingType () ==
1587
+ omp::DataSharingClauseType::FirstPrivate;
1588
+ });
1589
+ if (needsFirstPrivate) {
1590
+ // Find the end of the allocation blocks
1591
+ assert (afterAllocas.get ()->getSinglePredecessor ());
1592
+ builder.SetInsertPoint (
1593
+ afterAllocas.get ()->getSinglePredecessor ()->getTerminator ());
1594
+ llvm::BasicBlock *copyBlock =
1595
+ splitBB (builder, /* CreateBranch=*/ true , " omp.private.copy" );
1596
+ builder.SetInsertPoint (copyBlock->getFirstNonPHIOrDbgOrAlloca ());
1597
+ }
1598
+ for (unsigned i = 0 ; i < privateBlockArgs.size (); ++i) {
1599
+ if (privateDecls[i].getDataSharingType () !=
1600
+ omp::DataSharingClauseType::FirstPrivate)
1601
+ continue ;
1602
+
1603
+ // copyRegion implements `lhs = rhs`
1604
+ Region ©Region = privateDecls[i].getCopyRegion ();
1605
+
1606
+ // map copyRegion rhs arg
1607
+ llvm::Value *nonPrivateVar =
1608
+ moduleTranslation.lookupValue (taskOp.getPrivateVars ()[i]);
1609
+ assert (nonPrivateVar);
1610
+ moduleTranslation.mapValue (privateDecls[i].getCopyMoldArg (),
1611
+ nonPrivateVar);
1612
+
1613
+ // map copyRegion lhs arg
1614
+ moduleTranslation.mapValue (privateDecls[i].getCopyPrivateArg (),
1615
+ llvmPrivateVars[i]);
1616
+
1617
+ // in-place convert copy region
1618
+ builder.SetInsertPoint (builder.GetInsertBlock ()->getTerminator ());
1619
+ if (failed (inlineConvertOmpRegions (copyRegion, " omp.private.copy" ,
1620
+ builder, moduleTranslation)))
1621
+ return llvm::createStringError (
1622
+ " failed to inline `copy` region of an `omp.private` op in taskOp" );
1623
+
1624
+ // ignore unused value yielded from copy region
1625
+
1626
+ // clear copy region block argument mapping in case it needs to be
1627
+ // re-created with different source for reuse of the same reduction decl
1628
+ moduleTranslation.forgetMapping (copyRegion);
1629
+ }
1630
+
1631
+ // translate the body of the task:
1495
1632
builder.restoreIP (codegenIP);
1496
- return convertOmpOpRegions (taskOp.getRegion (), " omp.task.region" , builder,
1497
- moduleTranslation)
1498
- .takeError ();
1633
+ auto continuationBlockOrError = convertOmpOpRegions (
1634
+ taskOp.getRegion (), " omp.task.region" , builder, moduleTranslation);
1635
+ if (failed (handleError (continuationBlockOrError, *taskOp)))
1636
+ return llvm::make_error<PreviouslyReportedError>();
1637
+
1638
+ // private variable deallocation
1639
+ SmallVector<Region *> privateCleanupRegions;
1640
+ llvm::transform (privateDecls, std::back_inserter (privateCleanupRegions),
1641
+ [](omp::PrivateClauseOp privatizer) {
1642
+ return &privatizer.getDeallocRegion ();
1643
+ });
1644
+
1645
+ builder.SetInsertPoint (continuationBlockOrError.get ()->getTerminator ());
1646
+ if (failed (inlineOmpRegionCleanup (
1647
+ privateCleanupRegions, llvmPrivateVars, moduleTranslation, builder,
1648
+ " omp.private.dealloc" , /* shouldLoadCleanupRegionArg=*/ false )))
1649
+ return llvm::createStringError (" failed to inline `dealloc` region of an "
1650
+ " `omp.private` op in an omp.task" );
1651
+
1652
+ return llvm::Error::success ();
1499
1653
};
1500
1654
1501
1655
SmallVector<llvm::OpenMPIRBuilder::DependData> dds;
@@ -1740,65 +1894,11 @@ convertOmpParallel(omp::ParallelOp opInst, llvm::IRBuilderBase &builder,
1740
1894
1741
1895
auto bodyGenCB = [&](InsertPointTy allocaIP,
1742
1896
InsertPointTy codeGenIP) -> llvm::Error {
1743
- // Allocate private vars
1744
- llvm::BranchInst *allocaTerminator =
1745
- llvm::cast<llvm::BranchInst>(allocaIP.getBlock ()->getTerminator ());
1746
- builder.SetInsertPoint (allocaTerminator);
1747
- assert (allocaTerminator->getNumSuccessors () == 1 &&
1748
- " This is an unconditional branch created by OpenMPIRBuilder" );
1749
- llvm::BasicBlock *afterAllocas = allocaTerminator->getSuccessor (0 );
1750
-
1751
- // FIXME: Some of the allocation regions do more than just allocating.
1752
- // They read from their block argument (amongst other non-alloca things).
1753
- // When OpenMPIRBuilder outlines the parallel region into a different
1754
- // function it places the loads for live in-values (such as these block
1755
- // arguments) at the end of the entry block (because the entry block is
1756
- // assumed to contain only allocas). Therefore, if we put these complicated
1757
- // alloc blocks in the entry block, these will not dominate the availability
1758
- // of the live-in values they are using. Fix this by adding a latealloc
1759
- // block after the entry block to put these in (this also helps to avoid
1760
- // mixing non-alloca code with allocas).
1761
- // Alloc regions which do not use the block argument can still be placed in
1762
- // the entry block (therefore keeping the allocas together).
1763
- llvm::BasicBlock *privAllocBlock = nullptr ;
1764
- if (!privateBlockArgs.empty ())
1765
- privAllocBlock = splitBB (builder, true , " omp.private.latealloc" );
1766
- for (unsigned i = 0 ; i < privateBlockArgs.size (); ++i) {
1767
- Region &allocRegion = privateDecls[i].getAllocRegion ();
1768
-
1769
- // map allocation region block argument
1770
- llvm::Value *nonPrivateVar =
1771
- moduleTranslation.lookupValue (opInst.getPrivateVars ()[i]);
1772
- assert (nonPrivateVar);
1773
- moduleTranslation.mapValue (privateDecls[i].getAllocMoldArg (),
1774
- nonPrivateVar);
1775
-
1776
- // in-place convert the private allocation region
1777
- SmallVector<llvm::Value *, 1 > phis;
1778
- if (privateDecls[i].getAllocMoldArg ().getUses ().empty ()) {
1779
- // TODO this should use
1780
- // allocaIP.getBlock()->getFirstNonPHIOrDbgOrAlloca() so it goes before
1781
- // the code for fetching the thread id. Not doing this for now to avoid
1782
- // test churn.
1783
- builder.SetInsertPoint (allocaIP.getBlock ()->getTerminator ());
1784
- } else {
1785
- builder.SetInsertPoint (privAllocBlock->getTerminator ());
1786
- }
1787
- if (failed (inlineConvertOmpRegions (allocRegion, " omp.private.alloc" ,
1788
- builder, moduleTranslation, &phis)))
1789
- return llvm::createStringError (
1790
- " failed to inline `alloc` region of `omp.private`" );
1791
-
1792
- assert (phis.size () == 1 && " expected one allocation to be yielded" );
1793
-
1794
- moduleTranslation.mapValue (privateBlockArgs[i], phis[0 ]);
1795
- llvmPrivateVars.push_back (phis[0 ]);
1796
-
1797
- // clear alloc region block argument mapping in case it needs to be
1798
- // re-created with a different source for another use of the same
1799
- // reduction decl
1800
- moduleTranslation.forgetMapping (allocRegion);
1801
- }
1897
+ llvm::Expected<llvm::BasicBlock *> afterAllocas = allocatePrivateVars (
1898
+ opInst, builder, moduleTranslation, privateBlockArgs, privateDecls,
1899
+ llvmPrivateVars, allocaIP);
1900
+ if (handleError (afterAllocas, *opInst).failed ())
1901
+ return llvm::make_error<PreviouslyReportedError>();
1802
1902
1803
1903
// Allocate reduction vars
1804
1904
DenseMap<Value, llvm::Value *> reductionVariableMap;
@@ -1824,9 +1924,9 @@ convertOmpParallel(omp::ParallelOp opInst, llvm::IRBuilderBase &builder,
1824
1924
});
1825
1925
if (needsFirstprivate) {
1826
1926
// Find the end of the allocation blocks
1827
- assert (afterAllocas->getSinglePredecessor ());
1927
+ assert (afterAllocas. get () ->getSinglePredecessor ());
1828
1928
builder.SetInsertPoint (
1829
- afterAllocas->getSinglePredecessor ()->getTerminator ());
1929
+ afterAllocas. get () ->getSinglePredecessor ()->getTerminator ());
1830
1930
llvm::BasicBlock *copyBlock =
1831
1931
splitBB (builder, /* CreateBranch=*/ true , " omp.private.copy" );
1832
1932
builder.SetInsertPoint (copyBlock->getFirstNonPHIOrDbgOrAlloca ());
0 commit comments