@@ -3373,6 +3373,70 @@ def NVVM_Tcgen05CpOp : NVVM_Op<"tcgen05.cp", [NVVMRequiresSMa<[100, 101]>]> {
3373
3373
}];
3374
3374
}
3375
3375
3376
+ def NVVM_Tcgen05MmaSmemDescOp : NVVM_Op<"tcgen05.mma_smem_desc", []> {
3377
+ let summary = "Constructs a Shared Memory descriptor for MMA Operands A or B";
3378
+ let description = [{
3379
+ The `nvvm.tcgen05_mma_smem_desc` constructs a Shared Memory descriptor
3380
+ for tcgen05.mma. This descriptor is a 64-bit value which describes the
3381
+ properties of multiplicand matrix in shared memory including its location
3382
+ in the shared memory of the current CTA.
3383
+
3384
+ +-----------+------+------------------------------------------------------+
3385
+ | Bit-field | Size | Description |
3386
+ +-----------+------+------------------------------------------------------+
3387
+ | 0-13 | 14 | Matrix start address |
3388
+ | 14-15 | 2 | Reserved |
3389
+ | 16-29 | 14 | Leading dim relative-offset (or) absolute-address |
3390
+ | 30-31 | 2 | Reserved |
3391
+ | 32-45 | 14 | Stride dimension byte offset |
3392
+ | 46-48 | 3 | Fixed constant value of 0b001 |
3393
+ | 49-51 | 3 | Matrix base offset |
3394
+ | 52 | 1 | Leading dimension stride mode: |
3395
+ | | | 0: byte offset relative |
3396
+ | | | 1: byte address absolute |
3397
+ | 53-60 | 8 | Fixed constant value of 0xb00000000 |
3398
+ | 61-63 | 3 | Swizzling mode: |
3399
+ | | | 0: No swizzling |
3400
+ | | | 1: 128-Byte with 32B atomic swizzling |
3401
+ | | | 2: 128-Byte swizzling |
3402
+ | | | 4: 64-Byte swizzling |
3403
+ | | | 6: 32-Byte swizzling |
3404
+ | | | (Values 3, 5 and 7 are invalid) |
3405
+ +-----------+------+------------------------------------------------------+
3406
+
3407
+ Example:
3408
+ ```mlir
3409
+ %desc = nvvm.tcgen05.mma_smem_desc (%startAddr, %leadingDimOffset, %strideDimOffset,
3410
+ %baseOffset, %leadingDimMode, %swizzleMode) : (i32, i32, i32, i8, i1, i8) -> i64
3411
+ ```
3412
+ [For more information, see PTX ISA](https://docs.nvidia.com/cuda/parallel-thread-execution/#tcgen05-shared-memory-descriptor)
3413
+ }];
3414
+
3415
+ let arguments = (ins
3416
+ I32:$startAddr, // Matrix A or B start address (bits 13-0)
3417
+ I32:$leadingDimOffset, // Matrix A or B leading dim byte offset (bits 29-16)
3418
+ I32:$strideDimOffset, // Matrix A or B stride dim byte offset (bits 45-32)
3419
+ I8:$baseOffset, // Matrix A or B base offset (bits 51-49)
3420
+ I1:$leadingDimMode, // Matrix A or B leading dim mode (bit 52)
3421
+ I8:$swizzleMode // Swizzle mode (bits 63-61)
3422
+ );
3423
+
3424
+ let results = (outs I64:$res);
3425
+
3426
+ let assemblyFormat = [{
3427
+ `(` operands `)` attr-dict `:` `(` type(operands) `)` `->` type($res)
3428
+ }];
3429
+
3430
+ let extraClassDeclaration = [{
3431
+ static void createSmemDescriptor(Operation &op, LLVM::ModuleTranslation &mt,
3432
+ llvm::IRBuilderBase& builder);
3433
+ }];
3434
+
3435
+ string llvmBuilder = [{
3436
+ NVVM::Tcgen05MmaSmemDescOp::createSmemDescriptor(*op, moduleTranslation, builder);
3437
+ }];
3438
+ }
3439
+
3376
3440
//===----------------------------------------------------------------------===//
3377
3441
// NVVM tcgen05 LdSt Shape Attr
3378
3442
//===----------------------------------------------------------------------===//
0 commit comments