Skip to content

Commit 4fd2b49

Browse files
committed
[AArch64][SVE2] Add the SVE2.1 fdot instructions
This patch adds the assembly/disassembly for the following instructions: FDOT : Half-precision floating-point dot product FDOT : Half-precision floating-point indexed dot product The reference can be found here: https://developer.arm.com/documentation/ddi0602/2022-09 Differential Revision: https://reviews.llvm.org/D136427
1 parent fc41512 commit 4fd2b49

File tree

4 files changed

+138
-9
lines changed

4 files changed

+138
-9
lines changed

llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3575,4 +3575,6 @@ defm PSEL_PPPRI : sve2_int_perm_sel_p<"psel", int_aarch64_sve_psel>;
35753575

35763576
let Predicates = [HasSVE2p1_or_HasSME2] in {
35773577
defm FCLAMP_ZZZ : sve2p1_fclamp<"fclamp">;
3578+
def FDOT_ZZZ_S : sve_float_dot<0b0, "fdot">;
3579+
def FDOT_ZZZI_S : sve_float_dot_indexed<0b0, "fdot">;
35783580
} // End HasSVE2p1_or_HasSME2

llvm/lib/Target/AArch64/SVEInstrFormats.td

Lines changed: 31 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -8202,29 +8202,51 @@ class sve_bfloat_dot_base<bits<2> opc, string asm, string ops, dag iops>
82028202
let ElementSize = ElementSizeH;
82038203
}
82048204

8205-
class sve_bfloat_dot<string asm>
8206-
: sve_bfloat_dot_base<0b10, asm, "\t$Zda, $Zn, $Zm",
8207-
(ins ZPR32:$_Zda, ZPR16:$Zn, ZPR16:$Zm)> {
8205+
class sve_float_dot<bit bf, string asm>
8206+
: I<(outs ZPR32:$Zda), (ins ZPR32:$_Zda, ZPR16:$Zn, ZPR16:$Zm),
8207+
asm, "\t$Zda, $Zn, $Zm", "", []>, Sched<[]> {
8208+
bits<5> Zda;
8209+
bits<5> Zn;
82088210
bits<5> Zm;
8211+
let Inst{31-23} = 0b011001000;
8212+
let Inst{22} = bf;
8213+
let Inst{21} = 0b1;
82098214
let Inst{20-16} = Zm;
8215+
let Inst{15-10} = 0b100000;
8216+
let Inst{9-5} = Zn;
8217+
let Inst{4-0} = Zda;
8218+
8219+
let Constraints = "$Zda = $_Zda";
8220+
let DestructiveInstType = DestructiveOther;
82108221
}
82118222

82128223
multiclass sve_bfloat_dot<string asm, SDPatternOperator op> {
8213-
def NAME : sve_bfloat_dot<asm>;
8224+
def NAME : sve_float_dot<0b1, asm>;
82148225
def : SVE_3_Op_Pat<nxv4f32, op, nxv4f32, nxv8bf16, nxv8bf16 ,!cast<Instruction>(NAME)>;
82158226
}
82168227

8217-
class sve_bfloat_dot_indexed<string asm>
8218-
: sve_bfloat_dot_base<0b01, asm, "\t$Zda, $Zn, $Zm$iop",
8219-
(ins ZPR32:$_Zda, ZPR16:$Zn, ZPR3b16:$Zm, VectorIndexS:$iop)> {
8220-
bits<2> iop;
8228+
class sve_float_dot_indexed<bit bf, string asm>
8229+
: I<(outs ZPR32:$Zda), (ins ZPR32:$_Zda, ZPR16:$Zn, ZPR3b16:$Zm, VectorIndexS:$iop),
8230+
asm, "\t$Zda, $Zn, $Zm$iop", "", []>, Sched<[]> {
8231+
bits<5> Zda;
8232+
bits<5> Zn;
82218233
bits<3> Zm;
8234+
bits<2> iop;
8235+
let Inst{31-23} = 0b011001000;
8236+
let Inst{22} = bf;
8237+
let Inst{21} = 0b1;
82228238
let Inst{20-19} = iop;
82238239
let Inst{18-16} = Zm;
8240+
let Inst{15-10} = 0b010000;
8241+
let Inst{9-5} = Zn;
8242+
let Inst{4-0} = Zda;
8243+
8244+
let Constraints = "$Zda = $_Zda";
8245+
let DestructiveInstType = DestructiveOther;
82248246
}
82258247

82268248
multiclass sve_bfloat_dot_indexed<string asm, SDPatternOperator op> {
8227-
def NAME : sve_bfloat_dot_indexed<asm>;
8249+
def NAME : sve_float_dot_indexed<0b1, asm>;
82288250
def : SVE_4_Op_Imm_Pat<nxv4f32, op, nxv4f32, nxv8bf16, nxv8bf16, i64, VectorIndexS_timm, !cast<Instruction>(NAME)>;
82298251
}
82308252

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2p1 2>&1 < %s | FileCheck %s
2+
3+
// --------------------------------------------------------------------------//
4+
// Invalid vector lane index
5+
6+
fdot z0.s, z0.h, z0.h[8]
7+
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: vector lane must be an integer in range [0, 3].
8+
// CHECK-NEXT: fdot z0.s, z0.h, z0.h[8]
9+
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
10+
11+
fdot z0.s, z0.h, z0.h[-1]
12+
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: vector lane must be an integer in range [0, 3].
13+
// CHECK-NEXT: fdot z0.s, z0.h, z0.h[-1]
14+
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
15+
16+
// --------------------------------------------------------------------------//
17+
// Invalid vector suffix
18+
19+
fdot z0.h, z0.s, z0.s
20+
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
21+
// CHECK-NEXT: fdot z0.h, z0.s, z0.s
22+
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
23+
24+
fdot z0.d, z0.h, z0.h[0]
25+
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
26+
// CHECK-NEXT: fdot z0.d, z0.h, z0.h[0]
27+
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:

llvm/test/MC/AArch64/SVE2p1/fdot.s

Lines changed: 78 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,78 @@
1+
// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 < %s \
2+
// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
3+
// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2p1 < %s \
4+
// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
5+
// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \
6+
// RUN: | FileCheck %s --check-prefix=CHECK-ERROR
7+
// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2 < %s \
8+
// RUN: | llvm-objdump -d --mattr=+sme2 - | FileCheck %s --check-prefix=CHECK-INST
9+
// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2 < %s \
10+
// RUN: | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN
11+
// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 < %s \
12+
// RUN: | sed '/.text/d' | sed 's/.*encoding: //g' \
13+
// RUN: | llvm-mc -triple=aarch64 -mattr=+sme2 -disassemble -show-encoding \
14+
// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
15+
16+
movprfx z23, z31
17+
fdot z23.s, z13.h, z8.h // 01100100-00101000-10000001-10110111
18+
// CHECK-INST: movprfx z23, z31
19+
// CHECK-INST: fdot z23.s, z13.h, z8.h
20+
// CHECK-ENCODING: [0xb7,0x81,0x28,0x64]
21+
// CHECK-ERROR: instruction requires: sme2 or sve2p1
22+
// CHECK-UNKNOWN: 642881b7 <unknown>
23+
24+
fdot z0.s, z0.h, z0.h // 01100100-00100000-10000000-00000000
25+
// CHECK-INST: fdot z0.s, z0.h, z0.h
26+
// CHECK-ENCODING: [0x00,0x80,0x20,0x64]
27+
// CHECK-ERROR: instruction requires: sme2 or sve2p1
28+
// CHECK-UNKNOWN: 64208000 <unknown>
29+
30+
fdot z21.s, z10.h, z21.h // 01100100-00110101-10000001-01010101
31+
// CHECK-INST: fdot z21.s, z10.h, z21.h
32+
// CHECK-ENCODING: [0x55,0x81,0x35,0x64]
33+
// CHECK-ERROR: instruction requires: sme2 or sve2p1
34+
// CHECK-UNKNOWN: 64358155 <unknown>
35+
36+
fdot z23.s, z13.h, z8.h // 01100100-00101000-10000001-10110111
37+
// CHECK-INST: fdot z23.s, z13.h, z8.h
38+
// CHECK-ENCODING: [0xb7,0x81,0x28,0x64]
39+
// CHECK-ERROR: instruction requires: sme2 or sve2p1
40+
// CHECK-UNKNOWN: 642881b7 <unknown>
41+
42+
fdot z31.s, z31.h, z31.h // 01100100-00111111-10000011-11111111
43+
// CHECK-INST: fdot z31.s, z31.h, z31.h
44+
// CHECK-ENCODING: [0xff,0x83,0x3f,0x64]
45+
// CHECK-ERROR: instruction requires: sme2 or sve2p1
46+
// CHECK-UNKNOWN: 643f83ff <unknown>
47+
48+
movprfx z23, z31
49+
fdot z23.s, z13.h, z0.h[1] // 01100100-00101000-01000001-10110111
50+
// CHECK-INST: movprfx z23, z31
51+
// CHECK-INST: fdot z23.s, z13.h, z0.h[1]
52+
// CHECK-ENCODING: [0xb7,0x41,0x28,0x64]
53+
// CHECK-ERROR: instruction requires: sme2 or sve2p1
54+
// CHECK-UNKNOWN: 642841b7 <unknown>
55+
56+
fdot z0.s, z0.h, z0.h[0] // 01100100-00100000-01000000-00000000
57+
// CHECK-INST: fdot z0.s, z0.h, z0.h[0]
58+
// CHECK-ENCODING: [0x00,0x40,0x20,0x64]
59+
// CHECK-ERROR: instruction requires: sme2 or sve2p1
60+
// CHECK-UNKNOWN: 64204000 <unknown>
61+
62+
fdot z21.s, z10.h, z5.h[2] // 01100100-00110101-01000001-01010101
63+
// CHECK-INST: fdot z21.s, z10.h, z5.h[2]
64+
// CHECK-ENCODING: [0x55,0x41,0x35,0x64]
65+
// CHECK-ERROR: instruction requires: sme2 or sve2p1
66+
// CHECK-UNKNOWN: 64354155 <unknown>
67+
68+
fdot z23.s, z13.h, z0.h[1] // 01100100-00101000-01000001-10110111
69+
// CHECK-INST: fdot z23.s, z13.h, z0.h[1]
70+
// CHECK-ENCODING: [0xb7,0x41,0x28,0x64]
71+
// CHECK-ERROR: instruction requires: sme2 or sve2p1
72+
// CHECK-UNKNOWN: 642841b7 <unknown>
73+
74+
fdot z31.s, z31.h, z7.h[3] // 01100100-00111111-01000011-11111111
75+
// CHECK-INST: fdot z31.s, z31.h, z7.h[3]
76+
// CHECK-ENCODING: [0xff,0x43,0x3f,0x64]
77+
// CHECK-ERROR: instruction requires: sme2 or sve2p1
78+
// CHECK-UNKNOWN: 643f43ff <unknown>

0 commit comments

Comments
 (0)