//=-- SMEInstrFormats.td - AArch64 SME Instruction classes -*- tablegen -*--=// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // // AArch64 Scalable Matrix Extension (SME) Instruction Class Definitions. // //===----------------------------------------------------------------------===// def imm_to_tile8 : ComplexPattern", []>; def imm_to_tile16 : ComplexPattern", []>; def imm_to_tile32 : ComplexPattern", []>; def imm_to_tile64 : ComplexPattern", []>; def imm_to_tile128 : ComplexPattern", []>; def imm_to_zt : ComplexPattern", []>; def tileslice8 : ComplexPattern", []>; def tileslice16 : ComplexPattern", []>; def tileslice32 : ComplexPattern", []>; def tileslice64 : ComplexPattern", []>; def tileslice128 : ComplexPattern", []>; // nop def tileslicerange3s2 : ComplexPattern", []>; def tileslicerange2s2 : ComplexPattern", []>; def tileslicerange1s2 : ComplexPattern", []>; def tileslicerange0s2 : ComplexPattern", []>; def tileslicerange2s4 : ComplexPattern", []>; def tileslicerange1s4 : ComplexPattern", []>; def tileslicerange0s4 : ComplexPattern", []>; def am_sme_indexed_b4 :ComplexPattern", [], [SDNPWantRoot]>; def SDTZALoadStore : SDTypeProfile<0, 3, [SDTCisInt<0>, SDTCisPtrTy<1>, SDTCisInt<2>]>; def AArch64SMELdr : SDNode<"AArch64ISD::SME_ZA_LDR", SDTZALoadStore, [SDNPHasChain, SDNPSideEffect, SDNPMayLoad]>; def AArch64SMEStr : SDNode<"AArch64ISD::SME_ZA_STR", SDTZALoadStore, [SDNPHasChain, SDNPSideEffect, SDNPMayStore]>; //===----------------------------------------------------------------------===// // SME Pseudo Classes //===----------------------------------------------------------------------===// def getSMEPseudoMap : InstrMapping { let FilterClass = "SMEPseudo2Instr"; let RowFields = ["PseudoName"]; let ColFields = ["IsInstr"]; let KeyCol = ["0"]; let ValueCols = [["1"]]; } class SMEPseudo2Instr { string PseudoName = name; bit IsInstr = instr; } class sme_outer_product_pseudo : Pseudo<(outs), (ins i32imm:$tile, PPR3bAny:$pn, PPR3bAny:$pm, zpr_ty:$zn, zpr_ty:$zm), []>, Sched<[]> { // Translated to the actual instructions in AArch64ISelLowering.cpp let SMEMatrixType = za_flag; let usesCustomInserter = 1; } class sme2_za_array_2op_multi_single_pseudo : SMEPseudo2Instr, Pseudo<(outs), (ins MatrixIndexGPR32Op8_11:$Rv, index_ty:$imm3, multi_vector_ty:$Zn, zpr_ty:$Zm), []> { let SMEMatrixType = za_flag; let usesCustomInserter = 1; } class sme2_za_array_2op_multi_multi_pseudo : SMEPseudo2Instr, Pseudo<(outs), (ins MatrixIndexGPR32Op8_11:$Rv, index_ty:$imm3, multi_vector_ty:$Zn, multi_vector_ty:$Zm), []> { let SMEMatrixType = za_flag; let usesCustomInserter = 1; } class sme2_za_array_2op_multi_index_pseudo : SMEPseudo2Instr, Pseudo<(outs), (ins MatrixIndexGPR32Op8_11:$Rv, index_ty:$imm3, multi_vector_ty:$Zn, zpr_ty:$Zm, imm_ty:$i), []> { let SMEMatrixType = za_flag; let usesCustomInserter = 1; } class sme2_move_to_za_pseudo : SMEPseudo2Instr, Pseudo<(outs), (ins MatrixIndexGPR32Op8_11:$Rs, imm_ty:$imm, multi_vector_ty:$Zn), []> { let SMEMatrixType = za_flag; let usesCustomInserter = 1; } class sme2_move_to_tile_pseudo : SMEPseudo2Instr, Pseudo<(outs), (ins tile_imm:$tile, MatrixIndexGPR32Op12_15:$Rs, imm_ty:$imm, multi_vector_ty:$Zn), []> { let SMEMatrixType = za_flag; let usesCustomInserter = 1; } class sem2p1_zero_matrix_pseudo : SMEPseudo2Instr, Pseudo<(outs), (ins MatrixIndexGPR32Op8_11:$Rs, index_ty:$imm), []> { let SMEMatrixType = za_flag; let usesCustomInserter = 1; } class sme2_movez_to_tile_pseudo : SMEPseudo2Instr, Pseudo<(outs vector_ty:$Zn), (ins tile_imm:$tile, MatrixIndexGPR32Op12_15:$Rs, imm_ty:$imm), []> { let SMEMatrixType = za_flag; let usesCustomInserter = 1; } class sme2_movaz_array_to_tile_pseudo : SMEPseudo2Instr, Pseudo<(outs multi_vector_ty:$Zd), (ins MatrixIndexGPR32Op8_11:$Rv, index_ty:$imm3), []> { let SMEMatrixType = za_flag; let usesCustomInserter = 1; } //===----------------------------------------------------------------------===// // SME pattern match helpers. //===----------------------------------------------------------------------===// class SME2_ZA_TwoOp_Multi_Single_Pat : Pat<(intrinsic (i32 (tileslice MatrixIndexGPR32Op8_11:$base, index_ty:$offset)), vt:$Zn, vt:$Zm), (!cast(name # _PSEUDO) $base, $offset, vt:$Zn, zpr_ty:$Zm)>; class SME2_ZA_TwoOp_VG2_Multi_Single_Pat : Pat<(intrinsic (i32 (tileslice MatrixIndexGPR32Op8_11:$base, index_ty:$offset)), vt:$Zn1, vt:$Zn2, vt:$Zm), (!cast(name # _PSEUDO) $base, $offset, (REG_SEQUENCE ZPR2, vt:$Zn1, zsub0, vt:$Zn2, zsub1), zpr_ty:$Zm)>; class SME2_ZA_TwoOp_VG4_Multi_Single_Pat : Pat<(intrinsic (i32 (tileslice MatrixIndexGPR32Op8_11:$base, index_ty:$offset)), vt:$Zn1, vt:$Zn2, vt:$Zn3, vt:$Zn4, vt:$Zm), (!cast(name # _PSEUDO) $base, $offset, (REG_SEQUENCE ZPR4, vt:$Zn1, zsub0, vt:$Zn2, zsub1, vt:$Zn3, zsub2, vt:$Zn4, zsub3), zpr_ty:$Zm)>; class SME2_ZA_TwoOp_VG2_Multi_Multi_Pat : Pat<(intrinsic (i32 (tileslice MatrixIndexGPR32Op8_11:$base, index_ty:$offset)), vt:$Zn1, vt:$Zn2, vt:$Zm1, vt:$Zm2), (!cast(name # _PSEUDO) $base, $offset, (REG_SEQUENCE ZPR2Mul2, vt:$Zn1, zsub0, vt:$Zn2, zsub1), (REG_SEQUENCE ZPR2Mul2, vt:$Zm1, zsub0, vt:$Zm2, zsub1))>; class SME2_ZA_TwoOp_VG4_Multi_Multi_Pat : Pat<(intrinsic (i32 (tileslice MatrixIndexGPR32Op8_11:$base, index_ty:$offset)), vt:$Zn1, vt:$Zn2, vt:$Zn3, vt:$Zn4, vt:$Zm1, vt:$Zm2, vt:$Zm3, vt:$Zm4), (!cast(name # _PSEUDO) $base, $offset, (REG_SEQUENCE ZPR4Mul4, vt:$Zn1, zsub0, vt:$Zn2, zsub1, vt:$Zn3, zsub2, vt:$Zn4, zsub3), (REG_SEQUENCE ZPR4Mul4, vt:$Zm1, zsub0, vt:$Zm2, zsub1, vt:$Zm3, zsub2, vt:$Zm4, zsub3))>; class SME2_ZA_TwoOp_Multi_Index_Pat : Pat<(intrinsic (i32 (tileslice MatrixIndexGPR32Op8_11:$base, index_ty:$offset)), vt:$Zn, vt:$Zm, (i32 imm_ty:$i)), (!cast(name # _PSEUDO) $base, $offset, vt:$Zn, zpr_ty:$Zm, (i32 imm_ty:$i))>; class SME2_ZA_TwoOp_VG2_Multi_Index_Pat : Pat<(intrinsic (i32 (tileslice MatrixIndexGPR32Op8_11:$base, index_ty:$offset)), vt:$Zn1, vt:$Zn2, vt:$Zm, (i32 imm_ty:$i)), (!cast(name # _PSEUDO) $base, $offset, (REG_SEQUENCE ZPR2Mul2, vt:$Zn1, zsub0, vt:$Zn2, zsub1), zpr_ty:$Zm, imm_ty:$i)>; class SME2_ZA_TwoOp_VG4_Multi_Index_Pat : Pat<(intrinsic (i32 (tileslice MatrixIndexGPR32Op8_11:$base, index_ty:$offset)), vt:$Zn1, vt:$Zn2, vt:$Zn3, vt:$Zn4, vt:$Zm, (i32 imm_ty:$i)), (!cast(name # _PSEUDO) $base, $offset, (REG_SEQUENCE ZPR4Mul4, vt:$Zn1, zsub0, vt:$Zn2, zsub1, vt:$Zn3, zsub2, vt:$Zn4, zsub3), zpr_ty:$Zm, imm_ty:$i)>; class SME2_Sat_Shift_VG2_Pat : Pat<(out_vt (intrinsic in_vt:$Zn1, in_vt:$Zn2, (i32 imm_ty:$i))), (!cast(name) (REG_SEQUENCE ZPR2Mul2, in_vt:$Zn1, zsub0, in_vt:$Zn2, zsub1), imm_ty:$i)>; class SME2_Sat_Shift_VG4_Pat : Pat<(out_vt (intrinsic in_vt:$Zn1, in_vt:$Zn2, in_vt:$Zn3, in_vt:$Zn4, (i32 imm_ty:$i))), (!cast(name) (REG_SEQUENCE ZPR4Mul4, in_vt:$Zn1, zsub0, in_vt:$Zn2, zsub1, in_vt:$Zn3, zsub2, in_vt:$Zn4, zsub3), imm_ty:$i)>; class SME2_Cvt_VG4_Pat : Pat<(out_vt (intrinsic in_vt:$Zn1, in_vt:$Zn2, in_vt:$Zn3, in_vt:$Zn4)), (!cast(name) (REG_SEQUENCE ZPR4Mul4, in_vt:$Zn1, zsub0, in_vt:$Zn2, zsub1, in_vt:$Zn3, zsub2, in_vt:$Zn4, zsub3))>; class SME2_ZA_VG1x2_Multi_Pat : Pat<(intrinsic (i32 (tileslice MatrixIndexGPR32Op8_11:$base, index_ty:$offset)), vt:$Zn1, vt:$Zn2), (!cast(name # _PSEUDO) $base, $offset, (REG_SEQUENCE ZPR2Mul2, vt:$Zn1, zsub0, vt:$Zn2, zsub1))>; class SME2_ZA_VG1x4_Multi_Pat : Pat<(intrinsic (i32 (tileslice MatrixIndexGPR32Op8_11:$base, index_ty:$offset)), vt:$Zn1, vt:$Zn2, vt:$Zn3, vt:$Zn4), (!cast(name # _PSEUDO) $base, $offset, (REG_SEQUENCE ZPR4Mul4, vt:$Zn1, zsub0, vt:$Zn2, zsub1, vt:$Zn3, zsub2, vt:$Zn4, zsub3))>; class SME2_Tile_VG2_Multi_Pat : Pat<(intrinsic tile_imm:$tile, (i32 (tileslice MatrixIndexGPR32Op12_15:$base, index_ty:$offset)), vt:$Zn1, vt:$Zn2), (!cast(name # _PSEUDO) $tile, $base, $offset, (REG_SEQUENCE ZPR2Mul2, vt:$Zn1, zsub0, vt:$Zn2, zsub1))>; class SME2_Tile_VG4_Multi_Pat : Pat<(intrinsic tile_imm:$tile, (i32 (tileslice MatrixIndexGPR32Op12_15:$base, index_ty:$offset)), vt:$Zn1, vt:$Zn2, vt:$Zn3, vt:$Zn4), (!cast(name # _PSEUDO) $tile, $base, $offset, (REG_SEQUENCE ZPR4Mul4, vt:$Zn1, zsub0, vt:$Zn2, zsub1, vt:$Zn3, zsub2, vt:$Zn4, zsub3))>; class SME2_Zero_Matrix_Pat : Pat<(intrinsic (i32 (tileslice MatrixIndexGPR32Op8_11:$base, offset_ty:$offset))), (!cast(name) $base, $offset)>; class SME2_Tile_Movaz_Pat : Pat<(out_vt (intrinsic tile_imm:$tile, (i32 (tileslice MatrixIndexGPR32Op12_15:$base, index_ty:$offset)))), (!cast(name # _PSEUDO) $tile, $base, $offset)>; //===----------------------------------------------------------------------===// // SME pattern match helpers. //===----------------------------------------------------------------------===// class SME_ZA_Tile_TwoPred_TwoVec_Pat : Pat<(intrinsic imm_ty:$tile, (pg_ty PPR3bAny:$Pn), (pg_ty PPR3bAny:$Pm), vt:$Zn, vt:$Zm), (!cast(name # _PSEUDO) $tile, $Pn, $Pm, $Zn, $Zm)>; //===----------------------------------------------------------------------===// // SME smstart/smstop //===----------------------------------------------------------------------===// // SME defines three pstate fields to set or clear PSTATE.SM, PSTATE.ZA, or // both fields: // // MSR SVCRSM, # // MSR SVCRZA, # // MSR SVCRSMZA, # // // It's tricky to using the existing pstate operand defined in // AArch64SystemOperands.td since it only encodes 5 bits including op1;op2, // when these fields are also encoded in CRm[3:1]. def MSRpstatesvcrImm1 : PstateWriteSimple<(ins svcr_op:$pstatefield, timm0_1:$imm), "msr", "\t$pstatefield, $imm">, Sched<[WriteSys]> { bits<3> pstatefield; bit imm; let Inst{18-16} = 0b011; // op1 let Inst{11-9} = pstatefield; let Inst{8} = imm; let Inst{7-5} = 0b011; // op2 let hasPostISelHook = 1; } def : InstAlias<"smstart", (MSRpstatesvcrImm1 0b011, 0b1)>; def : InstAlias<"smstart sm", (MSRpstatesvcrImm1 0b001, 0b1)>; def : InstAlias<"smstart za", (MSRpstatesvcrImm1 0b010, 0b1)>; def : InstAlias<"smstop", (MSRpstatesvcrImm1 0b011, 0b0)>; def : InstAlias<"smstop sm", (MSRpstatesvcrImm1 0b001, 0b0)>; def : InstAlias<"smstop za", (MSRpstatesvcrImm1 0b010, 0b0)>; //===----------------------------------------------------------------------===// // SME Outer Products //===----------------------------------------------------------------------===// class sme_fp_outer_product_inst sz, bits<2> op, MatrixTileOperand za_ty, ZPRRegOp zpr_ty, string mnemonic> : I<(outs za_ty:$ZAda), (ins za_ty:$_ZAda, PPR3bAny:$Pn, PPR3bAny:$Pm, zpr_ty:$Zn, zpr_ty:$Zm), mnemonic, "\t$ZAda, $Pn/m, $Pm/m, $Zn, $Zm", "", []>, Sched<[]> { bits<5> Zm; bits<3> Pm; bits<3> Pn; bits<5> Zn; let Inst{31-25} = 0b1000000; let Inst{24} = op{1}; let Inst{23} = 0b1; let Inst{22-21} = sz; let Inst{20-16} = Zm; let Inst{15-13} = Pm; let Inst{12-10} = Pn; let Inst{9-5} = Zn; let Inst{4} = S; let Inst{3} = op{0}; let Constraints = "$ZAda = $_ZAda"; } multiclass sme_outer_product_fp32 sz, ZPRRegOp zpr_ty, string mnemonic, SDPatternOperator op> { def NAME : sme_fp_outer_product_inst, SMEPseudo2Instr { bits<2> ZAda; let Inst{1-0} = ZAda; let Inst{2} = 0b0; } def NAME # _PSEUDO : sme_outer_product_pseudo, SMEPseudo2Instr; def : SME_ZA_Tile_TwoPred_TwoVec_Pat; } multiclass sme_outer_product_fp64 { def NAME : sme_fp_outer_product_inst, SMEPseudo2Instr { bits<3> ZAda; let Inst{2-0} = ZAda; } def NAME # _PSEUDO : sme_outer_product_pseudo, SMEPseudo2Instr; def : SME_ZA_Tile_TwoPred_TwoVec_Pat; } multiclass sme2p1_fmop_tile_f8f16 op> { def NAME : sme_fp_outer_product_inst { bits<1> ZAda; let Inst{2-1} = 0b00; let Inst{0} = ZAda; } } multiclass sme2p1_fmop_tile_fp16 { def NAME : sme_fp_outer_product_inst, SMEPseudo2Instr { bits<1> ZAda; let Inst{2-1} = 0b00; let Inst{0} = ZAda; } def NAME # _PSEUDO : sme_outer_product_pseudo, SMEPseudo2Instr; def : SME_ZA_Tile_TwoPred_TwoVec_Pat; } class sme_int_outer_product_inst opc, bit sz, bit sme2, MatrixTileOperand za_ty, ZPRRegOp zpr_ty, string mnemonic> : I<(outs za_ty:$ZAda), (ins za_ty:$_ZAda, PPR3bAny:$Pn, PPR3bAny:$Pm, zpr_ty:$Zn, zpr_ty:$Zm), mnemonic, "\t$ZAda, $Pn/m, $Pm/m, $Zn, $Zm", "", []>, Sched<[]> { bits<5> Zm; bits<3> Pm; bits<3> Pn; bits<5> Zn; let Inst{31-25} = 0b1010000; let Inst{24} = opc{2}; // u0 let Inst{23} = 0b1; let Inst{22} = sz; let Inst{21} = opc{1}; // u1 let Inst{20-16} = Zm; let Inst{15-13} = Pm; let Inst{12-10} = Pn; let Inst{9-5} = Zn; let Inst{4} = opc{0}; //S; let Inst{3} = sme2; let Constraints = "$ZAda = $_ZAda"; } multiclass sme_int_outer_product_i32 opc, string mnemonic, SDPatternOperator op> { def NAME : sme_int_outer_product_inst, SMEPseudo2Instr { bits<2> ZAda; let Inst{1-0} = ZAda; let Inst{2} = 0b0; } def NAME # _PSEUDO : sme_outer_product_pseudo, SMEPseudo2Instr; def : SME_ZA_Tile_TwoPred_TwoVec_Pat; } multiclass sme_int_outer_product_i64 opc, string mnemonic, SDPatternOperator op> { def NAME : sme_int_outer_product_inst, SMEPseudo2Instr { bits<3> ZAda; let Inst{2-0} = ZAda; } def NAME # _PSEUDO : sme_outer_product_pseudo, SMEPseudo2Instr; def : SME_ZA_Tile_TwoPred_TwoVec_Pat; } class sme_outer_product_widening_inst opc, ZPRRegOp zpr_ty, string mnemonic> : I<(outs TileOp32:$ZAda), (ins TileOp32:$_ZAda, PPR3bAny:$Pn, PPR3bAny:$Pm, zpr_ty:$Zn, zpr_ty:$Zm), mnemonic, "\t$ZAda, $Pn/m, $Pm/m, $Zn, $Zm", "", []>, Sched<[]> { bits<5> Zm; bits<3> Pm; bits<3> Pn; bits<5> Zn; bits<2> ZAda; let Inst{31-25} = 0b1000000; let Inst{24} = !if(opc{2}, 0, 1); let Inst{23-22} = 0b10; let Inst{21} = opc{1}; let Inst{20-16} = Zm; let Inst{15-13} = Pm; let Inst{12-10} = Pn; let Inst{9-5} = Zn; let Inst{4} = opc{0}; let Inst{3} = opc{2}; let Inst{2} = 0b0; let Inst{1-0} = ZAda; let Constraints = "$ZAda = $_ZAda"; } multiclass sme_bf16_outer_product opc, string mnemonic, SDPatternOperator op> { def NAME : sme_outer_product_widening_inst, SMEPseudo2Instr; def NAME # _PSEUDO : sme_outer_product_pseudo, SMEPseudo2Instr; def : SME_ZA_Tile_TwoPred_TwoVec_Pat; } multiclass sme_f16_outer_product opc, string mnemonic, SDPatternOperator op> { def NAME : sme_outer_product_widening_inst, SMEPseudo2Instr; def NAME # _PSEUDO : sme_outer_product_pseudo, SMEPseudo2Instr; def : SME_ZA_Tile_TwoPred_TwoVec_Pat; } //===----------------------------------------------------------------------===// // SME Add Vector to Tile //===----------------------------------------------------------------------===// class sme_add_vector_to_tile_inst : I<(outs tile_ty:$ZAda), (ins tile_ty:$_ZAda, PPR3bAny:$Pn, PPR3bAny:$Pm, zpr_ty:$Zn), mnemonic, "\t$ZAda, $Pn/m, $Pm/m, $Zn", "", []>, Sched<[]> { bits<3> Pm; bits<3> Pn; bits<5> Zn; let Inst{31-23} = 0b110000001; let Inst{22} = op; let Inst{21-17} = 0b01000; let Inst{16} = V; let Inst{15-13} = Pm; let Inst{12-10} = Pn; let Inst{9-5} = Zn; let Inst{4-3} = 0b00; let Constraints = "$ZAda = $_ZAda"; } class sme_add_vector_to_tile_pseudo : Pseudo<(outs), (ins i32imm:$tile, PPR3bAny:$Pn, PPR3bAny:$Pm, zpr_ty:$Zn), []>, Sched<[]> { // Translated to the actual instructions in AArch64ISelLowering.cpp let SMEMatrixType = za_flag; let usesCustomInserter = 1; } multiclass sme_add_vector_to_tile_u32 { def NAME : sme_add_vector_to_tile_inst<0b0, V, TileOp32, ZPR32, mnemonic>, SMEPseudo2Instr { bits<2> ZAda; let Inst{2} = 0b0; let Inst{1-0} = ZAda; } def _PSEUDO_S : sme_add_vector_to_tile_pseudo, SMEPseudo2Instr; def : Pat<(op timm32_0_3:$tile, (nxv4i1 PPR3bAny:$pn), (nxv4i1 PPR3bAny:$pm), (nxv4i32 ZPR32:$zn)), (!cast(NAME # _PSEUDO_S) timm32_0_3:$tile, $pn, $pm, $zn)>; } multiclass sme_add_vector_to_tile_u64 { def NAME : sme_add_vector_to_tile_inst<0b1, V, TileOp64, ZPR64, mnemonic>, SMEPseudo2Instr { bits<3> ZAda; let Inst{2-0} = ZAda; } def _PSEUDO_D : sme_add_vector_to_tile_pseudo, SMEPseudo2Instr; let Predicates = [HasSMEI16I64] in { def : Pat<(op timm32_0_7:$tile, (nxv2i1 PPR3bAny:$pn), (nxv2i1 PPR3bAny:$pm), (nxv2i64 ZPR64:$zn)), (!cast(NAME # _PSEUDO_D) timm32_0_7:$tile, $pn, $pm, $zn)>; } } //===----------------------------------------------------------------------===// // SME Contiguous Loads //===----------------------------------------------------------------------===// class sme_mem_ld_ss_base msz, dag outs, dag ins, string mnemonic, string argstr> : I, Sched<[]> { bits<5> Rm; bits<2> Rv; bits<3> Pg; bits<5> Rn; let Inst{31-25} = 0b1110000; let Inst{24} = Q; let Inst{23-22} = msz; let Inst{21} = 0b0; let Inst{20-16} = Rm; let Inst{15} = V; let Inst{14-13} = Rv; let Inst{12-10} = Pg; let Inst{9-5} = Rn; let Inst{4} = 0b0; let mayLoad = 1; } class sme_mem_ld_ss_inst msz, string mnemonic, MatrixTileVectorOperand tile_ty, bit is_col, Operand imm_ty, RegisterOperand gpr_ty> : sme_mem_ld_ss_base< Q, is_col, msz, (outs tile_ty:$ZAt), (ins MatrixIndexGPR32Op12_15:$Rv, imm_ty:$imm, PPR3bAny:$Pg, GPR64sp:$Rn, gpr_ty:$Rm), mnemonic, "\t\\{$ZAt[$Rv, $imm]\\}, $Pg/z, [$Rn, $Rm]">; multiclass sme_mem_ss_aliases_base { def : InstAlias; // Default XZR offset aliases def : InstAlias; def : InstAlias; } multiclass sme_mem_ss_aliases { defm : sme_mem_ss_aliases_base(inst # _B), !if(is_col, TileVectorOpV8, TileVectorOpH8), sme_elm_idx0_15, GPR64shifted8, pg_suffix>; defm : sme_mem_ss_aliases_base(inst # _H), !if(is_col, TileVectorOpV16, TileVectorOpH16), sme_elm_idx0_7, GPR64shifted16, pg_suffix>; defm : sme_mem_ss_aliases_base(inst # _S), !if(is_col, TileVectorOpV32, TileVectorOpH32), sme_elm_idx0_3, GPR64shifted32, pg_suffix>; defm : sme_mem_ss_aliases_base(inst # _D), !if(is_col, TileVectorOpV64, TileVectorOpH64), sme_elm_idx0_1, GPR64shifted64, pg_suffix>; defm : sme_mem_ss_aliases_base(inst # _Q), !if(is_col, TileVectorOpV128, TileVectorOpH128), sme_elm_idx0_0, GPR64shifted128, pg_suffix>; } multiclass sme_mem_ld_ss_aliases { defm NAME : sme_mem_ss_aliases<"ld1", inst, is_col, "/z">; } multiclass sme_mem_ld_ss_patterns { // base, tileslice def : Pat<(Load PPR3bAny:$pg, GPR64sp:$base, tile_ty:$tile, (i32 (tileslice MatrixIndexGPR32Op12_15:$idx, offset_ty:$imm))), (Inst tile_ty:$tile, $idx, $imm, $pg, $base, XZR)>; // reg + reg, tileslice let AddedComplexity = 1 in { def : Pat<(Load PPR3bAny:$pg, (addr GPR64sp:$base, GPR64:$offset), tile_ty:$tile, (i32 (tileslice MatrixIndexGPR32Op12_15:$idx, offset_ty:$imm))), (Inst tile_ty:$tile, $idx, $imm, $pg, $base, $offset)>; } } class sme_load_pseudo : Pseudo<(outs), (ins i32imm:$tile, MatrixIndexGPR32Op12_15:$idx, i32imm:$imm, PPR3bAny:$pg, GPR64sp:$base, GPR64:$offset), []>, Sched<[]> { // Translated to the actual instructions in AArch64ISelLowering.cpp let usesCustomInserter = 1; let mayLoad = 1; } multiclass sme_mem_ld_v_ss { def _B : sme_mem_ld_ss_inst<0b0, 0b00, mnemonic # "b", !if(is_col, TileVectorOpV8, TileVectorOpH8), is_col, sme_elm_idx0_15, GPR64shifted8> { bits<4> imm; let Inst{3-0} = imm; } def _H : sme_mem_ld_ss_inst<0b0, 0b01, mnemonic # "h", !if(is_col, TileVectorOpV16, TileVectorOpH16), is_col, sme_elm_idx0_7, GPR64shifted16> { bits<1> ZAt; bits<3> imm; let Inst{3} = ZAt; let Inst{2-0} = imm; } def _S : sme_mem_ld_ss_inst<0b0, 0b10, mnemonic # "w", !if(is_col, TileVectorOpV32, TileVectorOpH32), is_col, sme_elm_idx0_3, GPR64shifted32> { bits<2> ZAt; bits<2> imm; let Inst{3-2} = ZAt; let Inst{1-0} = imm; } def _D : sme_mem_ld_ss_inst<0b0, 0b11, mnemonic # "d", !if(is_col, TileVectorOpV64, TileVectorOpH64), is_col, sme_elm_idx0_1, GPR64shifted64> { bits<3> ZAt; bits<1> imm; let Inst{3-1} = ZAt; let Inst{0} = imm; } def _Q : sme_mem_ld_ss_inst<0b1, 0b11, mnemonic # "q", !if(is_col, TileVectorOpV128, TileVectorOpH128), is_col, sme_elm_idx0_0, GPR64shifted128> { bits<4> ZAt; let Inst{3-0} = ZAt; } defm : sme_mem_ld_ss_aliases; // Pseudo instructions for lowering intrinsics, using immediates instead of // tile registers. def _PSEUDO_B : sme_load_pseudo; def _PSEUDO_H : sme_load_pseudo; def _PSEUDO_S : sme_load_pseudo; def _PSEUDO_D : sme_load_pseudo; def _PSEUDO_Q : sme_load_pseudo; defm : sme_mem_ld_ss_patterns(NAME # _PSEUDO_B), !if(is_col, int_aarch64_sme_ld1b_vert, int_aarch64_sme_ld1b_horiz), sme_elm_idx0_0, timm32_0_15, am_sve_regreg_lsl0, tileslice8>; defm : sme_mem_ld_ss_patterns(NAME # _PSEUDO_H), !if(is_col, int_aarch64_sme_ld1h_vert, int_aarch64_sme_ld1h_horiz), timm32_0_1, timm32_0_7, am_sve_regreg_lsl1, tileslice16>; defm : sme_mem_ld_ss_patterns(NAME # _PSEUDO_S), !if(is_col, int_aarch64_sme_ld1w_vert, int_aarch64_sme_ld1w_horiz), timm32_0_3, timm32_0_3, am_sve_regreg_lsl2, tileslice32>; defm : sme_mem_ld_ss_patterns(NAME # _PSEUDO_D), !if(is_col, int_aarch64_sme_ld1d_vert, int_aarch64_sme_ld1d_horiz), timm32_0_7, timm32_0_1, am_sve_regreg_lsl3, tileslice64>; defm : sme_mem_ld_ss_patterns(NAME # _PSEUDO_Q), !if(is_col, int_aarch64_sme_ld1q_vert, int_aarch64_sme_ld1q_horiz), timm32_0_15, sme_elm_idx0_0, am_sve_regreg_lsl4, tileslice128>; } multiclass sme_mem_ld_ss { defm _H : sme_mem_ld_v_ss; defm _V : sme_mem_ld_v_ss; } //===----------------------------------------------------------------------===// // SME Contiguous Stores //===----------------------------------------------------------------------===// class sme_mem_st_ss_base msz, dag ins, string mnemonic, string argstr> : I<(outs), ins, mnemonic, argstr, "", []>, Sched<[]> { bits<5> Rm; bits<2> Rv; bits<3> Pg; bits<5> Rn; let Inst{31-25} = 0b1110000; let Inst{24} = Q; let Inst{23-22} = msz; let Inst{21} = 0b1; let Inst{20-16} = Rm; let Inst{15} = V; let Inst{14-13} = Rv; let Inst{12-10} = Pg; let Inst{9-5} = Rn; let Inst{4} = 0b0; let mayStore = 1; let hasSideEffects = 1; } class sme_mem_st_ss_inst msz, string mnemonic, MatrixTileVectorOperand tile_ty, bit is_col, Operand imm_ty, RegisterOperand gpr_ty> : sme_mem_st_ss_base< Q, is_col, msz, (ins tile_ty:$ZAt, MatrixIndexGPR32Op12_15:$Rv, imm_ty:$imm, PPR3bAny:$Pg, GPR64sp:$Rn, gpr_ty:$Rm), mnemonic, "\t\\{$ZAt[$Rv, $imm]\\}, $Pg, [$Rn, $Rm]">; multiclass sme_mem_st_ss_aliases { defm NAME : sme_mem_ss_aliases<"st1", inst, is_col>; } multiclass sme_mem_st_ss_patterns { // base, tileslice def : Pat<(Store PPR3bAny:$pg, GPR64sp:$base, (imm2tile untyped:$tile), (i32 (tileslice MatrixIndexGPR32Op12_15:$idx, offset_ty:$imm))), (Inst $tile, $idx, $imm, $pg, $base, XZR)>; // reg + reg, tileslice let AddedComplexity = 1 in { def : Pat<(Store PPR3bAny:$pg, (addr GPR64sp:$base, GPR64:$offset), (imm2tile untyped:$tile), (i32 (tileslice MatrixIndexGPR32Op12_15:$idx, offset_ty:$imm))), (Inst $tile, $idx, $imm, $pg, $base, $offset)>; } } multiclass sme_mem_st_v_ss { def _B : sme_mem_st_ss_inst<0b0, 0b00, mnemonic # "b", !if(is_col, TileVectorOpV8, TileVectorOpH8), is_col, sme_elm_idx0_15, GPR64shifted8> { bits<4> imm; let Inst{3-0} = imm; } def _H : sme_mem_st_ss_inst<0b0, 0b01, mnemonic # "h", !if(is_col, TileVectorOpV16, TileVectorOpH16), is_col, sme_elm_idx0_7, GPR64shifted16> { bits<1> ZAt; bits<3> imm; let Inst{3} = ZAt; let Inst{2-0} = imm; } def _S : sme_mem_st_ss_inst<0b0, 0b10, mnemonic # "w", !if(is_col, TileVectorOpV32, TileVectorOpH32), is_col, sme_elm_idx0_3, GPR64shifted32> { bits<2> ZAt; bits<2> imm; let Inst{3-2} = ZAt; let Inst{1-0} = imm; } def _D : sme_mem_st_ss_inst<0b0, 0b11, mnemonic # "d", !if(is_col, TileVectorOpV64, TileVectorOpH64), is_col, sme_elm_idx0_1, GPR64shifted64> { bits<3> ZAt; bits<1> imm; let Inst{3-1} = ZAt; let Inst{0} = imm; } def _Q : sme_mem_st_ss_inst<0b1, 0b11, mnemonic # "q", !if(is_col, TileVectorOpV128, TileVectorOpH128), is_col, sme_elm_idx0_0, GPR64shifted128> { bits<4> ZAt; let Inst{3-0} = ZAt; } defm : sme_mem_st_ss_aliases; defm : sme_mem_st_ss_patterns(NAME # _B), !if(is_col, int_aarch64_sme_st1b_vert, int_aarch64_sme_st1b_horiz), timm32_0_15, imm_to_tile8, am_sve_regreg_lsl0, tileslice8>; defm : sme_mem_st_ss_patterns(NAME # _H), !if(is_col, int_aarch64_sme_st1h_vert, int_aarch64_sme_st1h_horiz), timm32_0_7, imm_to_tile16, am_sve_regreg_lsl1, tileslice16>; defm : sme_mem_st_ss_patterns(NAME # _S), !if(is_col, int_aarch64_sme_st1w_vert, int_aarch64_sme_st1w_horiz), timm32_0_3, imm_to_tile32, am_sve_regreg_lsl2, tileslice32>; defm : sme_mem_st_ss_patterns(NAME # _D), !if(is_col, int_aarch64_sme_st1d_vert, int_aarch64_sme_st1d_horiz), timm32_0_1, imm_to_tile64, am_sve_regreg_lsl3, tileslice64>; defm : sme_mem_st_ss_patterns(NAME # _Q), !if(is_col, int_aarch64_sme_st1q_vert, int_aarch64_sme_st1q_horiz), sme_elm_idx0_0, imm_to_tile128, am_sve_regreg_lsl4, tileslice128>; } multiclass sme_mem_st_ss { defm _H : sme_mem_st_v_ss; defm _V : sme_mem_st_v_ss; } //===----------------------------------------------------------------------===// // SME Save and Restore Array //===----------------------------------------------------------------------===// class sme_spill_fill_base : I, Sched<[]> { bits<2> Rv; bits<5> Rn; bits<4> imm4; let Inst{31-22} = 0b1110000100; let Inst{21} = isStore; let Inst{20-15} = 0b000000; let Inst{14-13} = Rv; let Inst{12-10} = 0b000; let Inst{9-5} = Rn; let Inst{4} = 0b0; let Inst{3-0} = imm4; } let mayStore = 1 in class sme_spill_inst : sme_spill_fill_base<0b1, (outs), (ins MatrixOp:$ZAt, MatrixIndexGPR32Op12_15:$Rv, sme_elm_idx0_15:$imm4, GPR64sp:$Rn, imm32_0_15:$offset), opcodestr>; let mayLoad = 1 in class sme_fill_inst : sme_spill_fill_base<0b0, (outs MatrixOp:$ZAt), (ins MatrixIndexGPR32Op12_15:$Rv, sme_elm_idx0_15:$imm4, GPR64sp:$Rn, imm32_0_15:$offset), opcodestr>; multiclass sme_spill { def NAME : sme_spill_inst; def : InstAlias(NAME) MatrixOp:$ZAt, MatrixIndexGPR32Op12_15:$Rv, sme_elm_idx0_15:$imm4, GPR64sp:$Rn, 0), 1>; def : Pat<(AArch64SMEStr (i32 MatrixIndexGPR32Op12_15:$slice), (i64 GPR64sp:$base), (i32 sme_elm_idx0_15:$imm)), (!cast(NAME) ZA, MatrixIndexGPR32Op12_15:$slice, sme_elm_idx0_15:$imm, GPR64sp:$base, imm32_0_15:$imm)>; } multiclass sme_fill { def NAME : sme_fill_inst; def : InstAlias(NAME) MatrixOp:$ZAt, MatrixIndexGPR32Op12_15:$Rv, sme_elm_idx0_15:$imm4, GPR64sp:$Rn, 0), 1>; def NAME # _PSEUDO : Pseudo<(outs), (ins MatrixIndexGPR32Op12_15:$idx, sme_elm_idx0_15:$imm4, GPR64sp:$base), []>, Sched<[]> { // Translated to actual instruction in AArch64ISelLowering.cpp let usesCustomInserter = 1; let mayLoad = 1; } def : Pat<(AArch64SMELdr MatrixIndexGPR32Op12_15:$slice, GPR64sp:$base, sme_elm_idx0_15:$imm), (!cast(NAME # _PSEUDO) MatrixIndexGPR32Op12_15:$slice, sme_elm_idx0_15:$imm, GPR64sp:$base)>; } //===----------------------------------------------------------------------===// // Move instructions //===----------------------------------------------------------------------===// class sme_vector_to_tile_base sz, dag outs, dag ins, string mnemonic, string argstr> : I, Sched<[]> { bits<2> Rv; bits<3> Pg; bits<5> Zn; let Inst{31-24} = 0b11000000; let Inst{23-22} = sz; let Inst{21-17} = 0b00000; let Inst{16} = Q; let Inst{15} = V; let Inst{14-13} = Rv; let Inst{12-10} = Pg; let Inst{9-5} = Zn; let Inst{4} = 0b0; } class sme_vector_to_tile_inst sz, MatrixTileVectorOperand tile_ty, bit is_col, Operand imm_ty, ZPRRegOp zpr_ty, string mnemonic> : sme_vector_to_tile_base{ let Constraints = "$ZAd = $_ZAd"; } multiclass sme_vector_to_tile_aliases { def : InstAlias<"mov\t$ZAd[$Rv, $imm], $Pg/m, $Zn", (inst tile_ty:$ZAd, MatrixIndexGPR32Op12_15:$Rv, imm_ty:$imm, PPR3bAny:$Pg, zpr_ty:$Zn), 1>; } multiclass sme_vector_to_tile_patterns { def : Pat<(op imm_ty:$tile, (i32 (tileslice MatrixIndexGPR32Op12_15:$idx, offset_ty:$imm)), (ppr_vt PPR3bAny:$pg), (zpr_vt ZPRAny:$zn)), (inst imm_ty:$tile, $idx, $imm, $pg, $zn)>; } class sme_mova_insert_pseudo : Pseudo<(outs), (ins i32imm:$tile, MatrixIndexGPR32Op12_15:$idx, i32imm:$imm, PPR3bAny:$pg, ZPRAny:$zn), []>, Sched<[]> { // Translated to the actual instructions in AArch64ISelLowering.cpp let SMEMatrixType = za_flag; let usesCustomInserter = 1; } multiclass sme_vector_v_to_tile { def _B : sme_vector_to_tile_inst<0b0, 0b00, !if(is_col, TileVectorOpV8, TileVectorOpH8), is_col, sme_elm_idx0_15, ZPR8, mnemonic>, SMEPseudo2Instr { bits<4> imm; let Inst{3-0} = imm; } def _H : sme_vector_to_tile_inst<0b0, 0b01, !if(is_col, TileVectorOpV16, TileVectorOpH16), is_col, sme_elm_idx0_7, ZPR16, mnemonic>, SMEPseudo2Instr { bits<1> ZAd; bits<3> imm; let Inst{3} = ZAd; let Inst{2-0} = imm; } def _S : sme_vector_to_tile_inst<0b0, 0b10, !if(is_col, TileVectorOpV32, TileVectorOpH32), is_col, sme_elm_idx0_3, ZPR32, mnemonic>, SMEPseudo2Instr { bits<2> ZAd; bits<2> imm; let Inst{3-2} = ZAd; let Inst{1-0} = imm; } def _D : sme_vector_to_tile_inst<0b0, 0b11, !if(is_col, TileVectorOpV64, TileVectorOpH64), is_col, sme_elm_idx0_1, ZPR64, mnemonic>, SMEPseudo2Instr { bits<3> ZAd; bits<1> imm; let Inst{3-1} = ZAd; let Inst{0} = imm; } def _Q : sme_vector_to_tile_inst<0b1, 0b11, !if(is_col, TileVectorOpV128, TileVectorOpH128), is_col, sme_elm_idx0_0, ZPR128, mnemonic>, SMEPseudo2Instr { bits<4> ZAd; bits<1> imm; let Inst{3-0} = ZAd; } // Pseudo instructions for lowering intrinsics, using immediates instead of // tile registers. def _PSEUDO_B : sme_mova_insert_pseudo, SMEPseudo2Instr; def _PSEUDO_H : sme_mova_insert_pseudo, SMEPseudo2Instr; def _PSEUDO_S : sme_mova_insert_pseudo, SMEPseudo2Instr; def _PSEUDO_D : sme_mova_insert_pseudo, SMEPseudo2Instr; def _PSEUDO_Q : sme_mova_insert_pseudo, SMEPseudo2Instr; defm : sme_vector_to_tile_aliases(NAME # _B), !if(is_col, TileVectorOpV8, TileVectorOpH8), ZPR8, sme_elm_idx0_15>; defm : sme_vector_to_tile_aliases(NAME # _H), !if(is_col, TileVectorOpV16, TileVectorOpH16), ZPR16, sme_elm_idx0_7>; defm : sme_vector_to_tile_aliases(NAME # _S), !if(is_col, TileVectorOpV32, TileVectorOpH32), ZPR32, sme_elm_idx0_3>; defm : sme_vector_to_tile_aliases(NAME # _D), !if(is_col, TileVectorOpV64, TileVectorOpH64), ZPR64, sme_elm_idx0_1>; defm : sme_vector_to_tile_aliases(NAME # _Q), !if(is_col, TileVectorOpV128, TileVectorOpH128), ZPR128, sme_elm_idx0_0>; defvar op = !if(is_col, int_aarch64_sme_write_vert, int_aarch64_sme_write_horiz); defm : sme_vector_to_tile_patterns(NAME # _PSEUDO_B), nxv16i8, nxv16i1, sme_elm_idx0_0, sme_elm_idx0_15, op, tileslice8>; defm : sme_vector_to_tile_patterns(NAME # _PSEUDO_H), nxv8i16, nxv8i1, sme_elm_idx0_1, sme_elm_idx0_7, op, tileslice16>; defm : sme_vector_to_tile_patterns(NAME # _PSEUDO_H), nxv8f16, nxv8i1, sme_elm_idx0_1, sme_elm_idx0_7, op, tileslice16>; defm : sme_vector_to_tile_patterns(NAME # _PSEUDO_H), nxv8bf16, nxv8i1, sme_elm_idx0_1, sme_elm_idx0_7, op, tileslice16>; defm : sme_vector_to_tile_patterns(NAME # _PSEUDO_S), nxv4i32, nxv4i1, sme_elm_idx0_3, sme_elm_idx0_3, op, tileslice32>; defm : sme_vector_to_tile_patterns(NAME # _PSEUDO_S), nxv4f32, nxv4i1, sme_elm_idx0_3, sme_elm_idx0_3, op, tileslice32>; defm : sme_vector_to_tile_patterns(NAME # _PSEUDO_D), nxv2i64, nxv2i1, sme_elm_idx0_7, sme_elm_idx0_1, op, tileslice64>; defm : sme_vector_to_tile_patterns(NAME # _PSEUDO_D), nxv2f64, nxv2i1, sme_elm_idx0_7, sme_elm_idx0_1, op, tileslice64>; defvar opq = !if(is_col, int_aarch64_sme_writeq_vert, int_aarch64_sme_writeq_horiz); defm : sme_vector_to_tile_patterns(NAME # _PSEUDO_Q), nxv16i8, nxv16i1, sme_elm_idx0_15, sme_elm_idx0_0, opq, tileslice128>; defm : sme_vector_to_tile_patterns(NAME # _PSEUDO_Q), nxv8i16, nxv8i1, sme_elm_idx0_15, sme_elm_idx0_0, opq, tileslice128>; defm : sme_vector_to_tile_patterns(NAME # _PSEUDO_Q), nxv8f16, nxv8i1, sme_elm_idx0_15, sme_elm_idx0_0, opq, tileslice128>; defm : sme_vector_to_tile_patterns(NAME # _PSEUDO_Q), nxv8bf16, nxv8i1, sme_elm_idx0_15, sme_elm_idx0_0, opq, tileslice128>; defm : sme_vector_to_tile_patterns(NAME # _PSEUDO_Q), nxv4i32, nxv4i1, sme_elm_idx0_15, sme_elm_idx0_0, opq, tileslice128>; defm : sme_vector_to_tile_patterns(NAME # _PSEUDO_Q), nxv4f32, nxv4i1, sme_elm_idx0_15, sme_elm_idx0_0, opq, tileslice128>; defm : sme_vector_to_tile_patterns(NAME # _PSEUDO_Q), nxv2i64, nxv2i1, sme_elm_idx0_15, sme_elm_idx0_0, opq, tileslice128>; defm : sme_vector_to_tile_patterns(NAME # _PSEUDO_Q), nxv2f64, nxv2i1, sme_elm_idx0_15, sme_elm_idx0_0, opq, tileslice128>; } multiclass sme_vector_to_tile { defm _H : sme_vector_v_to_tile; defm _V : sme_vector_v_to_tile; } class sme_tile_to_vector_base sz, dag outs, dag ins, string mnemonic, string argstr> : I, Sched<[]> { bits<2> Rv; bits<3> Pg; bits<5> Zd; let Inst{31-24} = 0b11000000; let Inst{23-22} = sz; let Inst{21-17} = 0b00001; let Inst{16} = Q; let Inst{15} = V; let Inst{14-13} = Rv; let Inst{12-10} = Pg; let Inst{9} = 0b0; let Inst{4-0} = Zd; } class sme_tile_to_vector_inst sz, ZPRRegOp zpr_ty, MatrixTileVectorOperand tile_ty, bit is_col, Operand imm_ty, string mnemonic> : sme_tile_to_vector_base { let Constraints = "$Zd = $_Zd"; } multiclass sme_tile_to_vector_aliases { def : InstAlias<"mov\t$Zd, $Pg/m, $ZAn[$Rv, $imm]", (inst zpr_ty:$Zd, PPR3bAny:$Pg, tile_ty:$ZAn, MatrixIndexGPR32Op12_15:$Rv, imm_ty:$imm), 1>; } multiclass sme_tile_to_vector_patterns { def : Pat<(zpr_vt (op (zpr_vt ZPRAny:$passthru), (ppr_vt PPR3bAny:$pg), (imm2tile untyped:$tile), MatrixIndexGPR32Op12_15:$idx)), (inst $passthru, $pg, $tile, $idx, 0)>; let AddedComplexity = 1 in { def : Pat<(zpr_vt (op (zpr_vt ZPRAny:$passthru), (ppr_vt PPR3bAny:$pg), (imm2tile untyped:$tile), (i32 (tileslice MatrixIndexGPR32Op12_15:$idx, offset_ty:$imm)))), (inst $passthru, $pg, $tile, $idx, $imm)>; } } multiclass sme_tile_to_vector_v { def _B : sme_tile_to_vector_inst<0b0, 0b00, ZPR8, !if(is_col, TileVectorOpV8, TileVectorOpH8), is_col, sme_elm_idx0_15, mnemonic> { bits<4> imm; let Inst{8-5} = imm; } def _H : sme_tile_to_vector_inst<0b0, 0b01, ZPR16, !if(is_col, TileVectorOpV16, TileVectorOpH16), is_col, sme_elm_idx0_7, mnemonic> { bits<1> ZAn; bits<3> imm; let Inst{8} = ZAn; let Inst{7-5} = imm; } def _S : sme_tile_to_vector_inst<0b0, 0b10, ZPR32, !if(is_col, TileVectorOpV32, TileVectorOpH32), is_col, sme_elm_idx0_3, mnemonic> { bits<2> ZAn; bits<2> imm; let Inst{8-7} = ZAn; let Inst{6-5} = imm; } def _D : sme_tile_to_vector_inst<0b0, 0b11, ZPR64, !if(is_col, TileVectorOpV64, TileVectorOpH64), is_col, sme_elm_idx0_1, mnemonic> { bits<3> ZAn; bits<1> imm; let Inst{8-6} = ZAn; let Inst{5} = imm; } def _Q : sme_tile_to_vector_inst<0b1, 0b11, ZPR128, !if(is_col, TileVectorOpV128, TileVectorOpH128), is_col, sme_elm_idx0_0, mnemonic> { bits<4> ZAn; let Inst{8-5} = ZAn; } defm : sme_tile_to_vector_aliases(NAME # _B), ZPR8, !if(is_col, TileVectorOpV8, TileVectorOpH8), sme_elm_idx0_15>; defm : sme_tile_to_vector_aliases(NAME # _H), ZPR16, !if(is_col, TileVectorOpV16, TileVectorOpH16), sme_elm_idx0_7>; defm : sme_tile_to_vector_aliases(NAME # _S), ZPR32, !if(is_col, TileVectorOpV32, TileVectorOpH32), sme_elm_idx0_3>; defm : sme_tile_to_vector_aliases(NAME # _D), ZPR64, !if(is_col, TileVectorOpV64, TileVectorOpH64), sme_elm_idx0_1>; defm : sme_tile_to_vector_aliases(NAME # _Q), ZPR128, !if(is_col, TileVectorOpV128, TileVectorOpH128), sme_elm_idx0_0>; defvar op = !if(is_col, int_aarch64_sme_read_vert, int_aarch64_sme_read_horiz); defm : sme_tile_to_vector_patterns(NAME # _B), nxv16i8, nxv16i1, sme_elm_idx0_15, imm_to_tile8, tileslice8, op>; defm : sme_tile_to_vector_patterns(NAME # _H), nxv8i16, nxv8i1, sme_elm_idx0_7, imm_to_tile16, tileslice16, op>; defm : sme_tile_to_vector_patterns(NAME # _H), nxv8f16, nxv8i1, sme_elm_idx0_7, imm_to_tile16, tileslice16, op>; defm : sme_tile_to_vector_patterns(NAME # _H), nxv8bf16, nxv8i1, sme_elm_idx0_7, imm_to_tile16, tileslice16, op>; defm : sme_tile_to_vector_patterns(NAME # _S), nxv4i32, nxv4i1, sme_elm_idx0_3, imm_to_tile32, tileslice32, op>; defm : sme_tile_to_vector_patterns(NAME # _S), nxv4f32, nxv4i1, sme_elm_idx0_3, imm_to_tile32, tileslice32, op>; defm : sme_tile_to_vector_patterns(NAME # _D), nxv2i64, nxv2i1, sme_elm_idx0_1, imm_to_tile64, tileslice64, op>; defm : sme_tile_to_vector_patterns(NAME # _D), nxv2f64, nxv2i1, sme_elm_idx0_1, imm_to_tile64, tileslice64, op>; defvar opq = !if(is_col, int_aarch64_sme_readq_vert, int_aarch64_sme_readq_horiz); defm : sme_tile_to_vector_patterns(NAME # _Q), nxv16i8, nxv16i1, sme_elm_idx0_0, imm_to_tile128, tileslice128, opq>; defm : sme_tile_to_vector_patterns(NAME # _Q), nxv8i16, nxv8i1, sme_elm_idx0_0, imm_to_tile128, tileslice128, opq>; defm : sme_tile_to_vector_patterns(NAME # _Q), nxv8f16, nxv8i1, sme_elm_idx0_0, imm_to_tile128, tileslice128, opq>; defm : sme_tile_to_vector_patterns(NAME # _Q), nxv8bf16, nxv8i1, sme_elm_idx0_0, imm_to_tile128, tileslice128, opq>; defm : sme_tile_to_vector_patterns(NAME # _Q), nxv4i32, nxv4i1, sme_elm_idx0_0, imm_to_tile128, tileslice128, opq>; defm : sme_tile_to_vector_patterns(NAME # _Q), nxv4f32, nxv4i1, sme_elm_idx0_0, imm_to_tile128, tileslice128, opq>; defm : sme_tile_to_vector_patterns(NAME # _Q), nxv2i64, nxv2i1, sme_elm_idx0_0, imm_to_tile128, tileslice128, opq>; defm : sme_tile_to_vector_patterns(NAME # _Q), nxv2f64, nxv2i1, sme_elm_idx0_0, imm_to_tile128, tileslice128, opq>; } multiclass sme_tile_to_vector { defm _H : sme_tile_to_vector_v; defm _V : sme_tile_to_vector_v; } //===----------------------------------------------------------------------===// // SME Zero //===----------------------------------------------------------------------===// // NOTE: This definition isn't really correct because there are outputs, i.e. // the tile registers being zeroed. We fix this up in a custom inserter that // marks the appropriate registers as being implicitly defined. class sme_zero_inst : I<(outs), (ins MatrixTileList:$imm), mnemonic, "\t$imm", "", []>, Sched<[]> { bits<8> imm; let Inst{31-8} = 0b110000000000100000000000; let Inst{7-0} = imm; } multiclass sme_zero { def NAME : sme_zero_inst; def : InstAlias<"zero\t\\{za\\}", (!cast(NAME) 0b11111111), 1>; def : InstAlias<"zero\t\\{za0.h\\}", (!cast(NAME) 0b01010101), 1>; def : InstAlias<"zero\t\\{za1.h\\}", (!cast(NAME) 0b10101010), 1>; def : InstAlias<"zero\t\\{za0.s\\}", (!cast(NAME) 0b00010001), 1>; def : InstAlias<"zero\t\\{za1.s\\}", (!cast(NAME) 0b00100010), 1>; def : InstAlias<"zero\t\\{za2.s\\}", (!cast(NAME) 0b01000100), 1>; def : InstAlias<"zero\t\\{za3.s\\}", (!cast(NAME) 0b10001000), 1>; def : InstAlias<"zero\t\\{za0.s,za1.s\\}", (!cast(NAME) 0b00110011), 1>; def : InstAlias<"zero\t\\{za0.s,za3.s\\}", (!cast(NAME) 0b10011001), 1>; def : InstAlias<"zero\t\\{za1.s,za2.s\\}", (!cast(NAME) 0b01100110), 1>; def : InstAlias<"zero\t\\{za2.s,za3.s\\}", (!cast(NAME) 0b11001100), 1>; def : InstAlias<"zero\t\\{za0.s,za1.s,za2.s\\}", (!cast(NAME) 0b01110111), 1>; def : InstAlias<"zero\t\\{za0.s,za1.s,za3.s\\}", (!cast(NAME) 0b10111011), 1>; def : InstAlias<"zero\t\\{za0.s,za2.s,za3.s\\}", (!cast(NAME) 0b11011101), 1>; def : InstAlias<"zero\t\\{za1.s,za2.s,za3.s\\}", (!cast(NAME) 0b11101110), 1>; def NAME # _PSEUDO : Pseudo<(outs), (ins i32imm:$tilelist), []>, Sched<[]> { // Translated to the actual instructions in AArch64ISelLowering.cpp let usesCustomInserter = 1; } def : Pat<(int_aarch64_sme_zero timm32_0_255:$imm), (!cast(NAME # _PSEUDO) timm32_0_255:$imm)>; } //===----------------------------------------------------------------------===// // SVE2 Instructions //===----------------------------------------------------------------------===// class sve2_int_perm_revd : I<(outs ZPR128:$Zd), (ins ZPR128:$_Zd, PPR3bAny:$Pg, ZPR128:$Zn), asm, "\t$Zd, $Pg/m, $Zn", "", []>, Sched<[]> { bits<5> Zd; bits<3> Pg; bits<5> Zn; let Inst{31-24} = 0b00000101; let Inst{23-22} = 0b00; // size let Inst{21-13} = 0b101110100; let Inst{12-10} = Pg; let Inst{9-5} = Zn; let Inst{4-0} = Zd; let Constraints = "$Zd = $_Zd"; let DestructiveInstType = DestructiveUnary; let ElementSize = ZPR128.ElementSize; } multiclass sve2_int_perm_revd { def NAME : sve2_int_perm_revd; def : SVE_1_Op_Passthru_Pat(NAME)>; def : SVE_1_Op_Passthru_Pat(NAME)>; def : SVE_1_Op_Passthru_Pat(NAME)>; def : SVE_1_Op_Passthru_Pat(NAME)>; def : SVE_1_Op_Passthru_Pat(NAME)>; def : SVE_1_Op_Passthru_Pat(NAME)>; def : SVE_1_Op_Passthru_Pat(NAME)>; def : SVE_1_Op_Passthru_Pat(NAME)>; } class sve2_clamp sz, bit U, ZPRRegOp zpr_ty> : I<(outs zpr_ty:$Zd), (ins zpr_ty:$_Zd, zpr_ty:$Zn, zpr_ty:$Zm), asm, "\t$Zd, $Zn, $Zm", "", []>, Sched<[]> { bits<5> Zm; bits<5> Zn; bits<5> Zd; let Inst{31-24} = 0b01000100; let Inst{23-22} = sz; let Inst{21} = 0b0; let Inst{20-16} = Zm; let Inst{15-11} = 0b11000; let Inst{10} = U; let Inst{9-5} = Zn; let Inst{4-0} = Zd; let Constraints = "$Zd = $_Zd"; let DestructiveInstType = DestructiveOther; let ElementSize = zpr_ty.ElementSize; } multiclass sve2_clamp { def _B : sve2_clamp; def _H : sve2_clamp; def _S : sve2_clamp; def _D : sve2_clamp; def : SVE_3_Op_Pat(NAME # _B)>; def : SVE_3_Op_Pat(NAME # _H)>; def : SVE_3_Op_Pat(NAME # _S)>; def : SVE_3_Op_Pat(NAME # _D)>; } class sve2_int_perm_sel_p : I<(outs PPRorPNRAny:$Pd), (ins PPRorPNRAny:$Pn, ppr_ty:$Pm, MatrixIndexGPR32Op12_15:$Rv, imm_ty:$imm), asm, "\t$Pd, $Pn, $Pm[$Rv, $imm]", "", []>, Sched<[]> { bits<2> Rv; bits<4> Pn; bits<4> Pm; bits<4> Pd; let Inst{31-24} = 0b00100101; let Inst{21} = 0b1; let Inst{17-16} = Rv; let Inst{15-14} = 0b01; let Inst{13-10} = Pn; let Inst{9} = 0b0; let Inst{8-5} = Pm; let Inst{4} = 0b0; let Inst{3-0} = Pd; } multiclass sve2_int_perm_sel_p { def _B : sve2_int_perm_sel_p { bits<4> imm; let Inst{23-22} = imm{3-2}; let Inst{20-19} = imm{1-0}; let Inst{18} = 0b1; } def _H : sve2_int_perm_sel_p { bits<3> imm; let Inst{23-22} = imm{2-1}; let Inst{20} = imm{0}; let Inst{19-18} = 0b10; } def _S : sve2_int_perm_sel_p { bits<2> imm; let Inst{23-22} = imm{1-0}; let Inst{20-18} = 0b100; } def _D : sve2_int_perm_sel_p { bits<1> imm; let Inst{23} = imm; let Inst{22} = 0b1; let Inst{20-18} = 0b000; } def : Pat<(nxv16i1 (op (nxv16i1 PPRAny:$Pn), (nxv16i1 PPR8:$Pm), MatrixIndexGPR32Op12_15:$idx)), (!cast(NAME # _B) $Pn, $Pm, $idx, 0)>; def : Pat<(nxv16i1 (op (nxv16i1 PPRAny:$Pn), (nxv8i1 PPR16:$Pm), MatrixIndexGPR32Op12_15:$idx)), (!cast(NAME # _H) $Pn, $Pm, $idx, 0)>; def : Pat<(nxv16i1 (op (nxv16i1 PPRAny:$Pn), (nxv4i1 PPR32:$Pm), MatrixIndexGPR32Op12_15:$idx)), (!cast(NAME # _S) $Pn, $Pm, $idx, 0)>; def : Pat<(nxv16i1 (op (nxv16i1 PPRAny:$Pn), (nxv2i1 PPR64:$Pm), MatrixIndexGPR32Op12_15:$idx)), (!cast(NAME # _D) $Pn, $Pm, $idx, 0)>; let AddedComplexity = 1 in { def : Pat<(nxv16i1 (op (nxv16i1 PPRAny:$Pn), (nxv16i1 PPR8:$Pm), (i32 (tileslice8 MatrixIndexGPR32Op12_15:$idx, sme_elm_idx0_15:$imm)))), (!cast(NAME # _B) $Pn, $Pm, $idx, $imm)>; def : Pat<(nxv16i1 (op (nxv16i1 PPRAny:$Pn), (nxv8i1 PPR16:$Pm), (i32 (tileslice16 MatrixIndexGPR32Op12_15:$idx, sme_elm_idx0_7:$imm)))), (!cast(NAME # _H) $Pn, $Pm, $idx, $imm)>; def : Pat<(nxv16i1 (op (nxv16i1 PPRAny:$Pn), (nxv4i1 PPR32:$Pm), (i32 (tileslice32 MatrixIndexGPR32Op12_15:$idx, sme_elm_idx0_3:$imm)))), (!cast(NAME # _S) $Pn, $Pm, $idx, $imm)>; def : Pat<(nxv16i1 (op (nxv16i1 PPRAny:$Pn), (nxv2i1 PPR64:$Pm), (i32 (tileslice64 MatrixIndexGPR32Op12_15:$idx, sme_elm_idx0_1:$imm)))), (!cast(NAME # _D) $Pn, $Pm, $idx, $imm)>; } } //===----------------------------------------------------------------------===// // SME2 Instructions //===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===// // SME2 single-multi ternary int/fp, two/four registers class sme2_dot_mla_add_sub_array_vg24_single op, MatrixOperand matrix_ty, RegisterOperand multi_vector_ty, ZPRRegOp zpr_ty, string mnemonic> : I<(outs matrix_ty:$ZAd), (ins matrix_ty:$_ZAd, MatrixIndexGPR32Op8_11:$Rv, sme_elm_idx0_7:$imm3, multi_vector_ty:$Zn, zpr_ty:$Zm), mnemonic,"\t$ZAd[$Rv, $imm3, " # !if(op{5}, "vgx4", "vgx2") # "], $Zn, $Zm", "", []> , Sched<[]> { bits<4> Zm; bits<5> Zn; bits<2> Rv; bits<3> imm3; let Inst{31-23} = 0b110000010; let Inst{22} = op{6}; //sz let Inst{21} = 0b1; let Inst{20} = op{5}; //vgx4 let Inst{19-16} = Zm; let Inst{15} = 0b0; let Inst{14-13} = Rv; let Inst{12-10} = op{4-2}; let Inst{9-5} = Zn; let Inst{4-3} = op{1-0}; let Inst{2-0} = imm3; let Constraints = "$ZAd = $_ZAd"; } multiclass sme2_dot_mla_add_sub_array_vg24_single op, MatrixOperand matrix_ty, RegisterOperand multi_vector_ty, ZPRRegOp zpr_ty>{ def NAME: sme2_dot_mla_add_sub_array_vg24_single, SMEPseudo2Instr; def : InstAlias(NAME) matrix_ty:$ZAd, MatrixIndexGPR32Op8_11:$Rv, sme_elm_idx0_7:$imm3, multi_vector_ty:$Zn, zpr_ty:$Zm), 0>; } multiclass sme2_dot_mla_add_sub_array_vg2_single op, MatrixOperand matrix_ty, RegisterOperand multi_vector_ty, ZPRRegOp zpr_ty, ValueType vty, SDPatternOperator intrinsic>{ def NAME: sme2_dot_mla_add_sub_array_vg24_single, SMEPseudo2Instr; def : InstAlias(NAME) matrix_ty:$ZAd, MatrixIndexGPR32Op8_11:$Rv, sme_elm_idx0_7:$imm3, multi_vector_ty:$Zn, zpr_ty:$Zm), 0>; def _PSEUDO : sme2_za_array_2op_multi_single_pseudo; def : SME2_ZA_TwoOp_VG2_Multi_Single_Pat; } multiclass sme2_dot_mla_add_sub_array_vg4_single op, MatrixOperand matrix_ty, RegisterOperand multi_vector_ty, ZPRRegOp zpr_ty, ValueType vty, SDPatternOperator intrinsic>{ def NAME: sme2_dot_mla_add_sub_array_vg24_single, SMEPseudo2Instr; def : InstAlias(NAME) matrix_ty:$ZAd, MatrixIndexGPR32Op8_11:$Rv, sme_elm_idx0_7:$imm3, multi_vector_ty:$Zn, zpr_ty:$Zm), 0>; def _PSEUDO : sme2_za_array_2op_multi_single_pseudo; def : SME2_ZA_TwoOp_VG4_Multi_Single_Pat; } //===----------------------------------------------------------------------===// // SME2 multiple vectors ternary INT/FP two and four registers class sme2_dot_mla_add_sub_array_vg2_multi op, MatrixOperand matrix_ty, RegisterOperand multi_vector_ty, string mnemonic> : I<(outs matrix_ty:$ZAd), (ins matrix_ty:$_ZAd, MatrixIndexGPR32Op8_11:$Rv, sme_elm_idx0_7:$imm3, multi_vector_ty:$Zn, multi_vector_ty:$Zm), mnemonic, "\t$ZAd[$Rv, $imm3, vgx2], $Zn, $Zm", "", []>, Sched<[]>{ bits<4> Zm; bits<4> Zn; bits<2> Rv; bits<3> imm3; let Inst{31-23} = 0b110000011; let Inst{22} = op{6}; //sz let Inst{21} = 0b1; let Inst{20-17} = Zm; let Inst{16-15} = 0b00; let Inst{14-13} = Rv; let Inst{12-10} = op{5-3}; let Inst{9-6} = Zn; let Inst{5-3} = op{2-0}; let Inst{2-0} = imm3; let Constraints = "$ZAd = $_ZAd"; } multiclass sme2_dot_mla_add_sub_array_vg2_multi op, MatrixOperand matrix_ty, RegisterOperand multi_vector_ty, ValueType zpr_ty, SDPatternOperator intrinsic> { def NAME : sme2_dot_mla_add_sub_array_vg2_multi, SMEPseudo2Instr; def _PSEUDO : sme2_za_array_2op_multi_multi_pseudo; def : SME2_ZA_TwoOp_VG2_Multi_Multi_Pat; def : InstAlias(NAME) matrix_ty:$ZAd, MatrixIndexGPR32Op8_11:$Rv, sme_elm_idx0_7:$imm3, multi_vector_ty:$Zn, multi_vector_ty:$Zm), 0>; } class sme2_dot_mla_add_sub_array_vg4_multi op, MatrixOperand matrix_ty, RegisterOperand multi_vector_ty, string mnemonic> : I<(outs matrix_ty:$ZAd), (ins matrix_ty:$_ZAd, MatrixIndexGPR32Op8_11:$Rv, sme_elm_idx0_7:$imm3, multi_vector_ty:$Zn, multi_vector_ty:$Zm), mnemonic, "\t$ZAd[$Rv, $imm3, vgx4], $Zn, $Zm", "", []>, Sched<[]>{ bits<3> Zm; bits<3> Zn; bits<2> Rv; bits<3> imm3; let Inst{31-23} = 0b110000011; let Inst{22} = op{6}; //sz let Inst{21} = 0b1; let Inst{20-18} = Zm; let Inst{17-15} = 0b010; let Inst{14-13} = Rv; let Inst{12-10} = op{5-3}; let Inst{9-7} = Zn; let Inst{6} = 0b0; let Inst{5-3} = op{2-0}; let Inst{2-0} = imm3; let Constraints = "$ZAd = $_ZAd"; } multiclass sme2_dot_mla_add_sub_array_vg4_multi op, MatrixOperand matrix_ty, RegisterOperand multi_vector_ty, ValueType zpr_ty, SDPatternOperator intrinsic>{ def NAME : sme2_dot_mla_add_sub_array_vg4_multi, SMEPseudo2Instr; def _PSEUDO : sme2_za_array_2op_multi_multi_pseudo; def : SME2_ZA_TwoOp_VG4_Multi_Multi_Pat; def : InstAlias(NAME) matrix_ty:$ZAd, MatrixIndexGPR32Op8_11:$Rv, sme_elm_idx0_7:$imm3, multi_vector_ty:$Zn, multi_vector_ty:$Zm), 0>; } //===----------------------------------------------------------------------===// // SME2 multiple vectors binary two or four registers class sme2_multivec_accum_add_sub op, MatrixOperand matrix_ty, RegisterOperand vector_ty> : I<(outs matrix_ty:$ZAdn), (ins matrix_ty:$_ZAdn, MatrixIndexGPR32Op8_11:$Rv, sme_elm_idx0_7:$imm3, vector_ty:$Zm), mnemonic, "\t$ZAdn[$Rv, $imm3, " # !if(vg4, "vgx4", "vgx2") # "], $Zm", "", []>, Sched<[]> { bits<2> Rv; bits<3> imm3; let Inst{31-23} = 0b110000011; let Inst{22} = sz; let Inst{21-19} = 0b100; let Inst{18} = op{2}; let Inst{17} = 0b0; let Inst{16} = vg4; let Inst{15} = 0b0; let Inst{14-13} = Rv; let Inst{12-10} = 0b111; let Inst{5} = 0b0; let Inst{4-3} = op{1-0}; let Inst{2-0} = imm3; let Constraints = "$ZAdn = $_ZAdn"; } class sme2_multivec_accum_add_sub_vg2 op, MatrixOperand matrix_ty, RegisterOperand vector_ty> : sme2_multivec_accum_add_sub { bits<4> Zm; let Inst{9-6} = Zm; } multiclass sme2_multivec_accum_add_sub_vg2 op, MatrixOperand matrix_ty, RegisterOperand vector_ty, ValueType vty, SDPatternOperator intrinsic> { def NAME : sme2_multivec_accum_add_sub_vg2, SMEPseudo2Instr; def : InstAlias(NAME) matrix_ty:$ZAdn, MatrixIndexGPR32Op8_11:$Rv, sme_elm_idx0_7:$imm3, vector_ty:$Zm), 0>; def _PSEUDO : sme2_move_to_za_pseudo; def : SME2_ZA_VG1x2_Multi_Pat; } class sme2_multivec_accum_add_sub_vg4 op, MatrixOperand matrix_ty, RegisterOperand vector_ty> : sme2_multivec_accum_add_sub { bits<3> Zm; let Inst{9-7} = Zm; let Inst{6} = 0b0; } multiclass sme2_multivec_accum_add_sub_vg4 op, MatrixOperand matrix_ty, RegisterOperand vector_ty, ValueType vty, SDPatternOperator intrinsic> { def NAME : sme2_multivec_accum_add_sub_vg4, SMEPseudo2Instr; def : InstAlias(NAME) matrix_ty:$ZAdn, MatrixIndexGPR32Op8_11:$Rv, sme_elm_idx0_7:$imm3, vector_ty:$Zm), 0>; def _PSEUDO : sme2_move_to_za_pseudo; def : SME2_ZA_VG1x4_Multi_Pat; } //===----------------------------------------------------------------------===// // SME2 Multi-vector - Multiple and Single SVE Destructive // Two and Four registers class sme2_sve_destructive_vector_vg2_single sz, bits<7> op, RegisterOperand vector_ty, ZPRRegOp zpr_ty, string mnemonic> : I<(outs vector_ty:$Zdn), (ins vector_ty:$_Zdn, zpr_ty:$Zm), mnemonic, "\t$Zdn, $_Zdn, $Zm", "", []>, Sched<[]> { bits<4> Zm; bits<4> Zdn; let Inst{31-24} = 0b11000001; let Inst{23-22} = sz; let Inst{21-20} = 0b10; let Inst{19-16} = Zm; let Inst{15-11} = 0b10100; let Inst{10-5} = op{6-1}; let Inst{4-1} = Zdn; let Inst{0} = op{0}; let Constraints = "$Zdn = $_Zdn"; } multiclass sme2_fp_sve_destructive_vector_vg2_single op> { def _H : sme2_sve_destructive_vector_vg2_single<0b01, op, ZZ_h_mul_r, ZPR4b16, mnemonic>; def _S : sme2_sve_destructive_vector_vg2_single<0b10, op, ZZ_s_mul_r, ZPR4b32, mnemonic>; def _D : sme2_sve_destructive_vector_vg2_single<0b11, op, ZZ_d_mul_r, ZPR4b64, mnemonic>; } multiclass sme2_int_sve_destructive_vector_vg2_single op> { def _B : sme2_sve_destructive_vector_vg2_single<0b00, op, ZZ_b_mul_r, ZPR4b8, mnemonic>; def _H : sme2_sve_destructive_vector_vg2_single<0b01, op, ZZ_h_mul_r, ZPR4b16, mnemonic>; def _S : sme2_sve_destructive_vector_vg2_single<0b10, op, ZZ_s_mul_r, ZPR4b32, mnemonic>; def _D : sme2_sve_destructive_vector_vg2_single<0b11, op, ZZ_d_mul_r, ZPR4b64, mnemonic>; } // SME2.1 fmax/fmin instructions. multiclass sme2p1_bf_max_min_vector_vg2_singleop> { def _H : sme2_sve_destructive_vector_vg2_single<0b00, op, ZZ_h_mul_r, ZPR4b16, mnemonic>; } class sme2_sve_destructive_vector_vg4_single sz, bits<7> op, RegisterOperand vector_ty, ZPRRegOp zpr_ty, string mnemonic> : I<(outs vector_ty:$Zdn), (ins vector_ty:$_Zdn, zpr_ty:$Zm), mnemonic, "\t$Zdn, $_Zdn, $Zm", "", []>, Sched<[]> { bits<4> Zm; bits<3> Zdn; let Inst{31-24} = 0b11000001; let Inst{23-22} = sz; let Inst{21-20} = 0b10; let Inst{19-16} = Zm; let Inst{15-11} = 0b10101; let Inst{10-5} = op{6-1}; let Inst{4-2} = Zdn; let Inst{1} = 0b0; let Inst{0} = op{0}; let Constraints = "$Zdn = $_Zdn"; } multiclass sme2_fp_sve_destructive_vector_vg4_single op> { def _H : sme2_sve_destructive_vector_vg4_single<0b01, op, ZZZZ_h_mul_r, ZPR4b16, mnemonic>; def _S : sme2_sve_destructive_vector_vg4_single<0b10, op, ZZZZ_s_mul_r, ZPR4b32, mnemonic>; def _D : sme2_sve_destructive_vector_vg4_single<0b11, op, ZZZZ_d_mul_r, ZPR4b64, mnemonic>; } multiclass sme2_int_sve_destructive_vector_vg4_single op> { def _B : sme2_sve_destructive_vector_vg4_single<0b00, op, ZZZZ_b_mul_r, ZPR4b8, mnemonic>; def _H : sme2_sve_destructive_vector_vg4_single<0b01, op, ZZZZ_h_mul_r, ZPR4b16, mnemonic>; def _S : sme2_sve_destructive_vector_vg4_single<0b10, op, ZZZZ_s_mul_r, ZPR4b32, mnemonic>; def _D : sme2_sve_destructive_vector_vg4_single<0b11, op, ZZZZ_d_mul_r, ZPR4b64, mnemonic>; } // SME2.1 fmax/fmin instructions. multiclass sme2p1_bf_max_min_vector_vg4_singleop> { def _H : sme2_sve_destructive_vector_vg4_single<0b00, op, ZZZZ_h_mul_r, ZPR4b16, mnemonic>; } class sme2_sve_destructive_vector_vg2_multi sz, bits<7> op, RegisterOperand vector_ty, string mnemonic> : I<(outs vector_ty:$Zdn), (ins vector_ty:$_Zdn, vector_ty:$Zm), mnemonic, "\t$Zdn, $_Zdn, $Zm", "", []>, Sched<[]> { bits<4> Zm; bits<4> Zdn; let Inst{31-24} = 0b11000001; let Inst{23-22} = sz; let Inst{21} = 0b1; let Inst{20-17} = Zm; let Inst{16-11} = 0b010110; let Inst{10-5} = op{6-1}; let Inst{4-1} = Zdn; let Inst{0} = op{0}; let Constraints = "$Zdn = $_Zdn"; } multiclass sme2_fp_sve_destructive_vector_vg2_multi op> { def _H : sme2_sve_destructive_vector_vg2_multi<0b01, op, ZZ_h_mul_r, mnemonic>; def _S : sme2_sve_destructive_vector_vg2_multi<0b10, op, ZZ_s_mul_r, mnemonic>; def _D : sme2_sve_destructive_vector_vg2_multi<0b11, op, ZZ_d_mul_r, mnemonic>; } multiclass sme2_int_sve_destructive_vector_vg2_multi op> { def _B : sme2_sve_destructive_vector_vg2_multi<0b00, op, ZZ_b_mul_r, mnemonic>; def _H : sme2_sve_destructive_vector_vg2_multi<0b01, op, ZZ_h_mul_r, mnemonic>; def _S : sme2_sve_destructive_vector_vg2_multi<0b10, op, ZZ_s_mul_r, mnemonic>; def _D : sme2_sve_destructive_vector_vg2_multi<0b11, op, ZZ_d_mul_r, mnemonic>; } // SME2.1 fmax/fmin instructions. multiclass sme2p1_bf_max_min_vector_vg2_multiop> { def _H : sme2_sve_destructive_vector_vg2_multi<0b00, op, ZZ_h_mul_r, mnemonic>; } class sme2_sve_destructive_vector_vg4_multi sz, bits<7> op, RegisterOperand vector_ty, string mnemonic> : I<(outs vector_ty:$Zdn), (ins vector_ty:$_Zdn, vector_ty:$Zm), mnemonic, "\t$Zdn, $_Zdn, $Zm", "", []>, Sched<[]> { bits<3> Zm; bits<3> Zdn; let Inst{31-24} = 0b11000001; let Inst{23-22} = sz; let Inst{21} = 0b1; let Inst{20-18} = Zm; let Inst{17-11} = 0b0010111; let Inst{10-5} = op{6-1}; let Inst{4-2} = Zdn; let Inst{1} = 0b0; let Inst{0} = op{0}; let Constraints = "$Zdn = $_Zdn"; } multiclass sme2_fp_sve_destructive_vector_vg4_multi op> { def _H : sme2_sve_destructive_vector_vg4_multi<0b01, op, ZZZZ_h_mul_r, mnemonic>; def _S : sme2_sve_destructive_vector_vg4_multi<0b10, op, ZZZZ_s_mul_r, mnemonic>; def _D : sme2_sve_destructive_vector_vg4_multi<0b11, op, ZZZZ_d_mul_r, mnemonic>; } multiclass sme2_int_sve_destructive_vector_vg4_multi op> { def _B : sme2_sve_destructive_vector_vg4_multi<0b00, op, ZZZZ_b_mul_r, mnemonic>; def _H : sme2_sve_destructive_vector_vg4_multi<0b01, op, ZZZZ_h_mul_r, mnemonic>; def _S : sme2_sve_destructive_vector_vg4_multi<0b10, op, ZZZZ_s_mul_r, mnemonic>; def _D : sme2_sve_destructive_vector_vg4_multi<0b11, op, ZZZZ_d_mul_r, mnemonic>; } // SME2.1 fmax/fmin instructions. multiclass sme2p1_bf_max_min_vector_vg4_multiop> { def _H : sme2_sve_destructive_vector_vg4_multi<0b00, op, ZZZZ_h_mul_r, mnemonic>; } //===----------------------------------------------------------------------===// // SME2 Multi-vector - Index/Single/Multi Array Vectors FMA sources class sme2_mla_long_array_index_base op0, bits<2> op, Operand index_ty, RegisterOperand multi_vector_ty, string mnemonic, string vg_acronym=""> : I<(outs MatrixOp32:$ZAda), (ins MatrixOp32:$_ZAda, MatrixIndexGPR32Op8_11:$Rv, index_ty:$imm, multi_vector_ty:$Zn, ZPR4b16:$Zm, VectorIndexH32b_timm:$i3), mnemonic, "\t$ZAda[$Rv, $imm" # !if(!eq(vg_acronym, ""), "", ", " # vg_acronym) # "], $Zn, $Zm$i3", "", []>, Sched<[]> { bits<4> Zm; bits<2> Rv; let Inst{31-24} = 0b11000001; let Inst{23-22} = op0; let Inst{21} = 0b0; let Inst{20} = !if(!eq(vg_acronym, ""), 0, 1); let Inst{19-16} = Zm; let Inst{14-13} = Rv; let Inst{12} = 0b1; let Inst{4-3} = op; let Constraints = "$ZAda = $_ZAda"; } multiclass sme2_mla_long_array_index op0, bits<2> op, ValueType zpr_ty, SDPatternOperator intrinsic> { def _HtoS : sme2_mla_long_array_index_base, SMEPseudo2Instr { bits<3> i3; bits<5> Zn; bits<3> imm; let Inst{15} = i3{2}; let Inst{11-10} = i3{1-0}; let Inst{9-5} = Zn; let Inst{2-0} = imm; } def _HtoS_PSEUDO : sme2_za_array_2op_multi_index_pseudo; def : SME2_ZA_TwoOp_Multi_Index_Pat; } class sme2_mla_long_array_vg2_index op0, bits<2> op> : sme2_mla_long_array_index_base { bits<3> i3; bits<4> Zn; bits<2> imm; let Inst{15} = 0b0; let Inst{11-10} = i3{2-1}; let Inst{9-6} = Zn; let Inst{5} = 0b0; let Inst{2} = i3{0}; let Inst{1-0} = imm; } multiclass sme2_fp_mla_long_array_vg2_index op, ValueType zpr_ty, SDPatternOperator intrinsic> { def _HtoS : sme2_mla_long_array_vg2_index, SMEPseudo2Instr; def _HtoS_PSEUDO : sme2_za_array_2op_multi_index_pseudo; def : SME2_ZA_TwoOp_VG2_Multi_Index_Pat; def : InstAlias(NAME #_HtoS) MatrixOp32:$ZAda, MatrixIndexGPR32Op8_11:$Rv, uimm2s2range:$imm, ZZ_h_mul_r:$Zn, ZPR4b16:$Zm, VectorIndexH32b_timm:$i3), 0>; } multiclass sme2_int_mla_long_array_vg2_index op, SDPatternOperator intrinsic> { def _S : sme2_mla_long_array_vg2_index, SMEPseudo2Instr; def _S_PSEUDO : sme2_za_array_2op_multi_index_pseudo; def : SME2_ZA_TwoOp_VG2_Multi_Index_Pat; def : InstAlias(NAME #_S) MatrixOp32:$ZAda, MatrixIndexGPR32Op8_11:$Rv, uimm2s2range:$imm, ZZ_h_mul_r:$Zn, ZPR4b16:$Zm, VectorIndexH32b_timm:$i3), 0>; } class sme2_mla_long_array_vg4_index op0, bits<2> op> : sme2_mla_long_array_index_base { bits<3> i3; bits<3> Zn; bits<2> imm; let Inst{15} = 0b1; let Inst{11-10} = i3{2-1}; let Inst{9-7} = Zn; let Inst{6-5} = 0b00; let Inst{2} = i3{0}; let Inst{1-0} = imm; } multiclass sme2_fp_mla_long_array_vg4_index op, ValueType zpr_ty, SDPatternOperator intrinsic> { def _HtoS : sme2_mla_long_array_vg4_index, SMEPseudo2Instr; def _HtoS_PSEUDO : sme2_za_array_2op_multi_index_pseudo; def : SME2_ZA_TwoOp_VG4_Multi_Index_Pat; def : InstAlias(NAME #_HtoS) MatrixOp32:$ZAda, MatrixIndexGPR32Op8_11:$Rv, uimm2s2range:$imm, ZZZZ_h_mul_r:$Zn, ZPR4b16:$Zm, VectorIndexH32b_timm:$i3), 0>; } multiclass sme2_int_mla_long_array_vg4_index op, SDPatternOperator intrinsic> { def _HtoS : sme2_mla_long_array_vg4_index, SMEPseudo2Instr; def _HtoS_PSEUDO : sme2_za_array_2op_multi_index_pseudo; def : SME2_ZA_TwoOp_VG4_Multi_Index_Pat; def : InstAlias(NAME #_HtoS) MatrixOp32:$ZAda, MatrixIndexGPR32Op8_11:$Rv, uimm2s2range:$imm, ZZZZ_h_mul_r:$Zn, ZPR4b16:$Zm, VectorIndexH32b_timm:$i3), 0>; } class sme2_mla_long_arrayop0, bits<2> op, MatrixOperand matrix_ty, Operand index_ty, RegisterOperand first_vector_ty, RegisterOperand second_vector_ty, string mnemonic, string vg_acronym=""> : I<(outs matrix_ty:$ZAda), (ins matrix_ty:$_ZAda, MatrixIndexGPR32Op8_11:$Rv, index_ty:$imm, first_vector_ty:$Zn, second_vector_ty:$Zm), mnemonic,"\t$ZAda[$Rv, $imm" # !if(!eq(vg_acronym, ""), "", ", " # vg_acronym) # "], $Zn, $Zm", "", []> , Sched<[]> { bits<2> Rv; let Inst{31-24} = 0b11000001; let Inst{23-22} = op0; let Inst{21} = 0b1; let Inst{15} = 0b0; let Inst{14-13} = Rv; let Inst{12-11} = 0b01; let Inst{10} = !if(!eq(vg_acronym, ""), 1, 0); let Inst{4-3} = op; let Constraints = "$ZAda = $_ZAda"; } multiclass sme2_mla_long_array_single op0, bits<2> op, ValueType zpr_ty, SDPatternOperator intrinsic> { def _HtoS : sme2_mla_long_array , SMEPseudo2Instr{ bits<4> Zm; bits<5> Zn; bits<3> imm; let Inst{20} = 0b0; let Inst{19-16} = Zm; let Inst{9-5} = Zn; let Inst{2-0} = imm; } def _HtoS_PSEUDO : sme2_za_array_2op_multi_single_pseudo; def : SME2_ZA_TwoOp_Multi_Single_Pat; } class sme2_mla_long_array_single_16b : sme2_mla_long_array<0b00, 0b00, MatrixOp16, uimm3s2range, ZPR8, ZPR4b8, mnemonic> { bits<4> Zm; bits<5> Zn; bits<3> imm; let Inst{20} = 0b1; let Inst{19-16} = Zm; let Inst{9-5} = Zn; let Inst{2-0} = imm; } class sme2_mla_long_array_vg24_single op0, bit vg4, bits<2> op, bit o2, MatrixOperand matrix_ty, RegisterOperand multi_vector_ty, ZPRRegOp zpr_ty, string mnemonic, string vg_acronym> : sme2_mla_long_array { bits<4> Zm; bits<5> Zn; bits<2> imm; let Inst{20} = vg4; let Inst{19-16} = Zm; let Inst{9-5} = Zn; let Inst{2} = o2; let Inst{1-0} = imm; } multiclass sme2_fp_mla_long_array_vg2_single op, MatrixOperand matrix_ty, RegisterOperand multi_vector_ty, ZPRRegOp vector_ty, ValueType zpr_ty, SDPatternOperator intrinsic> { def NAME : sme2_mla_long_array_vg24_single<0b00, 0b0, op{2-1}, op{0}, matrix_ty, multi_vector_ty, vector_ty, mnemonic, "vgx2">, SMEPseudo2Instr; def _PSEUDO : sme2_za_array_2op_multi_single_pseudo; def : SME2_ZA_TwoOp_VG2_Multi_Single_Pat; def : InstAlias(NAME) matrix_ty:$ZAda, MatrixIndexGPR32Op8_11:$Rv, uimm2s2range:$imm, multi_vector_ty:$Zn, vector_ty:$Zm), 0>; } multiclass sme2_int_mla_long_array_vg2_single op, SDPatternOperator intrinsic> { def _HtoS : sme2_mla_long_array_vg24_single<0b01, 0b0, op, 0b0, MatrixOp32, ZZ_h, ZPR4b16, mnemonic, "vgx2">, SMEPseudo2Instr; def _HtoS_PSEUDO : sme2_za_array_2op_multi_single_pseudo; def : SME2_ZA_TwoOp_VG2_Multi_Single_Pat; def : InstAlias(NAME #_HtoS) MatrixOp32:$ZAda, MatrixIndexGPR32Op8_11:$Rv, uimm2s2range:$imm, ZZ_h:$Zn, ZPR4b16:$Zm), 0>; } multiclass sme2_fp_mla_long_array_vg4_single op, MatrixOperand matrix_ty, RegisterOperand multi_vector_ty, ZPRRegOp vector_ty, ValueType zpr_ty, SDPatternOperator intrinsic> { def NAME : sme2_mla_long_array_vg24_single<0b00, 0b1, op{2-1}, op{0}, matrix_ty, multi_vector_ty, vector_ty, mnemonic, "vgx4">, SMEPseudo2Instr; def _PSEUDO : sme2_za_array_2op_multi_single_pseudo; def : SME2_ZA_TwoOp_VG4_Multi_Single_Pat; def : InstAlias(NAME) matrix_ty:$ZAda, MatrixIndexGPR32Op8_11:$Rv, uimm2s2range:$imm, multi_vector_ty:$Zn, vector_ty:$Zm), 0>; } multiclass sme2_int_mla_long_array_vg4_single op, SDPatternOperator intrinsic> { def _HtoS : sme2_mla_long_array_vg24_single<0b01, 0b1, op, 0b0, MatrixOp32, ZZZZ_h, ZPR4b16, mnemonic, "vgx4">, SMEPseudo2Instr; def _HtoS_PSEUDO : sme2_za_array_2op_multi_single_pseudo; def : SME2_ZA_TwoOp_VG4_Multi_Single_Pat; def : InstAlias(NAME #_HtoS) MatrixOp32:$ZAda, MatrixIndexGPR32Op8_11:$Rv, uimm2s2range:$imm, ZZZZ_h:$Zn, ZPR4b16:$Zm), 0>; } class sme2_mla_long_array_vg2_multi op0, bits<3> op, MatrixOperand matrix_ty, RegisterOperand multi_vector_ty> : sme2_mla_long_array { bits<4> Zm; bits<4> Zn; bits<2> imm; let Inst{20-17} = Zm; let Inst{16} = 0b0; let Inst{9-6} = Zn; let Inst{5} = op{2}; // fp8 let Inst{2} = 0b0; let Inst{1-0} = imm; } multiclass sme2_fp_mla_long_array_vg2_multi op, MatrixOperand matrix_ty, RegisterOperand multi_vector_ty, ValueType zpr_ty, SDPatternOperator intrinsic> { def NAME : sme2_mla_long_array_vg2_multi, SMEPseudo2Instr; def _PSEUDO : sme2_za_array_2op_multi_multi_pseudo; def : SME2_ZA_TwoOp_VG2_Multi_Multi_Pat; def : InstAlias(NAME) matrix_ty:$ZAda, MatrixIndexGPR32Op8_11:$Rv, uimm2s2range:$imm, multi_vector_ty:$Zn, multi_vector_ty:$Zm), 0>; } multiclass sme2_int_mla_long_array_vg2_multi op, SDPatternOperator intrinsic> { def _HtoS : sme2_mla_long_array_vg2_multi, SMEPseudo2Instr; def _HtoS_PSEUDO : sme2_za_array_2op_multi_multi_pseudo; def : SME2_ZA_TwoOp_VG2_Multi_Multi_Pat; def : InstAlias(NAME #_HtoS) MatrixOp32:$ZAda, MatrixIndexGPR32Op8_11:$Rv, uimm2s2range:$imm2, ZZ_h_mul_r:$Zn, ZZ_h_mul_r:$Zm), 0>; } class sme2_mla_long_array_vg4_multi op0, bits<3> op, MatrixOperand matrix_ty, RegisterOperand multi_vector_ty> : sme2_mla_long_array { bits<3> Zm; bits<3> Zn; bits<2> imm; let Inst{20-18} = Zm; let Inst{17} = 0b0; let Inst{16} = 0b1; let Inst{9-7} = Zn; let Inst{6} = 0b0; let Inst{5} = op{2}; //fp8 let Inst{2} = 0b0; let Inst{1-0} = imm; } multiclass sme2_fp_mla_long_array_vg4_multi op, MatrixOperand matrix_ty, RegisterOperand multi_vector_ty, ValueType zpr_ty, SDPatternOperator intrinsic> { def NAME : sme2_mla_long_array_vg4_multi, SMEPseudo2Instr; def _PSEUDO : sme2_za_array_2op_multi_multi_pseudo; def : SME2_ZA_TwoOp_VG4_Multi_Multi_Pat; def : InstAlias(NAME) matrix_ty:$ZAda, MatrixIndexGPR32Op8_11:$Rv, uimm2s2range:$imm, multi_vector_ty:$Zn, multi_vector_ty:$Zm), 0>; } multiclass sme2_int_mla_long_array_vg4_multi op, SDPatternOperator intrinsic> { def _HtoS : sme2_mla_long_array_vg4_multi, SMEPseudo2Instr; def _HtoS_PSEUDO : sme2_za_array_2op_multi_multi_pseudo; def : SME2_ZA_TwoOp_VG4_Multi_Multi_Pat; def : InstAlias(NAME #_HtoS) MatrixOp32:$ZAda, MatrixIndexGPR32Op8_11:$Rv, uimm2s2range:$imm2, ZZZZ_h_mul_r:$Zn, ZZZZ_h_mul_r:$Zm), 0>; } //===----------------------------------------------------------------------===// class sme2_frint_cvt_vg2_multisz, bits<5>op, RegisterOperand first_ty, RegisterOperand second_ty, string mnemonic> : I<(outs first_ty:$Zd), (ins second_ty:$Zn), mnemonic, "\t$Zd, $Zn", "", []>, Sched<[]> { bits<4> Zn; bits<4> Zd; let Inst{31-24} = 0b11000001; let Inst{23-22} = sz; let Inst{21-20} = 0b10; let Inst{19-16} = op{4-1}; let Inst{15-10} = 0b111000; let Inst{9-6} = Zn; let Inst{5} = op{0}; let Inst{4-1} = Zd; let Inst{0} = 0b0; } // SME2 multi-vec FP to int convert two registers // SME2 multi-vec int to FP two registers multiclass sme2_fp_cvt_vg2_multi op> { def NAME : sme2_frint_cvt_vg2_multi<0b00, op, ZZ_s_mul_r, ZZ_s_mul_r, mnemonic>; } // SME2 multi-vec FRINT two registers multiclass sme2_frint_vector_vg2_multi op> { def _S : sme2_frint_cvt_vg2_multi<0b10, op, ZZ_s_mul_r, ZZ_s_mul_r, mnemonic>; } class sme2_frint_zip_cvt_vg4_multisz, bits<7>op, RegisterOperand first_ty, RegisterOperand second_ty, string mnemonic> : I<(outs first_ty:$Zd), (ins second_ty:$Zn), mnemonic, "\t$Zd, $Zn", "", []>, Sched<[]> { bits<3> Zn; bits<3> Zd; let Inst{31-24} = 0b11000001; let Inst{23-22} = sz; let Inst{21-20} = 0b11; let Inst{19-16} = op{6-3}; let Inst{15-10} = 0b111000; let Inst{9-7} = Zn; let Inst{6-5} = op{2-1}; let Inst{4-2} = Zd; let Inst{1} = op{0}; let Inst{0} = 0b0; } // SME2 multi-vec FP to int convert four registers // SME2 multi-vec int to FP four registers multiclass sme2_fp_cvt_vg4_multi op> { def NAME : sme2_frint_zip_cvt_vg4_multi<0b00, op, ZZZZ_s_mul_r, ZZZZ_s_mul_r, mnemonic>; } // SME2 multi-vec quadwords ZIP four registers multiclass sme2_zip_vector_vg4 op> { def _B : sme2_frint_zip_cvt_vg4_multi<0b00, op, ZZZZ_b_mul_r, ZZZZ_b_mul_r, mnemonic>; def _H : sme2_frint_zip_cvt_vg4_multi<0b01, op, ZZZZ_h_mul_r, ZZZZ_h_mul_r, mnemonic>; def _S : sme2_frint_zip_cvt_vg4_multi<0b10, op, ZZZZ_s_mul_r, ZZZZ_s_mul_r, mnemonic>; def _D : sme2_frint_zip_cvt_vg4_multi<0b11, op, ZZZZ_d_mul_r, ZZZZ_d_mul_r, mnemonic>; } // SME2 multi-vec quadwords ZIP four registers multiclass sme2_zip_vector_vg4_Q op> { def NAME: sme2_frint_zip_cvt_vg4_multi<0b00, op, ZZZZ_q_mul_r, ZZZZ_q_mul_r, mnemonic>; } // SME2 multi-vec FRINT four registers multiclass sme2_frint_vector_vg4_multi op> { def _S : sme2_frint_zip_cvt_vg4_multi<0b10, op, ZZZZ_s_mul_r, ZZZZ_s_mul_r, mnemonic>; } class sme2_cvt_vg2_single op, RegisterOperand first_ty, RegisterOperand second_ty> : I<(outs first_ty:$Zd), (ins second_ty:$Zn), mnemonic, "\t$Zd, $Zn", "", []>, Sched<[]> { bits<4> Zn; bits<5> Zd; let Inst{31-23} = 0b110000010; let Inst{22} = op{4}; let Inst{21-19} = 0b100; let Inst{18-16} = op{3-1}; let Inst{15-10} = 0b111000; let Inst{9-6} = Zn; let Inst{5} = op{0}; let Inst{4-0} = Zd; } // SME2 multi-vec FP down convert two registers // SME2 multi-vec int down convert two registers multiclass sme2_cvt_vg2_single op, ValueType out_vt, ValueType in_vt, SDPatternOperator intrinsic> { def NAME : sme2_cvt_vg2_single; def : SVE2p1_Cvt_VG2_Pat; } // SME2 multi-vec FP8 down convert two registers multiclass sme2_fp8_cvt_vg2_single { def NAME : sme2_cvt_vg2_single; } class sme2_cvt_unpk_vector_vg2sz, bits<3> op, bit u, RegisterOperand first_ty, RegisterOperand second_ty, string mnemonic> : I<(outs first_ty:$Zd), (ins second_ty:$Zn), mnemonic, "\t$Zd, $Zn", "", []>, Sched<[]> { bits<5> Zn; bits<4> Zd; let Inst{31-24} = 0b11000001; let Inst{23-22} = sz; let Inst{21-19} = 0b100; let Inst{18-16} = op; let Inst{15-10} = 0b111000; let Inst{9-5} = Zn; let Inst{4-1} = Zd; let Inst{0} = u; } // SME2 multi-vec unpack two registers multiclass sme2_unpk_vector_vg2 { def _H : sme2_cvt_unpk_vector_vg2<0b01, 0b101, u, ZZ_h_mul_r, ZPR8, mnemonic>; def _S : sme2_cvt_unpk_vector_vg2<0b10, 0b101, u, ZZ_s_mul_r, ZPR16, mnemonic>; def _D : sme2_cvt_unpk_vector_vg2<0b11, 0b101, u, ZZ_d_mul_r, ZPR32, mnemonic>; } // SME2.1 multi-vec convert two registers multiclass sme2p1_fp_cvt_vector_vg2_single { def _S : sme2_cvt_unpk_vector_vg2<0b10, 0b000, l, ZZ_s_mul_r, ZPR16, mnemonic>; } // SME2 multi-vec FP8 up convert two registers multiclass sme2p1_fp8_cvt_vector_vg2_single opc, bit L> { def _NAME : sme2_cvt_unpk_vector_vg2; } class sme2_cvt_vg4_single op, bits<4>op2, RegisterOperand first_ty, RegisterOperand second_ty, string mnemonic> : I<(outs first_ty:$Zd), (ins second_ty:$Zn), mnemonic, "\t$Zd, $Zn", "", []>, Sched<[]> { bits<3> Zn; bits<5> Zd; let Inst{31-24} = 0b11000001; let Inst{23} = sz; let Inst{22} = op{2}; let Inst{21-20} = 0b11; let Inst{19-16} = op2; let Inst{15-10} = 0b111000; let Inst{9-7} = Zn; let Inst{6-5} = op{1-0}; let Inst{4-0} = Zd; } // SME2 multi-vec int down convert four registers multiclass sme2_int_cvt_vg4_single op, SDPatternOperator intrinsic> { def _StoB : sme2_cvt_vg4_single<0, op, 0b0011, ZPR8, ZZZZ_s_mul_r, mnemonic>; def _DtoH : sme2_cvt_vg4_single<1, op, 0b0011, ZPR16, ZZZZ_d_mul_r, mnemonic>; def : SME2_Cvt_VG4_Pat; def : SME2_Cvt_VG4_Pat; } //SME2 multi-vec FP8 down convert four registers multiclass sme2_fp8_cvt_vg4_single { def _NAME : sme2_cvt_vg4_single<0b0, {0b00, N}, 0b0100, ZPR8, ZZZZ_s_mul_r, mnemonic>; } class sme2_unpk_vector_vg4sz, bit u, RegisterOperand first_ty, RegisterOperand second_ty, string mnemonic> : I<(outs first_ty:$Zd), (ins second_ty:$Zn), mnemonic, "\t$Zd, $Zn", "", []>, Sched<[]> { bits<4> Zn; bits<3> Zd; let Inst{31-24} = 0b11000001; let Inst{23-22} = sz; let Inst{21-10} = 0b110101111000; let Inst{9-6} = Zn; let Inst{5} = 0b0; let Inst{4-2} = Zd; let Inst{1} = 0b0; let Inst{0} = u; } // SME2 multi-vec UNPK four registers multiclass sme2_unpk_vector_vg4 { def _H : sme2_unpk_vector_vg4<0b01, u, ZZZZ_h_mul_r, ZZ_b_mul_r, mnemonic>; def _S : sme2_unpk_vector_vg4<0b10, u, ZZZZ_s_mul_r, ZZ_h_mul_r, mnemonic>; def _D : sme2_unpk_vector_vg4<0b11, u, ZZZZ_d_mul_r, ZZ_s_mul_r, mnemonic>; } //===----------------------------------------------------------------------===// // SME2 multi-vec CLAMP registers class sme2_clamp_vector_vg24_multi sz, bits<3> op1, bit u, RegisterOperand multi_vector_ty, ZPRRegOp vector_ty, string mnemonic> : I<(outs multi_vector_ty:$Zd), (ins multi_vector_ty:$_Zd, vector_ty:$Zn, vector_ty:$Zm), mnemonic, "\t$Zd, $Zn, $Zm", "", []>, Sched<[]>{ bits<5> Zm; bits<5> Zn; let Inst{31-24} = 0b11000001; let Inst{23-22} = sz; let Inst{21} = 0b1; let Inst{20-16} = Zm; let Inst{15-13} = 0b110; let Inst{12-10} = op1; let Inst{9-5} = Zn; let Inst{0} = u; let Constraints = "$Zd = $_Zd"; } class sme2_clamp_vector_vg2_multi sz, bits<3> op1, bit u, RegisterOperand multi_vector_ty, ZPRRegOp vector_ty, string mnemonic> : sme2_clamp_vector_vg24_multi{ bits<4> Zd; let Inst{4-1} = Zd; } multiclass sme2_fp_clamp_vector_vg2_multi{ def _H : sme2_clamp_vector_vg2_multi<0b01, 0b000, 0b0, ZZ_h_mul_r, ZPR16, mnemonic>; def _S : sme2_clamp_vector_vg2_multi<0b10, 0b000, 0b0, ZZ_s_mul_r, ZPR32, mnemonic>; def _D : sme2_clamp_vector_vg2_multi<0b11, 0b000, 0b0, ZZ_d_mul_r, ZPR64, mnemonic>; } multiclass sme2_int_clamp_vector_vg2_multi{ def _B : sme2_clamp_vector_vg2_multi<0b00, 0b001, u, ZZ_b_mul_r, ZPR8, mnemonic>; def _H : sme2_clamp_vector_vg2_multi<0b01, 0b001, u, ZZ_h_mul_r, ZPR16, mnemonic>; def _S : sme2_clamp_vector_vg2_multi<0b10, 0b001, u, ZZ_s_mul_r, ZPR32, mnemonic>; def _D : sme2_clamp_vector_vg2_multi<0b11, 0b001, u, ZZ_d_mul_r, ZPR64, mnemonic>; } // SME2.1 multi-vec FCLAMP two registers multiclass sme2p1_bfclamp_vector_vg2_multi { def _H : sme2_clamp_vector_vg2_multi<0b00, 0b000, 0b0, ZZ_h_mul_r, ZPR16, mnemonic>; } class sme2_clamp_vector_vg4_multi sz, bits<3> op1, bit u, RegisterOperand multi_vector_ty, ZPRRegOp vector_ty, string mnemonic> : sme2_clamp_vector_vg24_multi{ bits<3> Zd; let Inst{4-2} = Zd; let Inst{1} = 0b0; } multiclass sme2_fp_clamp_vector_vg4_multi{ def _H : sme2_clamp_vector_vg4_multi<0b01, 0b010, 0b0, ZZZZ_h_mul_r, ZPR16, mnemonic>; def _S : sme2_clamp_vector_vg4_multi<0b10, 0b010, 0b0, ZZZZ_s_mul_r, ZPR32, mnemonic>; def _D : sme2_clamp_vector_vg4_multi<0b11, 0b010, 0b0, ZZZZ_d_mul_r, ZPR64, mnemonic>; } multiclass sme2_int_clamp_vector_vg4_multi{ def _B : sme2_clamp_vector_vg4_multi<0b00, 0b011, u, ZZZZ_b_mul_r, ZPR8, mnemonic>; def _H : sme2_clamp_vector_vg4_multi<0b01, 0b011, u, ZZZZ_h_mul_r, ZPR16, mnemonic>; def _S : sme2_clamp_vector_vg4_multi<0b10, 0b011, u, ZZZZ_s_mul_r, ZPR32, mnemonic>; def _D : sme2_clamp_vector_vg4_multi<0b11, 0b011, u, ZZZZ_d_mul_r, ZPR64, mnemonic>; } // SME2.1 multi-vec FCLAMP four registers multiclass sme2p1_bfclamp_vector_vg4_multi { def _H : sme2_clamp_vector_vg4_multi<0b00, 0b010, 0b0, ZZZZ_h_mul_r, ZPR16, mnemonic>; } // SME2 multi-vec ZIP two registers class sme2_zip_vector_vg2 sz, bit q, bit u, RegisterOperand multi_vector_ty, ZPRRegOp vector_ty, string mnemonic> : I<(outs multi_vector_ty:$Zd), (ins vector_ty:$Zn, vector_ty:$Zm), mnemonic, "\t$Zd, $Zn, $Zm", "", []>, Sched<[]>{ bits<4> Zd; bits<5> Zm; bits<5> Zn; let Inst{31-24} = 0b11000001; let Inst{23-22} = sz; let Inst{21} = 0b1; let Inst{20-16} = Zm; let Inst{15-11} = 0b11010; let Inst{10} = q; let Inst{9-5} = Zn; let Inst{4-1} = Zd; let Inst{0} = u; } multiclass sme2_zip_vector_vg2 { def _B : sme2_zip_vector_vg2<0b00, 0b0, op, ZZ_b_mul_r, ZPR8, mnemonic>; def _H : sme2_zip_vector_vg2<0b01, 0b0, op, ZZ_h_mul_r, ZPR16, mnemonic>; def _S : sme2_zip_vector_vg2<0b10, 0b0, op, ZZ_s_mul_r, ZPR32, mnemonic>; def _D : sme2_zip_vector_vg2<0b11, 0b0, op, ZZ_d_mul_r, ZPR64, mnemonic>; def _Q : sme2_zip_vector_vg2<0b00, 0b1, op, ZZ_q_mul_r, ZPR128, mnemonic>; } //===----------------------------------------------------------------------===// // SME2 Dot Products and MLA class sme2_multi_vec_array_vg2_index sz, bits<6> op, MatrixOperand matrix_ty, RegisterOperand multi_vector_ty, ZPRRegOp vector_ty, Operand index_ty, string mnemonic> : I<(outs matrix_ty:$ZAda), (ins matrix_ty:$_ZAda, MatrixIndexGPR32Op8_11:$Rv, sme_elm_idx0_7:$imm3, multi_vector_ty:$Zn, vector_ty:$Zm, index_ty:$i), mnemonic, "\t$ZAda[$Rv, $imm3, vgx2], $Zn, $Zm$i", "", []>, Sched<[]> { bits<4> Zm; bits<2> Rv; bits<4> Zn; bits<3> imm3; let Inst{31-24} = 0b11000001; let Inst{23-22} = sz; let Inst{21-20} = 0b01; let Inst{19-16} = Zm; let Inst{15} = 0b0; let Inst{14-13} = Rv; let Inst{12-10} = op{5-3}; let Inst{9-6} = Zn; let Inst{5-3} = op{2-0}; let Inst{2-0} = imm3; let Constraints = "$ZAda = $_ZAda"; } // SME2 multi-vec ternary indexed two registers 32-bit multiclass sme2_multi_vec_array_vg2_index_32b sz, bits<4> op, RegisterOperand multi_vector_ty, ZPRRegOp vector_ty, ValueType vt, SDPatternOperator intrinsic> { def NAME : sme2_multi_vec_array_vg2_index, SMEPseudo2Instr { bits<2> i; let Inst{11-10} = i; } def _PSEUDO : sme2_za_array_2op_multi_index_pseudo; def : SME2_ZA_TwoOp_VG2_Multi_Index_Pat; def : InstAlias(NAME) MatrixOp32:$ZAda, MatrixIndexGPR32Op8_11:$Rv, sme_elm_idx0_7:$imm3, multi_vector_ty:$Zn, vector_ty:$Zm, VectorIndexS32b_timm:$i), 0>; } // SME2.1 multi-vec ternary indexed two registers 16-bit multiclass sme2p1_multi_vec_array_vg2_index_16b sz, bits<3> op, RegisterOperand multi_vector_ty, ZPRRegOp vector_ty, ValueType vt, SDPatternOperator intrinsic> { def NAME : sme2_multi_vec_array_vg2_index, SMEPseudo2Instr { bits<3> i; let Inst{11-10} = i{2-1}; let Inst{3} = i{0}; } def _PSEUDO : sme2_za_array_2op_multi_index_pseudo; def : SME2_ZA_TwoOp_VG2_Multi_Index_Pat; def : InstAlias(NAME) MatrixOp16:$ZAda, MatrixIndexGPR32Op8_11:$Rv, sme_elm_idx0_7:$imm3, multi_vector_ty:$Zn, vector_ty:$Zm, VectorIndexH:$i), 0>; } // SME2 multi-vec indexed FP8 two-way dot product to FP16 two registers multiclass sme2p1_multi_vec_array_vg2_index_f8f16 sz, bits<3> op, RegisterOperand multi_vector_ty, ZPRRegOp zpr_ty> { def NAME : sme2_multi_vec_array_vg2_index { bits<3> i; let Inst{11-10} = i{2-1}; let Inst{3} = i{0}; } def : InstAlias(NAME) MatrixOp16:$ZAda, MatrixIndexGPR32Op8_11:$Rv, sme_elm_idx0_7:$imm3, multi_vector_ty:$Zn, zpr_ty:$Zm, VectorIndexH:$i), 0>; } // SME2 multi-vec indexed FP8 two-way vertical dot product to single precision // two registers class sme2_fp8_multi_vec_array_vg4_index : sme2_multi_vec_array_vg2_index<0b11, {0b01,?,0b0, T,?}, MatrixOp32, ZZ_b_mul_r, ZPR4b8, VectorIndexS, mnemonic> { bits<2> i; let Inst{10} = i{1}; let Inst{3} = i{0}; let AsmString = !strconcat(mnemonic, "{\t$ZAda[$Rv, $imm3, vgx4], $Zn, $Zm$i}"); } // SME2 multi-vec ternary indexed two registers 64-bit class sme2_multi_vec_array_vg2_index_64b op, RegisterOperand multi_vector_ty, ZPRRegOp vector_ty, string mnemonic> : I<(outs MatrixOp64:$ZAda), (ins MatrixOp64:$_ZAda, MatrixIndexGPR32Op8_11:$Rv, sme_elm_idx0_7:$imm3, multi_vector_ty:$Zn, vector_ty:$Zm, VectorIndexD32b_timm:$i1), mnemonic, "\t$ZAda[$Rv, $imm3, vgx2], $Zn, $Zm$i1", "", []>, Sched<[]> { bits<4> Zm; bits<2> Rv; bits<1> i1; bits<4> Zn; bits<3> imm3; let Inst{31-20} = 0b110000011101; let Inst{19-16} = Zm; let Inst{15} = 0b0; let Inst{14-13} = Rv; let Inst{12-11} = 0b00; let Inst{10} = i1; let Inst{9-6} = Zn; let Inst{5} = 0b0; let Inst{4-3} = op; let Inst{2-0} = imm3; let Constraints = "$ZAda = $_ZAda"; } multiclass sme2_multi_vec_array_vg2_index_64b op, RegisterOperand multi_vector_ty, ZPRRegOp vector_ty, ValueType vt, SDPatternOperator intrinsic> { def NAME : sme2_multi_vec_array_vg2_index_64b, SMEPseudo2Instr; def _PSEUDO : sme2_za_array_2op_multi_index_pseudo; def : SME2_ZA_TwoOp_VG2_Multi_Index_Pat; def : InstAlias(NAME) MatrixOp64:$ZAda, MatrixIndexGPR32Op8_11:$Rv, sme_elm_idx0_7:$imm3, multi_vector_ty:$Zn, vector_ty:$Zm, VectorIndexD32b_timm:$i1), 0>; } class sme2_multi_vec_array_vg4_index op, MatrixOperand matrix_ty, RegisterOperand multi_vector_ty, ZPRRegOp vector_ty, Operand index_ty, string mnemonic> : I<(outs matrix_ty:$ZAda), (ins matrix_ty:$_ZAda, MatrixIndexGPR32Op8_11:$Rv, sme_elm_idx0_7:$imm3, multi_vector_ty:$Zn, vector_ty:$Zm, index_ty:$i), mnemonic, "\t$ZAda[$Rv, $imm3, vgx4], $Zn, $Zm$i", "", []>, Sched<[]> { bits<4> Zm; bits<2> Rv; bits<3> Zn; bits<3> imm3; let Inst{31-23} = 0b110000010; let Inst{22} = sz; let Inst{21-20} = 0b01; let Inst{19-16} = Zm; let Inst{15} = 0b1; let Inst{14-13} = Rv; let Inst{12-10} = op{6-4}; let Inst{9-7} = Zn; let Inst{6-3} = op{3-0}; let Inst{2-0} = imm3; let Constraints = "$ZAda = $_ZAda"; } // SME2 multi-vec ternary indexed four registers 32-bit multiclass sme2_multi_vec_array_vg4_index_32b op, RegisterOperand multi_vector_ty, ZPRRegOp vector_ty, ValueType vt, SDPatternOperator intrinsic> { def NAME : sme2_multi_vec_array_vg4_index<0b1, {op{3},?,?,0b0, op{2-0}}, MatrixOp32, multi_vector_ty, vector_ty, VectorIndexS32b_timm, mnemonic>, SMEPseudo2Instr { bits<2> i; let Inst{11-10} = i; } def _PSEUDO : sme2_za_array_2op_multi_index_pseudo; def : SME2_ZA_TwoOp_VG4_Multi_Index_Pat; def : InstAlias(NAME) MatrixOp32:$ZAda, MatrixIndexGPR32Op8_11:$Rv, sme_elm_idx0_7:$imm3, multi_vector_ty:$Zn, vector_ty:$Zm, VectorIndexS32b_timm:$i), 0>; } // SME2.1 multi-vec ternary indexed four registers 16-bit (FP8) multiclass sme2p1_multi_vec_array_vg4_index_f8f16 op, RegisterOperand multi_vector_ty, ZPRRegOp zpr_ty> { def NAME : sme2_multi_vec_array_vg4_index<0b0,{0b1,?,?,op,?}, MatrixOp16, multi_vector_ty, zpr_ty, VectorIndexH, mnemonic>{ bits<3> i; let Inst{11-10} = i{2-1}; let Inst{3} = i{0}; } def : InstAlias(NAME) MatrixOp16:$ZAda, MatrixIndexGPR32Op8_11:$Rv, sme_elm_idx0_7:$imm3, multi_vector_ty:$Zn, zpr_ty:$Zm, VectorIndexH:$i), 0>; } // SME2.1 multi-vec ternary indexed four registers 16-bit multiclass sme2p1_multi_vec_array_vg4_index_16b op, RegisterOperand multi_vector_ty, ZPRRegOp vector_ty, ValueType vt, SDPatternOperator intrinsic> { def NAME : sme2_multi_vec_array_vg4_index<0b0,{0b1,?,?,op,?}, MatrixOp16, multi_vector_ty, vector_ty, VectorIndexH, mnemonic>, SMEPseudo2Instr { bits<3> i; let Inst{11-10} = i{2-1}; let Inst{3} = i{0}; } def _PSEUDO : sme2_za_array_2op_multi_index_pseudo; def : SME2_ZA_TwoOp_VG4_Multi_Index_Pat; def : InstAlias(NAME) MatrixOp16:$ZAda, MatrixIndexGPR32Op8_11:$Rv, sme_elm_idx0_7:$imm3, multi_vector_ty:$Zn, vector_ty:$Zm, VectorIndexH:$i), 0>; } // SME2 multi-vec ternary indexed four registers 64-bit class sme2_multi_vec_array_vg4_index_64b op, RegisterOperand multi_vector_ty, ZPRRegOp vector_ty, string mnemonic> : I<(outs MatrixOp64:$ZAda), (ins MatrixOp64:$_ZAda, MatrixIndexGPR32Op8_11:$Rv, sme_elm_idx0_7:$imm3, multi_vector_ty:$Zn, vector_ty:$Zm, VectorIndexD32b_timm:$i1), mnemonic, "\t$ZAda[$Rv, $imm3, vgx4], $Zn, $Zm$i1", "", []>, Sched<[]> { bits<4> Zm; bits<2> Rv; bits<1> i1; bits<3> Zn; bits<3> imm3; let Inst{31-20} = 0b110000011101; let Inst{19-16} = Zm; let Inst{15} = 0b1; let Inst{14-13} = Rv; let Inst{12} = 0b0; let Inst{11} = op{2}; let Inst{10} = i1; let Inst{9-7} = Zn; let Inst{6-5} = 0b00; let Inst{4-3} = op{1-0}; let Inst{2-0} = imm3; let Constraints = "$ZAda = $_ZAda"; } multiclass sme2_multi_vec_array_vg4_index_64b op, RegisterOperand multi_vector_ty, ZPRRegOp vector_ty, ValueType vty, SDPatternOperator intrinsic> { def NAME : sme2_multi_vec_array_vg4_index_64b, SMEPseudo2Instr; def _PSEUDO : sme2_za_array_2op_multi_index_pseudo; def : SME2_ZA_TwoOp_VG4_Multi_Index_Pat; def : InstAlias(NAME) MatrixOp64:$ZAda, MatrixIndexGPR32Op8_11:$Rv, sme_elm_idx0_7:$imm3, multi_vector_ty:$Zn, vector_ty:$Zm, VectorIndexD32b_timm:$i1), 0>; } // FMLAL (multiple and indexed vector, FP8 to FP16) class sme2_multi_vec_array_vg24_index_16b sz, bit vg4, bits<3> op, RegisterOperand multi_vector_ty, string mnemonic> : I<(outs MatrixOp16:$ZAda), (ins MatrixOp16:$_ZAda, MatrixIndexGPR32Op8_11:$Rv, uimm2s2range:$imm2, multi_vector_ty:$Zn, ZPR4b8:$Zm, VectorIndexB:$i), mnemonic, "\t$ZAda[$Rv, $imm2, " # !if(vg4, "vgx4", "vgx2") # "], $Zn, $Zm$i", "", []>, Sched<[]> { bits<4> Zm; bits<2> Rv; bits<4> i; bits<2> imm2; let Inst{31-24} = 0b11000001; let Inst{23-22} = sz; let Inst{21-20} = 0b01; let Inst{19-16} = Zm; let Inst{15} = vg4; let Inst{14-13} = Rv; let Inst{12} = op{2}; let Inst{11-10} = i{3-2}; let Inst{5-4} = op{1-0}; let Inst{3-2} = i{1-0}; let Inst{1-0} = imm2; let Constraints = "$ZAda = $_ZAda"; } multiclass sme2_multi_vec_array_vg2_index_16b sz, bits<3>op> { def NAME : sme2_multi_vec_array_vg24_index_16b { bits<4> Zn; let Inst{9-6} = Zn; } def : InstAlias(NAME) MatrixOp16:$ZAda, MatrixIndexGPR32Op8_11:$Rv, uimm2s2range:$imm2, ZZ_b_mul_r:$Zn, ZPR4b8:$Zm, VectorIndexB:$i), 0>; } multiclass sme2_multi_vec_array_vg4_index_16bsz, bits<3>op> { def NAME: sme2_multi_vec_array_vg24_index_16b { bits<3> Zn; let Inst{9-7} = Zn; let Inst{6} = 0b0; } def : InstAlias(NAME) MatrixOp16:$ZAda, MatrixIndexGPR32Op8_11:$Rv, uimm2s2range:$imm2, ZZZZ_b_mul_r:$Zn, ZPR4b8:$Zm, VectorIndexB:$i), 0>; } //===----------------------------------------------------------------------===// // SME2 multi-vec indexed long long MLA one source 16-bit class sme2_mla_ll_array_index_16b sz,bits<2> op> : I<(outs MatrixOp16:$ZAda), (ins MatrixOp16:$_ZAda, MatrixIndexGPR32Op8_11:$Rv, uimm3s2range:$imm3, ZPR8:$Zn, ZPR4b8:$Zm, VectorIndexB32b_timm:$i), mnemonic, "\t$ZAda[$Rv, $imm3], $Zn, $Zm$i", "", []>, Sched<[]> { bits<4> Zm; bits<2> Rv; bits<4> i; bits<5> Zn; bits<3> imm3; let Inst{31-24} = 0b11000001; let Inst{23-22} = sz; let Inst{21-20} = 0b00; let Inst{19-16} = Zm; let Inst{15} = i{3}; let Inst{14-13} = Rv; let Inst{12} = op{1}; let Inst{11-10} = i{2-1}; let Inst{9-5} = Zn; let Inst{4} = op{0}; let Inst{3} = i{0}; let Inst{2-0} = imm3; let Constraints = "$ZAda = $_ZAda"; } // SME2 multi-vec indexed long long MLA one source 32-bit class sme2_mla_ll_array_index_32b sz, bits<3> op> : I<(outs MatrixOp32:$ZAda), (ins MatrixOp32:$_ZAda, MatrixIndexGPR32Op8_11:$Rv, uimm2s4range:$imm2, ZPR8:$Zn, ZPR4b8:$Zm, VectorIndexB32b_timm:$i), mnemonic, "\t$ZAda[$Rv, $imm2], $Zn, $Zm$i", "", []>, Sched<[]> { bits<4> Zm; bits<2> Rv; bits<4> i; bits<5> Zn; bits<2> imm2; let Inst{31-24} = 0b11000001; let Inst{23-22} = sz; let Inst{21-20} = 0b00; let Inst{19-16} = Zm; let Inst{15} = i{3}; let Inst{14-13} = Rv; let Inst{12-10} = i{2-0}; let Inst{9-5} = Zn; let Inst{4-2} = op; let Inst{1-0} = imm2; let Constraints = "$ZAda = $_ZAda"; } multiclass sme2_mla_ll_array_index_32b sz, bits<3> op, SDPatternOperator intrinsic> { def NAME : sme2_mla_ll_array_index_32b, SMEPseudo2Instr; def _PSEUDO : sme2_za_array_2op_multi_index_pseudo; def : SME2_ZA_TwoOp_Multi_Index_Pat; } // SME2 multi-vec indexed long long MLA one source 64-bit class sme2_mla_ll_array_index_64b op> : I<(outs MatrixOp64:$ZAda), (ins MatrixOp64:$_ZAda, MatrixIndexGPR32Op8_11:$Rv, uimm2s4range:$imm2, ZPR16:$Zn, ZPR4b16:$Zm, VectorIndexH32b_timm:$i), mnemonic, "\t$ZAda[$Rv, $imm2], $Zn, $Zm$i", "", []>, Sched<[]> { bits<4> Zm; bits<2> Rv; bits<3> i; bits<5> Zn; bits<2> imm2; let Inst{31-20} = 0b110000011000; let Inst{19-16} = Zm; let Inst{15} = i{2}; let Inst{14-13} = Rv; let Inst{12} = 0b0; let Inst{11-10} = i{1-0}; let Inst{9-5} = Zn; let Inst{4-3} = op; let Inst{2} = 0b0; let Inst{1-0} = imm2; let Constraints = "$ZAda = $_ZAda"; } multiclass sme2_mla_ll_array_index_64b op, SDPatternOperator intrinsic> { def NAME : sme2_mla_ll_array_index_64b, SMEPseudo2Instr; def _PSEUDO : sme2_za_array_2op_multi_index_pseudo; def : SME2_ZA_TwoOp_Multi_Index_Pat; } class sme2_mla_ll_array_vg24_index_32b sz, bit vg4, bits<3> op, RegisterOperand vector_ty, string mnemonic> : I<(outs MatrixOp32:$ZAda), (ins MatrixOp32:$_ZAda, MatrixIndexGPR32Op8_11:$Rv, uimm1s4range:$imm, vector_ty:$Zn, ZPR4b8:$Zm, VectorIndexB32b_timm:$i), mnemonic, "\t$ZAda[$Rv, $imm, " # !if(vg4, "vgx4", "vgx2") # "], $Zn, $Zm$i", "", []>, Sched<[]> { bits<4> Zm; bits<2> Rv; bits<4> i; bit imm; let Inst{31-24} = 0b11000001; let Inst{23-22} = sz; let Inst{21-20} = 0b01; let Inst{19-16} = Zm; let Inst{15} = vg4; let Inst{14-13} = Rv; let Inst{12} = 0b0; let Inst{11-10} = i{3-2}; let Inst{5-3} = op; let Inst{2-1} = i{1-0}; let Inst{0} = imm; let Constraints = "$ZAda = $_ZAda"; } //SME2 multi-vec indexed long long MLA two sources 32-bit multiclass sme2_mla_ll_array_vg2_index_32b sz, bits<3> op, SDPatternOperator intrinsic> { def NAME: sme2_mla_ll_array_vg24_index_32b, SMEPseudo2Instr { bits<4> Zn; let Inst{9-6} = Zn; } def _PSEUDO : sme2_za_array_2op_multi_index_pseudo; def : SME2_ZA_TwoOp_VG2_Multi_Index_Pat; def : InstAlias(NAME) MatrixOp32:$ZAda, MatrixIndexGPR32Op8_11:$Rv, uimm1s4range:$imm, ZZ_b_mul_r:$Zn, ZPR4b8:$Zm, VectorIndexB32b_timm:$i), 0>; } // SME2 multi-vec indexed long long MLA four sources 32-bit multiclass sme2_mla_ll_array_vg4_index_32b sz, bits<4> op, SDPatternOperator intrinsic> { def NAME: sme2_mla_ll_array_vg24_index_32b, SMEPseudo2Instr { bits<3> Zn; let Inst{9-7} = Zn; let Inst{6} = op{3}; } def _PSEUDO : sme2_za_array_2op_multi_index_pseudo; def : SME2_ZA_TwoOp_VG4_Multi_Index_Pat; def : InstAlias(NAME) MatrixOp32:$ZAda, MatrixIndexGPR32Op8_11:$Rv, uimm1s4range:$imm, ZZZZ_b_mul_r:$Zn, ZPR4b8:$Zm, VectorIndexB32b_timm:$i), 0>; } class sme2_mla_ll_array_vg24_index_64b op, RegisterOperand vector_ty, string mnemonic> : I<(outs MatrixOp64:$ZAda), (ins MatrixOp64:$_ZAda, MatrixIndexGPR32Op8_11:$Rv, uimm1s4range:$imm, vector_ty:$Zn, ZPR4b16:$Zm, VectorIndexH32b_timm:$i), mnemonic, "\t$ZAda[$Rv, $imm, " # !if(vg4, "vgx4", "vgx2") # "], $Zn, $Zm$i", "", []>, Sched<[]> { bits<4> Zm; bits<2> Rv; bits<3> i; bit imm; let Inst{31-20} = 0b110000011001; let Inst{19-16} = Zm; let Inst{15} = vg4; let Inst{14-13} = Rv; let Inst{12-11} = 0b00; let Inst{10} = i{2}; let Inst{5} = 0b0; let Inst{4-3} = op; let Inst{2-1} = i{1-0}; let Inst{0} = imm; let Constraints = "$ZAda = $_ZAda"; } // SME2 multi-vec indexed long long MLA two sources 64-bit multiclass sme2_mla_ll_array_vg2_index_64b op, SDPatternOperator intrinsic> { def NAME: sme2_mla_ll_array_vg24_index_64b<0b0, op, ZZ_h_mul_r, mnemonic>, SMEPseudo2Instr { bits<4> Zn; let Inst{9-6} = Zn; } def _PSEUDO : sme2_za_array_2op_multi_index_pseudo; def : SME2_ZA_TwoOp_VG2_Multi_Index_Pat; def : InstAlias(NAME) MatrixOp64:$ZAda, MatrixIndexGPR32Op8_11:$Rv, uimm1s4range:$imm, ZZ_h_mul_r:$Zn, ZPR4b16:$Zm, VectorIndexH32b_timm:$i), 0>; } // SME2 multi-vec indexed long long MLA four sources 64-bit multiclass sme2_mla_ll_array_vg4_index_64b op, SDPatternOperator intrinsic> { def NAME: sme2_mla_ll_array_vg24_index_64b<0b1, op, ZZZZ_h_mul_r, mnemonic>, SMEPseudo2Instr { bits<3> Zn; let Inst{9-7} = Zn; let Inst{6} = 0b0; } def _PSEUDO : sme2_za_array_2op_multi_index_pseudo; def : SME2_ZA_TwoOp_VG4_Multi_Index_Pat; def : InstAlias(NAME) MatrixOp64:$ZAda, MatrixIndexGPR32Op8_11:$Rv, uimm1s4range:$imm, ZZZZ_h_mul_r:$Zn, ZPR4b16:$Zm, VectorIndexH32b_timm:$i), 0>; } //SME2 multiple and single vector long long FMA one source class sme2_mla_ll_array_single op, MatrixOperand matrix_ty, ZPRRegOp vector_ty, ZPRRegOp zpr_ty> : I<(outs matrix_ty:$ZAda), (ins matrix_ty:$_ZAda, MatrixIndexGPR32Op8_11:$Rv, uimm2s4range:$imm, vector_ty:$Zn, zpr_ty:$Zm), mnemonic, "\t$ZAda[$Rv, $imm], $Zn, $Zm", "", []>, Sched<[]> { bits<4> Zm; bits<2> Rv; bits<5> Zn; bits<2> imm; let Inst{31-23} = 0b110000010; let Inst{22} = op{4}; //sz let Inst{21} = 0b1; let Inst{20} = op{3}; //fp8 let Inst{19-16} = Zm; let Inst{15} = 0b0; let Inst{14-13} = Rv; let Inst{12-10} = 0b001; let Inst{9-5} = Zn; let Inst{4-2} = op{2-0}; let Inst{1-0} = imm; let Constraints = "$ZAda = $_ZAda"; } multiclass sme2_mla_ll_array_single op, MatrixOperand matrix_ty, ZPRRegOp vector_ty, ZPRRegOp zpr_ty, ValueType vt, SDPatternOperator intrinsic> { def NAME : sme2_mla_ll_array_single, SMEPseudo2Instr; def NAME # _PSEUDO : sme2_za_array_2op_multi_single_pseudo; def : SME2_ZA_TwoOp_Multi_Single_Pat; } class sme2_mla_ll_array_vg24_single op, MatrixOperand matrix_ty, RegisterOperand vector_ty, ZPRRegOp zpr_ty, string mnemonic> : I<(outs matrix_ty:$ZAda), (ins matrix_ty:$_ZAda, MatrixIndexGPR32Op8_11:$Rv, uimm1s4range:$imm, vector_ty:$Zn, zpr_ty:$Zm), mnemonic, "\t$ZAda[$Rv, $imm, " # !if(op{4}, "vgx4", "vgx2") # "], $Zn, $Zm", "", []>, Sched<[]> { bits<4> Zm; bits<2> Rv; bits<5> Zn; bit imm; let Inst{31-23} = 0b110000010; let Inst{22} = op{5}; //sz let Inst{21} = 0b1; let Inst{20} = op{4}; //vg4 let Inst{19-16} = Zm; let Inst{15} = 0b0; let Inst{14-13} = Rv; let Inst{12-10} = 0b000; let Inst{9-5} = Zn; let Inst{4-1} = op{3-0}; let Inst{0} = imm; let Constraints = "$ZAda = $_ZAda"; } //SME2 single-multi long long MLA two and four sources multiclass sme2_mla_ll_array_vg24_single op, MatrixOperand matrix_ty, RegisterOperand multi_vector_ty, ZPRRegOp zpr_ty> { def NAME: sme2_mla_ll_array_vg24_single, SMEPseudo2Instr; def NAME # _PSEUDO : sme2_za_array_2op_multi_single_pseudo; def : InstAlias(NAME) matrix_ty:$ZAd, MatrixIndexGPR32Op8_11:$Rv, uimm1s4range:$imm, multi_vector_ty:$Zn, zpr_ty:$Zm), 0>; } multiclass sme2_mla_ll_array_vg2_single op, MatrixOperand matrix_ty, RegisterOperand multi_vector_ty, ZPRRegOp zpr_ty, ValueType vt, SDPatternOperator intrinsic> { defm NAME: sme2_mla_ll_array_vg24_single; def : SME2_ZA_TwoOp_VG2_Multi_Single_Pat; } multiclass sme2_mla_ll_array_vg4_single op, MatrixOperand matrix_ty, RegisterOperand multi_vector_ty, ZPRRegOp zpr_ty, ValueType vt, SDPatternOperator intrinsic> { defm NAME: sme2_mla_ll_array_vg24_single; def : SME2_ZA_TwoOp_VG4_Multi_Single_Pat; } // SME2 multiple vectors long long MLA two sources class sme2_mla_ll_array_vg2_multi op, MatrixOperand matrix_ty, RegisterOperand vector_ty,string mnemonic> : I<(outs matrix_ty:$ZAda), (ins matrix_ty:$_ZAda, MatrixIndexGPR32Op8_11:$Rv, uimm1s4range:$imm, vector_ty:$Zn, vector_ty:$Zm), mnemonic, "\t$ZAda[$Rv, $imm, vgx2], $Zn, $Zm", "", []>, Sched<[]> { bits<4> Zm; bits<2> Rv; bits<4> Zn; bit imm; let Inst{31-23} = 0b110000011; let Inst{22} = op{4}; // sz let Inst{21} = 0b1; let Inst{20-17} = Zm; let Inst{16-15} = 0b00; let Inst{14-13} = Rv; let Inst{12-10} = 0b000; let Inst{9-6} = Zn; let Inst{5-2} = op{3-0}; let Inst{1} = 0b0; let Inst{0} = imm; let Constraints = "$ZAda = $_ZAda"; } multiclass sme2_mla_ll_array_vg2_multi op, MatrixOperand matrix_ty, RegisterOperand vector_ty, ValueType vt, SDPatternOperator intrinsic> { def NAME : sme2_mla_ll_array_vg2_multi, SMEPseudo2Instr; def _PSEUDO : sme2_za_array_2op_multi_multi_pseudo; def : SME2_ZA_TwoOp_VG2_Multi_Multi_Pat; def : InstAlias(NAME) matrix_ty:$ZAda, MatrixIndexGPR32Op8_11:$Rv, uimm1s4range:$imm, vector_ty:$Zn, vector_ty:$Zm), 0>; } // SME2 multiple vectors long long MLA four sources class sme2_mla_ll_array_vg4_multi op,MatrixOperand matrix_ty, RegisterOperand vector_ty, string mnemonic> : I<(outs matrix_ty:$ZAda), (ins matrix_ty:$_ZAda, MatrixIndexGPR32Op8_11:$Rv, uimm1s4range:$imm, vector_ty:$Zn, vector_ty:$Zm), mnemonic, "\t$ZAda[$Rv, $imm, vgx4], $Zn, $Zm", "", []>, Sched<[]> { bits<3> Zm; bits<2> Rv; bits<3> Zn; bit imm; let Inst{31-23} = 0b110000011; let Inst{22} = op{4}; // sz let Inst{21} = 0b1; let Inst{20-18} = Zm; let Inst{17-15} = 0b010; let Inst{14-13} = Rv; let Inst{12-10} = 0b000; let Inst{9-7} = Zn; let Inst{6} = 0b0; let Inst{5-2} = op{3-0}; let Inst{1} = 0b0; let Inst{0} = imm; let Constraints = "$ZAda = $_ZAda"; } multiclass sme2_mla_ll_array_vg4_multi op, MatrixOperand matrix_ty, RegisterOperand vector_ty, ValueType vt, SDPatternOperator intrinsic> { def NAME : sme2_mla_ll_array_vg4_multi, SMEPseudo2Instr; def _PSEUDO : sme2_za_array_2op_multi_multi_pseudo; def : SME2_ZA_TwoOp_VG4_Multi_Multi_Pat; def : InstAlias(NAME) matrix_ty:$ZAda, MatrixIndexGPR32Op8_11:$Rv, uimm1s4range:$imm, vector_ty:$Zn, vector_ty:$Zm), 0>; } //===----------------------------------------------------------------------===// // SME2 Outer Product and Accumulate multiclass sme2_int_mopx_tile op, SDPatternOperator intrinsic> { def NAME : sme_int_outer_product_inst, SMEPseudo2Instr { bits<2> ZAda; let Inst{1-0} = ZAda; let Inst{2} = 0b0; } def _PSEUDO : sme_outer_product_pseudo, SMEPseudo2Instr; def : SME_ZA_Tile_TwoPred_TwoVec_Pat; } multiclass sme2_int_bmopx_tile op, SDPatternOperator intrinsic> { def NAME : sme_outer_product_widening_inst, SMEPseudo2Instr; def _PSEUDO : sme_outer_product_pseudo, SMEPseudo2Instr; def : SME_ZA_Tile_TwoPred_TwoVec_Pat; } //===----------------------------------------------------------------------===/// // SME2 Zero Lookup Table. class sme2_zero_zt opc> : I<(outs ZTR:$ZT), (ins ), mnemonic, "\t\\{ $ZT \\}", "", []>, Sched<[]> { let Inst{31-4} = 0b1100000001001000000000000000; let Inst{3-0} = opc; } multiclass sme2_zero_zt opc> { def NAME : sme2_zero_zt; def NAME # _PSEUDO : Pseudo<(outs), (ins ZTR:$ZT), []>, Sched<[]> { // Translated to actual instruction in AArch64ISelLowering.cpp let usesCustomInserter = 1; } def : Pat<(int_aarch64_sme_zero_zt (imm_to_zt untyped:$zt)), (!cast(NAME # _PSEUDO) $zt)>; } //===----------------------------------------------------------------------===// // SME2 lookup table load/store class sme2_spill_fill_vector opc> : I, Sched<[]> { bits<5> Rn; let Inst{31-22} = 0b1110000100; let Inst{21-16} = opc{7-2}; let Inst{15-10} = 0b100000; let Inst{9-5} = Rn; let Inst{4-2} = 0b000; let Inst{1-0} = opc{1-0}; let mayLoad = !not(opc{7}); let mayStore = opc{7}; } multiclass sme2_spill_fill_vector opc, SDPatternOperator op> { def NAME : sme2_spill_fill_vector; def NAME # _PSEUDO : Pseudo<(outs), (ins ZTR:$ZTt, GPR64sp:$base), []>, Sched<[]> { // Translated to actual instruction in AArch64ISelLowering.cpp let usesCustomInserter = 1; } def : Pat<(op (imm_to_zt untyped:$tile), GPR64sp:$base), (!cast(NAME # _PSEUDO) $tile, $base)>; } //===----------------------------------------------------------------------===/// // SME2 move to/from lookup table class sme2_movt_zt_to_scalar opc> : I<(outs GPR64:$Rt), (ins ZTR:$ZTt, uimm3s8:$imm3), mnemonic, "\t$Rt, $ZTt[$imm3]", "", []>, Sched<[]> { bits<3> imm3; bits<5> Rt; let Inst{31-15} = 0b11000000010011000; let Inst{14-12} = imm3; let Inst{11-5} = opc; let Inst{4-0} = Rt; } class sme2_movt_scalar_to_zt opc> : I<(outs ZTR:$ZTt), (ins uimm3s8:$imm3, GPR64:$Rt), mnemonic, "\t$ZTt[$imm3], $Rt", "", []>, Sched<[]> { bits<3> imm3; bits<5> Rt; let Inst{31-15} = 0b11000000010011100; let Inst{14-12} = imm3; let Inst{11-5} = opc; let Inst{4-0} = Rt; } // SME2 move vector to lookup table class sme2_movt_zt_to_zt opc> : I<(outs ZTR:$ZTt), (ins sme_elm_idx0_3:$off2, ZPRAny:$Zt), mnemonic, "\t$ZTt[$off2, mul vl], $Zt", "", []>, Sched<[]> { bits<5> Zt; bits<2> off2; let Inst{31-14} = 0b110000000100111100; let Inst{13-12} = off2; let Inst{11-5} = opc; let Inst{4-0} = Zt; } multiclass sme2_movt_zt_to_zt opc> { def NAME : sme2_movt_zt_to_zt; def : InstAlias(NAME) ZTR:$ZTt, 0, ZPRAny:$Zt), 1>; } //===----------------------------------------------------------------------===// // SME2 lookup table expand one register class sme2_luti_vector_index sz, bits<7> opc, RegisterOperand vector_ty, AsmVectorIndexOpnd index_ty, string mnemonic> : I<(outs vector_ty:$Zd), (ins ZTR:$ZTt, ZPRAny:$Zn, index_ty:$i), mnemonic, "\t$Zd, $ZTt, $Zn$i", "", []>, Sched<[]> { bits<5> Zn; bits<5> Zd; let Inst{31-19} = 0b1100000011001; let Inst{18-14} = opc{6-2}; let Inst{13-12} = sz; let Inst{11-10} = opc{1-0}; let Inst{9-5} = Zn; let Inst{4-0} = Zd; } class sme2_luti2_vector_index sz, RegisterOperand vector_ty, string mnemonic> : sme2_luti_vector_index { bits<4> i; let Inst{17-14} = i; } multiclass sme2_luti2_vector_index { def _B : sme2_luti2_vector_index<0b00, ZPR8, mnemonic>; def _H : sme2_luti2_vector_index<0b01, ZPR16, mnemonic>; def _S : sme2_luti2_vector_index<0b10, ZPR32, mnemonic>; def : Pat<(nxv16i8 (intrinsic (imm_to_zt untyped:$zt), nxv16i8:$zn, (i32 VectorIndexB32b_timm:$imm))), (!cast(NAME # _B) $zt, nxv16i8:$zn, (i32 VectorIndexB32b_timm:$imm))>; def : Pat<(nxv8i16 (intrinsic (imm_to_zt untyped:$zt), nxv16i8:$zn, (i32 VectorIndexB32b_timm:$imm))), (!cast(NAME # _H) $zt, nxv16i8:$zn, (i32 VectorIndexB32b_timm:$imm))>; def : Pat<(nxv4i32 (intrinsic (imm_to_zt untyped:$zt), nxv16i8:$zn, (i32 VectorIndexB32b_timm:$imm))), (!cast(NAME # _S) $zt, nxv16i8:$zn, (i32 VectorIndexB32b_timm:$imm))>; def : Pat<(nxv8f16 (intrinsic (imm_to_zt untyped:$zt), nxv16i8:$zn, (i32 VectorIndexB32b_timm:$imm))), (!cast(NAME # _H) $zt, nxv16i8:$zn, (i32 VectorIndexB32b_timm:$imm))>; def : Pat<(nxv8bf16 (intrinsic (imm_to_zt untyped:$zt), nxv16i8:$zn, (i32 VectorIndexB32b_timm:$imm))), (!cast(NAME # _H) $zt, nxv16i8:$zn, (i32 VectorIndexB32b_timm:$imm))>; def : Pat<(nxv4f32 (intrinsic (imm_to_zt untyped:$zt), nxv16i8:$zn, (i32 VectorIndexB32b_timm:$imm))), (!cast(NAME # _S) $zt, nxv16i8:$zn, (i32 VectorIndexB32b_timm:$imm))>; } class sme2_luti4_vector_index sz, RegisterOperand vector_ty, string mnemonic> : sme2_luti_vector_index { bits<3> i; let Inst{16-14} = i; } multiclass sme2_luti4_vector_index { def _B : sme2_luti4_vector_index<0b00, ZPR8, mnemonic>; def _H : sme2_luti4_vector_index<0b01, ZPR16, mnemonic>; def _S : sme2_luti4_vector_index<0b10, ZPR32, mnemonic>; def : Pat<(nxv16i8 (intrinsic (imm_to_zt untyped:$zt), nxv16i8:$zn, (i32 VectorIndexH32b_timm:$imm))), (!cast(NAME # _B) $zt, nxv16i8:$zn, (i32 VectorIndexH32b_timm:$imm))>; def : Pat<(nxv8i16 (intrinsic (imm_to_zt untyped:$zt), nxv16i8:$zn, (i32 VectorIndexH32b_timm:$imm))), (!cast(NAME # _H) $zt, nxv16i8:$zn, (i32 VectorIndexH32b_timm:$imm))>; def : Pat<(nxv4i32 (intrinsic (imm_to_zt untyped:$zt), nxv16i8:$zn, (i32 VectorIndexH32b_timm:$imm))), (!cast(NAME # _S) $zt, nxv16i8:$zn, (i32 VectorIndexH32b_timm:$imm))>; def : Pat<(nxv8f16 (intrinsic (imm_to_zt untyped:$zt), nxv16i8:$zn, (i32 VectorIndexH32b_timm:$imm))), (!cast(NAME # _H) $zt, nxv16i8:$zn, (i32 VectorIndexH32b_timm:$imm))>; def : Pat<(nxv8bf16 (intrinsic (imm_to_zt untyped:$zt), nxv16i8:$zn, (i32 VectorIndexH32b_timm:$imm))), (!cast(NAME # _H) $zt, nxv16i8:$zn, (i32 VectorIndexH32b_timm:$imm))>; def : Pat<(nxv4f32 (intrinsic (imm_to_zt untyped:$zt), nxv16i8:$zn, (i32 VectorIndexH32b_timm:$imm))), (!cast(NAME # _S) $zt, nxv16i8:$zn, (i32 VectorIndexH32b_timm:$imm))>; } // SME2 lookup table expand two contiguous registers class sme2_luti_vector_vg2_index sz, bits<6> opc, RegisterOperand vector_ty, AsmVectorIndexOpnd index_ty, string mnemonic> : I<(outs vector_ty:$Zd), (ins ZTR:$ZTt, ZPRAny:$Zn, index_ty:$i), mnemonic, "\t$Zd, $ZTt, $Zn$i", "", []>, Sched<[]> { bits<5> Zn; bits<4> Zd; let Inst{31-19} = 0b1100000010001; let Inst{18-15} = opc{5-2}; let Inst{14} = 0b1; let Inst{13-12} = sz; let Inst{11-10} = opc{1-0}; let Inst{9-5} = Zn; let Inst{4-1} = Zd; let Inst{0} = 0b0; } class sme2_luti2_vector_vg2_index sz, RegisterOperand vector_ty, string mnemonic> : sme2_luti_vector_vg2_index { bits<3> i; let Inst{17-15} = i; } multiclass sme2_luti2_vector_vg2_index { def _B : sme2_luti2_vector_vg2_index<0b00, ZZ_b_mul_r, mnemonic>; def _H : sme2_luti2_vector_vg2_index<0b01, ZZ_h_mul_r, mnemonic>; def _S : sme2_luti2_vector_vg2_index<0b10, ZZ_s_mul_r, mnemonic>; } class sme2_luti4_vector_vg2_index sz, RegisterOperand vector_ty, string mnemonic> : sme2_luti_vector_vg2_index { bits<2> i; let Inst{16-15} = i; } multiclass sme2_luti4_vector_vg2_index { def _B : sme2_luti4_vector_vg2_index<0b00, ZZ_b_mul_r, mnemonic>; def _H : sme2_luti4_vector_vg2_index<0b01, ZZ_h_mul_r, mnemonic>; def _S : sme2_luti4_vector_vg2_index<0b10, ZZ_s_mul_r, mnemonic>; } // SME2 lookup table expand four contiguous registers class sme2_luti_vector_vg4_index sz, bits<5>opc, RegisterOperand vector_ty, AsmVectorIndexOpnd index_ty, string mnemonic> : I<(outs vector_ty:$Zd), (ins ZTR:$ZTt, ZPRAny:$Zn, index_ty:$i), mnemonic, "\t$Zd, $ZTt, $Zn$i", "", []>, Sched<[]> { bits<5> Zn; bits<3> Zd; let Inst{31-19} = 0b1100000010001; let Inst{18-16} = opc{4-2}; let Inst{15-14} = 0b10; let Inst{13-12} = sz; let Inst{11-10} = opc{1-0}; let Inst{9-5} = Zn; let Inst{4-2} = Zd; let Inst{1-0} = 0b00; } class sme2_luti2_vector_vg4_index sz, RegisterOperand vector_ty, string mnemonic> : sme2_luti_vector_vg4_index { bits<2> i; let Inst{17-16} = i; } multiclass sme2_luti2_vector_vg4_index { def _B : sme2_luti2_vector_vg4_index<0b00, ZZZZ_b_mul_r, mnemonic>; def _H : sme2_luti2_vector_vg4_index<0b01, ZZZZ_h_mul_r, mnemonic>; def _S : sme2_luti2_vector_vg4_index<0b10, ZZZZ_s_mul_r, mnemonic>; } class sme2_luti4_vector_vg4_index sz, RegisterOperand vector_ty, string mnemonic> : sme2_luti_vector_vg4_index { bits<1> i; let Inst{16} = i; } multiclass sme2_luti4_vector_vg4_index { def _H : sme2_luti4_vector_vg4_index<0b01, ZZZZ_h_mul_r, mnemonic>; def _S : sme2_luti4_vector_vg4_index<0b10, ZZZZ_s_mul_r, mnemonic>; } //===----------------------------------------------------------------------===// // SME2 MOV class sme2_mova_vec_to_tile_vg2_multi_base sz, bit v, RegisterOperand tile_ty, Operand index_ty, RegisterOperand vector_ty, string mnemonic> : I<(outs tile_ty:$ZAd), (ins tile_ty:$_ZAd, MatrixIndexGPR32Op12_15:$Rs, index_ty:$imm, vector_ty:$Zn), mnemonic, "\t$ZAd[$Rs, $imm], $Zn", "", []>, Sched<[]> { bits<2> Rs; bits<4> Zn; let Inst{31-24} = 0b11000000; let Inst{23-22} = sz; let Inst{21-16} = 0b000100; let Inst{15} = v; let Inst{14-13} = Rs; let Inst{12-10} = 0b000; let Inst{9-6} = Zn; let Inst{5-3} = 0b000; let Constraints = "$ZAd = $_ZAd"; } multiclass sme2_mova_vec_to_tile_or_array_aliases { def : InstAlias; } // SME2 move vector to tile, two registers multiclass sme2_mova_vec_to_tile_vg2_multi_base { def _B : sme2_mova_vec_to_tile_vg2_multi_base<0b00, v, !if(v, TileVectorOpV8, TileVectorOpH8), uimm3s2range, ZZ_b_mul_r, mnemonic>, SMEPseudo2Instr { bits<3> imm; let Inst{2-0} = imm; } def _H : sme2_mova_vec_to_tile_vg2_multi_base<0b01, v, !if(v, TileVectorOpV16, TileVectorOpH16), uimm2s2range, ZZ_h_mul_r, mnemonic>, SMEPseudo2Instr { bits<1> ZAd; bits<2> imm; let Inst{2} = ZAd; let Inst{1-0} = imm; } def _S : sme2_mova_vec_to_tile_vg2_multi_base<0b10, v, !if(v, TileVectorOpV32, TileVectorOpH32), uimm1s2range, ZZ_s_mul_r, mnemonic>, SMEPseudo2Instr { bits<2> ZAd; bits<1> imm; let Inst{2-1} = ZAd; let Inst{0} = imm; } def _D : sme2_mova_vec_to_tile_vg2_multi_base<0b11, v, !if(v, TileVectorOpV64, TileVectorOpH64), uimm0s2range, ZZ_d_mul_r, mnemonic>, SMEPseudo2Instr { bits<3> ZAd; let Inst{2-0} = ZAd; } def NAME # _B_PSEUDO : sme2_move_to_tile_pseudo; def NAME # _H_PSEUDO : sme2_move_to_tile_pseudo; def NAME # _S_PSEUDO : sme2_move_to_tile_pseudo; def NAME # _D_PSEUDO : sme2_move_to_tile_pseudo; def : SME2_Tile_VG2_Multi_Pat; def : SME2_Tile_VG2_Multi_Pat; def : SME2_Tile_VG2_Multi_Pat; def : SME2_Tile_VG2_Multi_Pat; def : SME2_Tile_VG2_Multi_Pat; def : SME2_Tile_VG2_Multi_Pat; def : SME2_Tile_VG2_Multi_Pat; def : SME2_Tile_VG2_Multi_Pat; defm : sme2_mova_vec_to_tile_or_array_aliases<1, !cast(NAME # _B), !if(v, TileVectorOpV8, TileVectorOpH8), MatrixIndexGPR32Op12_15, uimm3s2range, ZZ_b_mul_r, "mov">; defm : sme2_mova_vec_to_tile_or_array_aliases<1, !cast(NAME # _H), !if(v, TileVectorOpV16, TileVectorOpH16), MatrixIndexGPR32Op12_15, uimm2s2range, ZZ_h_mul_r, "mov">; defm : sme2_mova_vec_to_tile_or_array_aliases<1, !cast(NAME # _S), !if(v, TileVectorOpV32, TileVectorOpH32), MatrixIndexGPR32Op12_15, uimm1s2range, ZZ_s_mul_r, "mov">; defm : sme2_mova_vec_to_tile_or_array_aliases<1, !cast(NAME # _D), !if(v, TileVectorOpV64, TileVectorOpH64), MatrixIndexGPR32Op12_15, uimm0s2range, ZZ_d_mul_r, "mov">; defm : sme2_mova_vec_to_tile_or_array_aliases<0, !cast(NAME # _B), !if(v, TileVectorOpV8, TileVectorOpH8), MatrixIndexGPR32Op12_15, uimm3s2range, ZZ_b_mul_r, "mova">; defm : sme2_mova_vec_to_tile_or_array_aliases<0, !cast(NAME # _H), !if(v, TileVectorOpV16, TileVectorOpH16), MatrixIndexGPR32Op12_15, uimm2s2range, ZZ_h_mul_r, "mova">; defm : sme2_mova_vec_to_tile_or_array_aliases<0, !cast(NAME # _S), !if(v, TileVectorOpV32, TileVectorOpH32), MatrixIndexGPR32Op12_15, uimm1s2range, ZZ_s_mul_r, "mova">; defm : sme2_mova_vec_to_tile_or_array_aliases<0, !cast(NAME # _D), !if(v, TileVectorOpV64, TileVectorOpH64), MatrixIndexGPR32Op12_15, uimm0s2range, ZZ_d_mul_r, "mova">; defm : sme2_mova_vec_to_tile_or_array_aliases<0, !cast(NAME # _B), !if(v, TileVectorOpV8, TileVectorOpH8), MatrixIndexGPR32Op12_15, uimm3s2range, ZZ_b_mul_r, "mova">; defm : sme2_mova_vec_to_tile_or_array_aliases<0, !cast(NAME # _H), !if(v, TileVectorOpV16, TileVectorOpH16), MatrixIndexGPR32Op12_15, uimm2s2range, ZZ_h_mul_r, "mova">; defm : sme2_mova_vec_to_tile_or_array_aliases<0, !cast(NAME # _S), !if(v, TileVectorOpV32, TileVectorOpH32), MatrixIndexGPR32Op12_15, uimm1s2range, ZZ_s_mul_r, "mova">; defm : sme2_mova_vec_to_tile_or_array_aliases<0, !cast(NAME # _D), !if(v, TileVectorOpV64, TileVectorOpH64), MatrixIndexGPR32Op12_15, uimm0s2range, ZZ_d_mul_r, "mova">; } multiclass sme2_mova_vec_to_tile_vg2_multi{ defm _H : sme2_mova_vec_to_tile_vg2_multi_base<0b0, mnemonic, int_h>; defm _V : sme2_mova_vec_to_tile_vg2_multi_base<0b1, mnemonic, int_v>; } class sme2_mova_vec_to_tile_vg4_multi_base sz, bit v, bits<3> op, RegisterOperand tile_ty, Operand index_ty, RegisterOperand vector_ty, string mnemonic> : I<(outs tile_ty:$ZAd), (ins tile_ty:$_ZAd, MatrixIndexGPR32Op12_15:$Rs, index_ty:$imm, vector_ty:$Zn), mnemonic, "\t$ZAd[$Rs, $imm], $Zn", "", []>, Sched<[]> { bits<2> Rs; bits<3> Zn; let Inst{31-24} = 0b11000000; let Inst{23-22} = sz; let Inst{21-16} = 0b000100; let Inst{15} = v; let Inst{14-13} = Rs; let Inst{12-10} = 0b001; let Inst{9-7} = Zn; let Inst{6-3} = 0b0000; let Inst{2-0} = op; let Constraints = "$ZAd = $_ZAd"; } // SME2 move vector to tile, four registers multiclass sme2_mova_vec_to_tile_vg4_multi_base { def _B : sme2_mova_vec_to_tile_vg4_multi_base<0b00, v, {0,?,?}, !if(v, TileVectorOpV8, TileVectorOpH8), uimm2s4range, ZZZZ_b_mul_r, mnemonic>, SMEPseudo2Instr { bits<2> imm; let Inst{1-0} = imm; } def _H : sme2_mova_vec_to_tile_vg4_multi_base<0b01, v, {0,?,?}, !if(v, TileVectorOpV16, TileVectorOpH16), uimm1s4range, ZZZZ_h_mul_r, mnemonic>, SMEPseudo2Instr { bits<1> ZAd; bits<1> imm; let Inst{1} = ZAd; let Inst{0} = imm; } def _S : sme2_mova_vec_to_tile_vg4_multi_base<0b10, v, {0,?,?}, !if(v, TileVectorOpV32, TileVectorOpH32), uimm0s4range, ZZZZ_s_mul_r, mnemonic>, SMEPseudo2Instr { bits<2> ZAd; let Inst{1-0} = ZAd; } def _D : sme2_mova_vec_to_tile_vg4_multi_base<0b11, v, {?,?,?}, !if(v, TileVectorOpV64, TileVectorOpH64), uimm0s4range, ZZZZ_d_mul_r, mnemonic>, SMEPseudo2Instr { bits<3> ZAd; let Inst{2-0} = ZAd; } def NAME # _B_PSEUDO : sme2_move_to_tile_pseudo; def NAME # _H_PSEUDO : sme2_move_to_tile_pseudo; def NAME # _S_PSEUDO : sme2_move_to_tile_pseudo; def NAME # _D_PSEUDO : sme2_move_to_tile_pseudo; def : SME2_Tile_VG4_Multi_Pat; def : SME2_Tile_VG4_Multi_Pat; def : SME2_Tile_VG4_Multi_Pat; def : SME2_Tile_VG4_Multi_Pat; def : SME2_Tile_VG4_Multi_Pat; def : SME2_Tile_VG4_Multi_Pat; def : SME2_Tile_VG4_Multi_Pat; def : SME2_Tile_VG4_Multi_Pat; defm : sme2_mova_vec_to_tile_or_array_aliases<1, !cast(NAME # _B), !if(v, TileVectorOpV8, TileVectorOpH8), MatrixIndexGPR32Op12_15, uimm2s4range, ZZZZ_b_mul_r, "mov">; defm : sme2_mova_vec_to_tile_or_array_aliases<1, !cast(NAME # _H), !if(v, TileVectorOpV16, TileVectorOpH16), MatrixIndexGPR32Op12_15, uimm1s4range, ZZZZ_h_mul_r, "mov">; defm : sme2_mova_vec_to_tile_or_array_aliases<1, !cast(NAME # _S), !if(v, TileVectorOpV32, TileVectorOpH32), MatrixIndexGPR32Op12_15, uimm0s4range, ZZZZ_s_mul_r, "mov">; defm : sme2_mova_vec_to_tile_or_array_aliases<1, !cast(NAME # _D), !if(v, TileVectorOpV64, TileVectorOpH64), MatrixIndexGPR32Op12_15, uimm0s4range, ZZZZ_d_mul_r, "mov">; defm : sme2_mova_vec_to_tile_or_array_aliases<0, !cast(NAME # _B), !if(v, TileVectorOpV8, TileVectorOpH8), MatrixIndexGPR32Op12_15, uimm2s4range, ZZZZ_b_mul_r, "mova">; defm : sme2_mova_vec_to_tile_or_array_aliases<0, !cast(NAME # _H), !if(v, TileVectorOpV16, TileVectorOpH16), MatrixIndexGPR32Op12_15, uimm1s4range, ZZZZ_h_mul_r, "mova">; defm : sme2_mova_vec_to_tile_or_array_aliases<0, !cast(NAME # _S), !if(v, TileVectorOpV32, TileVectorOpH32), MatrixIndexGPR32Op12_15, uimm0s4range, ZZZZ_s_mul_r, "mova">; defm : sme2_mova_vec_to_tile_or_array_aliases<0, !cast(NAME # _D), !if(v, TileVectorOpV64, TileVectorOpH64), MatrixIndexGPR32Op12_15, uimm0s4range, ZZZZ_d_mul_r, "mova">; } multiclass sme2_mova_vec_to_tile_vg4_multi{ defm _H : sme2_mova_vec_to_tile_vg4_multi_base<0b0, mnemonic, int_h>; defm _V : sme2_mova_vec_to_tile_vg4_multi_base<0b1, mnemonic, int_v>; } // SME Move into Array class sme2_mova_vec_to_array_vg24_multi< bits<5> op, RegisterOperand array_ty, RegisterOperand vector_ty, string mnemonic, string vg_acronym=""> : I<(outs array_ty:$ZAd), (ins array_ty:$_ZAd, MatrixIndexGPR32Op8_11:$Rs, sme_elm_idx0_7:$imm, vector_ty:$Zn), mnemonic, "\t$ZAd[$Rs, $imm, " # vg_acronym # "], $Zn", "", []>, Sched<[]> { bits<2> Rs; bits<3> imm; let Inst{31-15} = 0b11000000000001000; let Inst{14-13} = Rs; let Inst{12-11} = 0b01; let Inst{10-6} = op; let Inst{5-3} = 0b000; let Inst{2-0} = imm; let Constraints = "$ZAd = $_ZAd"; } // MOVA (vector to array, two registers) multiclass sme2_mova_vec_to_array_vg2_multi { def NAME : sme2_mova_vec_to_array_vg24_multi<{0,?,?,?,?}, MatrixOp64, ZZ_d_mul_r, mnemonic, "vgx2">, SMEPseudo2Instr { bits<4> Zn; let Inst{9-6} = Zn; } def NAME # _PSEUDO : sme2_move_to_za_pseudo; def : SME2_ZA_VG1x2_Multi_Pat; def : SME2_ZA_VG1x2_Multi_Pat; def : SME2_ZA_VG1x2_Multi_Pat; def : SME2_ZA_VG1x2_Multi_Pat; def : SME2_ZA_VG1x2_Multi_Pat; def : SME2_ZA_VG1x2_Multi_Pat; def : SME2_ZA_VG1x2_Multi_Pat; def : SME2_ZA_VG1x2_Multi_Pat; defm : sme2_mova_vec_to_tile_or_array_aliases<0, !cast(NAME), MatrixOp8, MatrixIndexGPR32Op8_11, sme_elm_idx0_7, ZZ_b_mul_r, "mova">; defm : sme2_mova_vec_to_tile_or_array_aliases<0, !cast(NAME), MatrixOp16, MatrixIndexGPR32Op8_11, sme_elm_idx0_7, ZZ_h_mul_r, "mova">; defm : sme2_mova_vec_to_tile_or_array_aliases<0, !cast(NAME), MatrixOp32, MatrixIndexGPR32Op8_11, sme_elm_idx0_7, ZZ_s_mul_r, "mova">; defm : sme2_mova_vec_to_tile_or_array_aliases<0, !cast(NAME), MatrixOp64, MatrixIndexGPR32Op8_11, sme_elm_idx0_7, ZZ_d_mul_r, "mova">; defm : sme2_mova_vec_to_tile_or_array_aliases<0, !cast(NAME), MatrixOp8, MatrixIndexGPR32Op8_11, sme_elm_idx0_7, ZZ_b_mul_r, "mova", "vgx2">; defm : sme2_mova_vec_to_tile_or_array_aliases<0, !cast(NAME), MatrixOp16, MatrixIndexGPR32Op8_11, sme_elm_idx0_7, ZZ_h_mul_r, "mova", "vgx2">; defm : sme2_mova_vec_to_tile_or_array_aliases<0, !cast(NAME), MatrixOp32, MatrixIndexGPR32Op8_11, sme_elm_idx0_7, ZZ_s_mul_r, "mova", "vgx2">; defm : sme2_mova_vec_to_tile_or_array_aliases<0, !cast(NAME), MatrixOp8, MatrixIndexGPR32Op8_11, sme_elm_idx0_7, ZZ_b_mul_r, "mov">; defm : sme2_mova_vec_to_tile_or_array_aliases<0, !cast(NAME), MatrixOp16, MatrixIndexGPR32Op8_11, sme_elm_idx0_7, ZZ_h_mul_r, "mov">; defm : sme2_mova_vec_to_tile_or_array_aliases<0, !cast(NAME), MatrixOp32, MatrixIndexGPR32Op8_11, sme_elm_idx0_7, ZZ_s_mul_r, "mov">; defm : sme2_mova_vec_to_tile_or_array_aliases<0, !cast(NAME), MatrixOp64, MatrixIndexGPR32Op8_11, sme_elm_idx0_7, ZZ_d_mul_r, "mov">; defm : sme2_mova_vec_to_tile_or_array_aliases<0, !cast(NAME), MatrixOp8, MatrixIndexGPR32Op8_11, sme_elm_idx0_7, ZZ_b_mul_r, "mov", "vgx2">; defm : sme2_mova_vec_to_tile_or_array_aliases<0, !cast(NAME), MatrixOp16, MatrixIndexGPR32Op8_11, sme_elm_idx0_7, ZZ_h_mul_r, "mov", "vgx2">; defm : sme2_mova_vec_to_tile_or_array_aliases<0, !cast(NAME), MatrixOp32, MatrixIndexGPR32Op8_11, sme_elm_idx0_7, ZZ_s_mul_r, "mov", "vgx2">; defm : sme2_mova_vec_to_tile_or_array_aliases<1, !cast(NAME), MatrixOp64, MatrixIndexGPR32Op8_11, sme_elm_idx0_7, ZZ_d_mul_r, "mov", "vgx2">; } // MOVA (vector to array, four registers) multiclass sme2_mova_vec_to_array_vg4_multi { def NAME : sme2_mova_vec_to_array_vg24_multi<{1,?,?,?,0}, MatrixOp64, ZZZZ_d_mul_r, mnemonic, "vgx4">, SMEPseudo2Instr { bits<3> Zn; let Inst{9-7} = Zn; } def NAME # _PSEUDO : sme2_move_to_za_pseudo; def : SME2_ZA_VG1x4_Multi_Pat; def : SME2_ZA_VG1x4_Multi_Pat; def : SME2_ZA_VG1x4_Multi_Pat; def : SME2_ZA_VG1x4_Multi_Pat; def : SME2_ZA_VG1x4_Multi_Pat; def : SME2_ZA_VG1x4_Multi_Pat; def : SME2_ZA_VG1x4_Multi_Pat; def : SME2_ZA_VG1x4_Multi_Pat; defm : sme2_mova_vec_to_tile_or_array_aliases<0, !cast(NAME), MatrixOp8, MatrixIndexGPR32Op8_11, sme_elm_idx0_7, ZZZZ_b_mul_r, "mova">; defm : sme2_mova_vec_to_tile_or_array_aliases<0, !cast(NAME), MatrixOp16, MatrixIndexGPR32Op8_11, sme_elm_idx0_7, ZZZZ_h_mul_r, "mova">; defm : sme2_mova_vec_to_tile_or_array_aliases<0, !cast(NAME), MatrixOp32, MatrixIndexGPR32Op8_11, sme_elm_idx0_7, ZZZZ_s_mul_r, "mova">; defm : sme2_mova_vec_to_tile_or_array_aliases<0, !cast(NAME), MatrixOp64, MatrixIndexGPR32Op8_11, sme_elm_idx0_7, ZZZZ_d_mul_r, "mova">; defm : sme2_mova_vec_to_tile_or_array_aliases<0, !cast(NAME), MatrixOp8, MatrixIndexGPR32Op8_11, sme_elm_idx0_7, ZZZZ_b_mul_r, "mova", "vgx4">; defm : sme2_mova_vec_to_tile_or_array_aliases<0, !cast(NAME), MatrixOp16, MatrixIndexGPR32Op8_11, sme_elm_idx0_7, ZZZZ_h_mul_r, "mova", "vgx4">; defm : sme2_mova_vec_to_tile_or_array_aliases<0, !cast(NAME), MatrixOp32, MatrixIndexGPR32Op8_11, sme_elm_idx0_7, ZZZZ_s_mul_r, "mova", "vgx4">; defm : sme2_mova_vec_to_tile_or_array_aliases<0, !cast(NAME), MatrixOp8, MatrixIndexGPR32Op8_11, sme_elm_idx0_7, ZZZZ_b_mul_r, "mov">; defm : sme2_mova_vec_to_tile_or_array_aliases<0, !cast(NAME), MatrixOp16, MatrixIndexGPR32Op8_11, sme_elm_idx0_7, ZZZZ_h_mul_r, "mov">; defm : sme2_mova_vec_to_tile_or_array_aliases<0, !cast(NAME), MatrixOp32, MatrixIndexGPR32Op8_11, sme_elm_idx0_7, ZZZZ_s_mul_r, "mov">; defm : sme2_mova_vec_to_tile_or_array_aliases<0, !cast(NAME), MatrixOp64, MatrixIndexGPR32Op8_11, sme_elm_idx0_7, ZZZZ_d_mul_r, "mov">; defm : sme2_mova_vec_to_tile_or_array_aliases<0, !cast(NAME), MatrixOp8, MatrixIndexGPR32Op8_11, sme_elm_idx0_7, ZZZZ_b_mul_r, "mov", "vgx4">; defm : sme2_mova_vec_to_tile_or_array_aliases<0, !cast(NAME), MatrixOp16, MatrixIndexGPR32Op8_11, sme_elm_idx0_7, ZZZZ_h_mul_r, "mov", "vgx4">; defm : sme2_mova_vec_to_tile_or_array_aliases<0, !cast(NAME), MatrixOp32, MatrixIndexGPR32Op8_11, sme_elm_idx0_7, ZZZZ_s_mul_r, "mov", "vgx4">; defm : sme2_mova_vec_to_tile_or_array_aliases<1, !cast(NAME), MatrixOp64, MatrixIndexGPR32Op8_11, sme_elm_idx0_7, ZZZZ_d_mul_r, "mov", "vgx4">; } class sme2_mova_tile_to_vec_vg2_multi_base sz, bit v, bits<3> op, RegisterOperand vector_ty, RegisterOperand tile_ty, Operand index_ty, string mnemonic> : I, Sched<[]> { bits<4> Zd; bits<2> Rs; let Inst{31-24} = 0b11000000; let Inst{23-22} = sz; let Inst{21-16} = 0b000110; let Inst{15} = v; let Inst{14-13} = Rs; let Inst{12-11} = 0b00; let Inst{10-8} = op; let Inst{4-1} = Zd; let Inst{0} = 0b0; let Constraints = !if(op{1}, "$ZAn = $_ZAn", ""); } multiclass sme2_mova_tile_or_array_to_vec_aliases { def : InstAlias; } multiclass sme2_mova_tile_to_vec_vg2_multi_inst opc, string mnemonic> { def _B : sme2_mova_tile_to_vec_vg2_multi_base<0b00, v, opc, ZZ_b_mul_r, !if(v, TileVectorOpV8, TileVectorOpH8), uimm3s2range, mnemonic>, SMEPseudo2Instr { bits<3> imm; let Inst{7-5} = imm; } def _H : sme2_mova_tile_to_vec_vg2_multi_base<0b01, v, opc, ZZ_h_mul_r, !if(v, TileVectorOpV16, TileVectorOpH16), uimm2s2range, mnemonic>, SMEPseudo2Instr { bits<1> ZAn; bits<2> imm; let Inst{7} = ZAn; let Inst{6-5} = imm; } def _S : sme2_mova_tile_to_vec_vg2_multi_base<0b10, v, opc, ZZ_s_mul_r, !if(v, TileVectorOpV32, TileVectorOpH32), uimm1s2range, mnemonic>, SMEPseudo2Instr { bits<2> ZAn; bits<1> imm; let Inst{7-6} = ZAn; let Inst{5} = imm; } def _D : sme2_mova_tile_to_vec_vg2_multi_base<0b11, v, opc, ZZ_d_mul_r, !if(v, TileVectorOpV64, TileVectorOpH64), uimm0s2range, mnemonic>, SMEPseudo2Instr { bits<3> ZAn; let Inst{7-5} = ZAn; } if !eq(mnemonic, "mova") then { defm : sme2_mova_tile_or_array_to_vec_aliases<1,!cast(NAME # _B), ZZ_b_mul_r, !if(v, TileVectorOpV8, TileVectorOpH8), MatrixIndexGPR32Op12_15, uimm3s2range, "mov">; defm : sme2_mova_tile_or_array_to_vec_aliases<1,!cast(NAME # _H), ZZ_h_mul_r, !if(v, TileVectorOpV16, TileVectorOpH16), MatrixIndexGPR32Op12_15, uimm2s2range, "mov">; defm : sme2_mova_tile_or_array_to_vec_aliases<1, !cast(NAME # _S), ZZ_s_mul_r, !if(v, TileVectorOpV32, TileVectorOpH32), MatrixIndexGPR32Op12_15, uimm1s2range, "mov">; defm : sme2_mova_tile_or_array_to_vec_aliases<1, !cast(NAME # _D), ZZ_d_mul_r, !if(v, TileVectorOpV64, TileVectorOpH64), MatrixIndexGPR32Op12_15, uimm0s2range, "mov">; } defm : sme2_mova_tile_or_array_to_vec_aliases<0,!cast(NAME # _B), ZZ_b_mul_r, !if(v, TileVectorOpV8, TileVectorOpH8), MatrixIndexGPR32Op12_15, uimm3s2range, mnemonic>; defm : sme2_mova_tile_or_array_to_vec_aliases<0,!cast(NAME # _H), ZZ_h_mul_r, !if(v, TileVectorOpV16, TileVectorOpH16), MatrixIndexGPR32Op12_15, uimm2s2range, mnemonic>; defm : sme2_mova_tile_or_array_to_vec_aliases<0, !cast(NAME # _S), ZZ_s_mul_r, !if(v, TileVectorOpV32, TileVectorOpH32), MatrixIndexGPR32Op12_15, uimm1s2range, mnemonic>; defm : sme2_mova_tile_or_array_to_vec_aliases<0, !cast(NAME # _D), ZZ_d_mul_r, !if(v, TileVectorOpV64, TileVectorOpH64), MatrixIndexGPR32Op12_15, uimm0s2range, mnemonic>; } // SME2 move tile to vector, two registers multiclass sme2_mova_tile_to_vec_vg2_multi{ defm _H : sme2_mova_tile_to_vec_vg2_multi_inst<0b0, 0b000, mnemonic>; defm _V : sme2_mova_tile_to_vec_vg2_multi_inst<0b1, 0b000, mnemonic>; } // SME2p1 move tile to vector and zero tile, two registers multiclass sme2p1_movaz_tile_to_vec_vg2{ defm _H : sme2_mova_tile_to_vec_vg2_multi_inst<0b0, 0b010, mnemonic>; defm _V : sme2_mova_tile_to_vec_vg2_multi_inst<0b1, 0b010, mnemonic>; def NAME # _H_B_PSEUDO : sme2_movez_to_tile_pseudo; def NAME # _H_H_PSEUDO : sme2_movez_to_tile_pseudo; def NAME # _H_S_PSEUDO : sme2_movez_to_tile_pseudo; def NAME # _H_D_PSEUDO : sme2_movez_to_tile_pseudo; def NAME # _V_B_PSEUDO : sme2_movez_to_tile_pseudo; def NAME # _V_H_PSEUDO : sme2_movez_to_tile_pseudo; def NAME # _V_S_PSEUDO : sme2_movez_to_tile_pseudo; def NAME # _V_D_PSEUDO : sme2_movez_to_tile_pseudo; } class sme2_mova_tile_to_vec_vg4_multi_base sz, bit v, bits<6> op, RegisterOperand vector_ty, RegisterOperand tile_ty, Operand index_ty, string mnemonic> : I, Sched<[]> { bits<3> Zd; bits<2> Rs; let Inst{31-24} = 0b11000000; let Inst{23-22} = sz; let Inst{21-16} = 0b000110; let Inst{15} = v; let Inst{14-13} = Rs; let Inst{12-11} = 0b00; let Inst{10-5} = op{5-0}; let Inst{4-2} = Zd; let Inst{1-0} = 0b00; let Constraints = !if(op{4}, "$ZAn = $_ZAn", ""); } multiclass sme2_mova_tile_to_vec_vg4_multi_base opc, string mnemonic> { def _B : sme2_mova_tile_to_vec_vg4_multi_base<0b00, v, {opc,0,?,?}, ZZZZ_b_mul_r, !if(v, TileVectorOpV8, TileVectorOpH8), uimm2s4range, mnemonic>, SMEPseudo2Instr { bits<2> imm; let Inst{6-5} = imm; } def _H : sme2_mova_tile_to_vec_vg4_multi_base<0b01, v, {opc,0,?,?}, ZZZZ_h_mul_r, !if(v, TileVectorOpV16, TileVectorOpH16), uimm1s4range, mnemonic>, SMEPseudo2Instr { bits<1> ZAn; bits<1> imm; let Inst{6} = ZAn; let Inst{5} = imm; } def _S : sme2_mova_tile_to_vec_vg4_multi_base<0b10, v, {opc,0,?,?}, ZZZZ_s_mul_r, !if(v, TileVectorOpV32, TileVectorOpH32), uimm0s4range, mnemonic>, SMEPseudo2Instr { bits<2> ZAn; let Inst{6-5} = ZAn; } def _D : sme2_mova_tile_to_vec_vg4_multi_base<0b11, v, {opc,?,?,?}, ZZZZ_d_mul_r, !if(v, TileVectorOpV64, TileVectorOpH64), uimm0s4range, mnemonic>, SMEPseudo2Instr { bits<3> ZAn; let Inst{7-5} = ZAn; } if !eq(mnemonic, "mova") then { defm : sme2_mova_tile_or_array_to_vec_aliases<1, !cast(NAME # _B), ZZZZ_b_mul_r, !if(v, TileVectorOpV8, TileVectorOpH8), MatrixIndexGPR32Op12_15, uimm2s4range, "mov">; defm : sme2_mova_tile_or_array_to_vec_aliases<1, !cast(NAME # _H), ZZZZ_h_mul_r, !if(v, TileVectorOpV16, TileVectorOpH16), MatrixIndexGPR32Op12_15, uimm1s4range, "mov">; defm : sme2_mova_tile_or_array_to_vec_aliases<1, !cast(NAME # _S), ZZZZ_s_mul_r, !if(v, TileVectorOpV32, TileVectorOpH32), MatrixIndexGPR32Op12_15, uimm0s4range, "mov">; defm : sme2_mova_tile_or_array_to_vec_aliases<1, !cast(NAME # _D), ZZZZ_d_mul_r, !if(v, TileVectorOpV64, TileVectorOpH64), MatrixIndexGPR32Op12_15, uimm0s4range, "mov">; } defm : sme2_mova_tile_or_array_to_vec_aliases<0, !cast(NAME # _B), ZZZZ_b_mul_r, !if(v, TileVectorOpV8, TileVectorOpH8), MatrixIndexGPR32Op12_15, uimm2s4range, mnemonic>; defm : sme2_mova_tile_or_array_to_vec_aliases<0, !cast(NAME # _H), ZZZZ_h_mul_r, !if(v, TileVectorOpV16, TileVectorOpH16), MatrixIndexGPR32Op12_15, uimm1s4range, mnemonic>; defm : sme2_mova_tile_or_array_to_vec_aliases<0, !cast(NAME # _S), ZZZZ_s_mul_r, !if(v, TileVectorOpV32, TileVectorOpH32), MatrixIndexGPR32Op12_15, uimm0s4range, mnemonic>; defm : sme2_mova_tile_or_array_to_vec_aliases<0, !cast(NAME # _D), ZZZZ_d_mul_r, !if(v, TileVectorOpV64, TileVectorOpH64), MatrixIndexGPR32Op12_15, uimm0s4range, mnemonic>; } // SME2 move tile to vector, four registers multiclass sme2_mova_tile_to_vec_vg4_multi{ defm _H : sme2_mova_tile_to_vec_vg4_multi_base<0b0, 0b100, mnemonic>; defm _V : sme2_mova_tile_to_vec_vg4_multi_base<0b1, 0b100, mnemonic>; } // SME2p1 move tile to vector and zero tile, four registers multiclass sme2p1_movaz_tile_to_vec_vg4{ defm _H : sme2_mova_tile_to_vec_vg4_multi_base<0b0, 0b110, mnemonic>; defm _V : sme2_mova_tile_to_vec_vg4_multi_base<0b1, 0b110, mnemonic>; def NAME # _H_B_PSEUDO : sme2_movez_to_tile_pseudo; def NAME # _H_H_PSEUDO : sme2_movez_to_tile_pseudo; def NAME # _H_S_PSEUDO : sme2_movez_to_tile_pseudo; def NAME # _H_D_PSEUDO : sme2_movez_to_tile_pseudo; def NAME # _V_B_PSEUDO : sme2_movez_to_tile_pseudo; def NAME # _V_H_PSEUDO : sme2_movez_to_tile_pseudo; def NAME # _V_S_PSEUDO : sme2_movez_to_tile_pseudo; def NAME # _V_D_PSEUDO : sme2_movez_to_tile_pseudo; } class sme2_mova_array_to_vec_vg24_multiop, RegisterOperand vector_ty, RegisterOperand array_ty, string mnemonic, string vg_acronym> : I, Sched<[]> { bits<2> Rs; bits<3> imm; let Inst{31-15} = 0b11000000000001100; let Inst{14-13} = Rs; let Inst{12-11} = 0b01; let Inst{10-8} = op{3-1}; let Inst{7-5} = imm; let Inst{1} = op{0}; let Inst{0} = 0b0; let Constraints = !if(op{2}, "$ZAn = $_ZAn", ""); } // move array to vector, two registers. multiclass sme2_mova_array_to_vec_vg2_multi opc, string mnemonic> { def NAME : sme2_mova_array_to_vec_vg24_multi<{opc,?}, ZZ_d_mul_r, MatrixOp64, mnemonic, "vgx2">, SMEPseudo2Instr{ bits<4> Zd; let Inst{4-1} = Zd; } defm : sme2_mova_tile_or_array_to_vec_aliases<0, !cast(NAME), ZZ_b_mul_r, MatrixOp8, MatrixIndexGPR32Op8_11, sme_elm_idx0_7, mnemonic>; defm : sme2_mova_tile_or_array_to_vec_aliases<0, !cast(NAME), ZZ_h_mul_r, MatrixOp16, MatrixIndexGPR32Op8_11, sme_elm_idx0_7, mnemonic>; defm : sme2_mova_tile_or_array_to_vec_aliases<0, !cast(NAME), ZZ_s_mul_r, MatrixOp32, MatrixIndexGPR32Op8_11, sme_elm_idx0_7, mnemonic>; defm : sme2_mova_tile_or_array_to_vec_aliases<0, !cast(NAME), ZZ_d_mul_r, MatrixOp64, MatrixIndexGPR32Op8_11, sme_elm_idx0_7, mnemonic>; defm : sme2_mova_tile_or_array_to_vec_aliases<0, !cast(NAME), ZZ_b_mul_r, MatrixOp8, MatrixIndexGPR32Op8_11, sme_elm_idx0_7, mnemonic, "vgx2">; defm : sme2_mova_tile_or_array_to_vec_aliases<0, !cast(NAME), ZZ_h_mul_r, MatrixOp16, MatrixIndexGPR32Op8_11, sme_elm_idx0_7, mnemonic, "vgx2">; defm : sme2_mova_tile_or_array_to_vec_aliases<0, !cast(NAME), ZZ_s_mul_r, MatrixOp32, MatrixIndexGPR32Op8_11, sme_elm_idx0_7, mnemonic, "vgx2">; if !eq(mnemonic, "mova") then { defm : sme2_mova_tile_or_array_to_vec_aliases<0, !cast(NAME), ZZ_b_mul_r, MatrixOp8, MatrixIndexGPR32Op8_11, sme_elm_idx0_7, "mov">; defm : sme2_mova_tile_or_array_to_vec_aliases<0, !cast(NAME), ZZ_h_mul_r, MatrixOp16, MatrixIndexGPR32Op8_11, sme_elm_idx0_7, "mov">; defm : sme2_mova_tile_or_array_to_vec_aliases<0, !cast(NAME), ZZ_s_mul_r, MatrixOp32, MatrixIndexGPR32Op8_11, sme_elm_idx0_7, "mov">; defm : sme2_mova_tile_or_array_to_vec_aliases<0, !cast(NAME), ZZ_d_mul_r, MatrixOp64, MatrixIndexGPR32Op8_11, sme_elm_idx0_7, "mov">; defm : sme2_mova_tile_or_array_to_vec_aliases<0, !cast(NAME), ZZ_b_mul_r, MatrixOp8, MatrixIndexGPR32Op8_11, sme_elm_idx0_7, "mov", "vgx2">; defm : sme2_mova_tile_or_array_to_vec_aliases<0, !cast(NAME), ZZ_h_mul_r, MatrixOp16, MatrixIndexGPR32Op8_11, sme_elm_idx0_7, "mov", "vgx2">; defm : sme2_mova_tile_or_array_to_vec_aliases<0, !cast(NAME), ZZ_s_mul_r, MatrixOp32, MatrixIndexGPR32Op8_11, sme_elm_idx0_7, "mov", "vgx2">; defm : sme2_mova_tile_or_array_to_vec_aliases<1, !cast(NAME), ZZ_d_mul_r, MatrixOp64, MatrixIndexGPR32Op8_11, sme_elm_idx0_7, "mov", "vgx2">; } } multiclass sme2_movaz_array_to_vec_vg2_multi { defm NAME : sme2_mova_array_to_vec_vg2_multi<0b010, mnemonic>; def NAME # _PSEUDO : sme2_movaz_array_to_tile_pseudo; } // move array to vector, four registers multiclass sme2_mova_array_to_vec_vg4_multi opc, string mnemonic> { def NAME : sme2_mova_array_to_vec_vg24_multi, SMEPseudo2Instr { bits<3> Zd; let Inst{4-2} = Zd; } defm : sme2_mova_tile_or_array_to_vec_aliases<0, !cast(NAME), ZZZZ_b_mul_r, MatrixOp8, MatrixIndexGPR32Op8_11, sme_elm_idx0_7, mnemonic>; defm : sme2_mova_tile_or_array_to_vec_aliases<0, !cast(NAME), ZZZZ_h_mul_r, MatrixOp16, MatrixIndexGPR32Op8_11, sme_elm_idx0_7, mnemonic>; defm : sme2_mova_tile_or_array_to_vec_aliases<0, !cast(NAME), ZZZZ_s_mul_r, MatrixOp32, MatrixIndexGPR32Op8_11, sme_elm_idx0_7, mnemonic>; defm : sme2_mova_tile_or_array_to_vec_aliases<0, !cast(NAME), ZZZZ_d_mul_r, MatrixOp64, MatrixIndexGPR32Op8_11, sme_elm_idx0_7, mnemonic>; defm : sme2_mova_tile_or_array_to_vec_aliases<0, !cast(NAME), ZZZZ_b_mul_r, MatrixOp8, MatrixIndexGPR32Op8_11, sme_elm_idx0_7, mnemonic, "vgx4">; defm : sme2_mova_tile_or_array_to_vec_aliases<0, !cast(NAME), ZZZZ_h_mul_r, MatrixOp16, MatrixIndexGPR32Op8_11, sme_elm_idx0_7, mnemonic, "vgx4">; defm : sme2_mova_tile_or_array_to_vec_aliases<0, !cast(NAME), ZZZZ_s_mul_r, MatrixOp32, MatrixIndexGPR32Op8_11, sme_elm_idx0_7, mnemonic, "vgx4">; if !eq(mnemonic, "mova") then { defm : sme2_mova_tile_or_array_to_vec_aliases<0, !cast(NAME), ZZZZ_b_mul_r, MatrixOp8, MatrixIndexGPR32Op8_11, sme_elm_idx0_7, "mov">; defm : sme2_mova_tile_or_array_to_vec_aliases<0, !cast(NAME), ZZZZ_h_mul_r, MatrixOp16, MatrixIndexGPR32Op8_11, sme_elm_idx0_7, "mov">; defm : sme2_mova_tile_or_array_to_vec_aliases<0, !cast(NAME), ZZZZ_s_mul_r, MatrixOp32, MatrixIndexGPR32Op8_11, sme_elm_idx0_7, "mov">; defm : sme2_mova_tile_or_array_to_vec_aliases<0, !cast(NAME), ZZZZ_d_mul_r, MatrixOp64, MatrixIndexGPR32Op8_11, sme_elm_idx0_7, "mov">; defm : sme2_mova_tile_or_array_to_vec_aliases<0, !cast(NAME), ZZZZ_b_mul_r, MatrixOp8, MatrixIndexGPR32Op8_11, sme_elm_idx0_7, "mov", "vgx4">; defm : sme2_mova_tile_or_array_to_vec_aliases<0, !cast(NAME), ZZZZ_h_mul_r, MatrixOp16, MatrixIndexGPR32Op8_11, sme_elm_idx0_7, "mov", "vgx4">; defm : sme2_mova_tile_or_array_to_vec_aliases<0, !cast(NAME), ZZZZ_s_mul_r, MatrixOp32, MatrixIndexGPR32Op8_11, sme_elm_idx0_7, "mov", "vgx4">; defm : sme2_mova_tile_or_array_to_vec_aliases<1, !cast(NAME), ZZZZ_d_mul_r, MatrixOp64, MatrixIndexGPR32Op8_11, sme_elm_idx0_7, "mov", "vgx4">; } } multiclass sme2_movaz_array_to_vec_vg4_multi { defm NAME : sme2_mova_array_to_vec_vg4_multi<0b1100, mnemonic>; def NAME # _PSEUDO : sme2_movaz_array_to_tile_pseudo; } //===----------------------------------------------------------------------===// // SME2 multi-vec saturating shift right narrow class sme2_sat_shift_vector_vg2 : I<(outs ZPR16:$Zd), (ins ZZ_s_mul_r:$Zn, tvecshiftR16:$imm4), mnemonic, "\t$Zd, $Zn, $imm4", "", []>, Sched<[]> { bits<4> imm4; bits<4> Zn; bits<5> Zd; let Inst{31-21} = 0b11000001111; let Inst{20} = op; let Inst{19-16} = imm4; let Inst{15-10} = 0b110101; let Inst{9-6} = Zn; let Inst{5} = u; let Inst{4-0} = Zd; } multiclass sme2_sat_shift_vector_vg2 { def _H : sme2_sat_shift_vector_vg2; def : SME2_Sat_Shift_VG2_Pat; } class sme2_sat_shift_vector_vg4 sz, bits<3> op, ZPRRegOp zpr_ty, RegisterOperand vector_ty, Operand imm_ty, string mnemonic> : I<(outs zpr_ty:$Zd), (ins vector_ty:$Zn, imm_ty:$imm), mnemonic, "\t$Zd, $Zn, $imm", "", []>, Sched<[]> { bits<3> Zn; bits<5> Zd; let Inst{31-24} = 0b11000001; let Inst{23-22} = sz; let Inst{21} = 0b1; // Inst{20-16} = imm5; let Inst{15-11} = 0b11011; let Inst{10} = op{2}; let Inst{9-7} = Zn; let Inst{6-5} = op{1-0}; let Inst{4-0} = Zd; } multiclass sme2_sat_shift_vector_vg4 op, SDPatternOperator intrinsic> { def _B : sme2_sat_shift_vector_vg4<{0,1}, op, ZPR8, ZZZZ_s_mul_r, tvecshiftR32, mnemonic>{ bits<5> imm; let Inst{20-16} = imm; } def _H : sme2_sat_shift_vector_vg4<{1,?}, op, ZPR16, ZZZZ_d_mul_r, tvecshiftR64, mnemonic> { bits<6> imm; let Inst{22} = imm{5}; let Inst{20-16} = imm{4-0}; } def : SME2_Sat_Shift_VG4_Pat; def : SME2_Sat_Shift_VG4_Pat; } //===----------------------------------------------------------------------===// // SME2 Multi-vector - SVE Select class sme2_sel_vector_vg24 sz, bits<4> op, RegisterOperand vector_ty, string mnemonic> : I<(outs vector_ty:$Zd), (ins PNRAny_p8to15:$PNg, vector_ty:$Zn, vector_ty:$Zm), mnemonic, "\t$Zd, $PNg, $Zn, $Zm", "", []>, Sched<[]> { bits<3> PNg; let Inst{31-24} = 0b11000001; let Inst{23-22} = sz; let Inst{21} = 0b1; let Inst{17-16} = op{3-2}; let Inst{15-13} = 0b100; let Inst{12-10} = PNg; let Inst{6} = op{1}; let Inst{5} = 0b0; let Inst{1} = op{0}; let Inst{0} = 0b0; } class sme2_sel_vector_vg2 sz, RegisterOperand vector_ty, string mnemonic> : sme2_sel_vector_vg24 { bits<4> Zm; bits<4> Zn; bits<4> Zd; let Inst{20-17} = Zm; let Inst{9-6} = Zn; let Inst{4-1} = Zd; } multiclass sme2_sel_vector_vg2{ def _B : sme2_sel_vector_vg2<0b00, ZZ_b_mul_r, mnemonic>; def _H : sme2_sel_vector_vg2<0b01, ZZ_h_mul_r, mnemonic>; def _S : sme2_sel_vector_vg2<0b10, ZZ_s_mul_r, mnemonic>; def _D : sme2_sel_vector_vg2<0b11, ZZ_d_mul_r, mnemonic>; } class sme2_sel_vector_vg4 sz, RegisterOperand vector_ty, string mnemonic> : sme2_sel_vector_vg24 { bits<3> Zm; bits<3> Zn; bits<3> Zd; let Inst{20-18} = Zm; let Inst{9-7} = Zn; let Inst{4-2} = Zd; } multiclass sme2_sel_vector_vg4 { def _B : sme2_sel_vector_vg4<0b00, ZZZZ_b_mul_r, mnemonic>; def _H : sme2_sel_vector_vg4<0b01, ZZZZ_h_mul_r, mnemonic>; def _S : sme2_sel_vector_vg4<0b10, ZZZZ_s_mul_r, mnemonic>; def _D : sme2_sel_vector_vg4<0b11, ZZZZ_d_mul_r, mnemonic>; } //===----------------------------------------------------------------------===// // Non contiguous Load and Store class sme2_ld_vector_vg2_multi_scalar_scalar msz, bit n, RegisterOperand multi_vector_ty, RegisterOperand gpr_ty, string mnemonic> : I<(outs multi_vector_ty:$Zt), (ins PNRAny_p8to15:$PNg, GPR64sp:$Rn, gpr_ty:$Rm), mnemonic, "\t$Zt, $PNg/z, [$Rn, $Rm]", "", []>, Sched<[]> { bits<5> Rm; bits<3> PNg; bits<5> Rn; bits<4> Zt; let Inst{31-21} = 0b10100001000; let Inst{20-16} = Rm; let Inst{15} = 0b0; let Inst{14-13} = msz; let Inst{12-10} = PNg; let Inst{9-5} = Rn; let Inst{4} = Zt{3}; let Inst{3} = n; let Inst{2-0} = Zt{2-0}; let mayLoad = 1; } class sme2_ld_vector_vg4_multi_scalar_scalar msz, bit n, RegisterOperand multi_vector_ty, RegisterOperand gpr_ty, string mnemonic> : I<(outs multi_vector_ty:$Zt), (ins PNRAny_p8to15:$PNg, GPR64sp:$Rn, gpr_ty:$Rm), mnemonic, "\t$Zt, $PNg/z, [$Rn, $Rm]", "", []>, Sched<[]> { bits<5> Rm; bits<3> PNg; bits<5> Rn; bits<3> Zt; let Inst{31-21} = 0b10100001000; let Inst{20-16} = Rm; let Inst{15} = 0b1; let Inst{14-13} = msz; let Inst{12-10} = PNg; let Inst{9-5} = Rn; let Inst{4} = Zt{2}; let Inst{3} = n; let Inst{2} = 0b0; let Inst{1-0} = Zt{1-0}; let mayLoad = 1; } class sme2_ld_vector_vg24_multi_scalar_immediate msz, bit n, bits<2> op, RegisterOperand multi_vector_ty, Operand index_ty, string mnemonic> : I<(outs multi_vector_ty:$Zt), (ins PNRAny_p8to15:$PNg, GPR64sp:$Rn, index_ty:$imm4), mnemonic, "\t$Zt, $PNg/z, [$Rn, $imm4, mul vl]", "", []>, Sched<[]> { bits<4> imm4; bits<3> PNg; bits<5> Rn; let Inst{31-20} = 0b101000010100; let Inst{19-16} = imm4; let Inst{15} = op{1}; let Inst{14-13} = msz; let Inst{12-10} = PNg; let Inst{9-5} = Rn; let Inst{3} = n; let Inst{2} = op{0}; let mayLoad = 1; } multiclass sme2_ld_vector_vg2_multi_scalar_immediate msz, bit n, RegisterOperand multi_vector_ty, Operand index_ty, string mnemonic>{ def NAME : sme2_ld_vector_vg24_multi_scalar_immediate { bits<4> Zt; let Inst{4} = Zt{3}; let Inst{2-0} = Zt{2-0}; } def : InstAlias(NAME) multi_vector_ty:$Zt, PNRAny_p8to15:$PNg, GPR64sp:$Rn, 0), 1>; } multiclass sme2_ld_vector_vg4_multi_scalar_immediate msz, bit n, RegisterOperand multi_vector_ty, Operand index_ty, string mnemonic> { def NAME : sme2_ld_vector_vg24_multi_scalar_immediate { bits<3> Zt; let Inst{4} = Zt{2}; let Inst{1-0} = Zt{1-0}; } def : InstAlias(NAME) multi_vector_ty:$Zt, PNRAny_p8to15:$PNg, GPR64sp:$Rn, 0), 1>; } //===----------------------------------------------------------------------===// // SME2 Non-Contiguous Store class sme2_st_vector_vg2_multi_scalar_scalar msz, bit n, RegisterOperand multi_vector_ty, RegisterOperand gpr_ty, string mnemonic> : I<(outs ), (ins multi_vector_ty:$Zt, PNRAny_p8to15:$PNg, GPR64sp:$Rn, gpr_ty:$Rm), mnemonic, "\t$Zt, $PNg, [$Rn, $Rm]", "", []>, Sched<[]> { bits<5> Rm; bits<3> PNg; bits<5> Rn; bits<4> Zt; let Inst{31-21} = 0b10100001001; let Inst{20-16} = Rm; let Inst{15} = 0b0; let Inst{14-13} = msz; let Inst{12-10} = PNg; let Inst{9-5} = Rn; let Inst{4} = Zt{3}; let Inst{3} = n; let Inst{2-0} = Zt{2-0}; let mayStore = 1; } class sme2_st_vector_vg4_multi_scalar_scalar msz, bit n, RegisterOperand multi_vector_ty, RegisterOperand gpr_ty, string mnemonic> : I<(outs ), (ins multi_vector_ty:$Zt, PNRAny_p8to15:$PNg, GPR64sp:$Rn, gpr_ty:$Rm), mnemonic, "\t$Zt, $PNg, [$Rn, $Rm]", "", []>, Sched<[]> { bits<5> Rm; bits<3> PNg; bits<5> Rn; bits<3> Zt; let Inst{31-21} = 0b10100001001; let Inst{20-16} = Rm; let Inst{15} = 0b1; let Inst{14-13} = msz; let Inst{12-10} = PNg; let Inst{9-5} = Rn; let Inst{4} = Zt{2}; let Inst{3} = n; let Inst{2} = 0b0; let Inst{1-0} = Zt{1-0}; let mayStore = 1; } class sme2_st_vector_vg24_multi_scalar_immediate msz, bit n, bits<2> op, RegisterOperand multi_vector_ty, Operand index_ty, string mnemonic> : I<(outs ), (ins multi_vector_ty:$Zt, PNRAny_p8to15:$PNg, GPR64sp:$Rn, index_ty:$imm4), mnemonic, "\t$Zt, $PNg, [$Rn, $imm4, mul vl]", "", []>, Sched<[]> { bits<4> imm4; bits<3> PNg; bits<5> Rn; let Inst{31-20} = 0b101000010110; let Inst{19-16} = imm4; let Inst{15} = op{1}; let Inst{14-13} = msz; let Inst{12-10} = PNg; let Inst{9-5} = Rn; let Inst{3} = n; let Inst{2} = op{0}; let mayStore = 1; } multiclass sme2_st_vector_vg2_multi_scalar_immediate msz, bit n, RegisterOperand multi_vector_ty, Operand index_ty, string mnemonic> { def NAME: sme2_st_vector_vg24_multi_scalar_immediate { bits<4> Zt; let Inst{4} = Zt{3}; let Inst{2-0} = Zt{2-0}; } def : InstAlias(NAME) multi_vector_ty:$Zt, PNRAny_p8to15:$PNg, GPR64sp:$Rn,0), 1>; } multiclass sme2_st_vector_vg4_multi_scalar_immediate msz, bit n, RegisterOperand multi_vector_ty, Operand index_ty, string mnemonic> { def NAME : sme2_st_vector_vg24_multi_scalar_immediate { bits<3> Zt; let Inst{4} = Zt{2}; let Inst{1-0} = Zt{1-0}; } def : InstAlias(NAME) multi_vector_ty:$Zt, PNRAny_p8to15:$PNg, GPR64sp:$Rn,0), 1>; } //===----------------------------------------------------------------------===// // SME2.1 //===----------------------------------------------------------------------===// // SME zeroing move array to vector class sme2p1_movaz_tile_to_vec_base sz, bit q, bit v, ZPRRegOp vector_ty, RegisterOperand tile_ty, Operand index_ty, string mnemonic> : I<(outs vector_ty:$Zd, tile_ty:$ZAn), (ins tile_ty:$_ZAn, MatrixIndexGPR32Op12_15:$Rs, index_ty:$imm), mnemonic, "\t$Zd, $ZAn[$Rs, $imm]", "", []>, Sched<[]> { bits<2> Rs; bits<5> Zd; let Inst{31-24} = 0b11000000; let Inst{23-22} = sz; let Inst{21-17} = 0b00001; let Inst{16} = q; let Inst{15} = v; let Inst{14-13} = Rs; let Inst{12-9} = 0b0001; let Inst{4-0} = Zd; let Constraints = "$ZAn = $_ZAn"; } multiclass sme2p1_movaz_tile_to_vec_base { def _B : sme2p1_movaz_tile_to_vec_base<0b00, 0b0, v, ZPR8, !if(v, TileVectorOpV8, TileVectorOpH8), sme_elm_idx0_15, mnemonic>, SMEPseudo2Instr { bits<4> imm; let Inst{8-5} = imm; } def _H : sme2p1_movaz_tile_to_vec_base<0b01, 0b0, v, ZPR16, !if(v, TileVectorOpV16, TileVectorOpH16), sme_elm_idx0_7, mnemonic>, SMEPseudo2Instr { bits<1> ZAn; bits<3> imm; let Inst{8} = ZAn; let Inst{7-5} = imm; } def _S : sme2p1_movaz_tile_to_vec_base<0b10, 0b0, v, ZPR32, !if(v, TileVectorOpV32, TileVectorOpH32), sme_elm_idx0_3, mnemonic>, SMEPseudo2Instr { bits<2> ZAn; bits<2> imm; let Inst{8-7} = ZAn; let Inst{6-5} = imm; } def _D : sme2p1_movaz_tile_to_vec_base<0b11, 0b0, v, ZPR64, !if(v, TileVectorOpV64, TileVectorOpH64), sme_elm_idx0_1, mnemonic>, SMEPseudo2Instr { bits<3> ZAn; bits<1> imm; let Inst{8-6} = ZAn; let Inst{5} = imm; } def _Q : sme2p1_movaz_tile_to_vec_base<0b11, 0b1, v, ZPR128, !if(v, TileVectorOpV128, TileVectorOpH128), sme_elm_idx0_0, mnemonic>, SMEPseudo2Instr { bits<4> ZAn; let Inst{8-5} = ZAn; } } multiclass sme2p1_movaz_tile_to_vec{ defm _H : sme2p1_movaz_tile_to_vec_base<0b0, mnemonic>; defm _V : sme2p1_movaz_tile_to_vec_base<0b1, mnemonic>; def NAME # _H_B_PSEUDO : sme2_movez_to_tile_pseudo; def NAME # _H_H_PSEUDO : sme2_movez_to_tile_pseudo; def NAME # _H_S_PSEUDO : sme2_movez_to_tile_pseudo; def NAME # _H_D_PSEUDO : sme2_movez_to_tile_pseudo; def NAME # _H_Q_PSEUDO : sme2_movez_to_tile_pseudo; def NAME # _V_B_PSEUDO : sme2_movez_to_tile_pseudo; def NAME # _V_H_PSEUDO : sme2_movez_to_tile_pseudo; def NAME # _V_S_PSEUDO : sme2_movez_to_tile_pseudo; def NAME # _V_D_PSEUDO : sme2_movez_to_tile_pseudo; def NAME # _V_Q_PSEUDO : sme2_movez_to_tile_pseudo; def : SME2_Tile_Movaz_Pat; def : SME2_Tile_Movaz_Pat; def : SME2_Tile_Movaz_Pat; def : SME2_Tile_Movaz_Pat; def : SME2_Tile_Movaz_Pat; def : SME2_Tile_Movaz_Pat; def : SME2_Tile_Movaz_Pat; def : SME2_Tile_Movaz_Pat; def : SME2_Tile_Movaz_Pat; def : SME2_Tile_Movaz_Pat; def : SME2_Tile_Movaz_Pat; def : SME2_Tile_Movaz_Pat; def : SME2_Tile_Movaz_Pat; def : SME2_Tile_Movaz_Pat; def : SME2_Tile_Movaz_Pat; def : SME2_Tile_Movaz_Pat; // H_Q def : SME2_Tile_Movaz_Pat; def : SME2_Tile_Movaz_Pat; def : SME2_Tile_Movaz_Pat; def : SME2_Tile_Movaz_Pat; def : SME2_Tile_Movaz_Pat; def : SME2_Tile_Movaz_Pat; def : SME2_Tile_Movaz_Pat; def : SME2_Tile_Movaz_Pat; // _V_Q def : SME2_Tile_Movaz_Pat; def : SME2_Tile_Movaz_Pat; def : SME2_Tile_Movaz_Pat; def : SME2_Tile_Movaz_Pat; def : SME2_Tile_Movaz_Pat; def : SME2_Tile_Movaz_Pat; def : SME2_Tile_Movaz_Pat; def : SME2_Tile_Movaz_Pat; } //===----------------------------------------------------------------------===// // SME2.1 multiple vectors zero array class sme2p1_zero_matrix opc, Operand index_ty, string mnemonic, string vg_acronym=""> : I<(outs MatrixOp64:$ZAd), (ins MatrixOp64:$_ZAd, MatrixIndexGPR32Op8_11:$Rv, index_ty:$imm), mnemonic, "\t$ZAd[$Rv, $imm" # !if(!eq(vg_acronym, ""), "", ", " # vg_acronym) # "]", "", []>, Sched<[]> { bits <2> Rv; let Inst{31-18} = 0b11000000000011; let Inst{17-15} = opc{5-3}; let Inst{14-13} = Rv; let Inst{12-3} = 0b0000000000; let Inst{2-0} = opc{2-0}; let Constraints = "$ZAd = $_ZAd"; } multiclass sme2p1_zero_matrix { def _VG2_Z : sme2p1_zero_matrix<{0b000,?,?,?}, sme_elm_idx0_7, mnemonic, "vgx2">, SMEPseudo2Instr { bits<3> imm; let Inst{2-0} = imm; } def _2Z : sme2p1_zero_matrix<{0b001,?,?,?}, uimm3s2range, mnemonic>, SMEPseudo2Instr { bits<3> imm; let Inst{2-0} = imm; } def _VG2_2Z : sme2p1_zero_matrix<{0b0100,?,?}, uimm2s2range, mnemonic, "vgx2">, SMEPseudo2Instr { bits<2> imm; let Inst{1-0} = imm; } def _VG4_2Z : sme2p1_zero_matrix<{0b0110,?,?}, uimm2s2range, mnemonic, "vgx4">, SMEPseudo2Instr { bits<2> imm; let Inst{1-0} = imm; } def _VG4_Z : sme2p1_zero_matrix<{0b100,?,?,?}, sme_elm_idx0_7, mnemonic, "vgx4">, SMEPseudo2Instr { bits<3> imm; let Inst{2-0} = imm; } def _4Z : sme2p1_zero_matrix<{0b1010,?,?}, uimm2s4range, mnemonic>, SMEPseudo2Instr { bits<2> imm; let Inst{1-0} = imm; } def _VG2_4Z : sme2p1_zero_matrix<{0b11000,?}, uimm1s4range, mnemonic, "vgx2">, SMEPseudo2Instr { bits<1> imm; let Inst{0} = imm; } def _VG4_4Z : sme2p1_zero_matrix<{0b11100,?}, uimm1s4range, mnemonic, "vgx4">, SMEPseudo2Instr { bits<1> imm; let Inst{0} = imm; } def NAME # _VG2_Z_PSEUDO : sem2p1_zero_matrix_pseudo; def NAME # _VG4_Z_PSEUDO : sem2p1_zero_matrix_pseudo; def NAME # _2Z_PSEUDO : sem2p1_zero_matrix_pseudo; def NAME # _VG2_2Z_PSEUDO : sem2p1_zero_matrix_pseudo; def NAME # _VG4_2Z_PSEUDO : sem2p1_zero_matrix_pseudo; def NAME # _4Z_PSEUDO : sem2p1_zero_matrix_pseudo; def NAME # _VG2_4Z_PSEUDO : sem2p1_zero_matrix_pseudo; def NAME # _VG4_4Z_PSEUDO : sem2p1_zero_matrix_pseudo; def : SME2_Zero_Matrix_Pat; def : SME2_Zero_Matrix_Pat; def : SME2_Zero_Matrix_Pat; def : SME2_Zero_Matrix_Pat; def : SME2_Zero_Matrix_Pat; def : SME2_Zero_Matrix_Pat; def : SME2_Zero_Matrix_Pat; def : SME2_Zero_Matrix_Pat; } //===----------------------------------------------------------------------===// // SME2.1 lookup table expand two non-contiguous registers class sme2p1_luti_vector_vg2_index op, bits<2> sz, RegisterOperand vector_ty, AsmVectorIndexOpnd index_ty, string mnemonic> : I<(outs vector_ty:$Zd), (ins ZTR:$ZTt, ZPRAny:$Zn, index_ty:$i), mnemonic, "\t$Zd, $ZTt, $Zn$i", "", []>, Sched<[]> { bits<5> Zn; bits<4> Zd; let Inst{31-19} = 0b1100000010011; let Inst{18-15} = op; let Inst{14} = 0b1; let Inst{13-12} = sz; let Inst{11-10} = 0b00; let Inst{9-5} = Zn; let Inst{4} = Zd{3}; let Inst{3} = 0b0; let Inst{2-0} = Zd{2-0}; } class sme2p1_luti2_vector_vg2_index sz, RegisterOperand vector_ty, AsmVectorIndexOpnd index_ty, string mnemonic> : sme2p1_luti_vector_vg2_index<{1,?,?,?}, sz, vector_ty, index_ty, mnemonic> { bits<3> i; let Inst{17-15} = i; } multiclass sme2p1_luti2_vector_vg2_index { def _B : sme2p1_luti2_vector_vg2_index<0b00, ZZ_b_strided, VectorIndexH, mnemonic>; def _H : sme2p1_luti2_vector_vg2_index<0b01, ZZ_h_strided, VectorIndexH, mnemonic>; } class sme2p1_luti4_vector_vg2_index sz, RegisterOperand vector_ty, AsmVectorIndexOpnd index_ty, string mnemonic> : sme2p1_luti_vector_vg2_index<{0b01,?,?}, sz, vector_ty, index_ty, mnemonic> { bits<2> i; let Inst{16-15} = i; } multiclass sme2p1_luti4_vector_vg2_index { def _B : sme2p1_luti4_vector_vg2_index<0b00, ZZ_b_strided, VectorIndexS, mnemonic>; def _H : sme2p1_luti4_vector_vg2_index<0b01, ZZ_h_strided, VectorIndexS, mnemonic>; } // SME2.1 lookup table expand four non-contiguous registers class sme2p1_luti_vector_vg4_index op, bits<2> sz, RegisterOperand vector_ty, AsmVectorIndexOpnd index_ty, string mnemonic> : I<(outs vector_ty:$Zd), (ins ZTR:$ZTt, ZPRAny:$Zn, index_ty:$i), mnemonic, "\t$Zd, $ZTt, $Zn$i", "", []>, Sched<[]> { bits<5> Zn; bits<3> Zd; let Inst{31-19} = 0b1100000010011; let Inst{18-16} = op; let Inst{15-14} = 0b10; let Inst{13-12} = sz; let Inst{11-10} = 0b00; let Inst{9-5} = Zn; let Inst{4} = Zd{2}; let Inst{3-2} = 0b00; let Inst{1-0} = Zd{1-0}; } class sme2p1_luti2_vector_vg4_index sz, RegisterOperand vector_ty, AsmVectorIndexOpnd index_ty, string mnemonic> : sme2p1_luti_vector_vg4_index<{1,?,?}, sz, vector_ty, index_ty, mnemonic> { bits<2> i; let Inst{17-16} = i; } multiclass sme2p1_luti2_vector_vg4_index { def _B : sme2p1_luti2_vector_vg4_index<0b00, ZZZZ_b_strided, VectorIndexS, mnemonic>; def _H : sme2p1_luti2_vector_vg4_index<0b01, ZZZZ_h_strided, VectorIndexS, mnemonic>; } class sme2p1_luti4_vector_vg4_index sz, RegisterOperand vector_ty, AsmVectorIndexOpnd index_ty, string mnemonic> : sme2p1_luti_vector_vg4_index<{0b01,?}, sz, vector_ty, index_ty, mnemonic> { bit i; let Inst{16} = i; } multiclass sme2p1_luti4_vector_vg4_index { def _H: sme2p1_luti4_vector_vg4_index<0b01, ZZZZ_h_strided, VectorIndexD, mnemonic>; } // SME2 lookup table two source registers expand to four contiguous destination registers class sme2_luti4_vector_vg4 sz, bits<2> op, string mnemonic> : I<(outs ZZZZ_b_mul_r:$Zd), (ins ZTR:$ZTt, ZZ_mul_r:$Zn), mnemonic, "\t$Zd, $ZTt, $Zn", "", []>, Sched<[]> { bits<4> Zn; bits<3> Zd; let Inst{31-14} = 0b110000001000101100; let Inst{13-12} = sz; let Inst{11-10} = op; let Inst{9-6} = Zn; let Inst{5} = 0b0; let Inst{4-2} = Zd; let Inst{1-0} = 0b00; } // SME2 lookup table two source registers expand to four non-contiguous destination registers class sme2_luti4_vector_vg4_strided sz, bits<2> op, string mnemonic> : I<(outs ZZZZ_b_strided:$Zd), (ins ZTR:$ZTt, ZZ_mul_r:$Zn), mnemonic, "\t$Zd, $ZTt, $Zn", "", []>, Sched<[]> { bits<4> Zn; bits<3> Zd; let Inst{31-14} = 0b110000001001101100; let Inst{13-12} = sz; let Inst{11-10} = op; let Inst{9-6} = Zn; let Inst{5} = 0b0; let Inst{4} = Zd{2}; let Inst{3-2} = 0b00; let Inst{1-0} = Zd{1-0}; }