//===-- X86InstrAVX10.td - AVX10 Instruction Set -----------*- tablegen -*-===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // // This file describes the X86 AVX10 instruction set, defining the // instructions, and properties of the instructions which are needed for code // generation, machine code emission, and analysis. // //===----------------------------------------------------------------------===// // VNNI FP16 let ExeDomain = SSEPackedSingle in defm VDPPHPS : avx512_dpf16ps_sizes<0x52, "vdpphps", X86dpfp16ps, avx512vl_f16_info, [HasAVX10_2], [HasAVX10_2_512]>, T8, PS, EVEX_CD8<32, CD8VF>; // VNNI INT8 defm VPDPBSSD : VNNI_common<0x50, "vpdpbssd", X86vpdpbssd, SchedWriteVecIMul, 1, [HasAVX10_2], [HasAVX10_2_512]>, XD; defm VPDPBSSDS : VNNI_common<0x51, "vpdpbssds", X86vpdpbssds, SchedWriteVecIMul, 1, [HasAVX10_2], [HasAVX10_2_512]>, XD; defm VPDPBSUD : VNNI_common<0x50, "vpdpbsud", X86vpdpbsud, SchedWriteVecIMul, 0, [HasAVX10_2], [HasAVX10_2_512]>, XS; defm VPDPBSUDS : VNNI_common<0x51, "vpdpbsuds", X86vpdpbsuds, SchedWriteVecIMul, 0, [HasAVX10_2], [HasAVX10_2_512]>, XS; defm VPDPBUUD : VNNI_common<0x50, "vpdpbuud", X86vpdpbuud, SchedWriteVecIMul, 1, [HasAVX10_2], [HasAVX10_2_512]>, PS; defm VPDPBUUDS : VNNI_common<0x51, "vpdpbuuds", X86vpdpbuuds, SchedWriteVecIMul, 1, [HasAVX10_2], [HasAVX10_2_512]>, PS; // VNNI INT16 defm VPDPWSUD : VNNI_common<0xd2, "vpdpwsud", X86vpdpwsud, SchedWriteVecIMul, 0, [HasAVX10_2], [HasAVX10_2_512]>, XS; defm VPDPWSUDS : VNNI_common<0xd3, "vpdpwsuds", X86vpdpwsuds, SchedWriteVecIMul, 0, [HasAVX10_2], [HasAVX10_2_512]>, XS; defm VPDPWUSD : VNNI_common<0xd2, "vpdpwusd", X86vpdpwusd, SchedWriteVecIMul, 0, [HasAVX10_2], [HasAVX10_2_512]>, PD; defm VPDPWUSDS : VNNI_common<0xd3, "vpdpwusds", X86vpdpwusds, SchedWriteVecIMul, 0, [HasAVX10_2], [HasAVX10_2_512]>, PD; defm VPDPWUUD : VNNI_common<0xd2, "vpdpwuud", X86vpdpwuud, SchedWriteVecIMul, 1, [HasAVX10_2], [HasAVX10_2_512]>, PS; defm VPDPWUUDS : VNNI_common<0xd3, "vpdpwuuds", X86vpdpwuuds, SchedWriteVecIMul, 1, [HasAVX10_2], [HasAVX10_2_512]>, PS; // VMPSADBW defm VMPSADBW : avx512_common_3Op_rm_imm8<0x42, X86Vmpsadbw, "vmpsadbw", SchedWritePSADBW, avx512vl_i16_info, avx512vl_i8_info, HasAVX10_2>, XS, EVEX_CD8<32, CD8VF>; //------------------------------------------------- // AVX10 MINMAX instructions //------------------------------------------------- multiclass avx10_minmax_packed_base { let ExeDomain = VTI.ExeDomain, Uses = [MXCSR], mayRaiseFPException = 1 in { defm rri : AVX512_maskable<0x52, MRMSrcReg, VTI, (outs VTI.RC:$dst), (ins VTI.RC:$src1, VTI.RC:$src2, i32u8imm:$src3), OpStr, "$src3, $src2, $src1", "$src1, $src2, $src3", (VTI.VT (OpNode VTI.RC:$src1, VTI.RC:$src2, (i32 timm:$src3)))>, EVEX, VVVV, Sched<[WriteFMAX]>; defm rmi : AVX512_maskable<0x52, MRMSrcMem, VTI, (outs VTI.RC:$dst), (ins VTI.RC:$src1, VTI.MemOp:$src2, i32u8imm:$src3), OpStr, "$src3, $src2, $src1", "$src1, $src2, $src3", (VTI.VT (OpNode VTI.RC:$src1, (VTI.LdFrag addr:$src2), (i32 timm:$src3)))>, EVEX, VVVV, Sched<[WriteFMAX.Folded, WriteFMAX.ReadAfterFold]>; defm rmbi : AVX512_maskable<0x52, MRMSrcMem, VTI, (outs VTI.RC:$dst), (ins VTI.RC:$src1, VTI.ScalarMemOp:$src2, i32u8imm:$src3), OpStr, "$src3, ${src2}"#VTI.BroadcastStr#", $src1", "$src1, ${src2}"#VTI.BroadcastStr#", $src3", (VTI.VT (OpNode VTI.RC:$src1, (VTI.BroadcastLdFrag addr:$src2), (i32 timm:$src3)))>, EVEX, VVVV, EVEX_B, Sched<[WriteFMAX.Folded, WriteFMAX.ReadAfterFold]>; } } multiclass avx10_minmax_packed_sae { let Uses = [], mayRaiseFPException = 0 in defm Zrrib : AVX512_maskable<0x52, MRMSrcReg, VTI.info512, (outs VTI.info512.RC:$dst), (ins VTI.info512.RC:$src1, VTI.info512.RC:$src2, i32u8imm:$src3), OpStr, "$src3, {sae}, $src2, $src1", "$src1, $src2, {sae}, $src3", (VTI.info512.VT (OpNode (VTI.info512.VT VTI.info512.RC:$src1), (VTI.info512.VT VTI.info512.RC:$src2), (i32 timm:$src3)))>, EVEX, VVVV, EVEX_B, EVEX_V512, Sched<[WriteFMAX]>; } multiclass avx10_minmax_packed { let Predicates = [HasAVX10_2_512] in defm Z : avx10_minmax_packed_base, EVEX_V512; let Predicates = [HasAVX10_2] in { defm Z256 : avx10_minmax_packed_base, EVEX_V256; defm Z128 : avx10_minmax_packed_base, EVEX_V128; } } multiclass avx10_minmax_scalar { let ExeDomain = _.ExeDomain, Predicates = [HasAVX10_2] in { let mayRaiseFPException = 1 in { let isCodeGenOnly = 1 in { def rri : AVX512Ii8<0x53, MRMSrcReg, (outs _.FRC:$dst), (ins _.FRC:$src1, _.FRC:$src2, i32u8imm:$src3), !strconcat(OpStr, "\t{$src3, $src2, $src1|$src1, $src2, $src3}"), [(set _.FRC:$dst, (OpNode _.FRC:$src1, _.FRC:$src2, (i32 timm:$src3)))]>, Sched<[WriteFMAX]>; def rmi : AVX512Ii8<0x53, MRMSrcMem, (outs _.FRC:$dst), (ins _.FRC:$src1, _.ScalarMemOp:$src2, i32u8imm:$src3), !strconcat(OpStr, "\t{$src3, $src2, $src1|$src1, $src2, $src3}"), [(set _.FRC:$dst, (OpNode _.FRC:$src1, (_.ScalarLdFrag addr:$src2), (i32 timm:$src3)))]>, Sched<[WriteFMAX.Folded, WriteFMAX.ReadAfterFold]>; } defm rri : AVX512_maskable<0x53, MRMSrcReg, _, (outs VR128X:$dst), (ins VR128X:$src1, VR128X:$src2, i32u8imm:$src3), OpStr, "$src3, $src2, $src1", "$src1, $src2, $src3", (_.VT (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2), (i32 timm:$src3))), 0, 0, 0, vselect_mask, "", "_Int">, Sched<[WriteFMAX]>; defm rmi : AVX512_maskable<0x53, MRMSrcMem, _, (outs VR128X:$dst), (ins VR128X:$src1, _.ScalarMemOp:$src2, i32u8imm:$src3), OpStr, "$src3, $src2, $src1", "$src1, $src2, $src3", (_.VT (OpNode (_.VT _.RC:$src1), (_.ScalarIntMemFrags addr:$src2), (i32 timm:$src3))), 0, 0, 0, vselect_mask, "", "_Int">, Sched<[WriteFMAX.Folded, WriteFMAX.ReadAfterFold]>; } let Uses = [], mayRaiseFPException = 0 in defm rrib : AVX512_maskable<0x53, MRMSrcReg, _, (outs VR128X:$dst), (ins VR128X:$src1, VR128X:$src2, i32u8imm:$src3), OpStr, "$src3, {sae}, $src2, $src1", "$src1, $src2, {sae}, $src3", (_.VT (OpNodeSAE (_.VT _.RC:$src1), (_.VT _.RC:$src2), (i32 timm:$src3))), 0, 0, 0, vselect_mask, "", "_Int">, Sched<[WriteFMAX]>, EVEX_B; } } let mayRaiseFPException = 0 in defm VMINMAXBF16 : avx10_minmax_packed<"vminmaxbf16", avx512vl_bf16_info, X86vminmax>, AVX512XDIi8Base, EVEX_CD8<16, CD8VF>, TA; defm VMINMAXPD : avx10_minmax_packed<"vminmaxpd", avx512vl_f64_info, X86vminmax>, avx10_minmax_packed_sae<"vminmaxpd", avx512vl_f64_info, X86vminmaxSae>, AVX512PDIi8Base, REX_W, TA, EVEX_CD8<64, CD8VF>; defm VMINMAXPH : avx10_minmax_packed<"vminmaxph", avx512vl_f16_info, X86vminmax>, avx10_minmax_packed_sae<"vminmaxph", avx512vl_f16_info, X86vminmaxSae>, AVX512PSIi8Base, TA, EVEX_CD8<16, CD8VF>; defm VMINMAXPS : avx10_minmax_packed<"vminmaxps", avx512vl_f32_info, X86vminmax>, avx10_minmax_packed_sae<"vminmaxps", avx512vl_f32_info, X86vminmaxSae>, AVX512PDIi8Base, TA, EVEX_CD8<32, CD8VF>; defm VMINMAXSD : avx10_minmax_scalar<"vminmaxsd", v2f64x_info, X86vminmaxs, X86vminmaxsSae>, AVX512AIi8Base, VEX_LIG, EVEX, VVVV, EVEX_CD8<64, CD8VT1>, REX_W; defm VMINMAXSH : avx10_minmax_scalar<"vminmaxsh", v8f16x_info, X86vminmaxs, X86vminmaxsSae>, AVX512PSIi8Base, VEX_LIG, EVEX, VVVV, EVEX_CD8<16, CD8VT1>, TA; defm VMINMAXSS : avx10_minmax_scalar<"vminmaxss", v4f32x_info, X86vminmaxs, X86vminmaxsSae>, AVX512AIi8Base, VEX_LIG, EVEX, VVVV, EVEX_CD8<32, CD8VT1>; //------------------------------------------------- // AVX10 SATCVT instructions //------------------------------------------------- multiclass avx10_sat_cvt_rmb Opc, string OpStr, X86FoldableSchedWrite sched, X86VectorVTInfo DestInfo, X86VectorVTInfo SrcInfo, SDNode MaskNode> { defm rr: AVX512_maskable, Sched<[sched]>; defm rm: AVX512_maskable, Sched<[sched.Folded, sched.ReadAfterFold]>; defm rmb: AVX512_maskable, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>; } // Conversion with rounding control (RC) multiclass avx10_sat_cvt_rc Opc, string OpStr, X86SchedWriteWidths sched, AVX512VLVectorVTInfo DestInfo, AVX512VLVectorVTInfo SrcInfo, SDNode MaskNode> { let Predicates = [HasAVX10_2_512], Uses = [MXCSR] in defm Zrrb : AVX512_maskable, Sched<[sched.ZMM]>, EVEX, EVEX_RC, EVEX_B; } // Conversion with SAE multiclass avx10_sat_cvt_sae Opc, string OpStr, X86SchedWriteWidths sched, AVX512VLVectorVTInfo DestInfo, AVX512VLVectorVTInfo SrcInfo, SDNode Node> { let Predicates = [HasAVX10_2_512], Uses = [MXCSR] in defm Zrrb : AVX512_maskable, Sched<[sched.ZMM]>, EVEX, EVEX_B; } multiclass avx10_sat_cvt_base Opc, string OpStr, X86SchedWriteWidths sched, SDNode MaskNode, AVX512VLVectorVTInfo DestInfo, AVX512VLVectorVTInfo SrcInfo> { let Predicates = [HasAVX10_2_512] in defm Z : avx10_sat_cvt_rmb, EVEX, EVEX_V512; let Predicates = [HasAVX10_2] in { defm Z256 : avx10_sat_cvt_rmb, EVEX, EVEX_V256; defm Z128 : avx10_sat_cvt_rmb, EVEX, EVEX_V128; } } defm VCVTBF162IBS : avx10_sat_cvt_base<0x69, "vcvtbf162ibs", SchedWriteVecIMul, X86vcvtp2ibs, avx512vl_i16_info, avx512vl_bf16_info>, AVX512XDIi8Base, T_MAP5, EVEX_CD8<16, CD8VF>; defm VCVTBF162IUBS : avx10_sat_cvt_base<0x6b, "vcvtbf162iubs", SchedWriteVecIMul, X86vcvtp2iubs, avx512vl_i16_info, avx512vl_bf16_info>, AVX512XDIi8Base, T_MAP5, EVEX_CD8<16, CD8VF>; defm VCVTPH2IBS : avx10_sat_cvt_base<0x69, "vcvtph2ibs", SchedWriteVecIMul, X86vcvtp2ibs, avx512vl_i16_info, avx512vl_f16_info>, avx10_sat_cvt_rc<0x69, "vcvtph2ibs", SchedWriteVecIMul, avx512vl_i16_info, avx512vl_f16_info, X86vcvtp2ibsRnd>, AVX512PSIi8Base, T_MAP5, EVEX_CD8<16, CD8VF>; defm VCVTPH2IUBS : avx10_sat_cvt_base<0x6b, "vcvtph2iubs", SchedWriteVecIMul, X86vcvtp2iubs, avx512vl_i16_info, avx512vl_f16_info>, avx10_sat_cvt_rc<0x6b, "vcvtph2iubs", SchedWriteVecIMul, avx512vl_i16_info, avx512vl_f16_info, X86vcvtp2iubsRnd>, AVX512PSIi8Base, T_MAP5, EVEX_CD8<16, CD8VF>; defm VCVTPS2IBS : avx10_sat_cvt_base<0x69, "vcvtps2ibs", SchedWriteVecIMul, X86vcvtp2ibs, avx512vl_i32_info, avx512vl_f32_info>, avx10_sat_cvt_rc<0x69, "vcvtps2ibs", SchedWriteVecIMul, avx512vl_i32_info, avx512vl_f32_info, X86vcvtp2ibsRnd>, AVX512PDIi8Base, T_MAP5, EVEX_CD8<32, CD8VF>; defm VCVTPS2IUBS : avx10_sat_cvt_base<0x6b, "vcvtps2iubs", SchedWriteVecIMul, X86vcvtp2iubs, avx512vl_i32_info, avx512vl_f32_info>, avx10_sat_cvt_rc<0x6b, "vcvtps2iubs", SchedWriteVecIMul, avx512vl_i32_info, avx512vl_f32_info, X86vcvtp2iubsRnd>, AVX512PDIi8Base, T_MAP5, EVEX_CD8<32, CD8VF>; defm VCVTTBF162IBS : avx10_sat_cvt_base<0x68, "vcvttbf162ibs", SchedWriteVecIMul, X86vcvttp2ibs, avx512vl_i16_info, avx512vl_bf16_info>, AVX512XDIi8Base, T_MAP5, EVEX_CD8<16, CD8VF>; defm VCVTTBF162IUBS : avx10_sat_cvt_base<0x6a, "vcvttbf162iubs", SchedWriteVecIMul, X86vcvttp2iubs, avx512vl_i16_info, avx512vl_bf16_info>, AVX512XDIi8Base, T_MAP5, EVEX_CD8<16, CD8VF>; defm VCVTTPH2IBS : avx10_sat_cvt_base<0x68, "vcvttph2ibs", SchedWriteVecIMul, X86vcvttp2ibs, avx512vl_i16_info, avx512vl_f16_info>, avx10_sat_cvt_sae<0x68, "vcvttph2ibs", SchedWriteVecIMul, avx512vl_i16_info, avx512vl_f16_info, X86vcvttp2ibsSAE>, AVX512PSIi8Base, T_MAP5, EVEX_CD8<16, CD8VF>; defm VCVTTPH2IUBS : avx10_sat_cvt_base<0x6a, "vcvttph2iubs", SchedWriteVecIMul, X86vcvttp2iubs, avx512vl_i16_info, avx512vl_f16_info>, avx10_sat_cvt_sae<0x6a, "vcvttph2iubs", SchedWriteVecIMul, avx512vl_i16_info, avx512vl_f16_info, X86vcvttp2iubsSAE>, AVX512PSIi8Base, T_MAP5, EVEX_CD8<16, CD8VF>; defm VCVTTPS2IBS : avx10_sat_cvt_base<0x68, "vcvttps2ibs", SchedWriteVecIMul, X86vcvttp2ibs, avx512vl_i32_info, avx512vl_f32_info>, avx10_sat_cvt_sae<0x68, "vcvttps2ibs", SchedWriteVecIMul, avx512vl_i32_info, avx512vl_f32_info, X86vcvttp2ibsSAE>, AVX512PDIi8Base, T_MAP5, EVEX_CD8<32, CD8VF>; defm VCVTTPS2IUBS : avx10_sat_cvt_base<0x6a, "vcvttps2iubs", SchedWriteVecIMul, X86vcvttp2iubs, avx512vl_i32_info, avx512vl_f32_info>, avx10_sat_cvt_sae<0x6a, "vcvttps2iubs", SchedWriteVecIMul, avx512vl_i32_info, avx512vl_f32_info, X86vcvttp2iubsSAE>, AVX512PDIi8Base, T_MAP5, EVEX_CD8<32, CD8VF>; //------------------------------------------------- // AVX10 SATCVT-DS instructions //------------------------------------------------- // Convert Double to Signed/Unsigned Doubleword with truncation. multiclass avx10_cvttpd2dqs opc, string OpcodeStr, SDPatternOperator OpNode, SDNode MaskOpNode, SDNode OpNodeSAE, X86SchedWriteWidths sched> { let Predicates = [HasAVX10_2_512] in { defm Z : avx512_vcvt_fp, avx512_vcvt_fp_sae, EVEX_V512; } let Predicates = [HasAVX10_2] in { defm Z128 : avx512_vcvt_fp, EVEX_V128; defm Z256 : avx512_vcvt_fp, EVEX_V256; } let Predicates = [HasAVX10_2], hasEVEX_U=1 in { defm Z256 : avx512_vcvt_fp_sae, EVEX_V256; } def : InstAlias(NAME # "Z128rr") VR128X:$dst, VR128X:$src), 0, "att">; def : InstAlias(NAME # "Z128rrk") VR128X:$dst, VK2WM:$mask, VR128X:$src), 0, "att">; def : InstAlias(NAME # "Z128rrkz") VR128X:$dst, VK2WM:$mask, VR128X:$src), 0, "att">; def : InstAlias(NAME # "Z128rmb") VR128X:$dst, f64mem:$src), 0, "att">; def : InstAlias(NAME # "Z128rmbk") VR128X:$dst, VK2WM:$mask, f64mem:$src), 0, "att">; def : InstAlias(NAME # "Z128rmbkz") VR128X:$dst, VK2WM:$mask, f64mem:$src), 0, "att">; def : InstAlias(NAME # "Z256rr") VR128X:$dst, VR256X:$src), 0, "att">; def : InstAlias(NAME # "Z256rrb") VR128X:$dst, VR256X:$src), 0, "att">; def : InstAlias(NAME # "Z256rrk") VR128X:$dst, VK4WM:$mask, VR256X:$src), 0, "att">; def : InstAlias(NAME # "Z256rrbk") VR128X:$dst, VK4WM:$mask, VR256X:$src), 0, "att">; def : InstAlias(NAME # "Z256rrkz") VR128X:$dst, VK4WM:$mask, VR256X:$src), 0, "att">; def : InstAlias(NAME # "Z256rrbkz") VR128X:$dst, VK4WM:$mask, VR256X:$src), 0, "att">; def : InstAlias(NAME # "Z256rmb") VR128X:$dst, f64mem:$src), 0, "att">; def : InstAlias(NAME # "Z256rmbk") VR128X:$dst, VK4WM:$mask, f64mem:$src), 0, "att">; def : InstAlias(NAME # "Z256rmbkz") VR128X:$dst, VK4WM:$mask, f64mem:$src), 0, "att">; } // Convert Double to Signed/Unsigned Quardword with truncation saturationn enabled multiclass avx10_cvttpd2qqs opc, string OpcodeStr, SDPatternOperator OpNode, SDNode MaskOpNode, SDNode OpNodeRnd, X86SchedWriteWidths sched> { let Predicates = [HasAVX10_2_512] in { defm Z : avx512_vcvt_fp, avx512_vcvt_fp_sae, EVEX_V512; } let Predicates = [HasAVX10_2] in { defm Z128 : avx512_vcvt_fp, EVEX_V128; defm Z256 : avx512_vcvt_fp, EVEX_V256; } let Predicates = [HasAVX10_2], hasEVEX_U=1 in { defm Z256 : avx512_vcvt_fp_sae, EVEX_V256; } } // Convert Float to Signed/Unsigned Quardword with truncation multiclass avx10_cvttps2qqs opc, string OpcodeStr, SDPatternOperator OpNode, SDNode MaskOpNode, SDNode OpNodeRnd, X86SchedWriteWidths sched> { let Predicates = [HasAVX10_2_512] in { defm Z : avx512_vcvt_fp, avx512_vcvt_fp_sae, EVEX_V512; } let Predicates = [HasAVX10_2] in { defm Z128 : avx512_vcvt_fp, EVEX_V128; defm Z256 : avx512_vcvt_fp, EVEX_V256; } let Predicates = [HasAVX10_2], hasEVEX_U=1 in { defm Z256 : avx512_vcvt_fp_sae, EVEX_V256; } } // Convert Float to Signed/Unsigned Doubleword with truncation multiclass avx10_cvttps2dqs opc, string OpcodeStr, SDPatternOperator OpNode, SDNode MaskOpNode, SDNode OpNodeSAE, X86SchedWriteWidths sched> { let Predicates = [HasAVX10_2_512] in { defm Z : avx512_vcvt_fp, avx512_vcvt_fp_sae, EVEX_V512; } let Predicates = [HasAVX10_2] in { defm Z128 : avx512_vcvt_fp, EVEX_V128; defm Z256 : avx512_vcvt_fp, EVEX_V256; } } defm VCVTTPD2DQS : avx10_cvttpd2dqs<0x6D, "vcvttpd2dqs", X86cvttp2sis, X86cvttp2sis, X86cvttp2sisSAE, SchedWriteCvtPD2DQ>, PD, REX_W, T_MAP5,PS, EVEX_CD8<64, CD8VF>; defm VCVTTPD2UDQS : avx10_cvttpd2dqs<0x6C, "vcvttpd2udqs", X86cvttp2uis, X86cvttp2uis, X86cvttp2uisSAE, SchedWriteCvtPD2DQ>, REX_W, T_MAP5,PS, EVEX_CD8<64, CD8VF>; defm VCVTTPS2DQS : avx10_cvttps2dqs<0x6D, "vcvttps2dqs", X86cvttp2sis, X86cvttp2sis, X86cvttp2sisSAE, SchedWriteCvtPS2DQ>, T_MAP5,PS, EVEX_CD8<32, CD8VF>; defm VCVTTPS2UDQS : avx10_cvttps2dqs<0x6C, "vcvttps2udqs", X86cvttp2uis, X86cvttp2uis, X86cvttp2uisSAE, SchedWriteCvtPS2DQ>, T_MAP5,PS, EVEX_CD8<32, CD8VF>; defm VCVTTPD2QQS : avx10_cvttpd2qqs<0x6D, "vcvttpd2qqs", X86cvttp2sis, X86cvttp2sis, X86cvttp2sisSAE, SchedWriteCvtPD2DQ>, REX_W, T_MAP5,PD, EVEX_CD8<64, CD8VF>; defm VCVTTPS2QQS : avx10_cvttps2qqs<0x6D, "vcvttps2qqs", X86cvttp2sis, X86cvttp2sis, X86cvttp2sisSAE, SchedWriteCvtPS2DQ>, T_MAP5,PD, EVEX_CD8<32, CD8VH>; defm VCVTTPD2UQQS : avx10_cvttpd2qqs<0x6C, "vcvttpd2uqqs", X86cvttp2uis, X86cvttp2uis, X86cvttp2uisSAE, SchedWriteCvtPD2DQ>, REX_W, T_MAP5,PD, EVEX_CD8<64, CD8VF>; defm VCVTTPS2UQQS : avx10_cvttps2qqs<0x6C, "vcvttps2uqqs", X86cvttp2uis, X86cvttp2uis, X86cvttp2uisSAE, SchedWriteCvtPS2DQ>, T_MAP5,PD, EVEX_CD8<32, CD8VH>; let Predicates = [HasAVX10_2] in { // Special patterns to allow use of X86mcvttp2si for masking. Instruction // patterns have been disabled with null_frag. // Patterns VCVTTPD2DQSZ128 // VCVTTPD2DQS def : Pat<(v4i32(X86fp2sisat(v2f64 VR128X:$src))), (VCVTTPD2DQSZ128rr VR128X:$src)>; def : Pat<(v4i32(fp_to_sint_sat(v4f64 VR256X:$src), i32)), (VCVTTPD2DQSZ256rr VR256X:$src)>; def : Pat<(v8i32(fp_to_sint_sat(v8f64 VR512:$src), i32)), (VCVTTPD2DQSZrr VR512:$src)>; // VCVTTPD2QQS def : Pat<(v2i64(fp_to_sint_sat(v2f64 VR128X:$src), i64)), (VCVTTPD2QQSZ128rr VR128X:$src)>; def : Pat<(v4i64(fp_to_sint_sat(v4f64 VR256X:$src), i64)), (VCVTTPD2QQSZ256rr VR256X:$src)>; def : Pat<(v8i64(fp_to_sint_sat(v8f64 VR512:$src), i64)), (VCVTTPD2QQSZrr VR512:$src)>; // VCVTTPD2UDQS def : Pat<(v4i32(X86fp2uisat(v2f64 VR128X:$src))), (VCVTTPD2UDQSZ128rr VR128X:$src)>; def : Pat<(v4i32(fp_to_uint_sat(v4f64 VR256X:$src), i32)), (VCVTTPD2UDQSZ256rr VR256X:$src)>; def : Pat<(v8i32(fp_to_uint_sat(v8f64 VR512:$src), i32)), (VCVTTPD2UDQSZrr VR512:$src)>; // VCVTTPD2UQQS def : Pat<(v2i64(fp_to_uint_sat(v2f64 VR128X:$src), i64)), (VCVTTPD2UQQSZ128rr VR128X:$src)>; def : Pat<(v4i64(fp_to_uint_sat(v4f64 VR256X:$src), i64)), (VCVTTPD2UQQSZ256rr VR256X:$src)>; def : Pat<(v8i64(fp_to_uint_sat(v8f64 VR512:$src), i64)), (VCVTTPD2UQQSZrr VR512:$src)>; // VCVTTPS2DQS def : Pat<(v4i32(fp_to_sint_sat(v4f32 VR128X:$src), i32)), (VCVTTPS2DQSZ128rr VR128X:$src)>; def : Pat<(v8i32(fp_to_sint_sat(v8f32 VR256X:$src), i32)), (VCVTTPS2DQSZ256rr VR256X:$src)>; def : Pat<(v16i32(fp_to_sint_sat(v16f32 VR512:$src), i32)), (VCVTTPS2DQSZrr VR512:$src)>; // VCVTTPS2QQS def : Pat<(v2i64(X86fp2sisat(v4f32 VR128X:$src))), (VCVTTPS2QQSZ128rr VR128X:$src)>; def : Pat<(v4i64(fp_to_sint_sat(v4f32 VR128X:$src), i64)), (VCVTTPS2QQSZ256rr VR128X:$src)>; def : Pat<(v8i64(fp_to_sint_sat(v8f32 VR256X:$src), i64)), (VCVTTPS2QQSZrr VR256X:$src)>; // VCVTTPS2UDQS def : Pat<(v4i32(fp_to_uint_sat(v4f32 VR128X:$src), i32)), (VCVTTPS2UDQSZ128rr VR128X:$src)>; def : Pat<(v8i32(fp_to_uint_sat(v8f32 VR256X:$src), i32)), (VCVTTPS2UDQSZ256rr VR256X:$src)>; def : Pat<(v16i32(fp_to_uint_sat(v16f32 VR512:$src), i32)), (VCVTTPS2UDQSZrr VR512:$src)>; // VCVTTPS2UQQS def : Pat<(v2i64(X86fp2uisat(v4f32 VR128X:$src))), (VCVTTPS2UQQSZ128rr VR128X:$src)>; def : Pat<(v4i64(fp_to_uint_sat(v4f32 VR128X:$src), i64)), (VCVTTPS2UQQSZ256rr VR128X:$src)>; def : Pat<(v8i64(fp_to_uint_sat(v8f32 VR256X:$src), i64)), (VCVTTPS2UQQSZrr VR256X:$src)>; def : Pat<(v4i32 (X86cvttp2sis (v2f64 VR128X:$src))), (VCVTTPD2DQSZ128rr VR128X:$src)>; def : Pat<(v4i32 (X86cvttp2sis (loadv2f64 addr:$src))), (VCVTTPD2DQSZ128rm addr:$src)>; def : Pat<(v4i32 (X86cvttp2sis (v2f64 (X86VBroadcastld64 addr:$src)))), (VCVTTPD2DQSZ128rmb addr:$src)>; def : Pat<(X86mcvttp2sis (v2f64 VR128X:$src), (v4i32 VR128X:$src0), VK2WM:$mask), (VCVTTPD2DQSZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>; def : Pat<(X86mcvttp2sis (v2f64 VR128X:$src), v4i32x_info.ImmAllZerosV, VK2WM:$mask), (VCVTTPD2DQSZ128rrkz VK2WM:$mask, VR128X:$src)>; def : Pat<(X86mcvttp2sis (loadv2f64 addr:$src), (v4i32 VR128X:$src0), VK2WM:$mask), (VCVTTPD2DQSZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>; def : Pat<(X86mcvttp2sis (loadv2f64 addr:$src), v4i32x_info.ImmAllZerosV, VK2WM:$mask), (VCVTTPD2DQSZ128rmkz VK2WM:$mask, addr:$src)>; def : Pat<(X86mcvttp2sis (v2f64 (X86VBroadcastld64 addr:$src)), (v4i32 VR128X:$src0), VK2WM:$mask), (VCVTTPD2DQSZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>; def : Pat<(X86mcvttp2sis (v2f64 (X86VBroadcastld64 addr:$src)), v4i32x_info.ImmAllZerosV, VK2WM:$mask), (VCVTTPD2DQSZ128rmbkz VK2WM:$mask, addr:$src)>; // Patterns VCVTTPD2UDQSZ128 def : Pat<(v4i32 (X86cvttp2uis (v2f64 (X86VBroadcastld64 addr:$src)))), (VCVTTPD2UDQSZ128rmb addr:$src)>; def : Pat<(v4i32 (X86cvttp2uis (v2f64 VR128X:$src))), (VCVTTPD2UDQSZ128rr VR128X:$src)>; def : Pat<(v4i32 (X86cvttp2uis (v2f64 (X86VBroadcastld64 addr:$src)))), (VCVTTPD2UDQSZ128rmb addr:$src)>; def : Pat<(X86mcvttp2uis (v2f64 VR128X:$src), (v4i32 VR128X:$src0), VK2WM:$mask), (VCVTTPD2UDQSZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>; def : Pat<(X86mcvttp2uis (v2f64 VR128X:$src), v4i32x_info.ImmAllZerosV, VK2WM:$mask), (VCVTTPD2UDQSZ128rrkz VK2WM:$mask, VR128X:$src)>; def : Pat<(X86mcvttp2uis (loadv2f64 addr:$src), (v4i32 VR128X:$src0), VK2WM:$mask), (VCVTTPD2UDQSZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>; def : Pat<(X86mcvttp2uis (loadv2f64 addr:$src), v4i32x_info.ImmAllZerosV, VK2WM:$mask), (VCVTTPD2UDQSZ128rmkz VK2WM:$mask, addr:$src)>; def : Pat<(X86mcvttp2uis (v2f64 (X86VBroadcastld64 addr:$src)), (v4i32 VR128X:$src0), VK2WM:$mask), (VCVTTPD2UDQSZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>; def : Pat<(X86mcvttp2uis (v2f64 (X86VBroadcastld64 addr:$src)), v4i32x_info.ImmAllZerosV, VK2WM:$mask), (VCVTTPD2UDQSZ128rmbkz VK2WM:$mask, addr:$src)>; } // Convert scalar float/double to signed/unsigned int 32/64 with truncation and saturation. multiclass avx10_cvt_s_ds opc, string asm, X86VectorVTInfo _SrcRC, X86VectorVTInfo _DstRC, SDPatternOperator OpNode, SDNode OpNodeInt, SDNode OpNodeSAE, X86FoldableSchedWrite sched> { let Predicates = [HasAVX10_2], ExeDomain = _SrcRC.ExeDomain in { let isCodeGenOnly = 1 in { def rr : AVX512, EVEX, VEX_LIG, Sched<[sched]>, SIMD_EXC; def rm : AVX512, EVEX, VEX_LIG, Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC; } def rr_Int : AVX512, EVEX, VEX_LIG, Sched<[sched]>, SIMD_EXC; let Uses = [MXCSR] in def rrb_Int : AVX512, EVEX, VEX_LIG, EVEX_B, Sched<[sched]>; def rm_Int : AVX512, EVEX, VEX_LIG, Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC; } } defm VCVTTSS2SIS: avx10_cvt_s_ds<0x6D, "vcvttss2sis", f32x_info, i32x_info, fp_to_sint_sat, X86cvttss2Int, X86cvttss2IntSAE, WriteCvtSS2I>, T_MAP5,XS, EVEX_CD8<32, CD8VT1>; defm VCVTTSS2SI64S: avx10_cvt_s_ds<0x6D, "vcvttss2sis", f32x_info, i64x_info, fp_to_sint_sat, X86cvttss2Int, X86cvttss2IntSAE, WriteCvtSS2I>, REX_W, T_MAP5,XS, EVEX_CD8<32, CD8VT1>; defm VCVTTSD2SIS: avx10_cvt_s_ds<0x6D, "vcvttsd2sis", f64x_info, i32x_info, fp_to_sint_sat, X86cvttss2Int, X86cvttss2IntSAE, WriteCvtSD2I>, T_MAP5,XD, EVEX_CD8<64, CD8VT1>; defm VCVTTSD2SI64S: avx10_cvt_s_ds<0x6D, "vcvttsd2sis", f64x_info, i64x_info, fp_to_sint_sat, X86cvttss2Int, X86cvttss2IntSAE, WriteCvtSD2I>, REX_W, T_MAP5,XD, EVEX_CD8<64, CD8VT1>; defm VCVTTSS2USIS: avx10_cvt_s_ds<0x6C, "vcvttss2usis", f32x_info, i32x_info, fp_to_uint_sat, X86cvttss2UInt, X86cvttss2UIntSAE, WriteCvtSS2I>, T_MAP5,XS, EVEX_CD8<32, CD8VT1>; defm VCVTTSS2USI64S: avx10_cvt_s_ds<0x6C, "vcvttss2usis", f32x_info, i64x_info, fp_to_uint_sat, X86cvttss2UInt, X86cvttss2UIntSAE, WriteCvtSS2I>, T_MAP5,XS,REX_W, EVEX_CD8<32, CD8VT1>; defm VCVTTSD2USIS: avx10_cvt_s_ds<0x6C, "vcvttsd2usis", f64x_info, i32x_info, fp_to_uint_sat, X86cvttss2UInt, X86cvttss2UIntSAE, WriteCvtSD2I>, T_MAP5,XD, EVEX_CD8<64, CD8VT1>; defm VCVTTSD2USI64S: avx10_cvt_s_ds<0x6C, "vcvttsd2usis", f64x_info, i64x_info, fp_to_uint_sat, X86cvttss2UInt, X86cvttss2UIntSAE, WriteCvtSD2I>, T_MAP5,XD, REX_W, EVEX_CD8<64, CD8VT1>; //------------------------------------------------- // AVX10 CONVERT instructions //------------------------------------------------- multiclass avx10_cvt2ps2ph_rc opc, string OpcodeStr, X86FoldableSchedWrite sched, X86VectorVTInfo _Src, X86VectorVTInfo _, SDNode OpNodeRnd> { let Uses = [MXCSR] in defm rrb : AVX512_maskable, EVEX, VVVV, EVEX_B, EVEX_RC, PD, Sched<[sched]>; } //TODO: Merge into avx512_binop_all, difference is rounding control added here. multiclass avx10_cvt2ps2ph opc, string OpcodeStr, X86SchedWriteWidths sched, AVX512VLVectorVTInfo _SrcVTInfo, AVX512VLVectorVTInfo _DstVTInfo, SDNode OpNode, SDNode OpNodeRnd> { let Predicates = [HasAVX10_2_512], Uses = [MXCSR] in { defm Z : avx512_binop_rm2, avx10_cvt2ps2ph_rc, EVEX_V512, EVEX_CD8<32, CD8VF>; } let Predicates = [HasAVX10_2] in { defm Z256 : avx512_binop_rm2, EVEX_V256, EVEX_CD8<32, CD8VF>; defm Z128 : avx512_binop_rm2, EVEX_V128, EVEX_CD8<32, CD8VF>; } } defm VCVT2PS2PHX : avx10_cvt2ps2ph<0x67, "vcvt2ps2phx", SchedWriteCvtPD2PS, avx512vl_f32_info, avx512vl_f16_info, X86vfpround2, X86vfpround2Rnd>, T8; defm VCVT2PH2BF8 : avx512_binop_all<0x74, "vcvt2ph2bf8", SchedWriteCvtPD2PS, avx512vl_f16_info, avx512vl_i8_info, X86vcvt2ph2bf8, [HasAVX10_2_512], [HasAVX10_2]>, EVEX_CD8<16, CD8VF>, T8, XD; defm VCVT2PH2BF8S : avx512_binop_all<0x74, "vcvt2ph2bf8s", SchedWriteCvtPD2PS, avx512vl_f16_info, avx512vl_i8_info, X86vcvt2ph2bf8s, [HasAVX10_2_512], [HasAVX10_2]>, EVEX_CD8<16, CD8VF>, T_MAP5, XD; defm VCVT2PH2HF8 : avx512_binop_all<0x18, "vcvt2ph2hf8", SchedWriteCvtPD2PS, avx512vl_f16_info, avx512vl_i8_info, X86vcvt2ph2hf8, [HasAVX10_2_512], [HasAVX10_2]>, EVEX_CD8<16, CD8VF>, T_MAP5, XD; defm VCVT2PH2HF8S : avx512_binop_all<0x1b, "vcvt2ph2hf8s", SchedWriteCvtPD2PS, avx512vl_f16_info, avx512vl_i8_info, X86vcvt2ph2hf8s, [HasAVX10_2_512], [HasAVX10_2]>, EVEX_CD8<16, CD8VF>, T_MAP5, XD; //TODO: Merge into avx512_vcvt_fp, diffrence is one more source register here. multiclass avx10_convert_3op_packed OpCode, string OpcodeStr, X86VectorVTInfo vt_dst, X86VectorVTInfo vt_src1, X86VectorVTInfo vt_src2, SDPatternOperator OpNode, SDPatternOperator MaskOpNode, X86FoldableSchedWrite sched, string Broadcast = vt_src2.BroadcastStr, X86MemOperand MemOp = vt_src2.MemOp, RegisterClass MaskRC = vt_src2.KRCWM, dag LdDAG = (vt_dst.VT (OpNode (vt_src1.VT vt_src1.RC:$src1), (vt_src2.VT (vt_src2.LdFrag addr:$src2)))), dag MaskLdDAG = (vt_dst.VT (MaskOpNode (vt_src1.VT vt_src1.RC:$src1), (vt_src2.VT (vt_src2.LdFrag addr:$src2))))> { defm rr : AVX512_maskable_cvt, EVEX, VVVV, Sched<[sched]>; let mayLoad = 1 in defm rm : AVX512_maskable_cvt, EVEX, VVVV, Sched<[sched]>; let mayLoad = 1 in defm rmb : AVX512_maskable_cvt, EVEX, VVVV, EVEX_B, Sched<[sched]>; } //TODO: Merge into avx512_cvt_trunc multiclass avx10_convert_3op OpCode, string OpcodeStr, AVX512VLVectorVTInfo vt_dst, AVX512VLVectorVTInfo vt_src, X86SchedWriteWidths sched, SDPatternOperator OpNode, SDPatternOperator MaskOpNode, PatFrag bcast128 = vt_src.info128.BroadcastLdFrag, PatFrag loadVT128 = vt_src.info128.LdFrag, RegisterClass maskRC128 = vt_src.info128.KRCWM> { let Predicates = [HasAVX10_2_512] in defm Z : avx10_convert_3op_packed, EVEX_V512, EVEX_CD8<16, CD8VF>; let Predicates = [HasAVX10_2] in { defm Z256 : avx10_convert_3op_packed, EVEX_V256, EVEX_CD8<16, CD8VF>; defm Z128 : avx10_convert_3op_packed, EVEX_V128, EVEX_CD8<16, CD8VF>; // Special patterns to allow use of MaskOpNode for masking 128 version. Instruction // patterns have been disabled with null_frag. def : Pat<(vt_dst.info128.VT (OpNode (vt_dst.info128.VT VR128X:$src1), (vt_src.info128.VT VR128X:$src2))), (!cast(NAME # "Z128rr") VR128X:$src1, VR128X:$src2)>; def : Pat<(MaskOpNode (vt_dst.info128.VT VR128X:$src1), (vt_src.info128.VT VR128X:$src2), (vt_dst.info128.VT VR128X:$src0), maskRC128:$mask), (!cast(NAME # "Z128rrk") VR128X:$src0, maskRC128:$mask, VR128X:$src1, VR128X:$src2)>; def : Pat<(MaskOpNode (vt_dst.info128.VT VR128X:$src1), (vt_src.info128.VT VR128X:$src2), vt_dst.info128.ImmAllZerosV, maskRC128:$mask), (!cast(NAME # "Z128rrkz") maskRC128:$mask, VR128X:$src1, VR128X:$src2)>; def : Pat<(vt_dst.info128.VT (OpNode (vt_dst.info128.VT VR128X:$src1), (loadVT128 addr:$src2))), (!cast(NAME # "Z128rm") VR128X:$src1, addr:$src2)>; def : Pat<(MaskOpNode (vt_dst.info128.VT VR128X:$src1), (loadVT128 addr:$src2), (vt_dst.info128.VT VR128X:$src0), maskRC128:$mask), (!cast(NAME # "Z128rmk") VR128X:$src0, maskRC128:$mask, VR128X:$src1, addr:$src2)>; def : Pat<(MaskOpNode (vt_dst.info128.VT VR128X:$src1), (loadVT128 addr:$src2), vt_dst.info128.ImmAllZerosV, maskRC128:$mask), (!cast(NAME # "Z128rmkz") maskRC128:$mask, VR128X:$src1, addr:$src2)>; def : Pat<(vt_dst.info128.VT (OpNode (vt_dst.info128.VT VR128X:$src1), (vt_src.info128.VT (bcast128 addr:$src2)))), (!cast(NAME # "Z128rmb") VR128X:$src1, addr:$src2)>; def : Pat<(MaskOpNode (vt_dst.info128.VT VR128X:$src1), (vt_src.info128.VT (bcast128 addr:$src2)), (vt_dst.info128.VT VR128X:$src0), maskRC128:$mask), (!cast(NAME # "Z128rmbk") VR128X:$src0, maskRC128:$mask, VR128X:$src1, addr:$src2)>; def : Pat<(MaskOpNode (vt_dst.info128.VT VR128X:$src1), (vt_src.info128.VT (bcast128 addr:$src2)), vt_dst.info128.ImmAllZerosV, maskRC128:$mask), (!cast(NAME # "Z128rmbkz") maskRC128:$mask, VR128X:$src1, addr:$src2)>; } } defm VCVTBIASPH2BF8 : avx10_convert_3op<0x74, "vcvtbiasph2bf8", avx512vl_i8_info, avx512vl_f16_info, SchedWriteCvtPD2PS, X86vcvtbiasph2bf8, X86vmcvtbiasph2bf8>, T8, PS; defm VCVTBIASPH2BF8S : avx10_convert_3op<0x74, "vcvtbiasph2bf8s", avx512vl_i8_info, avx512vl_f16_info, SchedWriteCvtPD2PS, X86vcvtbiasph2bf8s, X86vmcvtbiasph2bf8s>, T_MAP5, PS; defm VCVTBIASPH2HF8 : avx10_convert_3op<0x18, "vcvtbiasph2hf8", avx512vl_i8_info, avx512vl_f16_info, SchedWriteCvtPD2PS, X86vcvtbiasph2hf8, X86vmcvtbiasph2hf8>, T_MAP5, PS; defm VCVTBIASPH2HF8S : avx10_convert_3op<0x1b, "vcvtbiasph2hf8s", avx512vl_i8_info, avx512vl_f16_info, SchedWriteCvtPD2PS, X86vcvtbiasph2hf8s, X86vmcvtbiasph2hf8s>, T_MAP5, PS; defm VCVTPH2BF8 : avx512_cvt_trunc_ne<0x74, "vcvtph2bf8", avx512vl_i8_info, avx512vl_f16_info, SchedWriteCvtPD2PS, X86vcvtph2bf8, X86vmcvtph2bf8, [HasAVX10_2], [HasAVX10_2_512]>, T8, XS, EVEX_CD8<16, CD8VF>; defm VCVTPH2BF8S : avx512_cvt_trunc_ne<0x74, "vcvtph2bf8s", avx512vl_i8_info, avx512vl_f16_info, SchedWriteCvtPD2PS, X86vcvtph2bf8s, X86vmcvtph2bf8s, [HasAVX10_2], [HasAVX10_2_512]>, T_MAP5, XS, EVEX_CD8<16, CD8VF>; defm VCVTPH2HF8 : avx512_cvt_trunc_ne<0x18, "vcvtph2hf8", avx512vl_i8_info, avx512vl_f16_info, SchedWriteCvtPD2PS, X86vcvtph2hf8, X86vmcvtph2hf8, [HasAVX10_2], [HasAVX10_2_512]>, T_MAP5, XS, EVEX_CD8<16, CD8VF>; defm VCVTPH2HF8S : avx512_cvt_trunc_ne<0x1b, "vcvtph2hf8s", avx512vl_i8_info, avx512vl_f16_info, SchedWriteCvtPD2PS, X86vcvtph2hf8s, X86vmcvtph2hf8s, [HasAVX10_2], [HasAVX10_2_512]>, T_MAP5, XS, EVEX_CD8<16, CD8VF>; multiclass avx10_convert_2op_nomb_packed opc, string OpcodeStr, X86VectorVTInfo _dest, X86VectorVTInfo _src, SDNode OpNode, X86MemOperand x86memop, X86FoldableSchedWrite sched, dag ld_dag = (load addr:$src)> { let ExeDomain = _dest.ExeDomain in { defm rr : AVX512_maskable_split, Sched<[sched]>; defm rm : AVX512_maskable_split, Sched<[sched.Folded]>; } } multiclass avx10_convert_2op_nomb opc, SDNode OpNode> { let Predicates = [HasAVX10_2_512] in defm Z : avx10_convert_2op_nomb_packed, EVEX_V512; let Predicates = [HasAVX10_2] in { defm Z128 : avx10_convert_2op_nomb_packed, EVEX_V128; defm Z256 : avx10_convert_2op_nomb_packed, EVEX_V256; } } defm VCVTHF82PH : avx10_convert_2op_nomb<"vcvthf82ph", avx512vl_f16_info, avx512vl_i8_info, 0x1e, X86vcvthf82ph>, AVX512XDIi8Base, T_MAP5, EVEX, EVEX_CD8<16, CD8VH>; //------------------------------------------------- // AVX10 BF16 instructions //------------------------------------------------- // VADDBF16, VSUBBF16, VMULBF16, VDIVBF16, VMAXBF16, VMINBF16 multiclass avx10_fp_binop_int_bf16 opc, string OpcodeStr, X86SchedWriteSizes sched, bit IsCommutable = 0> { let Predicates = [HasAVX10_2_512] in defm Z : avx512_fp_packed("int_x86_avx10_"#OpcodeStr#"bf16512"), !cast("int_x86_avx10_"#OpcodeStr#"bf16512"), v32bf16_info, sched.PH.ZMM, IsCommutable>, EVEX_V512, T_MAP5, PD, EVEX_CD8<16, CD8VF>; let Predicates = [HasAVX10_2] in { defm Z128 : avx512_fp_packed("int_x86_avx10_"#OpcodeStr#"bf16128"), !cast("int_x86_avx10_"#OpcodeStr#"bf16128"), v8bf16x_info, sched.PH.XMM, IsCommutable>, EVEX_V128, T_MAP5, PD, EVEX_CD8<16, CD8VF>; defm Z256 : avx512_fp_packed("int_x86_avx10_"#OpcodeStr#"bf16256"), !cast("int_x86_avx10_"#OpcodeStr#"bf16256"), v16bf16x_info, sched.PH.YMM, IsCommutable>, EVEX_V256, T_MAP5, PD, EVEX_CD8<16, CD8VF>; } } multiclass avx10_fp_binop_bf16 opc, string OpcodeStr, SDPatternOperator OpNode, X86SchedWriteSizes sched, bit IsCommutable = 0, SDPatternOperator MaskOpNode = OpNode> { let Predicates = [HasAVX10_2_512] in defm Z : avx512_fp_packed, EVEX_V512, T_MAP5, PD, EVEX_CD8<16, CD8VF>; let Predicates = [HasAVX10_2] in { defm Z128 : avx512_fp_packed, EVEX_V128, T_MAP5, PD, EVEX_CD8<16, CD8VF>; defm Z256 : avx512_fp_packed, EVEX_V256, T_MAP5, PD, EVEX_CD8<16, CD8VF>; } } let Uses = [], mayRaiseFPException = 0 in { defm VADDBF16 : avx10_fp_binop_bf16<0x58, "vadd", fadd, SchedWriteFAddSizes, 1>; defm VSUBBF16 : avx10_fp_binop_bf16<0x5C, "vsub", fsub, SchedWriteFAddSizes, 0>; defm VMULBF16 : avx10_fp_binop_bf16<0x59, "vmul", fmul, SchedWriteFMulSizes, 1>; defm VDIVBF16 : avx10_fp_binop_bf16<0x5E, "vdiv", fdiv, SchedWriteFDivSizes, 0>; defm VMINBF16 : avx10_fp_binop_int_bf16<0x5D, "vmin", SchedWriteFCmpSizes, 0>; defm VMAXBF16 : avx10_fp_binop_int_bf16<0x5F, "vmax", SchedWriteFCmpSizes, 0>; } // VCOMISBF16 let Uses = [], mayRaiseFPException = 0, Defs = [EFLAGS], Predicates = [HasAVX10_2] in { //TODO: Replace null_frag with X86fcmp to support lowering `fcmp oeq bfloat *` //which may require extend supports on BFR16X, loadbf16, ... defm VCOMISBF16Z : sse12_ord_cmp<0x2F, FR16X, null_frag, bf16, f16mem, loadf16, "comisbf16", SSEPackedSingle>, T_MAP5, PD, EVEX, VEX_LIG, EVEX_CD8<16, CD8VT1>; let isCodeGenOnly = 1 in { defm VCOMISBF16Z : sse12_ord_cmp_int<0x2F, VR128X, X86comi, v8bf16, f16mem, sse_load_bf16, "comisbf16", SSEPackedSingle>, T_MAP5, PD, EVEX, VEX_LIG, EVEX_CD8<16, CD8VT1>; } } // VCMPBF16 multiclass avx10_vcmp_common_bf16 { let mayRaiseFPException = 0 in { defm rri : AVX512_maskable_cmp<0xC2, MRMSrcReg, _, (outs _.KRC:$dst), (ins _.RC:$src1, _.RC:$src2, u8imm:$cc), "vcmp"#_.Suffix, "$cc, $src2, $src1", "$src1, $src2, $cc", (X86cmpm (_.VT _.RC:$src1), (_.VT _.RC:$src2), timm:$cc), (X86cmpm_su (_.VT _.RC:$src1), (_.VT _.RC:$src2), timm:$cc), 1>, Sched<[sched]>; defm rmi : AVX512_maskable_cmp<0xC2, MRMSrcMem, _, (outs _.KRC:$dst),(ins _.RC:$src1, _.MemOp:$src2, u8imm:$cc), "vcmp"#_.Suffix, "$cc, $src2, $src1", "$src1, $src2, $cc", (X86cmpm (_.VT _.RC:$src1), (_.VT (_.LdFrag addr:$src2)), timm:$cc), (X86cmpm_su (_.VT _.RC:$src1), (_.VT (_.LdFrag addr:$src2)), timm:$cc)>, Sched<[sched.Folded, sched.ReadAfterFold]>; defm rmbi : AVX512_maskable_cmp<0xC2, MRMSrcMem, _, (outs _.KRC:$dst), (ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$cc), "vcmp"#_.Suffix, "$cc, ${src2}"#_.BroadcastStr#", $src1", "$src1, ${src2}"#_.BroadcastStr#", $cc", (X86cmpm (_.VT _.RC:$src1), (_.VT (_.BroadcastLdFrag addr:$src2)), timm:$cc), (X86cmpm_su (_.VT _.RC:$src1), (_.VT (_.BroadcastLdFrag addr:$src2)), timm:$cc)>, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>; } } multiclass avx10_vcmp_bf16 { let Predicates = [HasAVX10_2_512] in defm Z : avx10_vcmp_common_bf16, EVEX_V512; let Predicates = [HasAVX10_2] in { defm Z128 : avx10_vcmp_common_bf16, EVEX_V128; defm Z256 : avx10_vcmp_common_bf16, EVEX_V256; } } defm VCMPBF16 : avx10_vcmp_bf16, AVX512XDIi8Base, EVEX, VVVV, EVEX_CD8<16, CD8VF>, TA; // VSQRTBF16 multiclass avx10_sqrt_packed_bf16 opc, string OpcodeStr, X86SchedWriteSizes sched> { let Predicates = [HasAVX10_2_512] in defm Z : avx512_sqrt_packed, EVEX_V512, PD, T_MAP5, EVEX_CD8<16, CD8VF>; let Predicates = [HasAVX10_2] in { defm Z128 : avx512_sqrt_packed, EVEX_V128, PD, T_MAP5, EVEX_CD8<16, CD8VF>; defm Z256 : avx512_sqrt_packed, EVEX_V256, PD, T_MAP5, EVEX_CD8<16, CD8VF>; } } let Uses = [], mayRaiseFPException = 0 in defm VSQRTBF16 : avx10_sqrt_packed_bf16<0x51, "vsqrt", SchedWriteFSqrtSizes>; // VRSQRTBF16, VRCPBF16, VSRQTBF16, VGETEXPBF16 multiclass avx10_fp14_bf16 opc, string OpcodeStr, SDNode OpNode, X86SchedWriteWidths sched> { let Predicates = [HasAVX10_2_512] in defm BF16Z : avx512_fp14_p, EVEX_V512; let Predicates = [HasAVX10_2] in { defm BF16Z128 : avx512_fp14_p, EVEX_V128; defm BF16Z256 : avx512_fp14_p, EVEX_V256; } } defm VRSQRT : avx10_fp14_bf16<0x4E, "vrsqrt", X86rsqrt14, SchedWriteFRsqrt>, T_MAP6, PS, EVEX_CD8<16, CD8VF>; defm VRCP : avx10_fp14_bf16<0x4C, "vrcp", X86rcp14, SchedWriteFRcp>, T_MAP6, PS, EVEX_CD8<16, CD8VF>; defm VGETEXP : avx10_fp14_bf16<0x42, "vgetexp", X86fgetexp, SchedWriteFRnd>, T_MAP6, PS, EVEX_CD8<16, CD8VF>; // VSCALEFBF16 multiclass avx10_fp_scalef_bf16 opc, string OpcodeStr, X86SchedWriteWidths sched> { let Predicates = [HasAVX10_2_512] in defm Z : avx512_fp_scalef_p, EVEX_V512, T_MAP6, PS, EVEX_CD8<16, CD8VF>; let Predicates = [HasAVX10_2] in { defm Z128 : avx512_fp_scalef_p, EVEX_V128, EVEX_CD8<16, CD8VF>, T_MAP6, PS; defm Z256 : avx512_fp_scalef_p, EVEX_V256, EVEX_CD8<16, CD8VF>, T_MAP6, PS; } } let Uses = [], mayRaiseFPException = 0 in defm VSCALEFBF16 : avx10_fp_scalef_bf16<0x2C, "vscalef", SchedWriteFAdd>; // VREDUCEBF16, VRNDSCALEBF16, VGETMANTBF16 multiclass avx10_common_unary_fp_packed_imm_bf16 opc, SDPatternOperator OpNode, SDPatternOperator MaskOpNode, X86SchedWriteWidths sched> { let Predicates = [HasAVX10_2_512] in defm Z : avx512_unary_fp_packed_imm, EVEX_V512; let Predicates = [HasAVX10_2] in { defm Z128 : avx512_unary_fp_packed_imm, EVEX_V128; defm Z256 : avx512_unary_fp_packed_imm, EVEX_V256; } } let Uses = [], mayRaiseFPException = 0 in { defm VREDUCEBF16 : avx10_common_unary_fp_packed_imm_bf16<"vreduce", avx512vl_bf16_info, 0x56, X86VReduce, X86VReduce, SchedWriteFRnd>, AVX512XDIi8Base, TA, EVEX, EVEX_CD8<16, CD8VF>; defm VRNDSCALEBF16 : avx10_common_unary_fp_packed_imm_bf16<"vrndscale", avx512vl_bf16_info, 0x08, X86any_VRndScale, X86VRndScale, SchedWriteFRnd>, AVX512XDIi8Base, TA, EVEX, EVEX_CD8<16, CD8VF>; defm VGETMANTBF16 : avx10_common_unary_fp_packed_imm_bf16<"vgetmant", avx512vl_bf16_info, 0x26, X86VGetMant, X86VGetMant, SchedWriteFRnd>, AVX512XDIi8Base, TA, EVEX, EVEX_CD8<16, CD8VF>; } // VFPCLASSBF16 multiclass avx10_fp_fpclass_bf16 opcVec, X86SchedWriteWidths sched> { let Predicates = [HasAVX10_2_512] in defm Z : avx512_vector_fpclass>, EVEX_V512; let Predicates = [HasAVX10_2] in { defm Z128 : avx512_vector_fpclass>, EVEX_V128; defm Z256 : avx512_vector_fpclass>, EVEX_V256; } } defm VFPCLASSBF16 : avx10_fp_fpclass_bf16<"vfpclass", 0x66, SchedWriteFCmp>, AVX512XDIi8Base, TA, EVEX, EVEX_CD8<16, CD8VF>; // VF[,N]M[ADD,SUB][132,213,231]BF16 multiclass avx10_fma3p_213_bf16 opc, string OpcodeStr, SDPatternOperator OpNode, SDNode MaskOpNode, X86SchedWriteWidths sched> { let Predicates = [HasAVX10_2_512] in defm Z : avx512_fma3p_213_rm, EVEX_V512, T_MAP6, PS, EVEX_CD8<16, CD8VF>; let Predicates = [HasAVX10_2] in { defm Z128 : avx512_fma3p_213_rm, EVEX_V128, T_MAP6, PS, EVEX_CD8<16, CD8VF>; defm Z256 : avx512_fma3p_213_rm, EVEX_V256, T_MAP6, PS, EVEX_CD8<16, CD8VF>; } } let Uses = [], mayRaiseFPException = 0 in { defm VFMADD213BF16 : avx10_fma3p_213_bf16<0xA8, "vfmadd213bf16", any_fma, fma, SchedWriteFMA>; defm VFMSUB213BF16 : avx10_fma3p_213_bf16<0xAA, "vfmsub213bf16", X86any_Fmsub, X86Fmsub, SchedWriteFMA>; defm VFNMADD213BF16 : avx10_fma3p_213_bf16<0xAC, "vfnmadd213bf16", X86any_Fnmadd, X86Fnmadd, SchedWriteFMA>; defm VFNMSUB213BF16 : avx10_fma3p_213_bf16<0xAE, "vfnmsub213bf16", X86any_Fnmsub, X86Fnmsub, SchedWriteFMA>; } multiclass avx10_fma3p_231_bf16 opc, string OpcodeStr, SDPatternOperator OpNode, SDNode MaskOpNode, X86SchedWriteWidths sched> { let Predicates = [HasAVX10_2_512] in defm Z : avx512_fma3p_231_rm, EVEX_V512, T_MAP6, PS, EVEX_CD8<16, CD8VF>; let Predicates = [HasAVX10_2] in { defm Z128 : avx512_fma3p_231_rm, EVEX_V128, T_MAP6, PS, EVEX_CD8<16, CD8VF>; defm Z256 : avx512_fma3p_231_rm, EVEX_V256, T_MAP6, PS, EVEX_CD8<16, CD8VF>; } } let Uses = [], mayRaiseFPException = 0 in { defm VFMADD231BF16 : avx10_fma3p_231_bf16<0xB8, "vfmadd231bf16", any_fma, fma, SchedWriteFMA>; defm VFMSUB231BF16 : avx10_fma3p_231_bf16<0xBA, "vfmsub231bf16", X86any_Fmsub, X86Fmsub, SchedWriteFMA>; defm VFNMADD231BF16 : avx10_fma3p_231_bf16<0xBC, "vfnmadd231bf16", X86any_Fnmadd, X86Fnmadd, SchedWriteFMA>; defm VFNMSUB231BF16 : avx10_fma3p_231_bf16<0xBE, "vfnmsub231bf16", X86any_Fnmsub, X86Fnmsub, SchedWriteFMA>; } multiclass avx10_fma3p_132_bf16 opc, string OpcodeStr, SDPatternOperator OpNode, SDNode MaskOpNode, X86SchedWriteWidths sched> { let Predicates = [HasAVX10_2_512] in defm Z : avx512_fma3p_132_rm, EVEX_V512, T_MAP6, PS, EVEX_CD8<16, CD8VF>; let Predicates = [HasAVX10_2] in { defm Z128 : avx512_fma3p_132_rm, EVEX_V128, T_MAP6, PS, EVEX_CD8<16, CD8VF>; defm Z256 : avx512_fma3p_132_rm, EVEX_V256, T_MAP6, PS, EVEX_CD8<16, CD8VF>; } } let Uses = [], mayRaiseFPException = 0 in { defm VFMADD132BF16 : avx10_fma3p_132_bf16<0x98, "vfmadd132bf16", any_fma, fma, SchedWriteFMA>; defm VFMSUB132BF16 : avx10_fma3p_132_bf16<0x9A, "vfmsub132bf16", X86any_Fmsub, X86Fmsub, SchedWriteFMA>; defm VFNMADD132BF16 : avx10_fma3p_132_bf16<0x9C, "vfnmadd132bf16", X86any_Fnmadd, X86Fnmadd, SchedWriteFMA>; defm VFNMSUB132BF16 : avx10_fma3p_132_bf16<0x9E, "vfnmsub132bf16", X86any_Fnmsub, X86Fnmsub, SchedWriteFMA>; } //------------------------------------------------- // AVX10 COMEF instructions //------------------------------------------------- multiclass avx10_com_ef Opc, RegisterClass RC, ValueType VT, SDPatternOperator OpNode, string OpcodeStr, X86MemOperand x86memop, PatFrag ld_frag, Domain d, X86FoldableSchedWrite sched = WriteFComX>{ let ExeDomain = d, mayRaiseFPException = 1, isCodeGenOnly = 1 in { def rr : AVX512, EVEX, EVEX_V128, Sched<[sched]>, SIMD_EXC; let mayLoad = 1 in { def rm : AVX512, EVEX, EVEX_V128, Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC; } } } multiclass avx10_com_ef_int Opc, X86VectorVTInfo _, SDNode OpNode, string OpcodeStr, Domain d, X86FoldableSchedWrite sched = WriteFComX> { let ExeDomain = d, mayRaiseFPException = 1 in { def rr_Int : AVX512, EVEX, EVEX_V128, Sched<[sched]>, SIMD_EXC; let mayLoad = 1 in { def rm_Int : AVX512, EVEX, EVEX_V128, Sched<[sched]>, SIMD_EXC; } def rrb_Int : AVX512, EVEX, EVEX_V128, EVEX_B, Sched<[sched]>, SIMD_EXC; } } let Defs = [EFLAGS], Uses = [MXCSR], Predicates = [HasAVX10_2] in { defm VUCOMXSDZ : avx10_com_ef<0x2e, FR64X, f64, X86ucomi512, "vucomxsd", f64mem, loadf64, SSEPackedDouble>, TB, XD, VEX_LIG, REX_W, EVEX_CD8<64, CD8VT1>; defm VUCOMXSHZ : avx10_com_ef<0x2e, FR16X, f16, X86ucomi512, "vucomxsh", f16mem, loadf16, SSEPackedSingle>, T_MAP5, XS, EVEX_CD8<16, CD8VT1>; defm VUCOMXSSZ : avx10_com_ef<0x2e, FR32X, f32, X86ucomi512, "vucomxss", f32mem, loadf32, SSEPackedSingle>, TB, XS, VEX_LIG, EVEX_CD8<32, CD8VT1>; defm VCOMXSDZ : avx10_com_ef_int<0x2f, v2f64x_info, X86comi512, "vcomxsd", SSEPackedDouble>, TB, XD, VEX_LIG, REX_W, EVEX_CD8<64, CD8VT1>; defm VCOMXSHZ : avx10_com_ef_int<0x2f, v8f16x_info, X86comi512, "vcomxsh", SSEPackedSingle>, T_MAP5, XS, EVEX_CD8<16, CD8VT1>; defm VCOMXSSZ : avx10_com_ef_int<0x2f, v4f32x_info, X86comi512, "vcomxss", SSEPackedSingle>, TB, XS, VEX_LIG, EVEX_CD8<32, CD8VT1>; defm VUCOMXSDZ : avx10_com_ef_int<0x2e, v2f64x_info, X86ucomi512, "vucomxsd", SSEPackedDouble>, TB, XD, VEX_LIG, REX_W, EVEX_CD8<64, CD8VT1>; defm VUCOMXSHZ : avx10_com_ef_int<0x2e, v8f16x_info, X86ucomi512, "vucomxsh", SSEPackedSingle>, T_MAP5, XS, EVEX_CD8<16, CD8VT1>; defm VUCOMXSSZ : avx10_com_ef_int<0x2e, v4f32x_info, X86ucomi512, "vucomxss", SSEPackedSingle>, TB, XS, VEX_LIG, EVEX_CD8<32, CD8VT1>; } //------------------------------------------------- // AVX10 MOVZXC (COPY) instructions //------------------------------------------------- let Predicates = [HasAVX10_2] in { def VMOVZPDILo2PDIZrr : AVX512XSI<0x7E, MRMSrcReg, (outs VR128X:$dst), (ins VR128X:$src), "vmovd\t{$src, $dst|$dst, $src}", [(set VR128X:$dst, (v4i32 (X86vzmovl (v4i32 VR128X:$src))))]>, EVEX, Sched<[WriteVecMoveFromGpr]>; let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0, mayLoad = 1 in def VMOVZPDILo2PDIZrm : AVX512XSI<0x7E, MRMSrcMem, (outs VR128X:$dst), (ins i32mem:$src), "vmovd\t{$src, $dst|$dst, $src}", []>, EVEX, EVEX_CD8<32, CD8VT1>, Sched<[WriteVecLoad]>; let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0, mayStore = 1 in def VMOVZPDILo2PDIZmr : AVX512PDI<0xD6, MRMDestMem, (outs), (ins i32mem:$dst, VR128X:$src), "vmovd\t{$src, $dst|$dst, $src}", []>, EVEX, EVEX_CD8<32, CD8VT1>, Sched<[WriteVecStore]>; let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0 in def VMOVZPDILo2PDIZrr2 : AVX512PDI<0xD6, MRMSrcReg, (outs VR128X:$dst), (ins VR128X:$src), "vmovd\t{$src, $dst|$dst, $src}", []>, EVEX, Sched<[WriteVecMoveFromGpr]>; def : InstAlias<"vmovd.s\t{$src, $dst|$dst, $src}", (VMOVZPDILo2PDIZrr2 VR128X:$dst, VR128X:$src), 0>; def VMOVZPWILo2PWIZrr : AVX512XSI<0x6E, MRMSrcReg, (outs VR128X:$dst), (ins VR128X:$src), "vmovw\t{$src, $dst|$dst, $src}", [(set VR128X:$dst, (v8i16 (X86vzmovl (v8i16 VR128X:$src))))]>, EVEX, T_MAP5, Sched<[WriteVecMoveFromGpr]>; let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0, mayLoad = 1 in def VMOVZPWILo2PWIZrm : AVX512XSI<0x6E, MRMSrcMem, (outs VR128X:$dst), (ins i16mem:$src), "vmovw\t{$src, $dst|$dst, $src}", []>, EVEX, EVEX_CD8<16, CD8VT1>, T_MAP5, Sched<[WriteVecLoad]>; let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0, mayStore = 1 in def VMOVZPWILo2PWIZmr : AVX512XSI<0x7E, MRMDestMem, (outs), (ins i32mem:$dst, VR128X:$src), "vmovw\t{$src, $dst|$dst, $src}", []>, EVEX, EVEX_CD8<16, CD8VT1>, T_MAP5, Sched<[WriteVecStore]>; let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0 in def VMOVZPWILo2PWIZrr2 : AVX512XSI<0x7E, MRMSrcReg, (outs VR128X:$dst), (ins VR128X:$src), "vmovw\t{$src, $dst|$dst, $src}", []>, EVEX, T_MAP5, Sched<[WriteVecMoveFromGpr]>; def : InstAlias<"vmovw.s\t{$src, $dst|$dst, $src}", (VMOVZPWILo2PWIZrr2 VR128X:$dst, VR128X:$src), 0>; } // MOVRS multiclass vmovrs_p opc, string OpStr, X86VectorVTInfo _> { let ExeDomain = _.ExeDomain in { defm m: AVX512_maskable("int_x86_avx10_"#OpStr#_.Size) addr:$src))>, EVEX; } } multiclass vmovrs_p_vl opc, string OpStr, AVX512VLVectorVTInfo _Vec> { let Predicates = [HasMOVRS, HasAVX10_2_512, In64BitMode] in defm Z : vmovrs_p, EVEX_V512; let Predicates = [HasMOVRS, HasAVX10_2, In64BitMode] in { defm Z128 : vmovrs_p, EVEX_V128; defm Z256 : vmovrs_p, EVEX_V256; } } defm VMOVRSB : vmovrs_p_vl<0x6f, "vmovrsb", avx512vl_i8_info>, T_MAP5, XD, EVEX_CD8<8, CD8VF>, Sched<[WriteVecLoad]>; defm VMOVRSW : vmovrs_p_vl<0x6f, "vmovrsw", avx512vl_i16_info>, T_MAP5, XD, REX_W, EVEX_CD8<16, CD8VF>, Sched<[WriteVecLoad]>; defm VMOVRSD : vmovrs_p_vl<0x6f, "vmovrsd", avx512vl_i32_info>, T_MAP5, XS, EVEX_CD8<32, CD8VF>, Sched<[WriteVecLoad]>; defm VMOVRSQ : vmovrs_p_vl<0x6f, "vmovrsq", avx512vl_i64_info>, T_MAP5, XS, REX_W, EVEX_CD8<64, CD8VF>, Sched<[WriteVecLoad]>; // SM4(EVEX) multiclass avx10_sm4_base { // SM4_Base is in X86InstrSSE.td. let Predicates = [HasSM4, HasAVX10_2], AddedComplexity = 1 in { defm Z128 : SM4_Base, EVEX_V128; defm Z256 : SM4_Base, EVEX_V256; } let Predicates = [HasSM4, HasAVX10_2_512] in defm Z : SM4_Base, EVEX_V512; } defm VSM4KEY4 : avx10_sm4_base<"vsm4key4">, T8, XS, EVEX, VVVV; defm VSM4RNDS4 : avx10_sm4_base<"vsm4rnds4">, T8, XD, EVEX, VVVV;