//===---- SMInstructions.td - Scalar Memory Instruction Definitions -------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// def smrd_offset_8 : ImmOperand; let EncoderMethod = "getSMEMOffsetEncoding", DecoderMethod = "decodeSMEMOffset" in { def SMEMOffset : ImmOperand; def SMEMOffsetMod : NamedIntOperand; def OptSMEMOffsetMod : NamedIntOperand { let ImmTy = SMEMOffsetMod.ImmTy; let PredicateMethod = SMEMOffsetMod.PredicateMethod; let PrintMethod = SMEMOffsetMod.PrintMethod; } } //===----------------------------------------------------------------------===// // Scalar Memory classes //===----------------------------------------------------------------------===// class SM_Pseudo pattern=[]> : InstSI , SIMCInstr { let isPseudo = 1; let isCodeGenOnly = 1; let LGKM_CNT = 1; let SMRD = 1; let mayStore = 0; let mayLoad = 1; let hasSideEffects = 0; let maybeAtomic = 0; let UseNamedOperandTable = 1; let SchedRW = [WriteSMEM]; string Mnemonic = opName; string AsmOperands = asmOps; bits<1> has_sbase = 1; bits<1> has_sdst = 1; bit has_glc = 0; bit has_dlc = 0; bit has_offset = 0; bit has_soffset = 0; bit is_buffer = 0; } class SM_Real : InstSI { let isPseudo = 0; let isCodeGenOnly = 0; Instruction Opcode = !cast(NAME); // copy relevant pseudo op flags let LGKM_CNT = ps.LGKM_CNT; let SMRD = ps.SMRD; let mayStore = ps.mayStore; let mayLoad = ps.mayLoad; let hasSideEffects = ps.hasSideEffects; let UseNamedOperandTable = ps.UseNamedOperandTable; let SchedRW = ps.SchedRW; let SubtargetPredicate = ps.SubtargetPredicate; let OtherPredicates = ps.OtherPredicates; let AsmMatchConverter = ps.AsmMatchConverter; let IsAtomicRet = ps.IsAtomicRet; let IsAtomicNoRet = ps.IsAtomicNoRet; let Uses = ps.Uses; let Defs = ps.Defs; let TSFlags = ps.TSFlags; bit is_buffer = ps.is_buffer; // encoding bits<7> sbase; bits<7> sdst; bits<32> offset; bits<8> soffset; bits<5> cpol; } class OffsetMode { bit HasOffset = hasOffset; bit HasSOffset = hasSOffset; string Variant = variant; dag Ins = ins; string Asm = asm; } def IMM_Offset : OffsetMode<1, 0, "_IMM", (ins SMEMOffset:$offset), "$offset">; def SGPR_Offset : OffsetMode<0, 1, "_SGPR", (ins SReg_32:$soffset), "$soffset">; def SGPR_IMM_Offset : OffsetMode<1, 1, "_SGPR_IMM", (ins SReg_32:$soffset, SMEMOffsetMod:$offset), "$soffset$offset">; def SGPR_IMM_OptOffset : OffsetMode<1, 1, "_SGPR_IMM", (ins SReg_32:$soffset, OptSMEMOffsetMod:$offset), "$soffset$offset">; class SM_Probe_Pseudo : SM_Pseudo { let mayLoad = 0; let mayStore = 0; let has_glc = 0; let LGKM_CNT = 0; let ScalarStore = 0; let hasSideEffects = 1; let has_offset = offsets.HasOffset; let has_soffset = offsets.HasSOffset; } class SM_Load_Pseudo : SM_Pseudo { RegisterClass BaseClass = baseClass; let mayLoad = 1; let isReMaterializable = 1; let mayStore = 0; let has_glc = 1; let has_dlc = 1; let has_offset = offsets.HasOffset; let has_soffset = offsets.HasSOffset; } class SM_Store_Pseudo : SM_Pseudo { RegisterClass BaseClass = baseClass; let mayLoad = 0; let mayStore = 1; let has_glc = 1; let has_dlc = 1; let has_offset = offsets.HasOffset; let has_soffset = offsets.HasSOffset; let ScalarStore = 1; } class SM_Discard_Pseudo : SM_Pseudo { let mayLoad = 0; let mayStore = 0; let has_glc = 0; let has_sdst = 0; let ScalarStore = 0; let hasSideEffects = 1; let has_offset = offsets.HasOffset; let has_soffset = offsets.HasSOffset; } multiclass SM_Load_Pseudos { defvar opName = !tolower(op); def "" : SM_Load_Pseudo ; // The constrained multi-dword load equivalents with early clobber flag at // the dst operands. They are needed only for codegen and there is no need // for their real opcodes. if !gt(dstClass.RegTypes[0].Size, 32) then let Constraints = "@earlyclobber $sdst", PseudoInstr = op # offsets.Variant in def "" # _ec : SM_Load_Pseudo ; } multiclass SM_Pseudo_Loads { defm _IMM : SM_Load_Pseudos ; defm _SGPR : SM_Load_Pseudos ; defm _SGPR_IMM : SM_Load_Pseudos ; } multiclass SM_Pseudo_Stores { defvar opName = !tolower(NAME); def _IMM : SM_Store_Pseudo ; def _SGPR : SM_Store_Pseudo ; def _SGPR_IMM : SM_Store_Pseudo ; } multiclass SM_Pseudo_Discards { defvar opName = !tolower(NAME); def _IMM : SM_Discard_Pseudo ; def _SGPR : SM_Discard_Pseudo ; def _SGPR_IMM : SM_Discard_Pseudo ; } class SM_Time_Pseudo : SM_Pseudo< opName, (outs SReg_64_XEXEC:$sdst), (ins), " $sdst", [(set i64:$sdst, (node))]> { let hasSideEffects = 1; let mayStore = 0; let mayLoad = 0; let has_sbase = 0; } class SM_Inval_Pseudo : SM_Pseudo< opName, (outs), (ins), "", [(node)]> { let hasSideEffects = 1; let mayLoad = 0; let mayStore = 0; let has_sdst = 0; let has_sbase = 0; } multiclass SM_Pseudo_Probe { defvar opName = !tolower(NAME); def _IMM : SM_Probe_Pseudo ; def _SGPR : SM_Probe_Pseudo ; def _SGPR_IMM : SM_Probe_Pseudo ; def _SGPR_OPT_IMM : SM_Probe_Pseudo ; } class SM_WaveId_Pseudo : SM_Pseudo< opName, (outs SReg_32_XM0_XEXEC:$sdst), (ins), " $sdst", [(set i32:$sdst, (node))]> { let hasSideEffects = 1; let mayStore = 0; let mayLoad = 0; let has_sbase = 0; } class SM_Prefetch_Pseudo : SM_Pseudo { // Mark prefetches as both load and store to prevent reordering with loads // and stores. This is also needed for pattern to match prefetch intrinsic. let mayLoad = 1; let mayStore = 1; let has_glc = 0; let LGKM_CNT = 0; let has_sbase = hasSBase; let ScalarStore = 0; let has_offset = 1; let has_soffset = 1; } //===----------------------------------------------------------------------===// // Scalar Atomic Memory Classes //===----------------------------------------------------------------------===// class SM_Atomic_Pseudo : SM_Pseudo { bit glc = isRet; let mayLoad = 1; let mayStore = 1; let has_glc = 1; let has_dlc = 1; let has_soffset = 1; // Should these be set? let ScalarStore = 1; let hasSideEffects = 1; let maybeAtomic = 1; let IsAtomicNoRet = !not(isRet); let IsAtomicRet = isRet; } class SM_Pseudo_Atomic : SM_Atomic_Pseudo { let has_offset = offsets.HasOffset; let has_soffset = offsets.HasSOffset; let Constraints = !if(isRet, "$sdst = $sdata", ""); let DisableEncoding = !if(isRet, "$sdata", ""); } multiclass SM_Pseudo_Atomics { defvar opName = !tolower(NAME); def _IMM : SM_Pseudo_Atomic ; def _SGPR : SM_Pseudo_Atomic ; def _SGPR_IMM : SM_Pseudo_Atomic ; def _IMM_RTN : SM_Pseudo_Atomic ; def _SGPR_RTN : SM_Pseudo_Atomic ; def _SGPR_IMM_RTN : SM_Pseudo_Atomic ; } //===----------------------------------------------------------------------===// // Scalar Memory Instructions //===----------------------------------------------------------------------===// // We are using the SReg_32_XM0 and not the SReg_32 register class for 32-bit // SMRD instructions, because the SReg_32_XM0 register class does not include M0 // and writing to M0 from an SMRD instruction will hang the GPU. // XXX - SMEM instructions do not allow exec for data operand, but // does sdst for SMRD on SI/CI? defm S_LOAD_DWORD : SM_Pseudo_Loads ; defm S_LOAD_DWORDX2 : SM_Pseudo_Loads ; let SubtargetPredicate = HasScalarDwordx3Loads in defm S_LOAD_DWORDX3 : SM_Pseudo_Loads ; defm S_LOAD_DWORDX4 : SM_Pseudo_Loads ; defm S_LOAD_DWORDX8 : SM_Pseudo_Loads ; defm S_LOAD_DWORDX16 : SM_Pseudo_Loads ; defm S_LOAD_I8 : SM_Pseudo_Loads ; defm S_LOAD_U8 : SM_Pseudo_Loads ; defm S_LOAD_I16 : SM_Pseudo_Loads ; defm S_LOAD_U16 : SM_Pseudo_Loads ; let is_buffer = 1 in { defm S_BUFFER_LOAD_DWORD : SM_Pseudo_Loads ; // FIXME: exec_lo/exec_hi appear to be allowed for SMRD loads on // SI/CI, bit disallowed for SMEM on VI. defm S_BUFFER_LOAD_DWORDX2 : SM_Pseudo_Loads ; let SubtargetPredicate = HasScalarDwordx3Loads in defm S_BUFFER_LOAD_DWORDX3 : SM_Pseudo_Loads ; defm S_BUFFER_LOAD_DWORDX4 : SM_Pseudo_Loads ; defm S_BUFFER_LOAD_DWORDX8 : SM_Pseudo_Loads ; defm S_BUFFER_LOAD_DWORDX16 : SM_Pseudo_Loads ; defm S_BUFFER_LOAD_I8 : SM_Pseudo_Loads ; defm S_BUFFER_LOAD_U8 : SM_Pseudo_Loads ; defm S_BUFFER_LOAD_I16 : SM_Pseudo_Loads ; defm S_BUFFER_LOAD_U16 : SM_Pseudo_Loads ; } let SubtargetPredicate = HasScalarStores in { defm S_STORE_DWORD : SM_Pseudo_Stores ; defm S_STORE_DWORDX2 : SM_Pseudo_Stores ; defm S_STORE_DWORDX4 : SM_Pseudo_Stores ; let is_buffer = 1 in { defm S_BUFFER_STORE_DWORD : SM_Pseudo_Stores ; defm S_BUFFER_STORE_DWORDX2 : SM_Pseudo_Stores ; defm S_BUFFER_STORE_DWORDX4 : SM_Pseudo_Stores ; } } // End SubtargetPredicate = HasScalarStores let SubtargetPredicate = HasSMemTimeInst in def S_MEMTIME : SM_Time_Pseudo <"s_memtime", int_amdgcn_s_memtime>; def S_DCACHE_INV : SM_Inval_Pseudo <"s_dcache_inv", int_amdgcn_s_dcache_inv>; let SubtargetPredicate = isGFX7GFX8GFX9 in { def S_DCACHE_INV_VOL : SM_Inval_Pseudo <"s_dcache_inv_vol", int_amdgcn_s_dcache_inv_vol>; } // let SubtargetPredicate = isGFX7GFX8GFX9 let SubtargetPredicate = isGFX8Plus in { let OtherPredicates = [HasScalarStores] in { def S_DCACHE_WB : SM_Inval_Pseudo <"s_dcache_wb", int_amdgcn_s_dcache_wb>; def S_DCACHE_WB_VOL : SM_Inval_Pseudo <"s_dcache_wb_vol", int_amdgcn_s_dcache_wb_vol>; } // End OtherPredicates = [HasScalarStores] defm S_ATC_PROBE : SM_Pseudo_Probe ; let is_buffer = 1 in { defm S_ATC_PROBE_BUFFER : SM_Pseudo_Probe ; } } // SubtargetPredicate = isGFX8Plus let SubtargetPredicate = HasSMemRealTime in def S_MEMREALTIME : SM_Time_Pseudo <"s_memrealtime", int_amdgcn_s_memrealtime>; let SubtargetPredicate = isGFX10Plus in def S_GL1_INV : SM_Inval_Pseudo<"s_gl1_inv">; let SubtargetPredicate = HasGetWaveIdInst in def S_GET_WAVEID_IN_WORKGROUP : SM_WaveId_Pseudo <"s_get_waveid_in_workgroup", int_amdgcn_s_get_waveid_in_workgroup>; let SubtargetPredicate = HasScalarFlatScratchInsts, Uses = [FLAT_SCR] in { defm S_SCRATCH_LOAD_DWORD : SM_Pseudo_Loads ; defm S_SCRATCH_LOAD_DWORDX2 : SM_Pseudo_Loads ; defm S_SCRATCH_LOAD_DWORDX4 : SM_Pseudo_Loads ; defm S_SCRATCH_STORE_DWORD : SM_Pseudo_Stores ; defm S_SCRATCH_STORE_DWORDX2 : SM_Pseudo_Stores ; defm S_SCRATCH_STORE_DWORDX4 : SM_Pseudo_Stores ; } // SubtargetPredicate = HasScalarFlatScratchInsts let SubtargetPredicate = HasScalarAtomics in { let is_buffer = 1 in { defm S_BUFFER_ATOMIC_SWAP : SM_Pseudo_Atomics ; defm S_BUFFER_ATOMIC_CMPSWAP : SM_Pseudo_Atomics ; defm S_BUFFER_ATOMIC_ADD : SM_Pseudo_Atomics ; defm S_BUFFER_ATOMIC_SUB : SM_Pseudo_Atomics ; defm S_BUFFER_ATOMIC_SMIN : SM_Pseudo_Atomics ; defm S_BUFFER_ATOMIC_UMIN : SM_Pseudo_Atomics ; defm S_BUFFER_ATOMIC_SMAX : SM_Pseudo_Atomics ; defm S_BUFFER_ATOMIC_UMAX : SM_Pseudo_Atomics ; defm S_BUFFER_ATOMIC_AND : SM_Pseudo_Atomics ; defm S_BUFFER_ATOMIC_OR : SM_Pseudo_Atomics ; defm S_BUFFER_ATOMIC_XOR : SM_Pseudo_Atomics ; defm S_BUFFER_ATOMIC_INC : SM_Pseudo_Atomics ; defm S_BUFFER_ATOMIC_DEC : SM_Pseudo_Atomics ; defm S_BUFFER_ATOMIC_SWAP_X2 : SM_Pseudo_Atomics ; defm S_BUFFER_ATOMIC_CMPSWAP_X2 : SM_Pseudo_Atomics ; defm S_BUFFER_ATOMIC_ADD_X2 : SM_Pseudo_Atomics ; defm S_BUFFER_ATOMIC_SUB_X2 : SM_Pseudo_Atomics ; defm S_BUFFER_ATOMIC_SMIN_X2 : SM_Pseudo_Atomics ; defm S_BUFFER_ATOMIC_UMIN_X2 : SM_Pseudo_Atomics ; defm S_BUFFER_ATOMIC_SMAX_X2 : SM_Pseudo_Atomics ; defm S_BUFFER_ATOMIC_UMAX_X2 : SM_Pseudo_Atomics ; defm S_BUFFER_ATOMIC_AND_X2 : SM_Pseudo_Atomics ; defm S_BUFFER_ATOMIC_OR_X2 : SM_Pseudo_Atomics ; defm S_BUFFER_ATOMIC_XOR_X2 : SM_Pseudo_Atomics ; defm S_BUFFER_ATOMIC_INC_X2 : SM_Pseudo_Atomics ; defm S_BUFFER_ATOMIC_DEC_X2 : SM_Pseudo_Atomics ; } defm S_ATOMIC_SWAP : SM_Pseudo_Atomics ; defm S_ATOMIC_CMPSWAP : SM_Pseudo_Atomics ; defm S_ATOMIC_ADD : SM_Pseudo_Atomics ; defm S_ATOMIC_SUB : SM_Pseudo_Atomics ; defm S_ATOMIC_SMIN : SM_Pseudo_Atomics ; defm S_ATOMIC_UMIN : SM_Pseudo_Atomics ; defm S_ATOMIC_SMAX : SM_Pseudo_Atomics ; defm S_ATOMIC_UMAX : SM_Pseudo_Atomics ; defm S_ATOMIC_AND : SM_Pseudo_Atomics ; defm S_ATOMIC_OR : SM_Pseudo_Atomics ; defm S_ATOMIC_XOR : SM_Pseudo_Atomics ; defm S_ATOMIC_INC : SM_Pseudo_Atomics ; defm S_ATOMIC_DEC : SM_Pseudo_Atomics ; defm S_ATOMIC_SWAP_X2 : SM_Pseudo_Atomics ; defm S_ATOMIC_CMPSWAP_X2 : SM_Pseudo_Atomics ; defm S_ATOMIC_ADD_X2 : SM_Pseudo_Atomics ; defm S_ATOMIC_SUB_X2 : SM_Pseudo_Atomics ; defm S_ATOMIC_SMIN_X2 : SM_Pseudo_Atomics ; defm S_ATOMIC_UMIN_X2 : SM_Pseudo_Atomics ; defm S_ATOMIC_SMAX_X2 : SM_Pseudo_Atomics ; defm S_ATOMIC_UMAX_X2 : SM_Pseudo_Atomics ; defm S_ATOMIC_AND_X2 : SM_Pseudo_Atomics ; defm S_ATOMIC_OR_X2 : SM_Pseudo_Atomics ; defm S_ATOMIC_XOR_X2 : SM_Pseudo_Atomics ; defm S_ATOMIC_INC_X2 : SM_Pseudo_Atomics ; defm S_ATOMIC_DEC_X2 : SM_Pseudo_Atomics ; } // let SubtargetPredicate = HasScalarAtomics let SubtargetPredicate = HasScalarAtomics in { defm S_DCACHE_DISCARD : SM_Pseudo_Discards; defm S_DCACHE_DISCARD_X2 : SM_Pseudo_Discards; } let SubtargetPredicate = isGFX12Plus in { def S_PREFETCH_INST : SM_Prefetch_Pseudo <"s_prefetch_inst", SReg_64, 1>; def S_PREFETCH_INST_PC_REL : SM_Prefetch_Pseudo <"s_prefetch_inst_pc_rel", SReg_64, 0>; def S_PREFETCH_DATA : SM_Prefetch_Pseudo <"s_prefetch_data", SReg_64, 1>; def S_PREFETCH_DATA_PC_REL : SM_Prefetch_Pseudo <"s_prefetch_data_pc_rel", SReg_64, 0>; def S_BUFFER_PREFETCH_DATA : SM_Prefetch_Pseudo <"s_buffer_prefetch_data", SReg_128, 1> { let is_buffer = 1; } } // end let SubtargetPredicate = isGFX12Plus //===----------------------------------------------------------------------===// // Targets //===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===// // SI //===----------------------------------------------------------------------===// class SMRD_Real_si op, SM_Pseudo ps> : SM_Real , SIMCInstr , Enc32 { let AssemblerPredicate = isGFX6GFX7; let DecoderNamespace = "GFX6GFX7"; let Inst{7-0} = !if(ps.has_offset, offset{7-0}, !if(ps.has_soffset, soffset, ?)); let Inst{8} = ps.has_offset; let Inst{14-9} = !if(ps.has_sbase, sbase{6-1}, ?); let Inst{21-15} = !if(ps.has_sdst, sdst{6-0}, ?); let Inst{26-22} = op; let Inst{31-27} = 0x18; //encoding } multiclass SM_Real_Loads_si op> { defvar ps = NAME; defvar immPs = !cast(ps#_IMM); def _IMM_si : SMRD_Real_si { let InOperandList = (ins immPs.BaseClass:$sbase, smrd_offset_8:$offset, CPol:$cpol); } defvar sgprPs = !cast(ps#_SGPR); def _SGPR_si : SMRD_Real_si ; } defm S_LOAD_DWORD : SM_Real_Loads_si <0x00>; defm S_LOAD_DWORDX2 : SM_Real_Loads_si <0x01>; defm S_LOAD_DWORDX4 : SM_Real_Loads_si <0x02>; defm S_LOAD_DWORDX8 : SM_Real_Loads_si <0x03>; defm S_LOAD_DWORDX16 : SM_Real_Loads_si <0x04>; defm S_BUFFER_LOAD_DWORD : SM_Real_Loads_si <0x08>; defm S_BUFFER_LOAD_DWORDX2 : SM_Real_Loads_si <0x09>; defm S_BUFFER_LOAD_DWORDX4 : SM_Real_Loads_si <0x0a>; defm S_BUFFER_LOAD_DWORDX8 : SM_Real_Loads_si <0x0b>; defm S_BUFFER_LOAD_DWORDX16 : SM_Real_Loads_si <0x0c>; def S_MEMTIME_si : SMRD_Real_si <0x1e, S_MEMTIME>; def S_DCACHE_INV_si : SMRD_Real_si <0x1f, S_DCACHE_INV>; //===----------------------------------------------------------------------===// // VI and GFX9. //===----------------------------------------------------------------------===// class SMEM_Real_vi op, SM_Pseudo ps> : SM_Real , SIMCInstr , Enc64 { field bit IsGFX9SpecificEncoding = false; let AssemblerPredicate = !if(IsGFX9SpecificEncoding, isGFX9Only, isGFX8GFX9); let DecoderNamespace = "GFX8"; let Inst{5-0} = !if(ps.has_sbase, sbase{6-1}, ?); let Inst{12-6} = !if(ps.has_sdst, sdst{6-0}, ?); // Note that for GFX9 instructions with immediate offsets, soffset_en // must be defined, whereas in GFX8 it's undefined in all cases, // meaning GFX9 is not perfectly backward-compatible with GFX8, despite // documentation suggesting otherwise. field bit SOffsetEn = !if(IsGFX9SpecificEncoding, !if(ps.has_offset, ps.has_soffset, !if(ps.has_soffset, 0, ?)), ?); let Inst{14} = SOffsetEn; let Inst{16} = !if(ps.has_glc, cpol{CPolBit.GLC}, ?); // imm // TODO: Shall not be defined if the instruction has no offset nor // soffset. let Inst{17} = ps.has_offset; let Inst{25-18} = op; let Inst{31-26} = 0x30; //encoding // VI supports 20-bit unsigned offsets while GFX9+ supports 21-bit signed. // Offset value is corrected accordingly when offset is encoded/decoded. // TODO: Forbid non-M0 register offsets for GFX8 stores and atomics. field bits<21> Offset; let Offset{6-0} = !if(ps.has_offset, offset{6-0}, !if(ps.has_soffset, soffset{6-0}, ?)); let Offset{20-7} = !if(ps.has_offset, offset{20-7}, ?); let Inst{52-32} = Offset; // soffset let Inst{63-57} = !if(!and(IsGFX9SpecificEncoding, ps.has_soffset), soffset{6-0}, ?); } class SMEM_Real_Load_vi op, string ps> : SMEM_Real_vi(ps)>; // The alternative GFX9 SGPR encoding using soffset to encode the // offset register. Not available in assembler and goes to the GFX9 // encoding family to avoid conflicts with the primary SGPR variant. class SMEM_Real_SGPR_alt_gfx9 { bit IsGFX9SpecificEncoding = true; bit SOffsetEn = 1; bit Offset = ?; int Subtarget = SIEncodingFamily.GFX9; string AsmVariantName = "NonParsable"; } multiclass SM_Real_Loads_vi op> { defvar ps = NAME; def _IMM_vi : SMEM_Real_Load_vi ; def _SGPR_vi : SMEM_Real_Load_vi ; def _SGPR_alt_gfx9 : SMEM_Real_Load_vi , SMEM_Real_SGPR_alt_gfx9; let IsGFX9SpecificEncoding = true in def _SGPR_IMM_gfx9 : SMEM_Real_Load_vi ; } class SMEM_Real_Store_Base_vi op, SM_Pseudo ps> : SMEM_Real_vi { // encoding bits<7> sdata; let sdst = ?; let Inst{12-6} = !if(ps.has_sdst, sdata{6-0}, ?); } class SMEM_Real_Store_vi op, string ps> : SMEM_Real_Store_Base_vi (ps)>; multiclass SM_Real_Stores_vi op> { defvar ps = NAME; def _IMM_vi : SMEM_Real_Store_vi ; def _SGPR_vi : SMEM_Real_Store_vi ; def _SGPR_alt_gfx9 : SMEM_Real_Store_vi , SMEM_Real_SGPR_alt_gfx9; let IsGFX9SpecificEncoding = true in def _SGPR_IMM_gfx9 : SMEM_Real_Store_vi ; } multiclass SM_Real_Probe_vi op> { defvar ps = NAME; def _IMM_vi : SMEM_Real_Store_Base_vi (ps#_IMM)>; def _SGPR_vi : SMEM_Real_Store_Base_vi (ps#_SGPR)>; def _SGPR_alt_gfx9 : SMEM_Real_Store_Base_vi (ps#_SGPR)>, SMEM_Real_SGPR_alt_gfx9; let IsGFX9SpecificEncoding = true in def _SGPR_IMM_gfx9 : SMEM_Real_Store_Base_vi (ps#_SGPR_IMM)>; } defm S_LOAD_DWORD : SM_Real_Loads_vi <0x00>; defm S_LOAD_DWORDX2 : SM_Real_Loads_vi <0x01>; defm S_LOAD_DWORDX4 : SM_Real_Loads_vi <0x02>; defm S_LOAD_DWORDX8 : SM_Real_Loads_vi <0x03>; defm S_LOAD_DWORDX16 : SM_Real_Loads_vi <0x04>; defm S_BUFFER_LOAD_DWORD : SM_Real_Loads_vi <0x08>; defm S_BUFFER_LOAD_DWORDX2 : SM_Real_Loads_vi <0x09>; defm S_BUFFER_LOAD_DWORDX4 : SM_Real_Loads_vi <0x0a>; defm S_BUFFER_LOAD_DWORDX8 : SM_Real_Loads_vi <0x0b>; defm S_BUFFER_LOAD_DWORDX16 : SM_Real_Loads_vi <0x0c>; defm S_STORE_DWORD : SM_Real_Stores_vi <0x10>; defm S_STORE_DWORDX2 : SM_Real_Stores_vi <0x11>; defm S_STORE_DWORDX4 : SM_Real_Stores_vi <0x12>; defm S_BUFFER_STORE_DWORD : SM_Real_Stores_vi <0x18>; defm S_BUFFER_STORE_DWORDX2 : SM_Real_Stores_vi <0x19>; defm S_BUFFER_STORE_DWORDX4 : SM_Real_Stores_vi <0x1a>; // These instructions use same encoding def S_DCACHE_INV_vi : SMEM_Real_vi <0x20, S_DCACHE_INV>; def S_DCACHE_WB_vi : SMEM_Real_vi <0x21, S_DCACHE_WB>; def S_DCACHE_INV_VOL_vi : SMEM_Real_vi <0x22, S_DCACHE_INV_VOL>; def S_DCACHE_WB_VOL_vi : SMEM_Real_vi <0x23, S_DCACHE_WB_VOL>; def S_MEMTIME_vi : SMEM_Real_vi <0x24, S_MEMTIME>; def S_MEMREALTIME_vi : SMEM_Real_vi <0x25, S_MEMREALTIME>; defm S_SCRATCH_LOAD_DWORD : SM_Real_Loads_vi <0x05>; defm S_SCRATCH_LOAD_DWORDX2 : SM_Real_Loads_vi <0x06>; defm S_SCRATCH_LOAD_DWORDX4 : SM_Real_Loads_vi <0x07>; defm S_SCRATCH_STORE_DWORD : SM_Real_Stores_vi <0x15>; defm S_SCRATCH_STORE_DWORDX2 : SM_Real_Stores_vi <0x16>; defm S_SCRATCH_STORE_DWORDX4 : SM_Real_Stores_vi <0x17>; defm S_ATC_PROBE : SM_Real_Probe_vi <0x26>; defm S_ATC_PROBE_BUFFER : SM_Real_Probe_vi <0x27>; //===----------------------------------------------------------------------===// // GFX9 //===----------------------------------------------------------------------===// class SMEM_Atomic_Real_vi op, SM_Atomic_Pseudo ps> : SMEM_Real_vi { bits<7> sdata; let Constraints = ps.Constraints; let DisableEncoding = ps.DisableEncoding; let cpol{CPolBit.GLC} = ps.glc; let Inst{12-6} = !if(ps.glc, sdst{6-0}, sdata{6-0}); } multiclass SM_Real_Atomics_vi op> { defvar ps = NAME; def _IMM_vi : SMEM_Atomic_Real_vi (ps#_IMM)>; def _SGPR_vi : SMEM_Atomic_Real_vi (ps#_SGPR)>; def _SGPR_alt_gfx9 : SMEM_Atomic_Real_vi (ps#_SGPR)>, SMEM_Real_SGPR_alt_gfx9; let IsGFX9SpecificEncoding = true in def _SGPR_IMM_gfx9 : SMEM_Atomic_Real_vi (ps#_SGPR_IMM)>; def _IMM_RTN_vi : SMEM_Atomic_Real_vi (ps#_IMM_RTN)>; def _SGPR_RTN_vi : SMEM_Atomic_Real_vi (ps#_SGPR_RTN)>; def _SGPR_RTN_alt_gfx9 : SMEM_Atomic_Real_vi (ps#_SGPR_RTN)>, SMEM_Real_SGPR_alt_gfx9; let IsGFX9SpecificEncoding = true in def _SGPR_IMM_RTN_gfx9 : SMEM_Atomic_Real_vi (ps#_SGPR_IMM_RTN)>; } defm S_BUFFER_ATOMIC_SWAP : SM_Real_Atomics_vi <0x40>; defm S_BUFFER_ATOMIC_CMPSWAP : SM_Real_Atomics_vi <0x41>; defm S_BUFFER_ATOMIC_ADD : SM_Real_Atomics_vi <0x42>; defm S_BUFFER_ATOMIC_SUB : SM_Real_Atomics_vi <0x43>; defm S_BUFFER_ATOMIC_SMIN : SM_Real_Atomics_vi <0x44>; defm S_BUFFER_ATOMIC_UMIN : SM_Real_Atomics_vi <0x45>; defm S_BUFFER_ATOMIC_SMAX : SM_Real_Atomics_vi <0x46>; defm S_BUFFER_ATOMIC_UMAX : SM_Real_Atomics_vi <0x47>; defm S_BUFFER_ATOMIC_AND : SM_Real_Atomics_vi <0x48>; defm S_BUFFER_ATOMIC_OR : SM_Real_Atomics_vi <0x49>; defm S_BUFFER_ATOMIC_XOR : SM_Real_Atomics_vi <0x4a>; defm S_BUFFER_ATOMIC_INC : SM_Real_Atomics_vi <0x4b>; defm S_BUFFER_ATOMIC_DEC : SM_Real_Atomics_vi <0x4c>; defm S_BUFFER_ATOMIC_SWAP_X2 : SM_Real_Atomics_vi <0x60>; defm S_BUFFER_ATOMIC_CMPSWAP_X2 : SM_Real_Atomics_vi <0x61>; defm S_BUFFER_ATOMIC_ADD_X2 : SM_Real_Atomics_vi <0x62>; defm S_BUFFER_ATOMIC_SUB_X2 : SM_Real_Atomics_vi <0x63>; defm S_BUFFER_ATOMIC_SMIN_X2 : SM_Real_Atomics_vi <0x64>; defm S_BUFFER_ATOMIC_UMIN_X2 : SM_Real_Atomics_vi <0x65>; defm S_BUFFER_ATOMIC_SMAX_X2 : SM_Real_Atomics_vi <0x66>; defm S_BUFFER_ATOMIC_UMAX_X2 : SM_Real_Atomics_vi <0x67>; defm S_BUFFER_ATOMIC_AND_X2 : SM_Real_Atomics_vi <0x68>; defm S_BUFFER_ATOMIC_OR_X2 : SM_Real_Atomics_vi <0x69>; defm S_BUFFER_ATOMIC_XOR_X2 : SM_Real_Atomics_vi <0x6a>; defm S_BUFFER_ATOMIC_INC_X2 : SM_Real_Atomics_vi <0x6b>; defm S_BUFFER_ATOMIC_DEC_X2 : SM_Real_Atomics_vi <0x6c>; defm S_ATOMIC_SWAP : SM_Real_Atomics_vi <0x80>; defm S_ATOMIC_CMPSWAP : SM_Real_Atomics_vi <0x81>; defm S_ATOMIC_ADD : SM_Real_Atomics_vi <0x82>; defm S_ATOMIC_SUB : SM_Real_Atomics_vi <0x83>; defm S_ATOMIC_SMIN : SM_Real_Atomics_vi <0x84>; defm S_ATOMIC_UMIN : SM_Real_Atomics_vi <0x85>; defm S_ATOMIC_SMAX : SM_Real_Atomics_vi <0x86>; defm S_ATOMIC_UMAX : SM_Real_Atomics_vi <0x87>; defm S_ATOMIC_AND : SM_Real_Atomics_vi <0x88>; defm S_ATOMIC_OR : SM_Real_Atomics_vi <0x89>; defm S_ATOMIC_XOR : SM_Real_Atomics_vi <0x8a>; defm S_ATOMIC_INC : SM_Real_Atomics_vi <0x8b>; defm S_ATOMIC_DEC : SM_Real_Atomics_vi <0x8c>; defm S_ATOMIC_SWAP_X2 : SM_Real_Atomics_vi <0xa0>; defm S_ATOMIC_CMPSWAP_X2 : SM_Real_Atomics_vi <0xa1>; defm S_ATOMIC_ADD_X2 : SM_Real_Atomics_vi <0xa2>; defm S_ATOMIC_SUB_X2 : SM_Real_Atomics_vi <0xa3>; defm S_ATOMIC_SMIN_X2 : SM_Real_Atomics_vi <0xa4>; defm S_ATOMIC_UMIN_X2 : SM_Real_Atomics_vi <0xa5>; defm S_ATOMIC_SMAX_X2 : SM_Real_Atomics_vi <0xa6>; defm S_ATOMIC_UMAX_X2 : SM_Real_Atomics_vi <0xa7>; defm S_ATOMIC_AND_X2 : SM_Real_Atomics_vi <0xa8>; defm S_ATOMIC_OR_X2 : SM_Real_Atomics_vi <0xa9>; defm S_ATOMIC_XOR_X2 : SM_Real_Atomics_vi <0xaa>; defm S_ATOMIC_INC_X2 : SM_Real_Atomics_vi <0xab>; defm S_ATOMIC_DEC_X2 : SM_Real_Atomics_vi <0xac>; multiclass SM_Real_Discard_vi op> { defvar ps = NAME; def _IMM_vi : SMEM_Real_vi (ps#_IMM)>; def _SGPR_vi : SMEM_Real_vi (ps#_SGPR)>; def _SGPR_alt_gfx9 : SMEM_Real_vi (ps#_SGPR)>, SMEM_Real_SGPR_alt_gfx9; let IsGFX9SpecificEncoding = true in def _SGPR_IMM_gfx9 : SMEM_Real_vi (ps#_SGPR_IMM)>; } defm S_DCACHE_DISCARD : SM_Real_Discard_vi <0x28>; defm S_DCACHE_DISCARD_X2 : SM_Real_Discard_vi <0x29>; //===----------------------------------------------------------------------===// // CI //===----------------------------------------------------------------------===// def smrd_literal_offset : ImmOperand; class SMRD_Real_Load_IMM_ci op, SM_Load_Pseudo ps> : SM_Real, Enc64 { let AssemblerPredicate = isGFX7Only; let DecoderNamespace = "GFX7"; let InOperandList = (ins ps.BaseClass:$sbase, smrd_literal_offset:$offset, CPol:$cpol); let Inst{7-0} = 0xff; let Inst{8} = 0; let Inst{14-9} = sbase{6-1}; let Inst{21-15} = sdst{6-0}; let Inst{26-22} = op; let Inst{31-27} = 0x18; //encoding let Inst{63-32} = offset{31-0}; } def S_LOAD_DWORD_IMM_ci : SMRD_Real_Load_IMM_ci <0x00, S_LOAD_DWORD_IMM>; def S_LOAD_DWORDX2_IMM_ci : SMRD_Real_Load_IMM_ci <0x01, S_LOAD_DWORDX2_IMM>; def S_LOAD_DWORDX4_IMM_ci : SMRD_Real_Load_IMM_ci <0x02, S_LOAD_DWORDX4_IMM>; def S_LOAD_DWORDX8_IMM_ci : SMRD_Real_Load_IMM_ci <0x03, S_LOAD_DWORDX8_IMM>; def S_LOAD_DWORDX16_IMM_ci : SMRD_Real_Load_IMM_ci <0x04, S_LOAD_DWORDX16_IMM>; def S_BUFFER_LOAD_DWORD_IMM_ci : SMRD_Real_Load_IMM_ci <0x08, S_BUFFER_LOAD_DWORD_IMM>; def S_BUFFER_LOAD_DWORDX2_IMM_ci : SMRD_Real_Load_IMM_ci <0x09, S_BUFFER_LOAD_DWORDX2_IMM>; def S_BUFFER_LOAD_DWORDX4_IMM_ci : SMRD_Real_Load_IMM_ci <0x0a, S_BUFFER_LOAD_DWORDX4_IMM>; def S_BUFFER_LOAD_DWORDX8_IMM_ci : SMRD_Real_Load_IMM_ci <0x0b, S_BUFFER_LOAD_DWORDX8_IMM>; def S_BUFFER_LOAD_DWORDX16_IMM_ci : SMRD_Real_Load_IMM_ci <0x0c, S_BUFFER_LOAD_DWORDX16_IMM>; class SMRD_Real_ci op, SM_Pseudo ps> : SM_Real , SIMCInstr , Enc32 { let AssemblerPredicate = isGFX7Only; let DecoderNamespace = "GFX7"; let Inst{7-0} = !if(ps.has_offset, offset{7-0}, !if(ps.has_soffset, soffset, ?)); let Inst{8} = ps.has_offset; let Inst{14-9} = !if(ps.has_sbase, sbase{6-1}, ?); let Inst{21-15} = !if(ps.has_sdst, sdst{6-0}, ?); let Inst{26-22} = op; let Inst{31-27} = 0x18; //encoding } def S_DCACHE_INV_VOL_ci : SMRD_Real_ci <0x1d, S_DCACHE_INV_VOL>; //===----------------------------------------------------------------------===// // Scalar Memory Patterns //===----------------------------------------------------------------------===// class SMRDLoadPat : PatFrag <(ops node:$ptr), (Op node:$ptr), [{ return isUniformLoad(N);}]> { let GISelPredicateCode = [{ if (!MI.hasOneMemOperand()) return false; if (!isInstrUniform(MI)) return false; // FIXME: We should probably be caching this. SmallVector AddrInfo; getAddrModeInfo(MI, MRI, AddrInfo); if (hasVgprParts(AddrInfo)) return false; return true; }]; } def smrd_load : SMRDLoadPat; def smrd_extloadi8 : SMRDLoadPat; def smrd_zextloadi8 : SMRDLoadPat; def smrd_sextloadi8 : SMRDLoadPat; def smrd_extloadi16 : SMRDLoadPat; def smrd_zextloadi16 : SMRDLoadPat; def smrd_sextloadi16 : SMRDLoadPat; def smrd_prefetch : PatFrag <(ops node:$ptr, node:$rw, node:$loc, node:$type), (prefetch node:$ptr, node:$rw, node:$loc, node:$type), [{ return !N->getOperand(1)->isDivergent();}]> { let GISelPredicateCode = [{ return isInstrUniform(MI); }]; } def SMRDImm : ComplexPattern; def SMRDImm32 : ComplexPattern; def SMRDSgpr : ComplexPattern; def SMRDSgprImm : ComplexPattern; def SMRDBufferImm : ComplexPattern; def SMRDBufferImm32 : ComplexPattern; def SMRDBufferSgprImm : ComplexPattern; class SMRDAlignedLoadPat : PatFrag <(ops node:$ptr), (Op node:$ptr), [{ // Returns true if it is a single dword load or naturally aligned multi-dword load. LoadSDNode *Ld = cast(N); unsigned Size = Ld->getMemoryVT().getStoreSize(); return Size <= 4 || Ld->getAlign().value() >= Size; }]> { let GISelPredicateCode = [{ auto &Ld = cast(MI); TypeSize Size = Ld.getMMO().getSize().getValue(); return Size <= 4 || Ld.getMMO().getAlign().value() >= Size; }]; } def aligned_smrd_load : SMRDAlignedLoadPat; multiclass SMRD_Patterns { // 1. IMM offset def : GCNPat < (frag (SMRDImm i64:$sbase, i32:$offset)), (vt (!cast(Instr#"_IMM"#suffix) $sbase, $offset, 0))>; // 2. 32-bit IMM offset on CI if immci then def : GCNPat < (frag (SMRDImm32 i64:$sbase, i32:$offset)), (vt (!cast(Instr#"_IMM_ci"#suffix) $sbase, $offset, 0))> { let SubtargetPredicate = isGFX7Only; } // 3. SGPR offset def : GCNPat < (frag (SMRDSgpr i64:$sbase, i32:$soffset)), (vt (!cast(Instr#"_SGPR"#suffix) $sbase, $soffset, 0))> { let SubtargetPredicate = isNotGFX9Plus; } def : GCNPat < (frag (SMRDSgpr i64:$sbase, i32:$soffset)), (vt (!cast(Instr#"_SGPR_IMM"#suffix) $sbase, $soffset, 0, 0))> { let SubtargetPredicate = isGFX9Plus; } // 4. SGPR+IMM offset def : GCNPat < (frag (SMRDSgprImm i64:$sbase, i32:$soffset, i32:$offset)), (vt (!cast(Instr#"_SGPR_IMM"#suffix) $sbase, $soffset, $offset, 0))> { let SubtargetPredicate = isGFX9Plus; } // 5. No offset def : GCNPat < (vt (frag (i64 SReg_64:$sbase))), (vt (!cast(Instr#"_IMM"#suffix) i64:$sbase, 0, 0))>; } multiclass SMRD_Pattern { // High priority when XNACK is enabled and the load was naturally aligned. let OtherPredicates = [HasXNACKEnabled], AddedComplexity = 102 in defm: SMRD_Patterns ; // XNACK is enabled and the load wasn't naturally aligned. The constrained sload variant. if !gt(vt.Size, 32) then { let OtherPredicates = [HasXNACKEnabled], AddedComplexity = 101 in defm: SMRD_Patterns ; } // XNACK is disabled. let AddedComplexity = 100 in defm: SMRD_Patterns ; } multiclass SMLoad_Pattern { // 1. Offset as an immediate def : GCNPat < (SIsbuffer_load v4i32:$sbase, (SMRDBufferImm i32:$offset), timm:$cachepolicy), (vt (!cast(Instr#"_IMM") SReg_128:$sbase, i32imm:$offset, (extract_cpol $cachepolicy)))> { let AddedComplexity = 2; } // 2. 32-bit IMM offset on CI if immci then def : GCNPat < (vt (SIsbuffer_load v4i32:$sbase, (SMRDBufferImm32 i32:$offset), timm:$cachepolicy)), (!cast(Instr#"_IMM_ci") SReg_128:$sbase, smrd_literal_offset:$offset, (extract_cpol $cachepolicy))> { let OtherPredicates = [isGFX7Only]; let AddedComplexity = 1; } // 3. Offset loaded in an 32bit SGPR def : GCNPat < (SIsbuffer_load v4i32:$sbase, i32:$soffset, timm:$cachepolicy), (vt (!cast(Instr#"_SGPR") SReg_128:$sbase, SReg_32:$soffset, (extract_cpol $cachepolicy)))> { let OtherPredicates = [isNotGFX9Plus]; } def : GCNPat < (SIsbuffer_load v4i32:$sbase, i32:$soffset, timm:$cachepolicy), (vt (!cast(Instr#"_SGPR_IMM") SReg_128:$sbase, SReg_32:$soffset, 0, (extract_cpol $cachepolicy)))> { let OtherPredicates = [isGFX9Plus]; } // 4. Offset as an 32-bit SGPR + immediate def : GCNPat < (SIsbuffer_load v4i32:$sbase, (SMRDBufferSgprImm i32:$soffset, i32:$offset), timm:$cachepolicy), (vt (!cast(Instr#"_SGPR_IMM") SReg_128:$sbase, SReg_32:$soffset, i32imm:$offset, (extract_cpol $cachepolicy)))> { let OtherPredicates = [isGFX9Plus]; } } multiclass ScalarLoadWithExtensionPat { // 1. IMM offset def : GCNPat < (node (SMRDImm i64:$sbase, i32:$offset)), (vt (!cast(Instr#"_IMM") $sbase, $offset, 0))>{ let OtherPredicates = [isGFX12Plus]; } // 2. SGPR offset def : GCNPat < (node (SMRDSgpr i64:$sbase, i32:$soffset)), (vt (!cast(Instr#"_SGPR_IMM") $sbase, $soffset, 0, 0))>{ let OtherPredicates = [isGFX12Plus]; } // 3. SGPR+IMM offset def : GCNPat < (node (SMRDSgprImm i64:$sbase, i32:$soffset, i32:$offset)), (vt (!cast(Instr#"_SGPR_IMM") $sbase, $soffset, $offset, 0))>{ let OtherPredicates = [isGFX12Plus]; } // 4. No offset def : GCNPat < (vt (node (i64 SReg_64:$sbase))), (vt (!cast(Instr#"_IMM") i64:$sbase, 0, 0))>{ let OtherPredicates = [isGFX12Plus]; } } multiclass ScalarBufferLoadIntrinsicPat { // 1. Offset as an immediate def : GCNPat < (name v4i32:$sbase, (SMRDBufferImm i32:$offset), timm:$cachepolicy), (i32 (!cast(Instr#"_IMM") SReg_128:$sbase, i32imm:$offset, (extract_cpol $cachepolicy)))> { let OtherPredicates = [isGFX12Plus]; } // 2. Offset as an 32-bit SGPR def : GCNPat < (name v4i32:$sbase, i32:$soffset, timm:$cachepolicy), (i32 (!cast(Instr#"_SGPR_IMM") SReg_128:$sbase, SReg_32:$soffset, 0, (extract_cpol $cachepolicy)))> { let OtherPredicates = [isGFX12Plus]; } // 3. Offset as an 32-bit SGPR + immediate def : GCNPat < (name v4i32:$sbase, (SMRDBufferSgprImm i32:$soffset, i32:$offset), timm:$cachepolicy), (i32 (!cast(Instr#"_SGPR_IMM") SReg_128:$sbase, SReg_32:$soffset, i32imm:$offset, (extract_cpol $cachepolicy)))> { let OtherPredicates = [isGFX12Plus]; } } // Global and constant loads can be selected to either MUBUF or SMRD // instructions, but SMRD instructions are faster so we want the instruction // selector to prefer those. let AddedComplexity = 100 in { defm : ScalarLoadWithExtensionPat <"S_LOAD_U8", smrd_extloadi8, i32>; defm : ScalarLoadWithExtensionPat <"S_LOAD_U8", smrd_zextloadi8, i32>; defm : ScalarLoadWithExtensionPat <"S_LOAD_I8", smrd_sextloadi8, i32>; defm : ScalarLoadWithExtensionPat <"S_LOAD_U16", smrd_extloadi16, i32>; defm : ScalarLoadWithExtensionPat <"S_LOAD_U16", smrd_zextloadi16, i32>; defm : ScalarLoadWithExtensionPat <"S_LOAD_I16", smrd_sextloadi16, i32>; defm : ScalarBufferLoadIntrinsicPat ; defm : ScalarBufferLoadIntrinsicPat ; defm : ScalarBufferLoadIntrinsicPat ; defm : ScalarBufferLoadIntrinsicPat ; } // End let AddedComplexity = 100 foreach vt = Reg32Types.types in { defm : SMRD_Pattern <"S_LOAD_DWORD", vt>; } foreach vt = SReg_64.RegTypes in { defm : SMRD_Pattern <"S_LOAD_DWORDX2", vt>; } foreach vt = SReg_96.RegTypes in { defm : SMRD_Pattern <"S_LOAD_DWORDX3", vt, false>; } foreach vt = SReg_128.RegTypes in { defm : SMRD_Pattern <"S_LOAD_DWORDX4", vt>; } foreach vt = SReg_256.RegTypes in { defm : SMRD_Pattern <"S_LOAD_DWORDX8", vt>; } foreach vt = SReg_512.RegTypes in { defm : SMRD_Pattern <"S_LOAD_DWORDX16", vt>; } defm : SMLoad_Pattern <"S_BUFFER_LOAD_DWORD", i32>; defm : SMLoad_Pattern <"S_BUFFER_LOAD_DWORDX2", v2i32>; defm : SMLoad_Pattern <"S_BUFFER_LOAD_DWORDX3", v3i32, false>; defm : SMLoad_Pattern <"S_BUFFER_LOAD_DWORDX4", v4i32>; defm : SMLoad_Pattern <"S_BUFFER_LOAD_DWORDX8", v8i32>; defm : SMLoad_Pattern <"S_BUFFER_LOAD_DWORDX16", v16i32>; defm : SMLoad_Pattern <"S_BUFFER_LOAD_DWORD", f32>; defm : SMLoad_Pattern <"S_BUFFER_LOAD_DWORDX2", v2f32>; defm : SMLoad_Pattern <"S_BUFFER_LOAD_DWORDX3", v3f32, false>; defm : SMLoad_Pattern <"S_BUFFER_LOAD_DWORDX4", v4f32>; defm : SMLoad_Pattern <"S_BUFFER_LOAD_DWORDX8", v8f32>; defm : SMLoad_Pattern <"S_BUFFER_LOAD_DWORDX16", v16f32>; let OtherPredicates = [HasSMemTimeInst] in { def : GCNPat < (i64 (readcyclecounter)), (S_MEMTIME) >; } // let OtherPredicates = [HasSMemTimeInst] let OtherPredicates = [HasShaderCyclesRegister] in { def : GCNPat < (i64 (readcyclecounter)), (REG_SEQUENCE SReg_64, (S_GETREG_B32 getHwRegImm.ret), sub0, (S_MOV_B32 (i32 0)), sub1)> { } } // let OtherPredicates = [HasShaderCyclesRegister] let OtherPredicates = [HasSMemRealTime] in { def : GCNPat < (i64 (readsteadycounter)), (S_MEMREALTIME) >; } // let OtherPredicates = [HasSMemRealTime] let SubtargetPredicate = isGFX11Plus in { def : GCNPat < (i64 (readsteadycounter)), (S_SENDMSG_RTN_B64 (i32 /*MSG_RTN_GET_REALTIME=*/0x83)) >; } // let SubtargetPredicate = [isGFX11Plus] def i32imm_zero : TImmLeaf ; def i32imm_one : TImmLeaf ; multiclass SMPrefetchPat { def : GCNPat < (smrd_prefetch (SMRDImm i64:$sbase, i32:$offset), timm, timm, cache_type), (!cast("S_PREFETCH_"#type) $sbase, $offset, (i32 SGPR_NULL), (i8 0)) >; def : GCNPat < (smrd_prefetch (i64 SReg_64:$sbase), timm, timm, cache_type), (!cast("S_PREFETCH_"#type) $sbase, 0, (i32 SGPR_NULL), (i8 0)) >; def : GCNPat < (smrd_prefetch (i32 SReg_32:$sbase), timm, timm, cache_type), (!cast("S_PREFETCH_"#type) (i64 (REG_SEQUENCE SReg_64, $sbase, sub0, (i32 (S_MOV_B32 (i32 0))), sub1)), 0, (i32 SGPR_NULL), (i8 0)) >; } defm : SMPrefetchPat<"INST", i32imm_zero>; defm : SMPrefetchPat<"DATA", i32imm_one>; //===----------------------------------------------------------------------===// // GFX10. //===----------------------------------------------------------------------===// class SMEM_Real_10Plus_common op, SM_Pseudo ps, string opName, int subtarget, RegisterWithSubRegs sgpr_null> : SM_Real, SIMCInstr, Enc64 { let Inst{5-0} = !if(ps.has_sbase, sbase{6-1}, ?); let Inst{12-6} = !if(ps.has_sdst, sdst{6-0}, ?); let Inst{25-18} = op; let Inst{31-26} = 0x3d; // There are SMEM instructions that do not employ any of the offset // fields, in which case we need them to remain undefined. let Inst{52-32} = !if(ps.has_offset, offset{20-0}, !if(ps.has_soffset, 0, ?)); let Inst{63-57} = !if(ps.has_soffset, soffset{6-0}, !if(ps.has_offset, sgpr_null.HWEncoding{6-0}, ?)); } class SMEM_Real_gfx10 op, SM_Pseudo ps> : SMEM_Real_10Plus_common { let AssemblerPredicate = isGFX10Only; let DecoderNamespace = "GFX10"; let Inst{14} = !if(ps.has_dlc, cpol{CPolBit.DLC}, ?); let Inst{16} = !if(ps.has_glc, cpol{CPolBit.GLC}, ?); } class SMEM_Real_Load_gfx10 op, string ps> : SMEM_Real_gfx10(ps)>; multiclass SM_Real_Loads_gfx10 op> { defvar ps = NAME; def _IMM_gfx10 : SMEM_Real_Load_gfx10; def _SGPR_gfx10 : SMEM_Real_Load_gfx10; def _SGPR_IMM_gfx10 : SMEM_Real_Load_gfx10; } class SMEM_Real_Store_gfx10 op, SM_Pseudo ps> : SMEM_Real_gfx10 { bits<7> sdata; let sdst = ?; let Inst{12-6} = !if(ps.has_sdst, sdata{6-0}, ?); } multiclass SM_Real_Stores_gfx10 op> { defvar ps = NAME; defvar immPs = !cast(ps#_IMM); def _IMM_gfx10 : SMEM_Real_Store_gfx10 ; defvar sgprPs = !cast(ps#_SGPR); def _SGPR_gfx10 : SMEM_Real_Store_gfx10 ; defvar sgprImmPs = !cast(ps#_SGPR_IMM); def _SGPR_IMM_gfx10 : SMEM_Real_Store_gfx10 ; } defm S_LOAD_DWORD : SM_Real_Loads_gfx10<0x000>; defm S_LOAD_DWORDX2 : SM_Real_Loads_gfx10<0x001>; defm S_LOAD_DWORDX4 : SM_Real_Loads_gfx10<0x002>; defm S_LOAD_DWORDX8 : SM_Real_Loads_gfx10<0x003>; defm S_LOAD_DWORDX16 : SM_Real_Loads_gfx10<0x004>; defm S_SCRATCH_LOAD_DWORD : SM_Real_Loads_gfx10<0x005>; defm S_SCRATCH_LOAD_DWORDX2 : SM_Real_Loads_gfx10<0x006>; defm S_SCRATCH_LOAD_DWORDX4 : SM_Real_Loads_gfx10<0x007>; defm S_BUFFER_LOAD_DWORD : SM_Real_Loads_gfx10<0x008>; defm S_BUFFER_LOAD_DWORDX2 : SM_Real_Loads_gfx10<0x009>; defm S_BUFFER_LOAD_DWORDX4 : SM_Real_Loads_gfx10<0x00a>; defm S_BUFFER_LOAD_DWORDX8 : SM_Real_Loads_gfx10<0x00b>; defm S_BUFFER_LOAD_DWORDX16 : SM_Real_Loads_gfx10<0x00c>; defm S_STORE_DWORD : SM_Real_Stores_gfx10<0x010>; defm S_STORE_DWORDX2 : SM_Real_Stores_gfx10<0x011>; defm S_STORE_DWORDX4 : SM_Real_Stores_gfx10<0x012>; defm S_SCRATCH_STORE_DWORD : SM_Real_Stores_gfx10<0x015>; defm S_SCRATCH_STORE_DWORDX2 : SM_Real_Stores_gfx10<0x016>; defm S_SCRATCH_STORE_DWORDX4 : SM_Real_Stores_gfx10<0x017>; defm S_BUFFER_STORE_DWORD : SM_Real_Stores_gfx10<0x018>; defm S_BUFFER_STORE_DWORDX2 : SM_Real_Stores_gfx10<0x019>; defm S_BUFFER_STORE_DWORDX4 : SM_Real_Stores_gfx10<0x01a>; def S_MEMREALTIME_gfx10 : SMEM_Real_gfx10<0x025, S_MEMREALTIME>; def S_MEMTIME_gfx10 : SMEM_Real_gfx10<0x024, S_MEMTIME>; def S_GL1_INV_gfx10 : SMEM_Real_gfx10<0x01f, S_GL1_INV>; def S_GET_WAVEID_IN_WORKGROUP_gfx10 : SMEM_Real_gfx10<0x02a, S_GET_WAVEID_IN_WORKGROUP>; def S_DCACHE_INV_gfx10 : SMEM_Real_gfx10<0x020, S_DCACHE_INV>; def S_DCACHE_WB_gfx10 : SMEM_Real_gfx10<0x021, S_DCACHE_WB>; multiclass SM_Real_Probe_gfx10 op> { defvar ps = NAME; def _IMM_gfx10 : SMEM_Real_Store_gfx10 (ps#_IMM)>; def _SGPR_gfx10 : SMEM_Real_Store_gfx10 (ps#_SGPR)>; def _SGPR_IMM_gfx10 : SMEM_Real_Store_gfx10 (ps#_SGPR_IMM)>; } defm S_ATC_PROBE : SM_Real_Probe_gfx10 <0x26>; defm S_ATC_PROBE_BUFFER : SM_Real_Probe_gfx10 <0x27>; class SMEM_Atomic_Real_gfx10 op, SM_Atomic_Pseudo ps> : SMEM_Real_gfx10 { bits<7> sdata; let Constraints = ps.Constraints; let DisableEncoding = ps.DisableEncoding; let cpol{CPolBit.GLC} = ps.glc; let Inst{14} = !if(ps.has_dlc, cpol{CPolBit.DLC}, 0); let Inst{12-6} = !if(ps.glc, sdst{6-0}, sdata{6-0}); } multiclass SM_Real_Atomics_gfx10 op> { defvar ps = NAME; def _IMM_gfx10 : SMEM_Atomic_Real_gfx10 (ps#_IMM)>; def _SGPR_gfx10 : SMEM_Atomic_Real_gfx10 (ps#_SGPR)>; def _SGPR_IMM_gfx10 : SMEM_Atomic_Real_gfx10 (ps#_SGPR_IMM)>; def _IMM_RTN_gfx10 : SMEM_Atomic_Real_gfx10 (ps#_IMM_RTN)>; def _SGPR_RTN_gfx10 : SMEM_Atomic_Real_gfx10 (ps#_SGPR_RTN)>; def _SGPR_IMM_RTN_gfx10 : SMEM_Atomic_Real_gfx10 (ps#_SGPR_IMM_RTN)>; } defm S_BUFFER_ATOMIC_SWAP : SM_Real_Atomics_gfx10 <0x40>; defm S_BUFFER_ATOMIC_CMPSWAP : SM_Real_Atomics_gfx10 <0x41>; defm S_BUFFER_ATOMIC_ADD : SM_Real_Atomics_gfx10 <0x42>; defm S_BUFFER_ATOMIC_SUB : SM_Real_Atomics_gfx10 <0x43>; defm S_BUFFER_ATOMIC_SMIN : SM_Real_Atomics_gfx10 <0x44>; defm S_BUFFER_ATOMIC_UMIN : SM_Real_Atomics_gfx10 <0x45>; defm S_BUFFER_ATOMIC_SMAX : SM_Real_Atomics_gfx10 <0x46>; defm S_BUFFER_ATOMIC_UMAX : SM_Real_Atomics_gfx10 <0x47>; defm S_BUFFER_ATOMIC_AND : SM_Real_Atomics_gfx10 <0x48>; defm S_BUFFER_ATOMIC_OR : SM_Real_Atomics_gfx10 <0x49>; defm S_BUFFER_ATOMIC_XOR : SM_Real_Atomics_gfx10 <0x4a>; defm S_BUFFER_ATOMIC_INC : SM_Real_Atomics_gfx10 <0x4b>; defm S_BUFFER_ATOMIC_DEC : SM_Real_Atomics_gfx10 <0x4c>; defm S_BUFFER_ATOMIC_SWAP_X2 : SM_Real_Atomics_gfx10 <0x60>; defm S_BUFFER_ATOMIC_CMPSWAP_X2 : SM_Real_Atomics_gfx10 <0x61>; defm S_BUFFER_ATOMIC_ADD_X2 : SM_Real_Atomics_gfx10 <0x62>; defm S_BUFFER_ATOMIC_SUB_X2 : SM_Real_Atomics_gfx10 <0x63>; defm S_BUFFER_ATOMIC_SMIN_X2 : SM_Real_Atomics_gfx10 <0x64>; defm S_BUFFER_ATOMIC_UMIN_X2 : SM_Real_Atomics_gfx10 <0x65>; defm S_BUFFER_ATOMIC_SMAX_X2 : SM_Real_Atomics_gfx10 <0x66>; defm S_BUFFER_ATOMIC_UMAX_X2 : SM_Real_Atomics_gfx10 <0x67>; defm S_BUFFER_ATOMIC_AND_X2 : SM_Real_Atomics_gfx10 <0x68>; defm S_BUFFER_ATOMIC_OR_X2 : SM_Real_Atomics_gfx10 <0x69>; defm S_BUFFER_ATOMIC_XOR_X2 : SM_Real_Atomics_gfx10 <0x6a>; defm S_BUFFER_ATOMIC_INC_X2 : SM_Real_Atomics_gfx10 <0x6b>; defm S_BUFFER_ATOMIC_DEC_X2 : SM_Real_Atomics_gfx10 <0x6c>; defm S_ATOMIC_SWAP : SM_Real_Atomics_gfx10 <0x80>; defm S_ATOMIC_CMPSWAP : SM_Real_Atomics_gfx10 <0x81>; defm S_ATOMIC_ADD : SM_Real_Atomics_gfx10 <0x82>; defm S_ATOMIC_SUB : SM_Real_Atomics_gfx10 <0x83>; defm S_ATOMIC_SMIN : SM_Real_Atomics_gfx10 <0x84>; defm S_ATOMIC_UMIN : SM_Real_Atomics_gfx10 <0x85>; defm S_ATOMIC_SMAX : SM_Real_Atomics_gfx10 <0x86>; defm S_ATOMIC_UMAX : SM_Real_Atomics_gfx10 <0x87>; defm S_ATOMIC_AND : SM_Real_Atomics_gfx10 <0x88>; defm S_ATOMIC_OR : SM_Real_Atomics_gfx10 <0x89>; defm S_ATOMIC_XOR : SM_Real_Atomics_gfx10 <0x8a>; defm S_ATOMIC_INC : SM_Real_Atomics_gfx10 <0x8b>; defm S_ATOMIC_DEC : SM_Real_Atomics_gfx10 <0x8c>; defm S_ATOMIC_SWAP_X2 : SM_Real_Atomics_gfx10 <0xa0>; defm S_ATOMIC_CMPSWAP_X2 : SM_Real_Atomics_gfx10 <0xa1>; defm S_ATOMIC_ADD_X2 : SM_Real_Atomics_gfx10 <0xa2>; defm S_ATOMIC_SUB_X2 : SM_Real_Atomics_gfx10 <0xa3>; defm S_ATOMIC_SMIN_X2 : SM_Real_Atomics_gfx10 <0xa4>; defm S_ATOMIC_UMIN_X2 : SM_Real_Atomics_gfx10 <0xa5>; defm S_ATOMIC_SMAX_X2 : SM_Real_Atomics_gfx10 <0xa6>; defm S_ATOMIC_UMAX_X2 : SM_Real_Atomics_gfx10 <0xa7>; defm S_ATOMIC_AND_X2 : SM_Real_Atomics_gfx10 <0xa8>; defm S_ATOMIC_OR_X2 : SM_Real_Atomics_gfx10 <0xa9>; defm S_ATOMIC_XOR_X2 : SM_Real_Atomics_gfx10 <0xaa>; defm S_ATOMIC_INC_X2 : SM_Real_Atomics_gfx10 <0xab>; defm S_ATOMIC_DEC_X2 : SM_Real_Atomics_gfx10 <0xac>; multiclass SM_Real_Discard_gfx10 op> { defvar ps = NAME; def _IMM_gfx10 : SMEM_Real_gfx10 (ps#_IMM)>; def _SGPR_gfx10 : SMEM_Real_gfx10 (ps#_SGPR)>; def _SGPR_IMM_gfx10 : SMEM_Real_gfx10 (ps#_SGPR_IMM)>; } defm S_DCACHE_DISCARD : SM_Real_Discard_gfx10 <0x28>; defm S_DCACHE_DISCARD_X2 : SM_Real_Discard_gfx10 <0x29>; def SMInfoTable : GenericTable { let FilterClass = "SM_Real"; let CppTypeName = "SMInfo"; let Fields = ["Opcode", "is_buffer"]; let PrimaryKey = ["Opcode"]; let PrimaryKeyName = "getSMEMOpcodeHelper"; } //===----------------------------------------------------------------------===// // GFX11. //===----------------------------------------------------------------------===// class SMEM_Real_gfx11 op, SM_Pseudo ps, string opName = ps.Mnemonic> : SMEM_Real_10Plus_common { let AssemblerPredicate = isGFX11Only; let DecoderNamespace = "GFX11"; let Inst{13} = !if(ps.has_dlc, cpol{CPolBit.DLC}, 0); let Inst{14} = !if(ps.has_glc, cpol{CPolBit.GLC}, 0); } class SMEM_Real_Load_gfx11 op, string ps, string opName> : SMEM_Real_gfx11(ps), opName>; multiclass SM_Real_Loads_gfx11 op, string ps> { defvar opName = !tolower(NAME); def _IMM_gfx11 : SMEM_Real_Load_gfx11; def _SGPR_gfx11 : SMEM_Real_Load_gfx11; def _SGPR_IMM_gfx11 : SMEM_Real_Load_gfx11; def : AMDGPUMnemonicAlias(ps#"_IMM").Mnemonic, opName> { let AssemblerPredicate = isGFX11Plus; } } defm S_LOAD_B32 : SM_Real_Loads_gfx11<0x000, "S_LOAD_DWORD">; defm S_LOAD_B64 : SM_Real_Loads_gfx11<0x001, "S_LOAD_DWORDX2">; defm S_LOAD_B128 : SM_Real_Loads_gfx11<0x002, "S_LOAD_DWORDX4">; defm S_LOAD_B256 : SM_Real_Loads_gfx11<0x003, "S_LOAD_DWORDX8">; defm S_LOAD_B512 : SM_Real_Loads_gfx11<0x004, "S_LOAD_DWORDX16">; defm S_BUFFER_LOAD_B32 : SM_Real_Loads_gfx11<0x008, "S_BUFFER_LOAD_DWORD">; defm S_BUFFER_LOAD_B64 : SM_Real_Loads_gfx11<0x009, "S_BUFFER_LOAD_DWORDX2">; defm S_BUFFER_LOAD_B128 : SM_Real_Loads_gfx11<0x00a, "S_BUFFER_LOAD_DWORDX4">; defm S_BUFFER_LOAD_B256 : SM_Real_Loads_gfx11<0x00b, "S_BUFFER_LOAD_DWORDX8">; defm S_BUFFER_LOAD_B512 : SM_Real_Loads_gfx11<0x00c, "S_BUFFER_LOAD_DWORDX16">; def S_GL1_INV_gfx11 : SMEM_Real_gfx11<0x020, S_GL1_INV>; def S_DCACHE_INV_gfx11 : SMEM_Real_gfx11<0x021, S_DCACHE_INV>; class SMEM_Real_Store_gfx11 op, SM_Pseudo ps> : SMEM_Real_gfx11 { // encoding bits<7> sdata; let sdst = ?; let Inst{12-6} = !if(ps.has_sdst, sdata{6-0}, ?); } multiclass SM_Real_Probe_gfx11 op> { defvar ps = NAME; def _IMM_gfx11 : SMEM_Real_Store_gfx11 (ps#_IMM)>; def _SGPR_gfx11 : SMEM_Real_Store_gfx11 (ps#_SGPR)>; def _SGPR_IMM_gfx11 : SMEM_Real_Store_gfx11 (ps#_SGPR_IMM)>; } defm S_ATC_PROBE : SM_Real_Probe_gfx11 <0x22>; defm S_ATC_PROBE_BUFFER : SM_Real_Probe_gfx11 <0x23>; //===----------------------------------------------------------------------===// // GFX12. //===----------------------------------------------------------------------===// class SMEM_Real_gfx12Plus op, SM_Pseudo ps, string opName, int subtarget, RegisterWithSubRegs sgpr_null> : SM_Real, SIMCInstr, Enc64 { let Inst{18-13} = op; let Inst{31-26} = 0x3d; let Inst{55-32} = !if(ps.has_offset, offset{23-0}, !if(ps.has_soffset, 0, ?)); let Inst{63-57} = !if(ps.has_soffset, soffset{6-0}, !if(ps.has_offset, sgpr_null.HWEncoding{6-0}, ?)); } class SMEM_Real_gfx12 op, SM_Pseudo ps, string opName = ps.Mnemonic> : SMEM_Real_gfx12Plus { let AssemblerPredicate = isGFX12Plus; let DecoderNamespace = "GFX12"; let Inst{5-0} = !if(ps.has_sbase, sbase{6-1}, ?); let Inst{12-6} = !if(ps.has_sdst, sdst{6-0}, ?); } class SMEM_Real_Prefetch_gfx12 op, SM_Pseudo ps> : SMEM_Real_gfx12 { bits<7> sdata; // Only 5 bits of sdata are supported. let sdst = ?; let Inst{12-11} = 0; // Unused sdata bits. let Inst{10-6} = !if(ps.has_sdst, sdata{4-0}, ?); } class SMEM_Real_Load_gfx12 op, string ps, string opName, OffsetMode offsets> : SMEM_Real_gfx12(ps # offsets.Variant), opName> { RegisterClass BaseClass = !cast(ps # offsets.Variant).BaseClass; let InOperandList = !con((ins BaseClass:$sbase), offsets.Ins, (ins CPol:$cpol)); let Inst{22-21} = cpol{4-3}; // scope let Inst{24-23} = cpol{1-0}; // th - only lower 2 bits are supported } multiclass SM_Real_Loads_gfx12 op, string ps = NAME> { defvar opName = !tolower(NAME); def _IMM_gfx12 : SMEM_Real_Load_gfx12; def _SGPR_IMM_gfx12 : SMEM_Real_Load_gfx12; } defm S_LOAD_B32 : SM_Real_Loads_gfx12<0x00, "S_LOAD_DWORD">; defm S_LOAD_B64 : SM_Real_Loads_gfx12<0x01, "S_LOAD_DWORDX2">; defm S_LOAD_B96 : SM_Real_Loads_gfx12<0x05, "S_LOAD_DWORDX3">; defm S_LOAD_B128 : SM_Real_Loads_gfx12<0x02, "S_LOAD_DWORDX4">; defm S_LOAD_B256 : SM_Real_Loads_gfx12<0x03, "S_LOAD_DWORDX8">; defm S_LOAD_B512 : SM_Real_Loads_gfx12<0x04, "S_LOAD_DWORDX16">; defm S_LOAD_I8 : SM_Real_Loads_gfx12<0x08>; defm S_LOAD_U8 : SM_Real_Loads_gfx12<0x09>; defm S_LOAD_I16 : SM_Real_Loads_gfx12<0x0a>; defm S_LOAD_U16 : SM_Real_Loads_gfx12<0x0b>; defm S_BUFFER_LOAD_B32 : SM_Real_Loads_gfx12<0x10, "S_BUFFER_LOAD_DWORD">; defm S_BUFFER_LOAD_B64 : SM_Real_Loads_gfx12<0x11, "S_BUFFER_LOAD_DWORDX2">; defm S_BUFFER_LOAD_B96 : SM_Real_Loads_gfx12<0x15, "S_BUFFER_LOAD_DWORDX3">; defm S_BUFFER_LOAD_B128 : SM_Real_Loads_gfx12<0x12, "S_BUFFER_LOAD_DWORDX4">; defm S_BUFFER_LOAD_B256 : SM_Real_Loads_gfx12<0x13, "S_BUFFER_LOAD_DWORDX8">; defm S_BUFFER_LOAD_B512 : SM_Real_Loads_gfx12<0x14, "S_BUFFER_LOAD_DWORDX16">; defm S_BUFFER_LOAD_I8 : SM_Real_Loads_gfx12<0x18>; defm S_BUFFER_LOAD_U8 : SM_Real_Loads_gfx12<0x19>; defm S_BUFFER_LOAD_I16 : SM_Real_Loads_gfx12<0x1a>; defm S_BUFFER_LOAD_U16 : SM_Real_Loads_gfx12<0x1b>; def S_DCACHE_INV_gfx12 : SMEM_Real_gfx12<0x021, S_DCACHE_INV>; def S_PREFETCH_INST_gfx12 : SMEM_Real_Prefetch_gfx12<0x24, S_PREFETCH_INST>; def S_PREFETCH_INST_PC_REL_gfx12 : SMEM_Real_Prefetch_gfx12<0x25, S_PREFETCH_INST_PC_REL>; def S_PREFETCH_DATA_gfx12 : SMEM_Real_Prefetch_gfx12<0x26, S_PREFETCH_DATA>; def S_BUFFER_PREFETCH_DATA_gfx12 : SMEM_Real_Prefetch_gfx12<0x27, S_BUFFER_PREFETCH_DATA>; def S_PREFETCH_DATA_PC_REL_gfx12 : SMEM_Real_Prefetch_gfx12<0x28, S_PREFETCH_DATA_PC_REL>; multiclass SMEM_Real_Probe_gfx12 op> { defvar ps = NAME; def _IMM_gfx12 : SMEM_Real_Prefetch_gfx12(ps#_IMM)>; def _SGPR_IMM_gfx12 : SMEM_Real_Prefetch_gfx12(ps#_SGPR_OPT_IMM)>; } defm S_ATC_PROBE : SMEM_Real_Probe_gfx12<0x22>; defm S_ATC_PROBE_BUFFER : SMEM_Real_Probe_gfx12<0x23>;