//===-- GCNPreRAOptimizations.cpp -----------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // /// \file /// This pass combines split register tuple initialization into a single pseudo: /// /// undef %0.sub1:sreg_64 = S_MOV_B32 1 /// %0.sub0:sreg_64 = S_MOV_B32 2 /// => /// %0:sreg_64 = S_MOV_B64_IMM_PSEUDO 0x200000001 /// /// This is to allow rematerialization of a value instead of spilling. It is /// supposed to be done after register coalescer to allow it to do its job and /// before actual register allocation to allow rematerialization. /// /// Right now the pass only handles 64 bit SGPRs with immediate initializers, /// although the same shall be possible with other register classes and /// instructions if necessary. /// //===----------------------------------------------------------------------===// #include "AMDGPU.h" #include "GCNSubtarget.h" #include "MCTargetDesc/AMDGPUMCTargetDesc.h" #include "llvm/CodeGen/LiveIntervals.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/InitializePasses.h" using namespace llvm; #define DEBUG_TYPE "amdgpu-pre-ra-optimizations" namespace { class GCNPreRAOptimizations : public MachineFunctionPass { private: const SIInstrInfo *TII; const SIRegisterInfo *TRI; MachineRegisterInfo *MRI; LiveIntervals *LIS; bool processReg(Register Reg); public: static char ID; GCNPreRAOptimizations() : MachineFunctionPass(ID) { initializeGCNPreRAOptimizationsPass(*PassRegistry::getPassRegistry()); } bool runOnMachineFunction(MachineFunction &MF) override; StringRef getPassName() const override { return "AMDGPU Pre-RA optimizations"; } void getAnalysisUsage(AnalysisUsage &AU) const override { AU.addRequired(); AU.setPreservesAll(); MachineFunctionPass::getAnalysisUsage(AU); } }; } // End anonymous namespace. INITIALIZE_PASS_BEGIN(GCNPreRAOptimizations, DEBUG_TYPE, "AMDGPU Pre-RA optimizations", false, false) INITIALIZE_PASS_DEPENDENCY(LiveIntervalsWrapperPass) INITIALIZE_PASS_END(GCNPreRAOptimizations, DEBUG_TYPE, "Pre-RA optimizations", false, false) char GCNPreRAOptimizations::ID = 0; char &llvm::GCNPreRAOptimizationsID = GCNPreRAOptimizations::ID; FunctionPass *llvm::createGCNPreRAOptimizationsPass() { return new GCNPreRAOptimizations(); } bool GCNPreRAOptimizations::processReg(Register Reg) { MachineInstr *Def0 = nullptr; MachineInstr *Def1 = nullptr; uint64_t Init = 0; bool Changed = false; SmallSet ModifiedRegs; bool IsAGPRDst = TRI->isAGPRClass(MRI->getRegClass(Reg)); for (MachineInstr &I : MRI->def_instructions(Reg)) { switch (I.getOpcode()) { default: return false; case AMDGPU::V_ACCVGPR_WRITE_B32_e64: break; case AMDGPU::COPY: { // Some subtargets cannot do an AGPR to AGPR copy directly, and need an // intermdiate temporary VGPR register. Try to find the defining // accvgpr_write to avoid temporary registers. if (!IsAGPRDst) return false; Register SrcReg = I.getOperand(1).getReg(); if (!SrcReg.isVirtual()) break; // Check if source of copy is from another AGPR. bool IsAGPRSrc = TRI->isAGPRClass(MRI->getRegClass(SrcReg)); if (!IsAGPRSrc) break; // def_instructions() does not look at subregs so it may give us a // different instruction that defines the same vreg but different subreg // so we have to manually check subreg. Register SrcSubReg = I.getOperand(1).getSubReg(); for (auto &Def : MRI->def_instructions(SrcReg)) { if (SrcSubReg != Def.getOperand(0).getSubReg()) continue; if (Def.getOpcode() == AMDGPU::V_ACCVGPR_WRITE_B32_e64) { MachineOperand DefSrcMO = Def.getOperand(1); // Immediates are not an issue and can be propagated in // postrapseudos pass. Only handle cases where defining // accvgpr_write source is a vreg. if (DefSrcMO.isReg() && DefSrcMO.getReg().isVirtual()) { // Propagate source reg of accvgpr write to this copy instruction I.getOperand(1).setReg(DefSrcMO.getReg()); I.getOperand(1).setSubReg(DefSrcMO.getSubReg()); // Reg uses were changed, collect unique set of registers to update // live intervals at the end. ModifiedRegs.insert(DefSrcMO.getReg()); ModifiedRegs.insert(SrcReg); Changed = true; } // Found the defining accvgpr_write, stop looking any further. break; } } break; } case AMDGPU::S_MOV_B32: if (I.getOperand(0).getReg() != Reg || !I.getOperand(1).isImm() || I.getNumOperands() != 2) return false; switch (I.getOperand(0).getSubReg()) { default: return false; case AMDGPU::sub0: if (Def0) return false; Def0 = &I; Init |= I.getOperand(1).getImm() & 0xffffffff; break; case AMDGPU::sub1: if (Def1) return false; Def1 = &I; Init |= static_cast(I.getOperand(1).getImm()) << 32; break; } break; } } // For AGPR reg, check if live intervals need to be updated. if (IsAGPRDst) { if (Changed) { for (Register RegToUpdate : ModifiedRegs) { LIS->removeInterval(RegToUpdate); LIS->createAndComputeVirtRegInterval(RegToUpdate); } } return Changed; } // For SGPR reg, check if we can combine instructions. if (!Def0 || !Def1 || Def0->getParent() != Def1->getParent()) return Changed; LLVM_DEBUG(dbgs() << "Combining:\n " << *Def0 << " " << *Def1 << " =>\n"); if (SlotIndex::isEarlierInstr(LIS->getInstructionIndex(*Def1), LIS->getInstructionIndex(*Def0))) std::swap(Def0, Def1); LIS->RemoveMachineInstrFromMaps(*Def0); LIS->RemoveMachineInstrFromMaps(*Def1); auto NewI = BuildMI(*Def0->getParent(), *Def0, Def0->getDebugLoc(), TII->get(AMDGPU::S_MOV_B64_IMM_PSEUDO), Reg) .addImm(Init); Def0->eraseFromParent(); Def1->eraseFromParent(); LIS->InsertMachineInstrInMaps(*NewI); LIS->removeInterval(Reg); LIS->createAndComputeVirtRegInterval(Reg); LLVM_DEBUG(dbgs() << " " << *NewI); return true; } bool GCNPreRAOptimizations::runOnMachineFunction(MachineFunction &MF) { if (skipFunction(MF.getFunction())) return false; const GCNSubtarget &ST = MF.getSubtarget(); TII = ST.getInstrInfo(); MRI = &MF.getRegInfo(); LIS = &getAnalysis().getLIS(); TRI = ST.getRegisterInfo(); bool Changed = false; for (unsigned I = 0, E = MRI->getNumVirtRegs(); I != E; ++I) { Register Reg = Register::index2VirtReg(I); if (!LIS->hasInterval(Reg)) continue; const TargetRegisterClass *RC = MRI->getRegClass(Reg); if ((RC->MC->getSizeInBits() != 64 || !TRI->isSGPRClass(RC)) && (ST.hasGFX90AInsts() || !TRI->isAGPRClass(RC))) continue; Changed |= processReg(Reg); } return Changed; }