//======----------- WindowScheduler.cpp - window scheduler -------------======// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // // An implementation of the Window Scheduling software pipelining algorithm. // // The fundamental concept of the window scheduling algorithm involves folding // the original MBB at a specific position, followed by list scheduling on the // folded MIs. The optimal scheduling result is then chosen from various folding // positions as the final scheduling outcome. // // The primary challenge in this algorithm lies in generating the folded MIs and // establishing their dependencies. We have innovatively employed a new MBB, // created by copying the original MBB three times, known as TripleMBB. This // TripleMBB enables the convenient implementation of MI folding and dependency // establishment. To facilitate the algorithm's implementation, we have also // devised data structures such as OriMIs, TriMIs, TriToOri, and OriToCycle. // // Another challenge in the algorithm is the scheduling of phis. Semantically, // it is difficult to place the phis in the window and perform list scheduling. // Therefore, we schedule these phis separately after each list scheduling. // // The provided implementation is designed for use before the Register Allocator // (RA). If the target requires implementation after RA, it is recommended to // reimplement analyseII(), schedulePhi(), and expand(). Additionally, // target-specific logic can be added in initialize(), preProcess(), and // postProcess(). // // Lastly, it is worth mentioning that getSearchIndexes() is an important // function. We have experimented with more complex heuristics on downstream // target and achieved favorable results. // //===----------------------------------------------------------------------===// #include "llvm/CodeGen/WindowScheduler.h" #include "llvm/ADT/Statistic.h" #include "llvm/CodeGen/LiveIntervals.h" #include "llvm/CodeGen/MachineLoopInfo.h" #include "llvm/CodeGen/MachinePipeliner.h" #include "llvm/CodeGen/ModuloSchedule.h" #include "llvm/CodeGen/TargetPassConfig.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/TimeProfiler.h" using namespace llvm; #define DEBUG_TYPE "pipeliner" namespace { STATISTIC(NumTryWindowSchedule, "Number of loops that we attempt to use window scheduling"); STATISTIC(NumTryWindowSearch, "Number of times that we run list schedule in the window scheduling"); STATISTIC(NumWindowSchedule, "Number of loops that we successfully use window scheduling"); STATISTIC(NumFailAnalyseII, "Window scheduling abort due to the failure of the II analysis"); cl::opt WindowSearchNum("window-search-num", cl::desc("The number of searches per loop in the window " "algorithm. 0 means no search number limit."), cl::Hidden, cl::init(6)); cl::opt WindowSearchRatio( "window-search-ratio", cl::desc("The ratio of searches per loop in the window algorithm. 100 " "means search all positions in the loop, while 0 means not " "performing any search."), cl::Hidden, cl::init(40)); cl::opt WindowIICoeff( "window-ii-coeff", cl::desc( "The coefficient used when initializing II in the window algorithm."), cl::Hidden, cl::init(5)); cl::opt WindowRegionLimit( "window-region-limit", cl::desc( "The lower limit of the scheduling region in the window algorithm."), cl::Hidden, cl::init(3)); cl::opt WindowDiffLimit( "window-diff-limit", cl::desc("The lower limit of the difference between best II and base II in " "the window algorithm. If the difference is smaller than " "this lower limit, window scheduling will not be performed."), cl::Hidden, cl::init(2)); } // namespace // WindowIILimit serves as an indicator of abnormal scheduling results and could // potentially be referenced by the derived target window scheduler. cl::opt WindowIILimit("window-ii-limit", cl::desc("The upper limit of II in the window algorithm."), cl::Hidden, cl::init(1000)); WindowScheduler::WindowScheduler(MachineSchedContext *C, MachineLoop &ML) : Context(C), MF(C->MF), MBB(ML.getHeader()), Loop(ML), Subtarget(&MF->getSubtarget()), TII(Subtarget->getInstrInfo()), TRI(Subtarget->getRegisterInfo()), MRI(&MF->getRegInfo()) { TripleDAG = std::unique_ptr( createMachineScheduler(/*OnlyBuildGraph=*/true)); } bool WindowScheduler::run() { if (!initialize()) { LLVM_DEBUG(dbgs() << "The WindowScheduler failed to initialize!\n"); return false; } // The window algorithm is time-consuming, and its compilation time should be // taken into consideration. TimeTraceScope Scope("WindowSearch"); ++NumTryWindowSchedule; // Performing the relevant processing before window scheduling. preProcess(); // The main window scheduling begins. std::unique_ptr SchedDAG(createMachineScheduler()); auto SearchIndexes = getSearchIndexes(WindowSearchNum, WindowSearchRatio); for (unsigned Idx : SearchIndexes) { OriToCycle.clear(); ++NumTryWindowSearch; // The scheduling starts with non-phi instruction, so SchedPhiNum needs to // be added to Idx. unsigned Offset = Idx + SchedPhiNum; auto Range = getScheduleRange(Offset, SchedInstrNum); SchedDAG->startBlock(MBB); SchedDAG->enterRegion(MBB, Range.begin(), Range.end(), SchedInstrNum); SchedDAG->schedule(); LLVM_DEBUG(SchedDAG->dump()); unsigned II = analyseII(*SchedDAG, Offset); if (II == WindowIILimit) { restoreTripleMBB(); LLVM_DEBUG(dbgs() << "Can't find a valid II. Keep searching...\n"); ++NumFailAnalyseII; continue; } schedulePhi(Offset, II); updateScheduleResult(Offset, II); restoreTripleMBB(); LLVM_DEBUG(dbgs() << "Current window Offset is " << Offset << " and II is " << II << ".\n"); } // Performing the relevant processing after window scheduling. postProcess(); // Check whether the scheduling result is valid. if (!isScheduleValid()) { LLVM_DEBUG(dbgs() << "Window scheduling is not needed!\n"); return false; } LLVM_DEBUG(dbgs() << "\nBest window offset is " << BestOffset << " and Best II is " << BestII << ".\n"); // Expand the scheduling result to prologue, kernel, and epilogue. expand(); ++NumWindowSchedule; return true; } ScheduleDAGInstrs * WindowScheduler::createMachineScheduler(bool OnlyBuildGraph) { return OnlyBuildGraph ? new ScheduleDAGMI( Context, std::make_unique(Context), true) : Context->PassConfig->createMachineScheduler(Context); } bool WindowScheduler::initialize() { if (!Subtarget->enableWindowScheduler()) { LLVM_DEBUG(dbgs() << "Target disables the window scheduling!\n"); return false; } // Initialized the member variables used by window algorithm. OriMIs.clear(); TriMIs.clear(); TriToOri.clear(); OriToCycle.clear(); SchedResult.clear(); SchedPhiNum = 0; SchedInstrNum = 0; BestII = UINT_MAX; BestOffset = 0; BaseII = 0; // List scheduling used in the window algorithm depends on LiveIntervals. if (!Context->LIS) { LLVM_DEBUG(dbgs() << "There is no LiveIntervals information!\n"); return false; } // Check each MI in MBB. SmallSet PrevDefs; SmallSet PrevUses; auto IsLoopCarried = [&](MachineInstr &Phi) { // Two cases are checked here: (1)The virtual register defined by the // preceding phi is used by the succeeding phi;(2)The preceding phi uses the // virtual register defined by the succeeding phi. if (PrevUses.count(Phi.getOperand(0).getReg())) return true; PrevDefs.insert(Phi.getOperand(0).getReg()); for (unsigned I = 1, E = Phi.getNumOperands(); I != E; I += 2) { if (PrevDefs.count(Phi.getOperand(I).getReg())) return true; PrevUses.insert(Phi.getOperand(I).getReg()); } return false; }; auto PLI = TII->analyzeLoopForPipelining(MBB); for (auto &MI : *MBB) { if (MI.isMetaInstruction() || MI.isTerminator()) continue; if (MI.isPHI()) { if (IsLoopCarried(MI)) { LLVM_DEBUG(dbgs() << "Loop carried phis are not supported yet!\n"); return false; } ++SchedPhiNum; ++BestOffset; } else ++SchedInstrNum; if (TII->isSchedulingBoundary(MI, MBB, *MF)) { LLVM_DEBUG( dbgs() << "Boundary MI is not allowed in window scheduling!\n"); return false; } if (PLI->shouldIgnoreForPipelining(&MI)) { LLVM_DEBUG(dbgs() << "Special MI defined by target is not allowed in " "window scheduling!\n"); return false; } for (auto &Def : MI.all_defs()) if (Def.isReg() && Def.getReg().isPhysical()) { LLVM_DEBUG(dbgs() << "Physical registers are not supported in " "window scheduling!\n"); return false; } } if (SchedInstrNum <= WindowRegionLimit) { LLVM_DEBUG(dbgs() << "There are too few MIs in the window region!\n"); return false; } return true; } void WindowScheduler::preProcess() { // Prior to window scheduling, it's necessary to backup the original MBB, // generate a new TripleMBB, and build a TripleDAG based on the TripleMBB. backupMBB(); generateTripleMBB(); TripleDAG->startBlock(MBB); TripleDAG->enterRegion( MBB, MBB->begin(), MBB->getFirstTerminator(), std::distance(MBB->begin(), MBB->getFirstTerminator())); TripleDAG->buildSchedGraph(Context->AA); } void WindowScheduler::postProcess() { // After window scheduling, it's necessary to clear the TripleDAG and restore // to the original MBB. TripleDAG->exitRegion(); TripleDAG->finishBlock(); restoreMBB(); } void WindowScheduler::backupMBB() { for (auto &MI : MBB->instrs()) OriMIs.push_back(&MI); // Remove MIs and the corresponding live intervals. for (auto &MI : make_early_inc_range(*MBB)) { Context->LIS->getSlotIndexes()->removeMachineInstrFromMaps(MI, true); MBB->remove(&MI); } } void WindowScheduler::restoreMBB() { // Erase MIs and the corresponding live intervals. for (auto &MI : make_early_inc_range(*MBB)) { Context->LIS->getSlotIndexes()->removeMachineInstrFromMaps(MI, true); MI.eraseFromParent(); } // Restore MBB to the state before window scheduling. for (auto *MI : OriMIs) MBB->push_back(MI); updateLiveIntervals(); } void WindowScheduler::generateTripleMBB() { const unsigned DuplicateNum = 3; TriMIs.clear(); TriToOri.clear(); assert(OriMIs.size() > 0 && "The Original MIs were not backed up!"); // Step 1: Performing the first copy of MBB instructions, excluding // terminators. At the same time, we back up the anti-register of phis. // DefPairs hold the old and new define register pairs. DenseMap DefPairs; for (auto *MI : OriMIs) { if (MI->isMetaInstruction() || MI->isTerminator()) continue; if (MI->isPHI()) if (Register AntiReg = getAntiRegister(MI)) DefPairs[MI->getOperand(0).getReg()] = AntiReg; auto *NewMI = MF->CloneMachineInstr(MI); MBB->push_back(NewMI); TriMIs.push_back(NewMI); TriToOri[NewMI] = MI; } // Step 2: Performing the remaining two copies of MBB instructions excluding // phis, and the last one contains terminators. At the same time, registers // are updated accordingly. for (size_t Cnt = 1; Cnt < DuplicateNum; ++Cnt) { for (auto *MI : OriMIs) { if (MI->isPHI() || MI->isMetaInstruction() || (MI->isTerminator() && Cnt < DuplicateNum - 1)) continue; auto *NewMI = MF->CloneMachineInstr(MI); DenseMap NewDefs; // New defines are updated. for (auto MO : NewMI->all_defs()) if (MO.isReg() && MO.getReg().isVirtual()) { Register NewDef = MRI->createVirtualRegister(MRI->getRegClass(MO.getReg())); NewMI->substituteRegister(MO.getReg(), NewDef, 0, *TRI); NewDefs[MO.getReg()] = NewDef; } // New uses are updated. for (auto DefRegPair : DefPairs) if (NewMI->readsRegister(DefRegPair.first, TRI)) { Register NewUse = DefRegPair.second; // Note the update process for '%1 -> %9' in '%10 = sub i32 %9, %3': // // BB.3: DefPairs // ================================== // %1 = phi i32 [%2, %BB.1], [%7, %BB.3] (%1,%7) // ... // ================================== // ... // %4 = sub i32 %1, %3 // ... // %7 = add i32 %5, %6 // ... // ---------------------------------- // ... // %8 = sub i32 %7, %3 (%1,%7),(%4,%8) // ... // %9 = add i32 %5, %6 (%1,%7),(%4,%8),(%7,%9) // ... // ---------------------------------- // ... // %10 = sub i32 %9, %3 (%1,%7),(%4,%10),(%7,%9) // ... ^ // %11 = add i32 %5, %6 (%1,%7),(%4,%10),(%7,%11) // ... // ================================== // < Terminators > // ================================== if (DefPairs.count(NewUse)) NewUse = DefPairs[NewUse]; NewMI->substituteRegister(DefRegPair.first, NewUse, 0, *TRI); } // DefPairs is updated at last. for (auto &NewDef : NewDefs) DefPairs[NewDef.first] = NewDef.second; MBB->push_back(NewMI); TriMIs.push_back(NewMI); TriToOri[NewMI] = MI; } } // Step 3: The registers used by phis are updated, and they are generated in // the third copy of MBB. // In the privious example, the old phi is: // %1 = phi i32 [%2, %BB.1], [%7, %BB.3] // The new phi is: // %1 = phi i32 [%2, %BB.1], [%11, %BB.3] for (auto &Phi : MBB->phis()) { for (auto DefRegPair : DefPairs) if (Phi.readsRegister(DefRegPair.first, TRI)) Phi.substituteRegister(DefRegPair.first, DefRegPair.second, 0, *TRI); } updateLiveIntervals(); } void WindowScheduler::restoreTripleMBB() { // After list scheduling, the MBB is restored in one traversal. for (size_t I = 0; I < TriMIs.size(); ++I) { auto *MI = TriMIs[I]; auto OldPos = MBB->begin(); std::advance(OldPos, I); auto CurPos = MI->getIterator(); if (CurPos != OldPos) { MBB->splice(OldPos, MBB, CurPos); Context->LIS->handleMove(*MI, /*UpdateFlags=*/false); } } } SmallVector WindowScheduler::getSearchIndexes(unsigned SearchNum, unsigned SearchRatio) { // We use SearchRatio to get the index range, and then evenly get the indexes // according to the SearchNum. This is a simple huristic. Depending on the // characteristics of the target, more complex algorithms can be used for both // performance and compilation time. assert(SearchRatio <= 100 && "SearchRatio should be equal or less than 100!"); unsigned MaxIdx = SchedInstrNum * SearchRatio / 100; unsigned Step = SearchNum > 0 && SearchNum <= MaxIdx ? MaxIdx / SearchNum : 1; SmallVector SearchIndexes; for (unsigned Idx = 0; Idx < MaxIdx; Idx += Step) SearchIndexes.push_back(Idx); return SearchIndexes; } int WindowScheduler::getEstimatedII(ScheduleDAGInstrs &DAG) { // Sometimes MaxDepth is 0, so it should be limited to the minimum of 1. unsigned MaxDepth = 1; for (auto &SU : DAG.SUnits) MaxDepth = std::max(SU.getDepth() + SU.Latency, MaxDepth); return MaxDepth * WindowIICoeff; } int WindowScheduler::calculateMaxCycle(ScheduleDAGInstrs &DAG, unsigned Offset) { int InitII = getEstimatedII(DAG); ResourceManager RM(Subtarget, &DAG); RM.init(InitII); // ResourceManager and DAG are used to calculate the maximum cycle for the // scheduled MIs. Since MIs in the Region have already been scheduled, the // emit cycles can be estimated in order here. int CurCycle = 0; auto Range = getScheduleRange(Offset, SchedInstrNum); for (auto &MI : Range) { auto *SU = DAG.getSUnit(&MI); int ExpectCycle = CurCycle; // The predecessors of current MI determine its earliest issue cycle. for (auto &Pred : SU->Preds) { if (Pred.isWeak()) continue; auto *PredMI = Pred.getSUnit()->getInstr(); int PredCycle = getOriCycle(PredMI); ExpectCycle = std::max(ExpectCycle, PredCycle + (int)Pred.getLatency()); } // Zero cost instructions do not need to check resource. if (!TII->isZeroCost(MI.getOpcode())) { // ResourceManager can be used to detect resource conflicts between the // current MI and the previously inserted MIs. while (!RM.canReserveResources(*SU, CurCycle) || CurCycle < ExpectCycle) { ++CurCycle; if (CurCycle == (int)WindowIILimit) return CurCycle; } RM.reserveResources(*SU, CurCycle); } OriToCycle[getOriMI(&MI)] = CurCycle; LLVM_DEBUG(dbgs() << "\tCycle " << CurCycle << " [S." << getOriStage(getOriMI(&MI), Offset) << "]: " << MI); } LLVM_DEBUG(dbgs() << "MaxCycle is " << CurCycle << ".\n"); return CurCycle; } // By utilizing TripleDAG, we can easily establish dependencies between A and B. // Based on the MaxCycle and the issue cycle of A and B, we can determine // whether it is necessary to add a stall cycle. This is because, without // inserting the stall cycle, the latency constraint between A and B cannot be // satisfied. The details are as follows: // // New MBB: // ======================================== // < Phis > // ======================================== (sliding direction) // MBB copy 1 | // V // // ~~~~~~~~~~~~~~~~~~~|~~~~~~~~~~~~~~~~~~~~ ----schedule window----- // | | // ===================V==================== | // MBB copy 2 < MI B > | // | // < MI A > V // ~~~~~~~~~~~~~~~~~~~:~~~~~~~~~~~~~~~~~~~~ ------------------------ // : // ===================V==================== // MBB copy 3 < MI B'> // // // // // ======================================== // < Terminators > // ======================================== int WindowScheduler::calculateStallCycle(unsigned Offset, int MaxCycle) { int MaxStallCycle = 0; int CurrentII = MaxCycle + 1; auto Range = getScheduleRange(Offset, SchedInstrNum); for (auto &MI : Range) { auto *SU = TripleDAG->getSUnit(&MI); int DefCycle = getOriCycle(&MI); for (auto &Succ : SU->Succs) { if (Succ.isWeak() || Succ.getSUnit() == &TripleDAG->ExitSU) continue; // If the expected cycle does not exceed CurrentII, no check is needed. if (DefCycle + (int)Succ.getLatency() <= CurrentII) continue; // If the cycle of the scheduled MI A is less than that of the scheduled // MI B, the scheduling will fail because the lifetime of the // corresponding register exceeds II. auto *SuccMI = Succ.getSUnit()->getInstr(); int UseCycle = getOriCycle(SuccMI); if (DefCycle < UseCycle) return WindowIILimit; // Get the stall cycle introduced by the register between two trips. int StallCycle = DefCycle + (int)Succ.getLatency() - CurrentII - UseCycle; MaxStallCycle = std::max(MaxStallCycle, StallCycle); } } LLVM_DEBUG(dbgs() << "MaxStallCycle is " << MaxStallCycle << ".\n"); return MaxStallCycle; } unsigned WindowScheduler::analyseII(ScheduleDAGInstrs &DAG, unsigned Offset) { LLVM_DEBUG(dbgs() << "Start analyzing II:\n"); int MaxCycle = calculateMaxCycle(DAG, Offset); if (MaxCycle == (int)WindowIILimit) return MaxCycle; int StallCycle = calculateStallCycle(Offset, MaxCycle); if (StallCycle == (int)WindowIILimit) return StallCycle; // The value of II is equal to the maximum execution cycle plus 1. return MaxCycle + StallCycle + 1; } void WindowScheduler::schedulePhi(int Offset, unsigned &II) { LLVM_DEBUG(dbgs() << "Start scheduling Phis:\n"); for (auto &Phi : MBB->phis()) { int LateCycle = INT_MAX; auto *SU = TripleDAG->getSUnit(&Phi); for (auto &Succ : SU->Succs) { // Phi doesn't have any Anti successors. if (Succ.getKind() != SDep::Data) continue; // Phi is scheduled before the successor of stage 0. The issue cycle of // phi is the latest cycle in this interval. auto *SuccMI = Succ.getSUnit()->getInstr(); int Cycle = getOriCycle(SuccMI); if (getOriStage(getOriMI(SuccMI), Offset) == 0) LateCycle = std::min(LateCycle, Cycle); } // The anti-dependency of phi need to be handled separately in the same way. if (Register AntiReg = getAntiRegister(&Phi)) { auto *AntiMI = MRI->getVRegDef(AntiReg); // AntiReg may be defined outside the kernel MBB. if (AntiMI->getParent() == MBB) { auto AntiCycle = getOriCycle(AntiMI); if (getOriStage(getOriMI(AntiMI), Offset) == 0) LateCycle = std::min(LateCycle, AntiCycle); } } // If there is no limit to the late cycle, a default value is given. if (LateCycle == INT_MAX) LateCycle = (int)(II - 1); LLVM_DEBUG(dbgs() << "\tCycle range [0, " << LateCycle << "] " << Phi); // The issue cycle of phi is set to the latest cycle in the interval. auto *OriPhi = getOriMI(&Phi); OriToCycle[OriPhi] = LateCycle; } } DenseMap WindowScheduler::getIssueOrder(unsigned Offset, unsigned II) { // At each issue cycle, phi is placed before MIs in stage 0. So the simplest // way is to put phi at the beginning of the current cycle. DenseMap> CycleToMIs; auto Range = getScheduleRange(Offset, SchedInstrNum); for (auto &Phi : MBB->phis()) CycleToMIs[getOriCycle(&Phi)].push_back(getOriMI(&Phi)); for (auto &MI : Range) CycleToMIs[getOriCycle(&MI)].push_back(getOriMI(&MI)); // Each MI is assigned a separate ordered Id, which is used as a sort marker // in the following expand process. DenseMap IssueOrder; int Id = 0; for (int Cycle = 0; Cycle < (int)II; ++Cycle) { if (!CycleToMIs.count(Cycle)) continue; for (auto *MI : CycleToMIs[Cycle]) IssueOrder[MI] = Id++; } return IssueOrder; } void WindowScheduler::updateScheduleResult(unsigned Offset, unsigned II) { // At the first update, Offset is equal to SchedPhiNum. At this time, only // BestII, BestOffset, and BaseII need to be updated. if (Offset == SchedPhiNum) { BestII = II; BestOffset = SchedPhiNum; BaseII = II; return; } // The update will only continue if the II is smaller than BestII and the II // is sufficiently small. if ((II >= BestII) || (II + WindowDiffLimit > BaseII)) return; BestII = II; BestOffset = Offset; // Record the result of the current list scheduling, noting that each MI is // stored unordered in SchedResult. SchedResult.clear(); auto IssueOrder = getIssueOrder(Offset, II); for (auto &Pair : OriToCycle) { assert(IssueOrder.count(Pair.first) && "Cannot find original MI!"); SchedResult.push_back(std::make_tuple(Pair.first, Pair.second, getOriStage(Pair.first, Offset), IssueOrder[Pair.first])); } } void WindowScheduler::expand() { // The MIs in the SchedResult are sorted by the issue order ID. llvm::stable_sort(SchedResult, [](const std::tuple &A, const std::tuple &B) { return std::get<3>(A) < std::get<3>(B); }); // Use the scheduling infrastructure for expansion, noting that InstrChanges // is not supported here. DenseMap Cycles, Stages; std::vector OrderedInsts; for (auto &Info : SchedResult) { auto *MI = std::get<0>(Info); OrderedInsts.push_back(MI); Cycles[MI] = std::get<1>(Info); Stages[MI] = std::get<2>(Info); LLVM_DEBUG(dbgs() << "\tCycle " << Cycles[MI] << " [S." << Stages[MI] << "]: " << *MI); } ModuloSchedule MS(*MF, &Loop, std::move(OrderedInsts), std::move(Cycles), std::move(Stages)); ModuloScheduleExpander MSE(*MF, MS, *Context->LIS, ModuloScheduleExpander::InstrChangesTy()); MSE.expand(); MSE.cleanup(); } void WindowScheduler::updateLiveIntervals() { SmallVector UsedRegs; for (MachineInstr &MI : *MBB) for (const MachineOperand &MO : MI.operands()) { if (!MO.isReg() || MO.getReg() == 0) continue; Register Reg = MO.getReg(); if (!is_contained(UsedRegs, Reg)) UsedRegs.push_back(Reg); } Context->LIS->repairIntervalsInRange(MBB, MBB->begin(), MBB->end(), UsedRegs); } iterator_range WindowScheduler::getScheduleRange(unsigned Offset, unsigned Num) { auto RegionBegin = MBB->begin(); std::advance(RegionBegin, Offset); auto RegionEnd = RegionBegin; std::advance(RegionEnd, Num); return make_range(RegionBegin, RegionEnd); } int WindowScheduler::getOriCycle(MachineInstr *NewMI) { assert(TriToOri.count(NewMI) && "Cannot find original MI!"); auto *OriMI = TriToOri[NewMI]; assert(OriToCycle.count(OriMI) && "Cannot find schedule cycle!"); return OriToCycle[OriMI]; } MachineInstr *WindowScheduler::getOriMI(MachineInstr *NewMI) { assert(TriToOri.count(NewMI) && "Cannot find original MI!"); return TriToOri[NewMI]; } unsigned WindowScheduler::getOriStage(MachineInstr *OriMI, unsigned Offset) { assert(llvm::find(OriMIs, OriMI) != OriMIs.end() && "Cannot find OriMI in OriMIs!"); // If there is no instruction fold, all MI stages are 0. if (Offset == SchedPhiNum) return 0; // For those MIs with an ID less than the Offset, their stages are set to 0, // while the rest are set to 1. unsigned Id = 0; for (auto *MI : OriMIs) { if (MI->isMetaInstruction()) continue; if (MI == OriMI) break; ++Id; } return Id >= (size_t)Offset ? 1 : 0; } Register WindowScheduler::getAntiRegister(MachineInstr *Phi) { assert(Phi->isPHI() && "Expecting PHI!"); Register AntiReg; for (auto MO : Phi->uses()) { if (MO.isReg()) AntiReg = MO.getReg(); else if (MO.isMBB() && MO.getMBB() == MBB) return AntiReg; } return 0; }