//===- SampleProfileMatcher.cpp - Sampling-based Stale Profile Matcher ----===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // // This file implements the SampleProfileMatcher used for stale // profile matching. // //===----------------------------------------------------------------------===// #include "llvm/Transforms/IPO/SampleProfileMatcher.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/MDBuilder.h" #include "llvm/Support/CommandLine.h" using namespace llvm; using namespace sampleprof; #define DEBUG_TYPE "sample-profile-matcher" static cl::opt FuncProfileSimilarityThreshold( "func-profile-similarity-threshold", cl::Hidden, cl::init(80), cl::desc("Consider a profile matches a function if the similarity of their " "callee sequences is above the specified percentile.")); static cl::opt MinFuncCountForCGMatching( "min-func-count-for-cg-matching", cl::Hidden, cl::init(5), cl::desc("The minimum number of basic blocks required for a function to " "run stale profile call graph matching.")); static cl::opt MinCallCountForCGMatching( "min-call-count-for-cg-matching", cl::Hidden, cl::init(3), cl::desc("The minimum number of call anchors required for a function to " "run stale profile call graph matching.")); extern cl::opt SalvageStaleProfile; extern cl::opt SalvageUnusedProfile; extern cl::opt PersistProfileStaleness; extern cl::opt ReportProfileStaleness; static cl::opt SalvageStaleProfileMaxCallsites( "salvage-stale-profile-max-callsites", cl::Hidden, cl::init(UINT_MAX), cl::desc("The maximum number of callsites in a function, above which stale " "profile matching will be skipped.")); void SampleProfileMatcher::findIRAnchors(const Function &F, AnchorMap &IRAnchors) const { // For inlined code, recover the original callsite and callee by finding the // top-level inline frame. e.g. For frame stack "main:1 @ foo:2 @ bar:3", the // top-level frame is "main:1", the callsite is "1" and the callee is "foo". auto FindTopLevelInlinedCallsite = [](const DILocation *DIL) { assert((DIL && DIL->getInlinedAt()) && "No inlined callsite"); const DILocation *PrevDIL = nullptr; do { PrevDIL = DIL; DIL = DIL->getInlinedAt(); } while (DIL->getInlinedAt()); LineLocation Callsite = FunctionSamples::getCallSiteIdentifier( DIL, FunctionSamples::ProfileIsFS); StringRef CalleeName = PrevDIL->getSubprogramLinkageName(); return std::make_pair(Callsite, FunctionId(CalleeName)); }; auto GetCanonicalCalleeName = [](const CallBase *CB) { StringRef CalleeName = UnknownIndirectCallee; if (Function *Callee = CB->getCalledFunction()) CalleeName = FunctionSamples::getCanonicalFnName(Callee->getName()); return CalleeName; }; // Extract profile matching anchors in the IR. for (auto &BB : F) { for (auto &I : BB) { DILocation *DIL = I.getDebugLoc(); if (!DIL) continue; if (FunctionSamples::ProfileIsProbeBased) { if (auto Probe = extractProbe(I)) { // Flatten inlined IR for the matching. if (DIL->getInlinedAt()) { IRAnchors.emplace(FindTopLevelInlinedCallsite(DIL)); } else { // Use empty StringRef for basic block probe. StringRef CalleeName; if (const auto *CB = dyn_cast(&I)) { // Skip the probe inst whose callee name is "llvm.pseudoprobe". if (!isa(&I)) CalleeName = GetCanonicalCalleeName(CB); } LineLocation Loc = LineLocation(Probe->Id, 0); IRAnchors.emplace(Loc, FunctionId(CalleeName)); } } } else { // TODO: For line-number based profile(AutoFDO), currently only support // find callsite anchors. In future, we need to parse all the non-call // instructions to extract the line locations for profile matching. if (!isa(&I) || isa(&I)) continue; if (DIL->getInlinedAt()) { IRAnchors.emplace(FindTopLevelInlinedCallsite(DIL)); } else { LineLocation Callsite = FunctionSamples::getCallSiteIdentifier( DIL, FunctionSamples::ProfileIsFS); StringRef CalleeName = GetCanonicalCalleeName(dyn_cast(&I)); IRAnchors.emplace(Callsite, FunctionId(CalleeName)); } } } } } void SampleProfileMatcher::findProfileAnchors(const FunctionSamples &FS, AnchorMap &ProfileAnchors) const { auto isInvalidLineOffset = [](uint32_t LineOffset) { return LineOffset & 0x8000; }; auto InsertAnchor = [](const LineLocation &Loc, const FunctionId &CalleeName, AnchorMap &ProfileAnchors) { auto Ret = ProfileAnchors.try_emplace(Loc, CalleeName); if (!Ret.second) { // For multiple callees, which indicates it's an indirect call, we use a // dummy name(UnknownIndirectCallee) as the indrect callee name. Ret.first->second = FunctionId(UnknownIndirectCallee); } }; for (const auto &I : FS.getBodySamples()) { const LineLocation &Loc = I.first; if (isInvalidLineOffset(Loc.LineOffset)) continue; for (const auto &C : I.second.getCallTargets()) InsertAnchor(Loc, C.first, ProfileAnchors); } for (const auto &I : FS.getCallsiteSamples()) { const LineLocation &Loc = I.first; if (isInvalidLineOffset(Loc.LineOffset)) continue; for (const auto &C : I.second) InsertAnchor(Loc, C.first, ProfileAnchors); } } bool SampleProfileMatcher::functionHasProfile(const FunctionId &IRFuncName, Function *&FuncWithoutProfile) { FuncWithoutProfile = nullptr; auto R = FunctionsWithoutProfile.find(IRFuncName); if (R != FunctionsWithoutProfile.end()) FuncWithoutProfile = R->second; return !FuncWithoutProfile; } bool SampleProfileMatcher::isProfileUnused(const FunctionId &ProfileFuncName) { return SymbolMap->find(ProfileFuncName) == SymbolMap->end(); } bool SampleProfileMatcher::functionMatchesProfile( const FunctionId &IRFuncName, const FunctionId &ProfileFuncName, bool FindMatchedProfileOnly) { if (IRFuncName == ProfileFuncName) return true; if (!SalvageUnusedProfile) return false; // If IR function doesn't have profile and the profile is unused, try // matching them. Function *IRFunc = nullptr; if (functionHasProfile(IRFuncName, IRFunc) || !isProfileUnused(ProfileFuncName)) return false; assert(FunctionId(IRFunc->getName()) != ProfileFuncName && "IR function should be different from profile function to match"); return functionMatchesProfile(*IRFunc, ProfileFuncName, FindMatchedProfileOnly); } LocToLocMap SampleProfileMatcher::longestCommonSequence(const AnchorList &AnchorList1, const AnchorList &AnchorList2, bool MatchUnusedFunction) { int32_t Size1 = AnchorList1.size(), Size2 = AnchorList2.size(), MaxDepth = Size1 + Size2; auto Index = [&](int32_t I) { return I + MaxDepth; }; LocToLocMap EqualLocations; if (MaxDepth == 0) return EqualLocations; // Backtrack the SES result. auto Backtrack = [&](const std::vector> &Trace, const AnchorList &AnchorList1, const AnchorList &AnchorList2, LocToLocMap &EqualLocations) { int32_t X = Size1, Y = Size2; for (int32_t Depth = Trace.size() - 1; X > 0 || Y > 0; Depth--) { const auto &P = Trace[Depth]; int32_t K = X - Y; int32_t PrevK = K; if (K == -Depth || (K != Depth && P[Index(K - 1)] < P[Index(K + 1)])) PrevK = K + 1; else PrevK = K - 1; int32_t PrevX = P[Index(PrevK)]; int32_t PrevY = PrevX - PrevK; while (X > PrevX && Y > PrevY) { X--; Y--; EqualLocations.insert({AnchorList1[X].first, AnchorList2[Y].first}); } if (Depth == 0) break; if (Y == PrevY) X--; else if (X == PrevX) Y--; X = PrevX; Y = PrevY; } }; // The greedy LCS/SES algorithm. // An array contains the endpoints of the furthest reaching D-paths. std::vector V(2 * MaxDepth + 1, -1); V[Index(1)] = 0; // Trace is used to backtrack the SES result. std::vector> Trace; for (int32_t Depth = 0; Depth <= MaxDepth; Depth++) { Trace.push_back(V); for (int32_t K = -Depth; K <= Depth; K += 2) { int32_t X = 0, Y = 0; if (K == -Depth || (K != Depth && V[Index(K - 1)] < V[Index(K + 1)])) X = V[Index(K + 1)]; else X = V[Index(K - 1)] + 1; Y = X - K; while (X < Size1 && Y < Size2 && functionMatchesProfile( AnchorList1[X].second, AnchorList2[Y].second, !MatchUnusedFunction /* Find matched function only */)) X++, Y++; V[Index(K)] = X; if (X >= Size1 && Y >= Size2) { // Length of an SES is D. Backtrack(Trace, AnchorList1, AnchorList2, EqualLocations); return EqualLocations; } } } // Length of an SES is greater than MaxDepth. return EqualLocations; } void SampleProfileMatcher::matchNonCallsiteLocs( const LocToLocMap &MatchedAnchors, const AnchorMap &IRAnchors, LocToLocMap &IRToProfileLocationMap) { auto InsertMatching = [&](const LineLocation &From, const LineLocation &To) { // Skip the unchanged location mapping to save memory. if (From != To) IRToProfileLocationMap.insert({From, To}); }; // Use function's beginning location as the initial anchor. int32_t LocationDelta = 0; SmallVector LastMatchedNonAnchors; for (const auto &IR : IRAnchors) { const auto &Loc = IR.first; bool IsMatchedAnchor = false; // Match the anchor location in lexical order. auto R = MatchedAnchors.find(Loc); if (R != MatchedAnchors.end()) { const auto &Candidate = R->second; InsertMatching(Loc, Candidate); LLVM_DEBUG(dbgs() << "Callsite with callee:" << IR.second.stringRef() << " is matched from " << Loc << " to " << Candidate << "\n"); LocationDelta = Candidate.LineOffset - Loc.LineOffset; // Match backwards for non-anchor locations. // The locations in LastMatchedNonAnchors have been matched forwards // based on the previous anchor, spilt it evenly and overwrite the // second half based on the current anchor. for (size_t I = (LastMatchedNonAnchors.size() + 1) / 2; I < LastMatchedNonAnchors.size(); I++) { const auto &L = LastMatchedNonAnchors[I]; uint32_t CandidateLineOffset = L.LineOffset + LocationDelta; LineLocation Candidate(CandidateLineOffset, L.Discriminator); InsertMatching(L, Candidate); LLVM_DEBUG(dbgs() << "Location is rematched backwards from " << L << " to " << Candidate << "\n"); } IsMatchedAnchor = true; LastMatchedNonAnchors.clear(); } // Match forwards for non-anchor locations. if (!IsMatchedAnchor) { uint32_t CandidateLineOffset = Loc.LineOffset + LocationDelta; LineLocation Candidate(CandidateLineOffset, Loc.Discriminator); InsertMatching(Loc, Candidate); LLVM_DEBUG(dbgs() << "Location is matched from " << Loc << " to " << Candidate << "\n"); LastMatchedNonAnchors.emplace_back(Loc); } } } // Filter the non-call locations from IRAnchors and ProfileAnchors and write // them into a list for random access later. void SampleProfileMatcher::getFilteredAnchorList( const AnchorMap &IRAnchors, const AnchorMap &ProfileAnchors, AnchorList &FilteredIRAnchorsList, AnchorList &FilteredProfileAnchorList) { for (const auto &I : IRAnchors) { if (I.second.stringRef().empty()) continue; FilteredIRAnchorsList.emplace_back(I); } for (const auto &I : ProfileAnchors) FilteredProfileAnchorList.emplace_back(I); } // Call target name anchor based profile fuzzy matching. // Input: // For IR locations, the anchor is the callee name of direct callsite; For // profile locations, it's the call target name for BodySamples or inlinee's // profile name for CallsiteSamples. // Matching heuristic: // First match all the anchors using the diff algorithm, then split the // non-anchor locations between the two anchors evenly, first half are matched // based on the start anchor, second half are matched based on the end anchor. // For example, given: // IR locations: [1, 2(foo), 3, 5, 6(bar), 7] // Profile locations: [1, 2, 3(foo), 4, 7, 8(bar), 9] // The matching gives: // [1, 2(foo), 3, 5, 6(bar), 7] // | | | | | | // [1, 2, 3(foo), 4, 7, 8(bar), 9] // The output mapping: [2->3, 3->4, 5->7, 6->8, 7->9]. void SampleProfileMatcher::runStaleProfileMatching( const Function &F, const AnchorMap &IRAnchors, const AnchorMap &ProfileAnchors, LocToLocMap &IRToProfileLocationMap, bool RunCFGMatching, bool RunCGMatching) { if (!RunCFGMatching && !RunCGMatching) return; LLVM_DEBUG(dbgs() << "Run stale profile matching for " << F.getName() << "\n"); assert(IRToProfileLocationMap.empty() && "Run stale profile matching only once per function"); AnchorList FilteredProfileAnchorList; AnchorList FilteredIRAnchorsList; getFilteredAnchorList(IRAnchors, ProfileAnchors, FilteredIRAnchorsList, FilteredProfileAnchorList); if (FilteredIRAnchorsList.empty() || FilteredProfileAnchorList.empty()) return; if (FilteredIRAnchorsList.size() > SalvageStaleProfileMaxCallsites || FilteredProfileAnchorList.size() > SalvageStaleProfileMaxCallsites) { LLVM_DEBUG(dbgs() << "Skip stale profile matching for " << F.getName() << " because the number of callsites in the IR is " << FilteredIRAnchorsList.size() << " and in the profile is " << FilteredProfileAnchorList.size() << "\n"); return; } // Match the callsite anchors by finding the longest common subsequence // between IR and profile. // Define a match between two anchors as follows: // 1) The function names of anchors are the same. // 2) The similarity between the anchor functions is above a threshold if // RunCGMatching is set. // For 2), we only consider the anchor functions from IR and profile don't // appear on either side to reduce the matching scope. Note that we need to // use IR anchor as base(A side) to align with the order of // IRToProfileLocationMap. LocToLocMap MatchedAnchors = longestCommonSequence(FilteredIRAnchorsList, FilteredProfileAnchorList, RunCGMatching /* Match unused functions */); // CFG level matching: // Apply the callsite matchings to infer matching for the basic // block(non-callsite) locations and write the result to // IRToProfileLocationMap. if (RunCFGMatching) matchNonCallsiteLocs(MatchedAnchors, IRAnchors, IRToProfileLocationMap); } void SampleProfileMatcher::runOnFunction(Function &F) { // We need to use flattened function samples for matching. // Unlike IR, which includes all callsites from the source code, the callsites // in profile only show up when they are hit by samples, i,e. the profile // callsites in one context may differ from those in another context. To get // the maximum number of callsites, we merge the function profiles from all // contexts, aka, the flattened profile to find profile anchors. const auto *FSFlattened = getFlattenedSamplesFor(F); if (SalvageUnusedProfile && !FSFlattened) { // Apply the matching in place to find the new function's matched profile. // TODO: For extended profile format, if a function profile is unused and // it's top-level, even if the profile is matched, it's not found in the // profile. This is because sample reader only read the used profile at the // beginning, we need to support loading the profile on-demand in future. auto R = FuncToProfileNameMap.find(&F); if (R != FuncToProfileNameMap.end()) FSFlattened = getFlattenedSamplesFor(R->second); } if (!FSFlattened) return; // Anchors for IR. It's a map from IR location to callee name, callee name is // empty for non-call instruction and use a dummy name(UnknownIndirectCallee) // for unknown indrect callee name. AnchorMap IRAnchors; findIRAnchors(F, IRAnchors); // Anchors for profile. It's a map from callsite location to a set of callee // name. AnchorMap ProfileAnchors; findProfileAnchors(*FSFlattened, ProfileAnchors); // Compute the callsite match states for profile staleness report. if (ReportProfileStaleness || PersistProfileStaleness) recordCallsiteMatchStates(F, IRAnchors, ProfileAnchors, nullptr); if (!SalvageStaleProfile) return; // For probe-based profiles, run matching only when profile checksum is // mismatched. bool ChecksumMismatch = FunctionSamples::ProfileIsProbeBased && !ProbeManager->profileIsValid(F, *FSFlattened); bool RunCFGMatching = !FunctionSamples::ProfileIsProbeBased || ChecksumMismatch; bool RunCGMatching = SalvageUnusedProfile; // For imported functions, the checksum metadata(pseudo_probe_desc) are // dropped, so we leverage function attribute(profile-checksum-mismatch) to // transfer the info: add the attribute during pre-link phase and check it // during post-link phase(see "profileIsValid"). if (ChecksumMismatch && LTOPhase == ThinOrFullLTOPhase::ThinLTOPreLink) F.addFnAttr("profile-checksum-mismatch"); // The matching result will be saved to IRToProfileLocationMap, create a // new map for each function. auto &IRToProfileLocationMap = getIRToProfileLocationMap(F); runStaleProfileMatching(F, IRAnchors, ProfileAnchors, IRToProfileLocationMap, RunCFGMatching, RunCGMatching); // Find and update callsite match states after matching. if (RunCFGMatching && (ReportProfileStaleness || PersistProfileStaleness)) recordCallsiteMatchStates(F, IRAnchors, ProfileAnchors, &IRToProfileLocationMap); } void SampleProfileMatcher::recordCallsiteMatchStates( const Function &F, const AnchorMap &IRAnchors, const AnchorMap &ProfileAnchors, const LocToLocMap *IRToProfileLocationMap) { bool IsPostMatch = IRToProfileLocationMap != nullptr; auto &CallsiteMatchStates = FuncCallsiteMatchStates[FunctionSamples::getCanonicalFnName(F.getName())]; auto MapIRLocToProfileLoc = [&](const LineLocation &IRLoc) { // IRToProfileLocationMap is null in pre-match phrase. if (!IRToProfileLocationMap) return IRLoc; const auto &ProfileLoc = IRToProfileLocationMap->find(IRLoc); if (ProfileLoc != IRToProfileLocationMap->end()) return ProfileLoc->second; else return IRLoc; }; for (const auto &I : IRAnchors) { // After fuzzy profile matching, use the matching result to remap the // current IR callsite. const auto &ProfileLoc = MapIRLocToProfileLoc(I.first); const auto &IRCalleeId = I.second; const auto &It = ProfileAnchors.find(ProfileLoc); if (It == ProfileAnchors.end()) continue; const auto &ProfCalleeId = It->second; if (IRCalleeId == ProfCalleeId) { auto It = CallsiteMatchStates.find(ProfileLoc); if (It == CallsiteMatchStates.end()) CallsiteMatchStates.emplace(ProfileLoc, MatchState::InitialMatch); else if (IsPostMatch) { if (It->second == MatchState::InitialMatch) It->second = MatchState::UnchangedMatch; else if (It->second == MatchState::InitialMismatch) It->second = MatchState::RecoveredMismatch; } } } // Check if there are any callsites in the profile that does not match to any // IR callsites. for (const auto &I : ProfileAnchors) { const auto &Loc = I.first; assert(!I.second.stringRef().empty() && "Callees should not be empty"); auto It = CallsiteMatchStates.find(Loc); if (It == CallsiteMatchStates.end()) CallsiteMatchStates.emplace(Loc, MatchState::InitialMismatch); else if (IsPostMatch) { // Update the state if it's not matched(UnchangedMatch or // RecoveredMismatch). if (It->second == MatchState::InitialMismatch) It->second = MatchState::UnchangedMismatch; else if (It->second == MatchState::InitialMatch) It->second = MatchState::RemovedMatch; } } } void SampleProfileMatcher::countMismatchedFuncSamples(const FunctionSamples &FS, bool IsTopLevel) { const auto *FuncDesc = ProbeManager->getDesc(FS.getGUID()); // Skip the function that is external or renamed. if (!FuncDesc) return; if (ProbeManager->profileIsHashMismatched(*FuncDesc, FS)) { if (IsTopLevel) NumStaleProfileFunc++; // Given currently all probe ids are after block probe ids, once the // checksum is mismatched, it's likely all the callites are mismatched and // dropped. We conservatively count all the samples as mismatched and stop // counting the inlinees' profiles. MismatchedFunctionSamples += FS.getTotalSamples(); return; } // Even the current-level function checksum is matched, it's possible that the // nested inlinees' checksums are mismatched that affect the inlinee's sample // loading, we need to go deeper to check the inlinees' function samples. // Similarly, count all the samples as mismatched if the inlinee's checksum is // mismatched using this recursive function. for (const auto &I : FS.getCallsiteSamples()) for (const auto &CS : I.second) countMismatchedFuncSamples(CS.second, false); } void SampleProfileMatcher::countMismatchedCallsiteSamples( const FunctionSamples &FS) { auto It = FuncCallsiteMatchStates.find(FS.getFuncName()); // Skip it if no mismatched callsite or this is an external function. if (It == FuncCallsiteMatchStates.end() || It->second.empty()) return; const auto &CallsiteMatchStates = It->second; auto findMatchState = [&](const LineLocation &Loc) { auto It = CallsiteMatchStates.find(Loc); if (It == CallsiteMatchStates.end()) return MatchState::Unknown; return It->second; }; auto AttributeMismatchedSamples = [&](const enum MatchState &State, uint64_t Samples) { if (isMismatchState(State)) MismatchedCallsiteSamples += Samples; else if (State == MatchState::RecoveredMismatch) RecoveredCallsiteSamples += Samples; }; // The non-inlined callsites are saved in the body samples of function // profile, go through it to count the non-inlined callsite samples. for (const auto &I : FS.getBodySamples()) AttributeMismatchedSamples(findMatchState(I.first), I.second.getSamples()); // Count the inlined callsite samples. for (const auto &I : FS.getCallsiteSamples()) { auto State = findMatchState(I.first); uint64_t CallsiteSamples = 0; for (const auto &CS : I.second) CallsiteSamples += CS.second.getTotalSamples(); AttributeMismatchedSamples(State, CallsiteSamples); if (isMismatchState(State)) continue; // When the current level of inlined call site matches the profiled call // site, we need to go deeper along the inline tree to count mismatches from // lower level inlinees. for (const auto &CS : I.second) countMismatchedCallsiteSamples(CS.second); } } void SampleProfileMatcher::countMismatchCallsites(const FunctionSamples &FS) { auto It = FuncCallsiteMatchStates.find(FS.getFuncName()); // Skip it if no mismatched callsite or this is an external function. if (It == FuncCallsiteMatchStates.end() || It->second.empty()) return; const auto &MatchStates = It->second; [[maybe_unused]] bool OnInitialState = isInitialState(MatchStates.begin()->second); for (const auto &I : MatchStates) { TotalProfiledCallsites++; assert( (OnInitialState ? isInitialState(I.second) : isFinalState(I.second)) && "Profile matching state is inconsistent"); if (isMismatchState(I.second)) NumMismatchedCallsites++; else if (I.second == MatchState::RecoveredMismatch) NumRecoveredCallsites++; } } void SampleProfileMatcher::countCallGraphRecoveredSamples( const FunctionSamples &FS, std::unordered_set &CallGraphRecoveredProfiles) { if (CallGraphRecoveredProfiles.count(FS.getFunction())) { NumCallGraphRecoveredFuncSamples += FS.getTotalSamples(); return; } for (const auto &CM : FS.getCallsiteSamples()) { for (const auto &CS : CM.second) { countCallGraphRecoveredSamples(CS.second, CallGraphRecoveredProfiles); } } } void SampleProfileMatcher::computeAndReportProfileStaleness() { if (!ReportProfileStaleness && !PersistProfileStaleness) return; std::unordered_set CallGraphRecoveredProfiles; if (SalvageUnusedProfile) { for (const auto &I : FuncToProfileNameMap) { CallGraphRecoveredProfiles.insert(I.second); if (GlobalValue::isAvailableExternallyLinkage(I.first->getLinkage())) continue; NumCallGraphRecoveredProfiledFunc++; } } // Count profile mismatches for profile staleness report. for (const auto &F : M) { if (skipProfileForFunction(F)) continue; // As the stats will be merged by linker, skip reporting the metrics for // imported functions to avoid repeated counting. if (GlobalValue::isAvailableExternallyLinkage(F.getLinkage())) continue; const auto *FS = Reader.getSamplesFor(F); if (!FS) continue; TotalProfiledFunc++; TotalFunctionSamples += FS->getTotalSamples(); if (SalvageUnusedProfile && !CallGraphRecoveredProfiles.empty()) countCallGraphRecoveredSamples(*FS, CallGraphRecoveredProfiles); // Checksum mismatch is only used in pseudo-probe mode. if (FunctionSamples::ProfileIsProbeBased) countMismatchedFuncSamples(*FS, true); // Count mismatches and samples for calliste. countMismatchCallsites(*FS); countMismatchedCallsiteSamples(*FS); } if (ReportProfileStaleness) { if (FunctionSamples::ProfileIsProbeBased) { errs() << "(" << NumStaleProfileFunc << "/" << TotalProfiledFunc << ") of functions' profile are invalid and (" << MismatchedFunctionSamples << "/" << TotalFunctionSamples << ") of samples are discarded due to function hash mismatch.\n"; } if (SalvageUnusedProfile) { errs() << "(" << NumCallGraphRecoveredProfiledFunc << "/" << TotalProfiledFunc << ") of functions' profile are matched and (" << NumCallGraphRecoveredFuncSamples << "/" << TotalFunctionSamples << ") of samples are reused by call graph matching.\n"; } errs() << "(" << (NumMismatchedCallsites + NumRecoveredCallsites) << "/" << TotalProfiledCallsites << ") of callsites' profile are invalid and (" << (MismatchedCallsiteSamples + RecoveredCallsiteSamples) << "/" << TotalFunctionSamples << ") of samples are discarded due to callsite location mismatch.\n"; errs() << "(" << NumRecoveredCallsites << "/" << (NumRecoveredCallsites + NumMismatchedCallsites) << ") of callsites and (" << RecoveredCallsiteSamples << "/" << (RecoveredCallsiteSamples + MismatchedCallsiteSamples) << ") of samples are recovered by stale profile matching.\n"; } if (PersistProfileStaleness) { LLVMContext &Ctx = M.getContext(); MDBuilder MDB(Ctx); SmallVector> ProfStatsVec; if (FunctionSamples::ProfileIsProbeBased) { ProfStatsVec.emplace_back("NumStaleProfileFunc", NumStaleProfileFunc); ProfStatsVec.emplace_back("TotalProfiledFunc", TotalProfiledFunc); ProfStatsVec.emplace_back("MismatchedFunctionSamples", MismatchedFunctionSamples); ProfStatsVec.emplace_back("TotalFunctionSamples", TotalFunctionSamples); } if (SalvageUnusedProfile) { ProfStatsVec.emplace_back("NumCallGraphRecoveredProfiledFunc", NumCallGraphRecoveredProfiledFunc); ProfStatsVec.emplace_back("NumCallGraphRecoveredFuncSamples", NumCallGraphRecoveredFuncSamples); } ProfStatsVec.emplace_back("NumMismatchedCallsites", NumMismatchedCallsites); ProfStatsVec.emplace_back("NumRecoveredCallsites", NumRecoveredCallsites); ProfStatsVec.emplace_back("TotalProfiledCallsites", TotalProfiledCallsites); ProfStatsVec.emplace_back("MismatchedCallsiteSamples", MismatchedCallsiteSamples); ProfStatsVec.emplace_back("RecoveredCallsiteSamples", RecoveredCallsiteSamples); auto *MD = MDB.createLLVMStats(ProfStatsVec); auto *NMD = M.getOrInsertNamedMetadata("llvm.stats"); NMD->addOperand(MD); } } void SampleProfileMatcher::findFunctionsWithoutProfile() { // TODO: Support MD5 profile. if (FunctionSamples::UseMD5) return; StringSet<> NamesInProfile; if (auto NameTable = Reader.getNameTable()) { for (auto Name : *NameTable) NamesInProfile.insert(Name.stringRef()); } for (auto &F : M) { // Skip declarations, as even if the function can be matched, we have // nothing to do with it. if (F.isDeclaration()) continue; StringRef CanonFName = FunctionSamples::getCanonicalFnName(F.getName()); const auto *FS = getFlattenedSamplesFor(F); if (FS) continue; // For extended binary, functions fully inlined may not be loaded in the // top-level profile, so check the NameTable which has the all symbol names // in profile. if (NamesInProfile.count(CanonFName)) continue; // For extended binary, non-profiled function symbols are in the profile // symbol list table. if (PSL && PSL->contains(CanonFName)) continue; LLVM_DEBUG(dbgs() << "Function " << CanonFName << " is not in profile or profile symbol list.\n"); FunctionsWithoutProfile[FunctionId(CanonFName)] = &F; } } bool SampleProfileMatcher::functionMatchesProfileHelper( const Function &IRFunc, const FunctionId &ProfFunc) { // The value is in the range [0, 1]. The bigger the value is, the more similar // two sequences are. float Similarity = 0.0; const auto *FSFlattened = getFlattenedSamplesFor(ProfFunc); if (!FSFlattened) return false; // The check for similarity or checksum may not be reliable if the function is // tiny, we use the number of basic block as a proxy for the function // complexity and skip the matching if it's too small. if (IRFunc.size() < MinFuncCountForCGMatching || FSFlattened->getBodySamples().size() < MinFuncCountForCGMatching) return false; // For probe-based function, we first trust the checksum info. If the checksum // doesn't match, we continue checking for similarity. if (FunctionSamples::ProfileIsProbeBased) { const auto *FuncDesc = ProbeManager->getDesc(IRFunc); if (FuncDesc && !ProbeManager->profileIsHashMismatched(*FuncDesc, *FSFlattened)) { LLVM_DEBUG(dbgs() << "The checksums for " << IRFunc.getName() << "(IR) and " << ProfFunc << "(Profile) match.\n"); return true; } } AnchorMap IRAnchors; findIRAnchors(IRFunc, IRAnchors); AnchorMap ProfileAnchors; findProfileAnchors(*FSFlattened, ProfileAnchors); AnchorList FilteredIRAnchorsList; AnchorList FilteredProfileAnchorList; getFilteredAnchorList(IRAnchors, ProfileAnchors, FilteredIRAnchorsList, FilteredProfileAnchorList); // Similarly skip the matching if the num of anchors is not enough. if (FilteredIRAnchorsList.size() < MinCallCountForCGMatching || FilteredProfileAnchorList.size() < MinCallCountForCGMatching) return false; // Use the diff algorithm to find the LCS between IR and profile. // Don't recursively match the callee function to avoid infinite matching, // callee functions will be handled later since it's processed in top-down // order . LocToLocMap MatchedAnchors = longestCommonSequence(FilteredIRAnchorsList, FilteredProfileAnchorList, false /* Match unused functions */); Similarity = static_cast(MatchedAnchors.size()) * 2 / (FilteredIRAnchorsList.size() + FilteredProfileAnchorList.size()); LLVM_DEBUG(dbgs() << "The similarity between " << IRFunc.getName() << "(IR) and " << ProfFunc << "(profile) is " << format("%.2f", Similarity) << "\n"); assert((Similarity >= 0 && Similarity <= 1.0) && "Similarity value should be in [0, 1]"); return Similarity * 100 > FuncProfileSimilarityThreshold; } // If FindMatchedProfileOnly is set to true, only use the processed function // results. This is used for skipping the repeated recursive matching. bool SampleProfileMatcher::functionMatchesProfile(Function &IRFunc, const FunctionId &ProfFunc, bool FindMatchedProfileOnly) { auto R = FuncProfileMatchCache.find({&IRFunc, ProfFunc}); if (R != FuncProfileMatchCache.end()) return R->second; if (FindMatchedProfileOnly) return false; bool Matched = functionMatchesProfileHelper(IRFunc, ProfFunc); FuncProfileMatchCache[{&IRFunc, ProfFunc}] = Matched; if (Matched) { FuncToProfileNameMap[&IRFunc] = ProfFunc; LLVM_DEBUG(dbgs() << "Function:" << IRFunc.getName() << " matches profile:" << ProfFunc << "\n"); } return Matched; } void SampleProfileMatcher::runOnModule() { ProfileConverter::flattenProfile(Reader.getProfiles(), FlattenedProfiles, FunctionSamples::ProfileIsCS); if (SalvageUnusedProfile) findFunctionsWithoutProfile(); // Process the matching in top-down order so that the caller matching result // can be used to the callee matching. std::vector TopDownFunctionList; TopDownFunctionList.reserve(M.size()); buildTopDownFuncOrder(CG, TopDownFunctionList); for (auto *F : TopDownFunctionList) { if (skipProfileForFunction(*F)) continue; runOnFunction(*F); } // Update the data in SampleLoader. if (SalvageUnusedProfile) for (auto &I : FuncToProfileNameMap) { assert(I.first && "New function is null"); FunctionId FuncName(I.first->getName()); FuncNameToProfNameMap->emplace(FuncName, I.second); // We need to remove the old entry to avoid duplicating the function // processing. SymbolMap->erase(FuncName); SymbolMap->emplace(I.second, I.first); } if (SalvageStaleProfile) distributeIRToProfileLocationMap(); computeAndReportProfileStaleness(); } void SampleProfileMatcher::distributeIRToProfileLocationMap( FunctionSamples &FS) { const auto ProfileMappings = FuncMappings.find(FS.getFuncName()); if (ProfileMappings != FuncMappings.end()) { FS.setIRToProfileLocationMap(&(ProfileMappings->second)); } for (auto &Callees : const_cast(FS.getCallsiteSamples())) { for (auto &FS : Callees.second) { distributeIRToProfileLocationMap(FS.second); } } } // Use a central place to distribute the matching results. Outlined and inlined // profile with the function name will be set to the same pointer. void SampleProfileMatcher::distributeIRToProfileLocationMap() { for (auto &I : Reader.getProfiles()) { distributeIRToProfileLocationMap(I.second); } }