//===------- JITLoaderPerf.cpp - Register profiler objects ------*- C++ -*-===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // // Register objects for access by profilers via the perf JIT interface. // //===----------------------------------------------------------------------===// #include "llvm/ExecutionEngine/Orc/TargetProcess/JITLoaderPerf.h" #include "llvm/ExecutionEngine/Orc/Shared/PerfSharedStructs.h" #include "llvm/Support/FileSystem.h" #include "llvm/Support/MemoryBuffer.h" #include "llvm/Support/Path.h" #include "llvm/Support/Process.h" #include "llvm/Support/Threading.h" #include #include #ifdef __linux__ #include // mmap() #include // clock_gettime(), time(), localtime_r() */ #include // for read(), close() #define DEBUG_TYPE "orc" // language identifier (XXX: should we generate something better from debug // info?) #define JIT_LANG "llvm-IR" #define LLVM_PERF_JIT_MAGIC \ ((uint32_t)'J' << 24 | (uint32_t)'i' << 16 | (uint32_t)'T' << 8 | \ (uint32_t)'D') #define LLVM_PERF_JIT_VERSION 1 using namespace llvm; using namespace llvm::orc; struct PerfState { // cache lookups uint32_t Pid; // base directory for output data std::string JitPath; // output data stream, closed via Dumpstream int DumpFd = -1; // output data stream std::unique_ptr Dumpstream; // perf mmap marker void *MarkerAddr = NULL; }; // prevent concurrent dumps from messing up the output file static std::mutex Mutex; static std::optional State; struct RecHeader { uint32_t Id; uint32_t TotalSize; uint64_t Timestamp; }; struct DIR { RecHeader Prefix; uint64_t CodeAddr; uint64_t NrEntry; }; struct DIE { uint64_t CodeAddr; uint32_t Line; uint32_t Discrim; }; struct CLR { RecHeader Prefix; uint32_t Pid; uint32_t Tid; uint64_t Vma; uint64_t CodeAddr; uint64_t CodeSize; uint64_t CodeIndex; }; struct UWR { RecHeader Prefix; uint64_t UnwindDataSize; uint64_t EhFrameHeaderSize; uint64_t MappedSize; }; static inline uint64_t timespec_to_ns(const struct timespec *TS) { const uint64_t NanoSecPerSec = 1000000000; return ((uint64_t)TS->tv_sec * NanoSecPerSec) + TS->tv_nsec; } static inline uint64_t perf_get_timestamp() { timespec TS; if (clock_gettime(CLOCK_MONOTONIC, &TS)) return 0; return timespec_to_ns(&TS); } static void writeDebugRecord(const PerfJITDebugInfoRecord &DebugRecord) { assert(State && "PerfState not initialized"); LLVM_DEBUG(dbgs() << "Writing debug record with " << DebugRecord.Entries.size() << " entries\n"); [[maybe_unused]] size_t Written = 0; DIR Dir{RecHeader{static_cast(DebugRecord.Prefix.Id), DebugRecord.Prefix.TotalSize, perf_get_timestamp()}, DebugRecord.CodeAddr, DebugRecord.Entries.size()}; State->Dumpstream->write(reinterpret_cast(&Dir), sizeof(Dir)); Written += sizeof(Dir); for (auto &Die : DebugRecord.Entries) { DIE d{Die.Addr, Die.Lineno, Die.Discrim}; State->Dumpstream->write(reinterpret_cast(&d), sizeof(d)); State->Dumpstream->write(Die.Name.data(), Die.Name.size() + 1); Written += sizeof(d) + Die.Name.size() + 1; } LLVM_DEBUG(dbgs() << "wrote " << Written << " bytes of debug info\n"); } static void writeCodeRecord(const PerfJITCodeLoadRecord &CodeRecord) { assert(State && "PerfState not initialized"); uint32_t Tid = get_threadid(); LLVM_DEBUG(dbgs() << "Writing code record with code size " << CodeRecord.CodeSize << " and code index " << CodeRecord.CodeIndex << "\n"); CLR Clr{RecHeader{static_cast(CodeRecord.Prefix.Id), CodeRecord.Prefix.TotalSize, perf_get_timestamp()}, State->Pid, Tid, CodeRecord.Vma, CodeRecord.CodeAddr, CodeRecord.CodeSize, CodeRecord.CodeIndex}; LLVM_DEBUG(dbgs() << "wrote " << sizeof(Clr) << " bytes of CLR, " << CodeRecord.Name.size() + 1 << " bytes of name, " << CodeRecord.CodeSize << " bytes of code\n"); State->Dumpstream->write(reinterpret_cast(&Clr), sizeof(Clr)); State->Dumpstream->write(CodeRecord.Name.data(), CodeRecord.Name.size() + 1); State->Dumpstream->write((const char *)CodeRecord.CodeAddr, CodeRecord.CodeSize); } static void writeUnwindRecord(const PerfJITCodeUnwindingInfoRecord &UnwindRecord) { assert(State && "PerfState not initialized"); dbgs() << "Writing unwind record with unwind data size " << UnwindRecord.UnwindDataSize << " and EH frame header size " << UnwindRecord.EHFrameHdrSize << " and mapped size " << UnwindRecord.MappedSize << "\n"; UWR Uwr{RecHeader{static_cast(UnwindRecord.Prefix.Id), UnwindRecord.Prefix.TotalSize, perf_get_timestamp()}, UnwindRecord.UnwindDataSize, UnwindRecord.EHFrameHdrSize, UnwindRecord.MappedSize}; LLVM_DEBUG(dbgs() << "wrote " << sizeof(Uwr) << " bytes of UWR, " << UnwindRecord.EHFrameHdrSize << " bytes of EH frame header, " << UnwindRecord.UnwindDataSize - UnwindRecord.EHFrameHdrSize << " bytes of EH frame\n"); State->Dumpstream->write(reinterpret_cast(&Uwr), sizeof(Uwr)); if (UnwindRecord.EHFrameHdrAddr) State->Dumpstream->write((const char *)UnwindRecord.EHFrameHdrAddr, UnwindRecord.EHFrameHdrSize); else State->Dumpstream->write(UnwindRecord.EHFrameHdr.data(), UnwindRecord.EHFrameHdrSize); State->Dumpstream->write((const char *)UnwindRecord.EHFrameAddr, UnwindRecord.UnwindDataSize - UnwindRecord.EHFrameHdrSize); } static Error registerJITLoaderPerfImpl(const PerfJITRecordBatch &Batch) { if (!State) return make_error("PerfState not initialized", inconvertibleErrorCode()); // Serialize the batch std::lock_guard Lock(Mutex); if (Batch.UnwindingRecord.Prefix.TotalSize > 0) writeUnwindRecord(Batch.UnwindingRecord); for (const auto &DebugInfo : Batch.DebugInfoRecords) writeDebugRecord(DebugInfo); for (const auto &CodeLoad : Batch.CodeLoadRecords) writeCodeRecord(CodeLoad); State->Dumpstream->flush(); return Error::success(); } struct Header { uint32_t Magic; // characters "JiTD" uint32_t Version; // header version uint32_t TotalSize; // total size of header uint32_t ElfMach; // elf mach target uint32_t Pad1; // reserved uint32_t Pid; uint64_t Timestamp; // timestamp uint64_t Flags; // flags }; static Error OpenMarker(PerfState &State) { // We mmap the jitdump to create an MMAP RECORD in perf.data file. The mmap // is captured either live (perf record running when we mmap) or in deferred // mode, via /proc/PID/maps. The MMAP record is used as a marker of a jitdump // file for more meta data info about the jitted code. Perf report/annotate // detect this special filename and process the jitdump file. // // Mapping must be PROT_EXEC to ensure it is captured by perf record // even when not using -d option. State.MarkerAddr = ::mmap(NULL, sys::Process::getPageSizeEstimate(), PROT_READ | PROT_EXEC, MAP_PRIVATE, State.DumpFd, 0); if (State.MarkerAddr == MAP_FAILED) return make_error("could not mmap JIT marker", inconvertibleErrorCode()); return Error::success(); } void CloseMarker(PerfState &State) { if (!State.MarkerAddr) return; munmap(State.MarkerAddr, sys::Process::getPageSizeEstimate()); State.MarkerAddr = nullptr; } static Expected
FillMachine(PerfState &State) { Header Hdr; Hdr.Magic = LLVM_PERF_JIT_MAGIC; Hdr.Version = LLVM_PERF_JIT_VERSION; Hdr.TotalSize = sizeof(Hdr); Hdr.Pid = State.Pid; Hdr.Timestamp = perf_get_timestamp(); char Id[16]; struct { uint16_t e_type; uint16_t e_machine; } Info; size_t RequiredMemory = sizeof(Id) + sizeof(Info); ErrorOr> MB = MemoryBuffer::getFileSlice("/proc/self/exe", RequiredMemory, 0); // This'll not guarantee that enough data was actually read from the // underlying file. Instead the trailing part of the buffer would be // zeroed. Given the ELF signature check below that seems ok though, // it's unlikely that the file ends just after that, and the // consequence would just be that perf wouldn't recognize the // signature. if (!MB) return make_error("could not open /proc/self/exe", MB.getError()); memcpy(&Id, (*MB)->getBufferStart(), sizeof(Id)); memcpy(&Info, (*MB)->getBufferStart() + sizeof(Id), sizeof(Info)); // check ELF signature if (Id[0] != 0x7f || Id[1] != 'E' || Id[2] != 'L' || Id[3] != 'F') return make_error("invalid ELF signature", inconvertibleErrorCode()); Hdr.ElfMach = Info.e_machine; return Hdr; } static Error InitDebuggingDir(PerfState &State) { time_t Time; struct tm LocalTime; char TimeBuffer[sizeof("YYYYMMDD")]; SmallString<64> Path; // search for location to dump data to if (const char *BaseDir = getenv("JITDUMPDIR")) Path.append(BaseDir); else if (!sys::path::home_directory(Path)) Path = "."; // create debug directory Path += "/.debug/jit/"; if (auto EC = sys::fs::create_directories(Path)) { std::string ErrStr; raw_string_ostream ErrStream(ErrStr); ErrStream << "could not create jit cache directory " << Path << ": " << EC.message() << "\n"; return make_error(std::move(ErrStr), inconvertibleErrorCode()); } // create unique directory for dump data related to this process time(&Time); localtime_r(&Time, &LocalTime); strftime(TimeBuffer, sizeof(TimeBuffer), "%Y%m%d", &LocalTime); Path += JIT_LANG "-jit-"; Path += TimeBuffer; SmallString<128> UniqueDebugDir; using sys::fs::createUniqueDirectory; if (auto EC = createUniqueDirectory(Path, UniqueDebugDir)) { std::string ErrStr; raw_string_ostream ErrStream(ErrStr); ErrStream << "could not create unique jit cache directory " << UniqueDebugDir << ": " << EC.message() << "\n"; return make_error(std::move(ErrStr), inconvertibleErrorCode()); } State.JitPath = std::string(UniqueDebugDir); return Error::success(); } static Error registerJITLoaderPerfStartImpl() { PerfState Tentative; Tentative.Pid = sys::Process::getProcessId(); // check if clock-source is supported if (!perf_get_timestamp()) return make_error("kernel does not support CLOCK_MONOTONIC", inconvertibleErrorCode()); if (auto Err = InitDebuggingDir(Tentative)) return Err; std::string Filename; raw_string_ostream FilenameBuf(Filename); FilenameBuf << Tentative.JitPath << "/jit-" << Tentative.Pid << ".dump"; // Need to open ourselves, because we need to hand the FD to OpenMarker() and // raw_fd_ostream doesn't expose the FD. using sys::fs::openFileForWrite; if (auto EC = openFileForReadWrite(FilenameBuf.str(), Tentative.DumpFd, sys::fs::CD_CreateNew, sys::fs::OF_None)) { std::string ErrStr; raw_string_ostream ErrStream(ErrStr); ErrStream << "could not open JIT dump file " << FilenameBuf.str() << ": " << EC.message() << "\n"; return make_error(std::move(ErrStr), inconvertibleErrorCode()); } Tentative.Dumpstream = std::make_unique(Tentative.DumpFd, true); auto Header = FillMachine(Tentative); if (!Header) return Header.takeError(); // signal this process emits JIT information if (auto Err = OpenMarker(Tentative)) return Err; Tentative.Dumpstream->write(reinterpret_cast(&Header.get()), sizeof(*Header)); // Everything initialized, can do profiling now. if (Tentative.Dumpstream->has_error()) return make_error("could not write JIT dump header", inconvertibleErrorCode()); State = std::move(Tentative); return Error::success(); } static Error registerJITLoaderPerfEndImpl() { if (!State) return make_error("PerfState not initialized", inconvertibleErrorCode()); RecHeader Close; Close.Id = static_cast(PerfJITRecordType::JIT_CODE_CLOSE); Close.TotalSize = sizeof(Close); Close.Timestamp = perf_get_timestamp(); State->Dumpstream->write(reinterpret_cast(&Close), sizeof(Close)); if (State->MarkerAddr) CloseMarker(*State); State.reset(); return Error::success(); } extern "C" llvm::orc::shared::CWrapperFunctionResult llvm_orc_registerJITLoaderPerfImpl(const char *Data, uint64_t Size) { using namespace orc::shared; return WrapperFunction::handle( Data, Size, registerJITLoaderPerfImpl) .release(); } extern "C" llvm::orc::shared::CWrapperFunctionResult llvm_orc_registerJITLoaderPerfStart(const char *Data, uint64_t Size) { using namespace orc::shared; return WrapperFunction::handle(Data, Size, registerJITLoaderPerfStartImpl) .release(); } extern "C" llvm::orc::shared::CWrapperFunctionResult llvm_orc_registerJITLoaderPerfEnd(const char *Data, uint64_t Size) { using namespace orc::shared; return WrapperFunction::handle(Data, Size, registerJITLoaderPerfEndImpl) .release(); } #else using namespace llvm; using namespace llvm::orc; static Error badOS() { using namespace llvm; return llvm::make_error( "unsupported OS (perf support is only available on linux!)", inconvertibleErrorCode()); } static Error badOSBatch(PerfJITRecordBatch &Batch) { return badOS(); } extern "C" llvm::orc::shared::CWrapperFunctionResult llvm_orc_registerJITLoaderPerfImpl(const char *Data, uint64_t Size) { using namespace shared; return WrapperFunction::handle(Data, Size, badOSBatch) .release(); } extern "C" llvm::orc::shared::CWrapperFunctionResult llvm_orc_registerJITLoaderPerfStart(const char *Data, uint64_t Size) { using namespace shared; return WrapperFunction::handle(Data, Size, badOS).release(); } extern "C" llvm::orc::shared::CWrapperFunctionResult llvm_orc_registerJITLoaderPerfEnd(const char *Data, uint64_t Size) { using namespace shared; return WrapperFunction::handle(Data, Size, badOS).release(); } #endif