//===-- xray_profiling.cpp --------------------------------------*- C++ -*-===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // // This file is a part of XRay, a dynamic runtime instrumentation system. // // This is the implementation of a profiling handler. // //===----------------------------------------------------------------------===// #include #include #include "sanitizer_common/sanitizer_atomic.h" #include "sanitizer_common/sanitizer_flags.h" #include "xray/xray_interface.h" #include "xray/xray_log_interface.h" #include "xray_buffer_queue.h" #include "xray_flags.h" #include "xray_profile_collector.h" #include "xray_profiling_flags.h" #include "xray_recursion_guard.h" #include "xray_tsc.h" #include "xray_utils.h" #include namespace __xray { namespace { static atomic_sint32_t ProfilerLogFlushStatus = { XRayLogFlushStatus::XRAY_LOG_NOT_FLUSHING}; static atomic_sint32_t ProfilerLogStatus = { XRayLogInitStatus::XRAY_LOG_UNINITIALIZED}; static SpinMutex ProfilerOptionsMutex; struct ProfilingData { atomic_uintptr_t Allocators; atomic_uintptr_t FCT; }; static pthread_key_t ProfilingKey; // We use a global buffer queue, which gets initialized once at initialisation // time, and gets reset when profiling is "done". static std::aligned_storage::type BufferQueueStorage; static BufferQueue *BQ = nullptr; thread_local FunctionCallTrie::Allocators::Buffers ThreadBuffers; thread_local std::aligned_storage::type AllocatorsStorage; thread_local std::aligned_storage::type FunctionCallTrieStorage; thread_local ProfilingData TLD{{0}, {0}}; thread_local atomic_uint8_t ReentranceGuard{0}; // We use a separate guard for ensuring that for this thread, if we're already // cleaning up, that any signal handlers don't attempt to cleanup nor // initialise. thread_local atomic_uint8_t TLDInitGuard{0}; // We also use a separate latch to signal that the thread is exiting, and // non-essential work should be ignored (things like recording events, etc.). thread_local atomic_uint8_t ThreadExitingLatch{0}; static ProfilingData *getThreadLocalData() XRAY_NEVER_INSTRUMENT { thread_local auto ThreadOnce = []() XRAY_NEVER_INSTRUMENT { pthread_setspecific(ProfilingKey, &TLD); return false; }(); (void)ThreadOnce; RecursionGuard TLDInit(TLDInitGuard); if (!TLDInit) return nullptr; if (atomic_load_relaxed(&ThreadExitingLatch)) return nullptr; uptr Allocators = 0; if (atomic_compare_exchange_strong(&TLD.Allocators, &Allocators, 1, memory_order_acq_rel)) { bool Success = false; auto AllocatorsUndo = at_scope_exit([&]() XRAY_NEVER_INSTRUMENT { if (!Success) atomic_store(&TLD.Allocators, 0, memory_order_release); }); // Acquire a set of buffers for this thread. if (BQ == nullptr) return nullptr; if (BQ->getBuffer(ThreadBuffers.NodeBuffer) != BufferQueue::ErrorCode::Ok) return nullptr; auto NodeBufferUndo = at_scope_exit([&]() XRAY_NEVER_INSTRUMENT { if (!Success) BQ->releaseBuffer(ThreadBuffers.NodeBuffer); }); if (BQ->getBuffer(ThreadBuffers.RootsBuffer) != BufferQueue::ErrorCode::Ok) return nullptr; auto RootsBufferUndo = at_scope_exit([&]() XRAY_NEVER_INSTRUMENT { if (!Success) BQ->releaseBuffer(ThreadBuffers.RootsBuffer); }); if (BQ->getBuffer(ThreadBuffers.ShadowStackBuffer) != BufferQueue::ErrorCode::Ok) return nullptr; auto ShadowStackBufferUndo = at_scope_exit([&]() XRAY_NEVER_INSTRUMENT { if (!Success) BQ->releaseBuffer(ThreadBuffers.ShadowStackBuffer); }); if (BQ->getBuffer(ThreadBuffers.NodeIdPairBuffer) != BufferQueue::ErrorCode::Ok) return nullptr; Success = true; new (&AllocatorsStorage) FunctionCallTrie::Allocators( FunctionCallTrie::InitAllocatorsFromBuffers(ThreadBuffers)); Allocators = reinterpret_cast( reinterpret_cast(&AllocatorsStorage)); atomic_store(&TLD.Allocators, Allocators, memory_order_release); } if (Allocators == 1) return nullptr; uptr FCT = 0; if (atomic_compare_exchange_strong(&TLD.FCT, &FCT, 1, memory_order_acq_rel)) { new (&FunctionCallTrieStorage) FunctionCallTrie(*reinterpret_cast( atomic_load_relaxed(&TLD.Allocators))); FCT = reinterpret_cast( reinterpret_cast(&FunctionCallTrieStorage)); atomic_store(&TLD.FCT, FCT, memory_order_release); } if (FCT == 1) return nullptr; return &TLD; } static void cleanupTLD() XRAY_NEVER_INSTRUMENT { auto FCT = atomic_exchange(&TLD.FCT, 0, memory_order_acq_rel); if (FCT == reinterpret_cast(reinterpret_cast( &FunctionCallTrieStorage))) reinterpret_cast(FCT)->~FunctionCallTrie(); auto Allocators = atomic_exchange(&TLD.Allocators, 0, memory_order_acq_rel); if (Allocators == reinterpret_cast( reinterpret_cast(&AllocatorsStorage))) reinterpret_cast(Allocators)->~Allocators(); } static void postCurrentThreadFCT(ProfilingData &T) XRAY_NEVER_INSTRUMENT { RecursionGuard TLDInit(TLDInitGuard); if (!TLDInit) return; uptr P = atomic_exchange(&T.FCT, 0, memory_order_acq_rel); if (P != reinterpret_cast( reinterpret_cast(&FunctionCallTrieStorage))) return; auto FCT = reinterpret_cast(P); DCHECK_NE(FCT, nullptr); uptr A = atomic_exchange(&T.Allocators, 0, memory_order_acq_rel); if (A != reinterpret_cast( reinterpret_cast(&AllocatorsStorage))) return; auto Allocators = reinterpret_cast(A); DCHECK_NE(Allocators, nullptr); // Always move the data into the profile collector. profileCollectorService::post(BQ, std::move(*FCT), std::move(*Allocators), std::move(ThreadBuffers), GetTid()); // Re-initialize the ThreadBuffers object to a known "default" state. ThreadBuffers = FunctionCallTrie::Allocators::Buffers{}; } } // namespace const char *profilingCompilerDefinedFlags() XRAY_NEVER_INSTRUMENT { #ifdef XRAY_PROFILER_DEFAULT_OPTIONS return SANITIZER_STRINGIFY(XRAY_PROFILER_DEFAULT_OPTIONS); #else return ""; #endif } XRayLogFlushStatus profilingFlush() XRAY_NEVER_INSTRUMENT { if (atomic_load(&ProfilerLogStatus, memory_order_acquire) != XRayLogInitStatus::XRAY_LOG_FINALIZED) { if (Verbosity()) Report("Not flushing profiles, profiling not been finalized.\n"); return XRayLogFlushStatus::XRAY_LOG_NOT_FLUSHING; } RecursionGuard SignalGuard(ReentranceGuard); if (!SignalGuard) { if (Verbosity()) Report("Cannot finalize properly inside a signal handler!\n"); atomic_store(&ProfilerLogFlushStatus, XRayLogFlushStatus::XRAY_LOG_NOT_FLUSHING, memory_order_release); return XRayLogFlushStatus::XRAY_LOG_NOT_FLUSHING; } s32 Previous = atomic_exchange(&ProfilerLogFlushStatus, XRayLogFlushStatus::XRAY_LOG_FLUSHING, memory_order_acq_rel); if (Previous == XRayLogFlushStatus::XRAY_LOG_FLUSHING) { if (Verbosity()) Report("Not flushing profiles, implementation still flushing.\n"); return XRayLogFlushStatus::XRAY_LOG_FLUSHING; } // At this point, we'll create the file that will contain the profile, but // only if the options say so. if (!profilingFlags()->no_flush) { // First check whether we have data in the profile collector service // before we try and write anything down. XRayBuffer B = profileCollectorService::nextBuffer({nullptr, 0}); if (B.Data == nullptr) { if (Verbosity()) Report("profiling: No data to flush.\n"); } else { LogWriter *LW = LogWriter::Open(); if (LW == nullptr) { if (Verbosity()) Report("profiling: Failed to flush to file, dropping data.\n"); } else { // Now for each of the buffers, write out the profile data as we would // see it in memory, verbatim. while (B.Data != nullptr && B.Size != 0) { LW->WriteAll(reinterpret_cast(B.Data), reinterpret_cast(B.Data) + B.Size); B = profileCollectorService::nextBuffer(B); } LogWriter::Close(LW); } } } profileCollectorService::reset(); atomic_store(&ProfilerLogFlushStatus, XRayLogFlushStatus::XRAY_LOG_FLUSHED, memory_order_release); atomic_store(&ProfilerLogStatus, XRayLogInitStatus::XRAY_LOG_UNINITIALIZED, memory_order_release); return XRayLogFlushStatus::XRAY_LOG_FLUSHED; } void profilingHandleArg0(int32_t FuncId, XRayEntryType Entry) XRAY_NEVER_INSTRUMENT { unsigned char CPU; auto TSC = readTSC(CPU); RecursionGuard G(ReentranceGuard); if (!G) return; auto Status = atomic_load(&ProfilerLogStatus, memory_order_acquire); if (UNLIKELY(Status == XRayLogInitStatus::XRAY_LOG_UNINITIALIZED || Status == XRayLogInitStatus::XRAY_LOG_INITIALIZING)) return; if (UNLIKELY(Status == XRayLogInitStatus::XRAY_LOG_FINALIZED || Status == XRayLogInitStatus::XRAY_LOG_FINALIZING)) { postCurrentThreadFCT(TLD); return; } auto T = getThreadLocalData(); if (T == nullptr) return; auto FCT = reinterpret_cast(atomic_load_relaxed(&T->FCT)); switch (Entry) { case XRayEntryType::ENTRY: case XRayEntryType::LOG_ARGS_ENTRY: FCT->enterFunction(FuncId, TSC, CPU); break; case XRayEntryType::EXIT: case XRayEntryType::TAIL: FCT->exitFunction(FuncId, TSC, CPU); break; default: // FIXME: Handle bugs. break; } } void profilingHandleArg1(int32_t FuncId, XRayEntryType Entry, uint64_t) XRAY_NEVER_INSTRUMENT { return profilingHandleArg0(FuncId, Entry); } XRayLogInitStatus profilingFinalize() XRAY_NEVER_INSTRUMENT { s32 CurrentStatus = XRayLogInitStatus::XRAY_LOG_INITIALIZED; if (!atomic_compare_exchange_strong(&ProfilerLogStatus, &CurrentStatus, XRayLogInitStatus::XRAY_LOG_FINALIZING, memory_order_release)) { if (Verbosity()) Report("Cannot finalize profile, the profiling is not initialized.\n"); return static_cast(CurrentStatus); } // Mark then finalize the current generation of buffers. This allows us to let // the threads currently holding onto new buffers still use them, but let the // last reference do the memory cleanup. DCHECK_NE(BQ, nullptr); BQ->finalize(); // Wait a grace period to allow threads to see that we're finalizing. SleepForMillis(profilingFlags()->grace_period_ms); // If we for some reason are entering this function from an instrumented // handler, we bail out. RecursionGuard G(ReentranceGuard); if (!G) return static_cast(CurrentStatus); // Post the current thread's data if we have any. postCurrentThreadFCT(TLD); // Then we force serialize the log data. profileCollectorService::serialize(); atomic_store(&ProfilerLogStatus, XRayLogInitStatus::XRAY_LOG_FINALIZED, memory_order_release); return XRayLogInitStatus::XRAY_LOG_FINALIZED; } XRayLogInitStatus profilingLoggingInit(size_t, size_t, void *Options, size_t OptionsSize) XRAY_NEVER_INSTRUMENT { RecursionGuard G(ReentranceGuard); if (!G) return XRayLogInitStatus::XRAY_LOG_UNINITIALIZED; s32 CurrentStatus = XRayLogInitStatus::XRAY_LOG_UNINITIALIZED; if (!atomic_compare_exchange_strong(&ProfilerLogStatus, &CurrentStatus, XRayLogInitStatus::XRAY_LOG_INITIALIZING, memory_order_acq_rel)) { if (Verbosity()) Report("Cannot initialize already initialised profiling " "implementation.\n"); return static_cast(CurrentStatus); } { SpinMutexLock Lock(&ProfilerOptionsMutex); FlagParser ConfigParser; ProfilerFlags Flags; Flags.setDefaults(); registerProfilerFlags(&ConfigParser, &Flags); ConfigParser.ParseString(profilingCompilerDefinedFlags()); const char *Env = GetEnv("XRAY_PROFILING_OPTIONS"); if (Env == nullptr) Env = ""; ConfigParser.ParseString(Env); // Then parse the configuration string provided. ConfigParser.ParseString(static_cast(Options)); if (Verbosity()) ReportUnrecognizedFlags(); *profilingFlags() = Flags; } // We need to reset the profile data collection implementation now. profileCollectorService::reset(); // Then also reset the buffer queue implementation. if (BQ == nullptr) { bool Success = false; new (&BufferQueueStorage) BufferQueue(profilingFlags()->per_thread_allocator_max, profilingFlags()->buffers_max, Success); if (!Success) { if (Verbosity()) Report("Failed to initialize preallocated memory buffers!"); atomic_store(&ProfilerLogStatus, XRayLogInitStatus::XRAY_LOG_UNINITIALIZED, memory_order_release); return XRayLogInitStatus::XRAY_LOG_UNINITIALIZED; } // If we've succeeded, set the global pointer to the initialised storage. BQ = reinterpret_cast(&BufferQueueStorage); } else { BQ->finalize(); auto InitStatus = BQ->init(profilingFlags()->per_thread_allocator_max, profilingFlags()->buffers_max); if (InitStatus != BufferQueue::ErrorCode::Ok) { if (Verbosity()) Report("Failed to initialize preallocated memory buffers; error: %s", BufferQueue::getErrorString(InitStatus)); atomic_store(&ProfilerLogStatus, XRayLogInitStatus::XRAY_LOG_UNINITIALIZED, memory_order_release); return XRayLogInitStatus::XRAY_LOG_UNINITIALIZED; } DCHECK(!BQ->finalizing()); } // We need to set up the exit handlers. static pthread_once_t Once = PTHREAD_ONCE_INIT; pthread_once( &Once, +[] { pthread_key_create( &ProfilingKey, +[](void *P) XRAY_NEVER_INSTRUMENT { if (atomic_exchange(&ThreadExitingLatch, 1, memory_order_acq_rel)) return; if (P == nullptr) return; auto T = reinterpret_cast(P); if (atomic_load_relaxed(&T->Allocators) == 0) return; { // If we're somehow executing this while inside a // non-reentrant-friendly context, we skip attempting to post // the current thread's data. RecursionGuard G(ReentranceGuard); if (!G) return; postCurrentThreadFCT(*T); } }); // We also need to set up an exit handler, so that we can get the // profile information at exit time. We use the C API to do this, to not // rely on C++ ABI functions for registering exit handlers. Atexit(+[]() XRAY_NEVER_INSTRUMENT { if (atomic_exchange(&ThreadExitingLatch, 1, memory_order_acq_rel)) return; auto Cleanup = at_scope_exit([]() XRAY_NEVER_INSTRUMENT { cleanupTLD(); }); // Finalize and flush. if (profilingFinalize() != XRAY_LOG_FINALIZED || profilingFlush() != XRAY_LOG_FLUSHED) return; if (Verbosity()) Report("XRay Profile flushed at exit."); }); }); __xray_log_set_buffer_iterator(profileCollectorService::nextBuffer); __xray_set_handler(profilingHandleArg0); __xray_set_handler_arg1(profilingHandleArg1); atomic_store(&ProfilerLogStatus, XRayLogInitStatus::XRAY_LOG_INITIALIZED, memory_order_release); if (Verbosity()) Report("XRay Profiling init successful.\n"); return XRayLogInitStatus::XRAY_LOG_INITIALIZED; } bool profilingDynamicInitializer() XRAY_NEVER_INSTRUMENT { // Set up the flag defaults from the static defaults and the // compiler-provided defaults. { SpinMutexLock Lock(&ProfilerOptionsMutex); auto *F = profilingFlags(); F->setDefaults(); FlagParser ProfilingParser; registerProfilerFlags(&ProfilingParser, F); ProfilingParser.ParseString(profilingCompilerDefinedFlags()); } XRayLogImpl Impl{ profilingLoggingInit, profilingFinalize, profilingHandleArg0, profilingFlush, }; auto RegistrationResult = __xray_log_register_mode("xray-profiling", Impl); if (RegistrationResult != XRayLogRegisterStatus::XRAY_REGISTRATION_OK) { if (Verbosity()) Report("Cannot register XRay Profiling mode to 'xray-profiling'; error = " "%d\n", RegistrationResult); return false; } if (!internal_strcmp(flags()->xray_mode, "xray-profiling")) __xray_log_select_mode("xray_profiling"); return true; } } // namespace __xray static auto UNUSED Unused = __xray::profilingDynamicInitializer();