//===- GlobalSplit.cpp - global variable splitter -------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // // This pass uses inrange annotations on GEP indices to split globals where // beneficial. Clang currently attaches these annotations to references to // virtual table globals under the Itanium ABI for the benefit of the // whole-program virtual call optimization and control flow integrity passes. // //===----------------------------------------------------------------------===// #include "llvm/Transforms/IPO/GlobalSplit.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringExtras.h" #include "llvm/IR/Constant.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/Function.h" #include "llvm/IR/GlobalValue.h" #include "llvm/IR/GlobalVariable.h" #include "llvm/IR/Intrinsics.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/Metadata.h" #include "llvm/IR/Module.h" #include "llvm/IR/Operator.h" #include "llvm/IR/Type.h" #include "llvm/IR/User.h" #include "llvm/Support/Casting.h" #include "llvm/Transforms/IPO.h" #include #include using namespace llvm; static bool splitGlobal(GlobalVariable &GV) { // If the address of the global is taken outside of the module, we cannot // apply this transformation. if (!GV.hasLocalLinkage()) return false; // We currently only know how to split ConstantStructs. auto *Init = dyn_cast_or_null(GV.getInitializer()); if (!Init) return false; const DataLayout &DL = GV.getDataLayout(); const StructLayout *SL = DL.getStructLayout(Init->getType()); ArrayRef MemberOffsets = SL->getMemberOffsets(); unsigned IndexWidth = DL.getIndexTypeSizeInBits(GV.getType()); // Verify that each user of the global is an inrange getelementptr constant, // and collect information on how it relates to the global. struct GEPInfo { GEPOperator *GEP; unsigned MemberIndex; APInt MemberRelativeOffset; GEPInfo(GEPOperator *GEP, unsigned MemberIndex, APInt MemberRelativeOffset) : GEP(GEP), MemberIndex(MemberIndex), MemberRelativeOffset(std::move(MemberRelativeOffset)) {} }; SmallVector Infos; for (User *U : GV.users()) { auto *GEP = dyn_cast(U); if (!GEP) return false; std::optional InRange = GEP->getInRange(); if (!InRange) return false; APInt Offset(IndexWidth, 0); if (!GEP->accumulateConstantOffset(DL, Offset)) return false; // Determine source-relative inrange. ConstantRange SrcInRange = InRange->sextOrTrunc(IndexWidth).add(Offset); // Check that the GEP offset is in the range (treating upper bound as // inclusive here). if (!SrcInRange.contains(Offset) && SrcInRange.getUpper() != Offset) return false; // Find which struct member the range corresponds to. if (SrcInRange.getLower().uge(SL->getSizeInBytes())) return false; unsigned MemberIndex = SL->getElementContainingOffset(SrcInRange.getLower().getZExtValue()); TypeSize MemberStart = MemberOffsets[MemberIndex]; TypeSize MemberEnd = MemberIndex == MemberOffsets.size() - 1 ? SL->getSizeInBytes() : MemberOffsets[MemberIndex + 1]; // Verify that the range matches that struct member. if (SrcInRange.getLower() != MemberStart || SrcInRange.getUpper() != MemberEnd) return false; Infos.emplace_back(GEP, MemberIndex, Offset - MemberStart); } SmallVector Types; GV.getMetadata(LLVMContext::MD_type, Types); IntegerType *Int32Ty = Type::getInt32Ty(GV.getContext()); std::vector SplitGlobals(Init->getNumOperands()); for (unsigned I = 0; I != Init->getNumOperands(); ++I) { // Build a global representing this split piece. auto *SplitGV = new GlobalVariable(*GV.getParent(), Init->getOperand(I)->getType(), GV.isConstant(), GlobalValue::PrivateLinkage, Init->getOperand(I), GV.getName() + "." + utostr(I)); SplitGlobals[I] = SplitGV; unsigned SplitBegin = SL->getElementOffset(I); unsigned SplitEnd = (I == Init->getNumOperands() - 1) ? SL->getSizeInBytes() : SL->getElementOffset(I + 1); // Rebuild type metadata, adjusting by the split offset. // FIXME: See if we can use DW_OP_piece to preserve debug metadata here. for (MDNode *Type : Types) { uint64_t ByteOffset = cast( cast(Type->getOperand(0))->getValue()) ->getZExtValue(); // Type metadata may be attached one byte after the end of the vtable, for // classes without virtual methods in Itanium ABI. AFAIK, it is never // attached to the first byte of a vtable. Subtract one to get the right // slice. // This is making an assumption that vtable groups are the only kinds of // global variables that !type metadata can be attached to, and that they // are either Itanium ABI vtable groups or contain a single vtable (i.e. // Microsoft ABI vtables). uint64_t AttachedTo = (ByteOffset == 0) ? ByteOffset : ByteOffset - 1; if (AttachedTo < SplitBegin || AttachedTo >= SplitEnd) continue; SplitGV->addMetadata( LLVMContext::MD_type, *MDNode::get(GV.getContext(), {ConstantAsMetadata::get( ConstantInt::get(Int32Ty, ByteOffset - SplitBegin)), Type->getOperand(1)})); } if (GV.hasMetadata(LLVMContext::MD_vcall_visibility)) SplitGV->setVCallVisibilityMetadata(GV.getVCallVisibility()); } for (const GEPInfo &Info : Infos) { assert(Info.MemberIndex < SplitGlobals.size() && "Invalid member"); auto *NewGEP = ConstantExpr::getGetElementPtr( Type::getInt8Ty(GV.getContext()), SplitGlobals[Info.MemberIndex], ConstantInt::get(GV.getContext(), Info.MemberRelativeOffset), Info.GEP->isInBounds()); Info.GEP->replaceAllUsesWith(NewGEP); } // Finally, remove the original global. Any remaining uses refer to invalid // elements of the global, so replace with poison. if (!GV.use_empty()) GV.replaceAllUsesWith(PoisonValue::get(GV.getType())); GV.eraseFromParent(); return true; } static bool splitGlobals(Module &M) { // First, see if the module uses either of the llvm.type.test or // llvm.type.checked.load intrinsics, which indicates that splitting globals // may be beneficial. Function *TypeTestFunc = M.getFunction(Intrinsic::getName(Intrinsic::type_test)); Function *TypeCheckedLoadFunc = M.getFunction(Intrinsic::getName(Intrinsic::type_checked_load)); Function *TypeCheckedLoadRelativeFunc = M.getFunction(Intrinsic::getName(Intrinsic::type_checked_load_relative)); if ((!TypeTestFunc || TypeTestFunc->use_empty()) && (!TypeCheckedLoadFunc || TypeCheckedLoadFunc->use_empty()) && (!TypeCheckedLoadRelativeFunc || TypeCheckedLoadRelativeFunc->use_empty())) return false; bool Changed = false; for (GlobalVariable &GV : llvm::make_early_inc_range(M.globals())) Changed |= splitGlobal(GV); return Changed; } PreservedAnalyses GlobalSplitPass::run(Module &M, ModuleAnalysisManager &AM) { if (!splitGlobals(M)) return PreservedAnalyses::all(); return PreservedAnalyses::none(); }