//===-- HTMLLogger.cpp ----------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // // This file implements the HTML logger. Given a directory dir/, we write // dir/0.html for the first analysis, etc. // These files contain a visualization that allows inspecting the CFG and the // state of the analysis at each point. // Static assets (HTMLLogger.js, HTMLLogger.css) and SVG graphs etc are embedded // so each output file is self-contained. // // VIEWS // // The timeline and function view are always shown. These allow selecting basic // blocks, statements within them, and processing iterations (BBs are visited // multiple times when e.g. loops are involved). // These are written directly into the HTML body. // // There are also listings of particular basic blocks, and dumps of the state // at particular analysis points (i.e. BB2 iteration 3 statement 2). // These are only shown when the relevant BB/analysis point is *selected*. // // DATA AND TEMPLATES // // The HTML proper is mostly static. // The analysis data is in a JSON object HTMLLoggerData which is embedded as // a \n"; writeCode(); writeCFG(); *OS << "\n"; *OS << llvm::StringRef(HTMLLogger_html).split("").second; } void enterBlock(const CFGBlock &B, bool PostVisit) override { llvm::SmallVector &BIter = BlockIters[&B]; unsigned IterNum = BIter.size() + 1; BIter.push_back(Iters.size()); Iters.push_back({&B, IterNum, PostVisit, /*Converged=*/false}); ElementIndex = 0; } void enterElement(const CFGElement &E) override { ++ElementIndex; } static std::string blockID(unsigned Block) { return llvm::formatv("B{0}", Block); } static std::string eltID(unsigned Block, unsigned Element) { return llvm::formatv("B{0}.{1}", Block, Element); } static std::string iterID(unsigned Block, unsigned Iter) { return llvm::formatv("B{0}:{1}", Block, Iter); } static std::string elementIterID(unsigned Block, unsigned Iter, unsigned Element) { return llvm::formatv("B{0}:{1}_B{0}.{2}", Block, Iter, Element); } // Write the analysis state associated with a particular analysis point. // FIXME: this dump is fairly opaque. We should show: // - values associated with the current Stmt // - values associated with its children // - meaningful names for values // - which boolean values are implied true/false by the flow condition void recordState(TypeErasedDataflowAnalysisState &State) override { unsigned Block = Iters.back().Block->getBlockID(); unsigned Iter = Iters.back().Iter; bool PostVisit = Iters.back().PostVisit; JOS->attributeObject(elementIterID(Block, Iter, ElementIndex), [&] { JOS->attribute("block", blockID(Block)); JOS->attribute("iter", Iter); JOS->attribute("post_visit", PostVisit); JOS->attribute("element", ElementIndex); // If this state immediately follows an Expr, show its built-in model. if (ElementIndex > 0) { auto S = Iters.back().Block->Elements[ElementIndex - 1].getAs(); if (const Expr *E = S ? llvm::dyn_cast(S->getStmt()) : nullptr) { if (E->isPRValue()) { if (auto *V = State.Env.getValue(*E)) JOS->attributeObject( "value", [&] { ModelDumper(*JOS, State.Env).dump(*V); }); } else { if (auto *Loc = State.Env.getStorageLocation(*E)) JOS->attributeObject( "value", [&] { ModelDumper(*JOS, State.Env).dump(*Loc); }); } } } if (!ContextLogs.empty()) { JOS->attribute("logs", ContextLogs); ContextLogs.clear(); } { std::string BuiltinLattice; llvm::raw_string_ostream BuiltinLatticeS(BuiltinLattice); State.Env.dump(BuiltinLatticeS); JOS->attribute("builtinLattice", BuiltinLattice); } }); } void blockConverged() override { Iters.back().Converged = true; } void logText(llvm::StringRef S) override { ContextLogs.append(S.begin(), S.end()); ContextLogs.push_back('\n'); } private: // Write the CFG block details. // Currently this is just the list of elements in execution order. // FIXME: an AST dump would be a useful view, too. void writeBlock(const CFGBlock &B, llvm::ArrayRef ItersForB) { JOS->attributeObject(blockID(B.getBlockID()), [&] { JOS->attributeArray("iters", [&] { for (size_t IterIdx : ItersForB) { const Iteration &Iter = Iters[IterIdx]; JOS->object([&] { JOS->attribute("iter", Iter.Iter); JOS->attribute("post_visit", Iter.PostVisit); JOS->attribute("converged", Iter.Converged); }); } }); JOS->attributeArray("elements", [&] { for (const auto &Elt : B.Elements) { std::string Dump; llvm::raw_string_ostream DumpS(Dump); Elt.dumpToStream(DumpS); JOS->value(Dump); } }); }); } // Write the code of function being examined. // We want to overlay the code with s that mark which BB particular // tokens are associated with, and even which BB element (so that clicking // can select the right element). void writeCode() { const auto &AST = CFG->getDecl().getASTContext(); bool Invalid = false; // Extract the source code from the original file. // Pretty-printing from the AST would probably be nicer (no macros or // indentation to worry about), but we need the boundaries of particular // AST nodes and the printer doesn't provide this. auto Range = clang::Lexer::makeFileCharRange( CharSourceRange::getTokenRange(CFG->getDecl().getSourceRange()), AST.getSourceManager(), AST.getLangOpts()); if (Range.isInvalid()) return; llvm::StringRef Code = clang::Lexer::getSourceText( Range, AST.getSourceManager(), AST.getLangOpts(), &Invalid); if (Invalid) return; // TokenInfo stores the BB and set of elements that a token is part of. struct TokenInfo { enum : unsigned { Missing = static_cast(-1) }; // The basic block this is part of. // This is the BB of the stmt with the smallest containing range. unsigned BB = Missing; unsigned BBPriority = 0; // The most specific stmt this is part of (smallest range). unsigned Elt = Missing; unsigned EltPriority = 0; // All stmts this is part of. SmallVector Elts; // Mark this token as being part of BB.Elt. // RangeLen is the character length of the element's range, used to // distinguish inner vs outer statements. // For example in `a==0`, token "a" is part of the stmts "a" and "a==0". // However "a" has a smaller range, so is more specific. Clicking on the // token "a" should select the stmt "a". void assign(unsigned BB, unsigned Elt, unsigned RangeLen) { // A worse BB (larger range) => ignore. if (this->BB != Missing && BB != this->BB && BBPriority <= RangeLen) return; if (BB != this->BB) { this->BB = BB; Elts.clear(); BBPriority = RangeLen; } BBPriority = std::min(BBPriority, RangeLen); Elts.push_back(Elt); if (this->Elt == Missing || EltPriority > RangeLen) this->Elt = Elt; } bool operator==(const TokenInfo &Other) const { return std::tie(BB, Elt, Elts) == std::tie(Other.BB, Other.Elt, Other.Elts); } // Write the attributes for the on this token. void write(llvm::raw_ostream &OS) const { OS << "class='c"; if (BB != Missing) OS << " " << blockID(BB); for (unsigned Elt : Elts) OS << " " << eltID(BB, Elt); OS << "'"; if (Elt != Missing) OS << " data-elt='" << eltID(BB, Elt) << "'"; if (BB != Missing) OS << " data-bb='" << blockID(BB) << "'"; } }; // Construct one TokenInfo per character in a flat array. // This is inefficient (chars in a token all have the same info) but simple. std::vector State(Code.size()); for (const auto *Block : CFG->getCFG()) { unsigned EltIndex = 0; for (const auto& Elt : *Block) { ++EltIndex; if (const auto S = Elt.getAs()) { auto EltRange = clang::Lexer::makeFileCharRange( CharSourceRange::getTokenRange(S->getStmt()->getSourceRange()), AST.getSourceManager(), AST.getLangOpts()); if (EltRange.isInvalid()) continue; if (EltRange.getBegin() < Range.getBegin() || EltRange.getEnd() >= Range.getEnd() || EltRange.getEnd() < Range.getBegin() || EltRange.getEnd() >= Range.getEnd()) continue; unsigned Off = EltRange.getBegin().getRawEncoding() - Range.getBegin().getRawEncoding(); unsigned Len = EltRange.getEnd().getRawEncoding() - EltRange.getBegin().getRawEncoding(); for (unsigned I = 0; I < Len; ++I) State[Off + I].assign(Block->getBlockID(), EltIndex, Len); } } } // Finally, write the code with the correct s. unsigned Line = AST.getSourceManager().getSpellingLineNumber(Range.getBegin()); *OS << ""; } // Write the CFG diagram, a graph of basic blocks. // Laying out graphs is hard, so we construct a graphviz description and shell // out to `dot` to turn it into an SVG. void writeCFG() { *OS << "\n"; } // Produce a graphviz description of a CFG. static std::string buildCFGDot(const clang::CFG &CFG) { std::string Graph; llvm::raw_string_ostream GraphS(Graph); // Graphviz likes to add unhelpful tooltips everywhere, " " suppresses. GraphS << R"(digraph { tooltip=" " node[class=bb, shape=square, fontname="sans-serif", tooltip=" "] edge[tooltip = " "] )"; for (unsigned I = 0; I < CFG.getNumBlockIDs(); ++I) GraphS << " " << blockID(I) << " [id=" << blockID(I) << "]\n"; for (const auto *Block : CFG) { for (const auto &Succ : Block->succs()) { if (Succ.getReachableBlock()) GraphS << " " << blockID(Block->getBlockID()) << " -> " << blockID(Succ.getReachableBlock()->getBlockID()) << "\n"; } } GraphS << "}\n"; return Graph; } }; // Nothing interesting here, just subprocess/temp-file plumbing. llvm::Expected renderSVG(llvm::StringRef DotGraph) { std::string DotPath; if (const auto *FromEnv = ::getenv("GRAPHVIZ_DOT")) DotPath = FromEnv; else { auto FromPath = llvm::sys::findProgramByName("dot"); if (!FromPath) return llvm::createStringError(FromPath.getError(), "'dot' not found on PATH"); DotPath = FromPath.get(); } // Create input and output files for `dot` subprocess. // (We create the output file as empty, to reserve the temp filename). llvm::SmallString<256> Input, Output; int InputFD; if (auto EC = llvm::sys::fs::createTemporaryFile("analysis", ".dot", InputFD, Input)) return llvm::createStringError(EC, "failed to create `dot` temp input"); llvm::raw_fd_ostream(InputFD, /*shouldClose=*/true) << DotGraph; auto DeleteInput = llvm::make_scope_exit([&] { llvm::sys::fs::remove(Input); }); if (auto EC = llvm::sys::fs::createTemporaryFile("analysis", ".svg", Output)) return llvm::createStringError(EC, "failed to create `dot` temp output"); auto DeleteOutput = llvm::make_scope_exit([&] { llvm::sys::fs::remove(Output); }); std::vector> Redirects = { Input, Output, /*stderr=*/std::nullopt}; std::string ErrMsg; int Code = llvm::sys::ExecuteAndWait( DotPath, {"dot", "-Tsvg"}, /*Env=*/std::nullopt, Redirects, /*SecondsToWait=*/0, /*MemoryLimit=*/0, &ErrMsg); if (!ErrMsg.empty()) return llvm::createStringError(llvm::inconvertibleErrorCode(), "'dot' failed: " + ErrMsg); if (Code != 0) return llvm::createStringError(llvm::inconvertibleErrorCode(), "'dot' failed (" + llvm::Twine(Code) + ")"); auto Buf = llvm::MemoryBuffer::getFile(Output); if (!Buf) return llvm::createStringError(Buf.getError(), "Can't read `dot` output"); // Output has prefix we don't want. Skip to tag. llvm::StringRef Result = Buf.get()->getBuffer(); auto Pos = Result.find(" tag in `dot` output"); return Result.substr(Pos).str(); } } // namespace std::unique_ptr Logger::html(std::function()> Streams) { return std::make_unique(std::move(Streams)); } } // namespace clang::dataflow