//===- ARMLegalizerInfo.cpp --------------------------------------*- C++ -*-==// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// /// \file /// This file implements the targeting of the Machinelegalizer class for ARM. /// \todo This should be generated by TableGen. //===----------------------------------------------------------------------===// #include "ARMLegalizerInfo.h" #include "ARMCallLowering.h" #include "ARMSubtarget.h" #include "llvm/CodeGen/GlobalISel/LegalizerHelper.h" #include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h" #include "llvm/CodeGen/LowLevelTypeUtils.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/TargetOpcodes.h" #include "llvm/CodeGen/ValueTypes.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Type.h" using namespace llvm; using namespace LegalizeActions; static bool AEABI(const ARMSubtarget &ST) { return ST.isTargetAEABI() || ST.isTargetGNUAEABI() || ST.isTargetMuslAEABI(); } ARMLegalizerInfo::ARMLegalizerInfo(const ARMSubtarget &ST) { using namespace TargetOpcode; const LLT p0 = LLT::pointer(0, 32); const LLT s1 = LLT::scalar(1); const LLT s8 = LLT::scalar(8); const LLT s16 = LLT::scalar(16); const LLT s32 = LLT::scalar(32); const LLT s64 = LLT::scalar(64); auto &LegacyInfo = getLegacyLegalizerInfo(); if (ST.isThumb1Only()) { // Thumb1 is not supported yet. LegacyInfo.computeTables(); verify(*ST.getInstrInfo()); return; } getActionDefinitionsBuilder({G_SEXT, G_ZEXT, G_ANYEXT}) .legalForCartesianProduct({s8, s16, s32}, {s1, s8, s16}); getActionDefinitionsBuilder(G_SEXT_INREG).lower(); getActionDefinitionsBuilder({G_MUL, G_AND, G_OR, G_XOR}) .legalFor({s32}) .clampScalar(0, s32, s32); if (ST.hasNEON()) getActionDefinitionsBuilder({G_ADD, G_SUB}) .legalFor({s32, s64}) .minScalar(0, s32); else getActionDefinitionsBuilder({G_ADD, G_SUB}) .legalFor({s32}) .minScalar(0, s32); getActionDefinitionsBuilder({G_ASHR, G_LSHR, G_SHL}) .legalFor({{s32, s32}}) .minScalar(0, s32) .clampScalar(1, s32, s32); bool HasHWDivide = (!ST.isThumb() && ST.hasDivideInARMMode()) || (ST.isThumb() && ST.hasDivideInThumbMode()); if (HasHWDivide) getActionDefinitionsBuilder({G_SDIV, G_UDIV}) .legalFor({s32}) .clampScalar(0, s32, s32); else getActionDefinitionsBuilder({G_SDIV, G_UDIV}) .libcallFor({s32}) .clampScalar(0, s32, s32); auto &REMBuilder = getActionDefinitionsBuilder({G_SREM, G_UREM}).minScalar(0, s32); if (HasHWDivide) REMBuilder.lowerFor({s32}); else if (AEABI(ST)) REMBuilder.customFor({s32}); else REMBuilder.libcallFor({s32}); getActionDefinitionsBuilder(G_INTTOPTR) .legalFor({{p0, s32}}) .minScalar(1, s32); getActionDefinitionsBuilder(G_PTRTOINT) .legalFor({{s32, p0}}) .minScalar(0, s32); getActionDefinitionsBuilder(G_CONSTANT) .legalFor({s32, p0}) .clampScalar(0, s32, s32); getActionDefinitionsBuilder(G_ICMP) .legalForCartesianProduct({s1}, {s32, p0}) .minScalar(1, s32); getActionDefinitionsBuilder(G_SELECT) .legalForCartesianProduct({s32, p0}, {s1}) .minScalar(0, s32); // We're keeping these builders around because we'll want to add support for // floating point to them. auto &LoadStoreBuilder = getActionDefinitionsBuilder({G_LOAD, G_STORE}) .legalForTypesWithMemDesc({{s8, p0, s8, 8}, {s16, p0, s16, 8}, {s32, p0, s32, 8}, {p0, p0, p0, 8}}) .unsupportedIfMemSizeNotPow2(); getActionDefinitionsBuilder(G_FRAME_INDEX).legalFor({p0}); getActionDefinitionsBuilder(G_GLOBAL_VALUE).legalFor({p0}); auto &PhiBuilder = getActionDefinitionsBuilder(G_PHI) .legalFor({s32, p0}) .minScalar(0, s32); getActionDefinitionsBuilder(G_PTR_ADD) .legalFor({{p0, s32}}) .minScalar(1, s32); getActionDefinitionsBuilder(G_BRCOND).legalFor({s1}); if (!ST.useSoftFloat() && ST.hasVFP2Base()) { getActionDefinitionsBuilder( {G_FADD, G_FSUB, G_FMUL, G_FDIV, G_FCONSTANT, G_FNEG}) .legalFor({s32, s64}); LoadStoreBuilder .legalForTypesWithMemDesc({{s64, p0, s64, 32}}) .maxScalar(0, s32); PhiBuilder.legalFor({s64}); getActionDefinitionsBuilder(G_FCMP).legalForCartesianProduct({s1}, {s32, s64}); getActionDefinitionsBuilder(G_MERGE_VALUES).legalFor({{s64, s32}}); getActionDefinitionsBuilder(G_UNMERGE_VALUES).legalFor({{s32, s64}}); getActionDefinitionsBuilder(G_FPEXT).legalFor({{s64, s32}}); getActionDefinitionsBuilder(G_FPTRUNC).legalFor({{s32, s64}}); getActionDefinitionsBuilder({G_FPTOSI, G_FPTOUI}) .legalForCartesianProduct({s32}, {s32, s64}); getActionDefinitionsBuilder({G_SITOFP, G_UITOFP}) .legalForCartesianProduct({s32, s64}, {s32}); getActionDefinitionsBuilder({G_GET_FPENV, G_SET_FPENV, G_GET_FPMODE}) .legalFor({s32}); getActionDefinitionsBuilder(G_RESET_FPENV).alwaysLegal(); getActionDefinitionsBuilder(G_SET_FPMODE).customFor({s32}); } else { getActionDefinitionsBuilder({G_FADD, G_FSUB, G_FMUL, G_FDIV}) .libcallFor({s32, s64}); LoadStoreBuilder.maxScalar(0, s32); getActionDefinitionsBuilder(G_FNEG).lowerFor({s32, s64}); getActionDefinitionsBuilder(G_FCONSTANT).customFor({s32, s64}); getActionDefinitionsBuilder(G_FCMP).customForCartesianProduct({s1}, {s32, s64}); if (AEABI(ST)) setFCmpLibcallsAEABI(); else setFCmpLibcallsGNU(); getActionDefinitionsBuilder(G_FPEXT).libcallFor({{s64, s32}}); getActionDefinitionsBuilder(G_FPTRUNC).libcallFor({{s32, s64}}); getActionDefinitionsBuilder({G_FPTOSI, G_FPTOUI}) .libcallForCartesianProduct({s32}, {s32, s64}); getActionDefinitionsBuilder({G_SITOFP, G_UITOFP}) .libcallForCartesianProduct({s32, s64}, {s32}); getActionDefinitionsBuilder({G_GET_FPENV, G_SET_FPENV, G_RESET_FPENV}) .libcall(); getActionDefinitionsBuilder({G_GET_FPMODE, G_SET_FPMODE, G_RESET_FPMODE}) .libcall(); } // Just expand whatever loads and stores are left. LoadStoreBuilder.lower(); if (!ST.useSoftFloat() && ST.hasVFP4Base()) getActionDefinitionsBuilder(G_FMA).legalFor({s32, s64}); else getActionDefinitionsBuilder(G_FMA).libcallFor({s32, s64}); getActionDefinitionsBuilder({G_FREM, G_FPOW}).libcallFor({s32, s64}); if (ST.hasV5TOps()) { getActionDefinitionsBuilder(G_CTLZ) .legalFor({s32, s32}) .clampScalar(1, s32, s32) .clampScalar(0, s32, s32); getActionDefinitionsBuilder(G_CTLZ_ZERO_UNDEF) .lowerFor({s32, s32}) .clampScalar(1, s32, s32) .clampScalar(0, s32, s32); } else { getActionDefinitionsBuilder(G_CTLZ_ZERO_UNDEF) .libcallFor({s32, s32}) .clampScalar(1, s32, s32) .clampScalar(0, s32, s32); getActionDefinitionsBuilder(G_CTLZ) .lowerFor({s32, s32}) .clampScalar(1, s32, s32) .clampScalar(0, s32, s32); } LegacyInfo.computeTables(); verify(*ST.getInstrInfo()); } void ARMLegalizerInfo::setFCmpLibcallsAEABI() { // FCMP_TRUE and FCMP_FALSE don't need libcalls, they should be // default-initialized. FCmp32Libcalls.resize(CmpInst::LAST_FCMP_PREDICATE + 1); FCmp32Libcalls[CmpInst::FCMP_OEQ] = { {RTLIB::OEQ_F32, CmpInst::BAD_ICMP_PREDICATE}}; FCmp32Libcalls[CmpInst::FCMP_OGE] = { {RTLIB::OGE_F32, CmpInst::BAD_ICMP_PREDICATE}}; FCmp32Libcalls[CmpInst::FCMP_OGT] = { {RTLIB::OGT_F32, CmpInst::BAD_ICMP_PREDICATE}}; FCmp32Libcalls[CmpInst::FCMP_OLE] = { {RTLIB::OLE_F32, CmpInst::BAD_ICMP_PREDICATE}}; FCmp32Libcalls[CmpInst::FCMP_OLT] = { {RTLIB::OLT_F32, CmpInst::BAD_ICMP_PREDICATE}}; FCmp32Libcalls[CmpInst::FCMP_ORD] = {{RTLIB::UO_F32, CmpInst::ICMP_EQ}}; FCmp32Libcalls[CmpInst::FCMP_UGE] = {{RTLIB::OLT_F32, CmpInst::ICMP_EQ}}; FCmp32Libcalls[CmpInst::FCMP_UGT] = {{RTLIB::OLE_F32, CmpInst::ICMP_EQ}}; FCmp32Libcalls[CmpInst::FCMP_ULE] = {{RTLIB::OGT_F32, CmpInst::ICMP_EQ}}; FCmp32Libcalls[CmpInst::FCMP_ULT] = {{RTLIB::OGE_F32, CmpInst::ICMP_EQ}}; FCmp32Libcalls[CmpInst::FCMP_UNE] = {{RTLIB::UNE_F32, CmpInst::ICMP_EQ}}; FCmp32Libcalls[CmpInst::FCMP_UNO] = { {RTLIB::UO_F32, CmpInst::BAD_ICMP_PREDICATE}}; FCmp32Libcalls[CmpInst::FCMP_ONE] = { {RTLIB::OGT_F32, CmpInst::BAD_ICMP_PREDICATE}, {RTLIB::OLT_F32, CmpInst::BAD_ICMP_PREDICATE}}; FCmp32Libcalls[CmpInst::FCMP_UEQ] = { {RTLIB::OEQ_F32, CmpInst::BAD_ICMP_PREDICATE}, {RTLIB::UO_F32, CmpInst::BAD_ICMP_PREDICATE}}; FCmp64Libcalls.resize(CmpInst::LAST_FCMP_PREDICATE + 1); FCmp64Libcalls[CmpInst::FCMP_OEQ] = { {RTLIB::OEQ_F64, CmpInst::BAD_ICMP_PREDICATE}}; FCmp64Libcalls[CmpInst::FCMP_OGE] = { {RTLIB::OGE_F64, CmpInst::BAD_ICMP_PREDICATE}}; FCmp64Libcalls[CmpInst::FCMP_OGT] = { {RTLIB::OGT_F64, CmpInst::BAD_ICMP_PREDICATE}}; FCmp64Libcalls[CmpInst::FCMP_OLE] = { {RTLIB::OLE_F64, CmpInst::BAD_ICMP_PREDICATE}}; FCmp64Libcalls[CmpInst::FCMP_OLT] = { {RTLIB::OLT_F64, CmpInst::BAD_ICMP_PREDICATE}}; FCmp64Libcalls[CmpInst::FCMP_ORD] = {{RTLIB::UO_F64, CmpInst::ICMP_EQ}}; FCmp64Libcalls[CmpInst::FCMP_UGE] = {{RTLIB::OLT_F64, CmpInst::ICMP_EQ}}; FCmp64Libcalls[CmpInst::FCMP_UGT] = {{RTLIB::OLE_F64, CmpInst::ICMP_EQ}}; FCmp64Libcalls[CmpInst::FCMP_ULE] = {{RTLIB::OGT_F64, CmpInst::ICMP_EQ}}; FCmp64Libcalls[CmpInst::FCMP_ULT] = {{RTLIB::OGE_F64, CmpInst::ICMP_EQ}}; FCmp64Libcalls[CmpInst::FCMP_UNE] = {{RTLIB::UNE_F64, CmpInst::ICMP_EQ}}; FCmp64Libcalls[CmpInst::FCMP_UNO] = { {RTLIB::UO_F64, CmpInst::BAD_ICMP_PREDICATE}}; FCmp64Libcalls[CmpInst::FCMP_ONE] = { {RTLIB::OGT_F64, CmpInst::BAD_ICMP_PREDICATE}, {RTLIB::OLT_F64, CmpInst::BAD_ICMP_PREDICATE}}; FCmp64Libcalls[CmpInst::FCMP_UEQ] = { {RTLIB::OEQ_F64, CmpInst::BAD_ICMP_PREDICATE}, {RTLIB::UO_F64, CmpInst::BAD_ICMP_PREDICATE}}; } void ARMLegalizerInfo::setFCmpLibcallsGNU() { // FCMP_TRUE and FCMP_FALSE don't need libcalls, they should be // default-initialized. FCmp32Libcalls.resize(CmpInst::LAST_FCMP_PREDICATE + 1); FCmp32Libcalls[CmpInst::FCMP_OEQ] = {{RTLIB::OEQ_F32, CmpInst::ICMP_EQ}}; FCmp32Libcalls[CmpInst::FCMP_OGE] = {{RTLIB::OGE_F32, CmpInst::ICMP_SGE}}; FCmp32Libcalls[CmpInst::FCMP_OGT] = {{RTLIB::OGT_F32, CmpInst::ICMP_SGT}}; FCmp32Libcalls[CmpInst::FCMP_OLE] = {{RTLIB::OLE_F32, CmpInst::ICMP_SLE}}; FCmp32Libcalls[CmpInst::FCMP_OLT] = {{RTLIB::OLT_F32, CmpInst::ICMP_SLT}}; FCmp32Libcalls[CmpInst::FCMP_ORD] = {{RTLIB::UO_F32, CmpInst::ICMP_EQ}}; FCmp32Libcalls[CmpInst::FCMP_UGE] = {{RTLIB::OLT_F32, CmpInst::ICMP_SGE}}; FCmp32Libcalls[CmpInst::FCMP_UGT] = {{RTLIB::OLE_F32, CmpInst::ICMP_SGT}}; FCmp32Libcalls[CmpInst::FCMP_ULE] = {{RTLIB::OGT_F32, CmpInst::ICMP_SLE}}; FCmp32Libcalls[CmpInst::FCMP_ULT] = {{RTLIB::OGE_F32, CmpInst::ICMP_SLT}}; FCmp32Libcalls[CmpInst::FCMP_UNE] = {{RTLIB::UNE_F32, CmpInst::ICMP_NE}}; FCmp32Libcalls[CmpInst::FCMP_UNO] = {{RTLIB::UO_F32, CmpInst::ICMP_NE}}; FCmp32Libcalls[CmpInst::FCMP_ONE] = {{RTLIB::OGT_F32, CmpInst::ICMP_SGT}, {RTLIB::OLT_F32, CmpInst::ICMP_SLT}}; FCmp32Libcalls[CmpInst::FCMP_UEQ] = {{RTLIB::OEQ_F32, CmpInst::ICMP_EQ}, {RTLIB::UO_F32, CmpInst::ICMP_NE}}; FCmp64Libcalls.resize(CmpInst::LAST_FCMP_PREDICATE + 1); FCmp64Libcalls[CmpInst::FCMP_OEQ] = {{RTLIB::OEQ_F64, CmpInst::ICMP_EQ}}; FCmp64Libcalls[CmpInst::FCMP_OGE] = {{RTLIB::OGE_F64, CmpInst::ICMP_SGE}}; FCmp64Libcalls[CmpInst::FCMP_OGT] = {{RTLIB::OGT_F64, CmpInst::ICMP_SGT}}; FCmp64Libcalls[CmpInst::FCMP_OLE] = {{RTLIB::OLE_F64, CmpInst::ICMP_SLE}}; FCmp64Libcalls[CmpInst::FCMP_OLT] = {{RTLIB::OLT_F64, CmpInst::ICMP_SLT}}; FCmp64Libcalls[CmpInst::FCMP_ORD] = {{RTLIB::UO_F64, CmpInst::ICMP_EQ}}; FCmp64Libcalls[CmpInst::FCMP_UGE] = {{RTLIB::OLT_F64, CmpInst::ICMP_SGE}}; FCmp64Libcalls[CmpInst::FCMP_UGT] = {{RTLIB::OLE_F64, CmpInst::ICMP_SGT}}; FCmp64Libcalls[CmpInst::FCMP_ULE] = {{RTLIB::OGT_F64, CmpInst::ICMP_SLE}}; FCmp64Libcalls[CmpInst::FCMP_ULT] = {{RTLIB::OGE_F64, CmpInst::ICMP_SLT}}; FCmp64Libcalls[CmpInst::FCMP_UNE] = {{RTLIB::UNE_F64, CmpInst::ICMP_NE}}; FCmp64Libcalls[CmpInst::FCMP_UNO] = {{RTLIB::UO_F64, CmpInst::ICMP_NE}}; FCmp64Libcalls[CmpInst::FCMP_ONE] = {{RTLIB::OGT_F64, CmpInst::ICMP_SGT}, {RTLIB::OLT_F64, CmpInst::ICMP_SLT}}; FCmp64Libcalls[CmpInst::FCMP_UEQ] = {{RTLIB::OEQ_F64, CmpInst::ICMP_EQ}, {RTLIB::UO_F64, CmpInst::ICMP_NE}}; } ARMLegalizerInfo::FCmpLibcallsList ARMLegalizerInfo::getFCmpLibcalls(CmpInst::Predicate Predicate, unsigned Size) const { assert(CmpInst::isFPPredicate(Predicate) && "Unsupported FCmp predicate"); if (Size == 32) return FCmp32Libcalls[Predicate]; if (Size == 64) return FCmp64Libcalls[Predicate]; llvm_unreachable("Unsupported size for FCmp predicate"); } bool ARMLegalizerInfo::legalizeCustom(LegalizerHelper &Helper, MachineInstr &MI, LostDebugLocObserver &LocObserver) const { using namespace TargetOpcode; MachineIRBuilder &MIRBuilder = Helper.MIRBuilder; MachineRegisterInfo &MRI = *MIRBuilder.getMRI(); LLVMContext &Ctx = MIRBuilder.getMF().getFunction().getContext(); switch (MI.getOpcode()) { default: return false; case G_SREM: case G_UREM: { Register OriginalResult = MI.getOperand(0).getReg(); auto Size = MRI.getType(OriginalResult).getSizeInBits(); if (Size != 32) return false; auto Libcall = MI.getOpcode() == G_SREM ? RTLIB::SDIVREM_I32 : RTLIB::UDIVREM_I32; // Our divmod libcalls return a struct containing the quotient and the // remainder. Create a new, unused register for the quotient and use the // destination of the original instruction for the remainder. Type *ArgTy = Type::getInt32Ty(Ctx); StructType *RetTy = StructType::get(Ctx, {ArgTy, ArgTy}, /* Packed */ true); Register RetRegs[] = {MRI.createGenericVirtualRegister(LLT::scalar(32)), OriginalResult}; auto Status = createLibcall(MIRBuilder, Libcall, {RetRegs, RetTy, 0}, {{MI.getOperand(1).getReg(), ArgTy, 0}, {MI.getOperand(2).getReg(), ArgTy, 0}}, LocObserver, &MI); if (Status != LegalizerHelper::Legalized) return false; break; } case G_FCMP: { assert(MRI.getType(MI.getOperand(2).getReg()) == MRI.getType(MI.getOperand(3).getReg()) && "Mismatched operands for G_FCMP"); auto OpSize = MRI.getType(MI.getOperand(2).getReg()).getSizeInBits(); auto OriginalResult = MI.getOperand(0).getReg(); auto Predicate = static_cast(MI.getOperand(1).getPredicate()); auto Libcalls = getFCmpLibcalls(Predicate, OpSize); if (Libcalls.empty()) { assert((Predicate == CmpInst::FCMP_TRUE || Predicate == CmpInst::FCMP_FALSE) && "Predicate needs libcalls, but none specified"); MIRBuilder.buildConstant(OriginalResult, Predicate == CmpInst::FCMP_TRUE ? 1 : 0); MI.eraseFromParent(); return true; } assert((OpSize == 32 || OpSize == 64) && "Unsupported operand size"); auto *ArgTy = OpSize == 32 ? Type::getFloatTy(Ctx) : Type::getDoubleTy(Ctx); auto *RetTy = Type::getInt32Ty(Ctx); SmallVector Results; for (auto Libcall : Libcalls) { auto LibcallResult = MRI.createGenericVirtualRegister(LLT::scalar(32)); auto Status = createLibcall(MIRBuilder, Libcall.LibcallID, {LibcallResult, RetTy, 0}, {{MI.getOperand(2).getReg(), ArgTy, 0}, {MI.getOperand(3).getReg(), ArgTy, 0}}, LocObserver, &MI); if (Status != LegalizerHelper::Legalized) return false; auto ProcessedResult = Libcalls.size() == 1 ? OriginalResult : MRI.createGenericVirtualRegister(MRI.getType(OriginalResult)); // We have a result, but we need to transform it into a proper 1-bit 0 or // 1, taking into account the different peculiarities of the values // returned by the comparison functions. CmpInst::Predicate ResultPred = Libcall.Predicate; if (ResultPred == CmpInst::BAD_ICMP_PREDICATE) { // We have a nice 0 or 1, and we just need to truncate it back to 1 bit // to keep the types consistent. MIRBuilder.buildTrunc(ProcessedResult, LibcallResult); } else { // We need to compare against 0. assert(CmpInst::isIntPredicate(ResultPred) && "Unsupported predicate"); auto Zero = MIRBuilder.buildConstant(LLT::scalar(32), 0); MIRBuilder.buildICmp(ResultPred, ProcessedResult, LibcallResult, Zero); } Results.push_back(ProcessedResult); } if (Results.size() != 1) { assert(Results.size() == 2 && "Unexpected number of results"); MIRBuilder.buildOr(OriginalResult, Results[0], Results[1]); } break; } case G_FCONSTANT: { // Convert to integer constants, while preserving the binary representation. auto AsInteger = MI.getOperand(1).getFPImm()->getValueAPF().bitcastToAPInt(); MIRBuilder.buildConstant(MI.getOperand(0), *ConstantInt::get(Ctx, AsInteger)); break; } case G_SET_FPMODE: { // New FPSCR = (FPSCR & FPStatusBits) | (Modes & ~FPStatusBits) LLT FPEnvTy = LLT::scalar(32); auto FPEnv = MRI.createGenericVirtualRegister(FPEnvTy); Register Modes = MI.getOperand(0).getReg(); MIRBuilder.buildGetFPEnv(FPEnv); auto StatusBitMask = MIRBuilder.buildConstant(FPEnvTy, ARM::FPStatusBits); auto StatusBits = MIRBuilder.buildAnd(FPEnvTy, FPEnv, StatusBitMask); auto NotStatusBitMask = MIRBuilder.buildConstant(FPEnvTy, ~ARM::FPStatusBits); auto FPModeBits = MIRBuilder.buildAnd(FPEnvTy, Modes, NotStatusBitMask); auto NewFPSCR = MIRBuilder.buildOr(FPEnvTy, StatusBits, FPModeBits); MIRBuilder.buildSetFPEnv(NewFPSCR); break; } } MI.eraseFromParent(); return true; }