From 2a36406e3d729fe446754a728dd8feb8f7fcecbe Mon Sep 17 00:00:00 2001 From: ridethepig Date: Sat, 20 May 2023 10:46:46 +0800 Subject: [PATCH] buggy mem2reg --- .vscode/launch.json | 2 +- CMakeLists.txt | 5 +- docs/LLVM-promoteAlloca.cpp | 1201 +++++++++++++++++++++++++++++++++++ include/algos.h | 11 + include/common.h | 4 + include/llir_instruction.h | 37 ++ include/llir_type.h | 17 +- include/llir_value.h | 90 ++- include/pass.h | 2 +- include/visitor.h | 2 + src/algo_dominance.cpp | 137 ++++ src/dominance_algo.cpp | 106 ---- src/main.cpp | 17 +- src/pass_mem2reg.cpp | 293 ++++++++- src/visitor_factory.cpp | 10 + src/visitor_llir_gen.cpp | 57 +- 16 files changed, 1831 insertions(+), 160 deletions(-) create mode 100644 docs/LLVM-promoteAlloca.cpp create mode 100644 include/algos.h create mode 100644 src/algo_dominance.cpp delete mode 100644 src/dominance_algo.cpp diff --git a/.vscode/launch.json b/.vscode/launch.json index 944d877..28ce5eb 100644 --- a/.vscode/launch.json +++ b/.vscode/launch.json @@ -9,7 +9,7 @@ "request": "launch", "name": "Debug", "program": "${workspaceFolder}/build/sysy", - "args": ["testcases/functional/64_calculator.sy", "-S", "-o", "build/manual-test/21_my.ll"], + "args": ["../sysytests/functional_2022/21_if_test2.sy", "-S", "-o", "build/21_my.ll", "-emit-llvm"], "cwd": "${workspaceFolder}" } ] diff --git a/CMakeLists.txt b/CMakeLists.txt index a6ec762..32313b5 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -21,7 +21,8 @@ file(GLOB MY_HEADERS "include/*.h") include(formatting.cmake) clang_format(format ${MY_SOURCES} ${MY_HEADERS}) # clang_format_check(format_check ${MY_SOURCES} ${MY_HEADERS}) - +# add_compile_options(-fsanitize=address) +# add_link_options(-fsanitize=address) add_executable(sysy ${SOURCES}) # message(STATUS "${SOURCES}") -target_link_libraries(sysy antlr4_static) \ No newline at end of file +target_link_libraries(sysy antlr4_static) diff --git a/docs/LLVM-promoteAlloca.cpp b/docs/LLVM-promoteAlloca.cpp new file mode 100644 index 0000000..73f1322 --- /dev/null +++ b/docs/LLVM-promoteAlloca.cpp @@ -0,0 +1,1201 @@ + +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/ADT/Twine.h" +#include "llvm/Analysis/AssumptionCache.h" +#include "llvm/Analysis/InstructionSimplify.h" +#include "llvm/Analysis/IteratedDominanceFrontier.h" +#include "llvm/Analysis/ValueTracking.h" +#include "llvm/IR/BasicBlock.h" +#include "llvm/IR/CFG.h" +#include "llvm/IR/Constant.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/DIBuilder.h" +#include "llvm/IR/DebugInfo.h" +#include "llvm/IR/Dominators.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/InstrTypes.h" +#include "llvm/IR/Instruction.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/Intrinsics.h" +#include "llvm/IR/LLVMContext.h" +#include "llvm/IR/Module.h" +#include "llvm/IR/Type.h" +#include "llvm/IR/User.h" +#include "llvm/Support/Casting.h" +#include "llvm/Transforms/Utils/Local.h" +#include "llvm/Transforms/Utils/PromoteMemToReg.h" +#include +#include +#include +#include +#include + +using namespace llvm; + +#define DEBUG_TYPE "mem2reg" + +STATISTIC(NumLocalPromoted, "Number of alloca's promoted within one block"); +STATISTIC(NumSingleStore, "Number of alloca's promoted with a single store"); +STATISTIC(NumDeadAlloca, "Number of dead alloca's removed"); +STATISTIC(NumPHIInsert, "Number of PHI nodes inserted"); + +bool llvm::isAllocaPromotable(const AllocaInst *AI) { + // Only allow direct and non-volatile loads and stores... + for (const User *U : AI->users()) { + if (const LoadInst *LI = dyn_cast(U)) { + // Note that atomic loads can be transformed; atomic semantics do + // not have any meaning for a local alloca. + if (LI->isVolatile() || LI->getType() != AI->getAllocatedType()) + return false; + } else if (const StoreInst *SI = dyn_cast(U)) { + if (SI->getValueOperand() == AI || + SI->getValueOperand()->getType() != AI->getAllocatedType()) + return false; // Don't allow a store OF the AI, only INTO the AI. + // Note that atomic stores can be transformed; atomic semantics do + // not have any meaning for a local alloca. + if (SI->isVolatile()) + return false; + } else if (const IntrinsicInst *II = dyn_cast(U)) { + if (!II->isLifetimeStartOrEnd() && !II->isDroppable()) + return false; + } else if (const BitCastInst *BCI = dyn_cast(U)) { + if (!onlyUsedByLifetimeMarkersOrDroppableInsts(BCI)) + return false; + } else if (const GetElementPtrInst *GEPI = dyn_cast(U)) { + if (!GEPI->hasAllZeroIndices()) + return false; + if (!onlyUsedByLifetimeMarkersOrDroppableInsts(GEPI)) + return false; + } else if (const AddrSpaceCastInst *ASCI = dyn_cast(U)) { + if (!onlyUsedByLifetimeMarkers(ASCI)) + return false; + } else { + return false; + } + } + + return true; +} + +namespace { + +/// Helper for updating assignment tracking debug info when promoting allocas. +class AssignmentTrackingInfo { + /// DbgAssignIntrinsics linked to the alloca with at most one per variable + /// fragment. (i.e. not be a comprehensive set if there are multiple + /// dbg.assigns for one variable fragment). + SmallVector DbgAssigns; + +public: + void init(AllocaInst *AI) { + SmallSet Vars; + for (DbgAssignIntrinsic *DAI : at::getAssignmentMarkers(AI)) { + if (Vars.insert(DebugVariable(DAI)).second) + DbgAssigns.push_back(DAI); + } + } + + /// Update assignment tracking debug info given for the to-be-deleted store + /// \p ToDelete that stores to this alloca. + void updateForDeletedStore( + StoreInst *ToDelete, DIBuilder &DIB, + SmallSet *DbgAssignsToDelete) const { + // There's nothing to do if the alloca doesn't have any variables using + // assignment tracking. + if (DbgAssigns.empty()) + return; + + // Insert a dbg.value where the linked dbg.assign is and remember to delete + // the dbg.assign later. Demoting to dbg.value isn't necessary for + // correctness but does reduce compile time and memory usage by reducing + // unnecessary function-local metadata. Remember that we've seen a + // dbg.assign for each variable fragment for the untracked store handling + // (after this loop). + SmallSet VarHasDbgAssignForStore; + for (DbgAssignIntrinsic *DAI : at::getAssignmentMarkers(ToDelete)) { + VarHasDbgAssignForStore.insert(DebugVariableAggregate(DAI)); + DbgAssignsToDelete->insert(DAI); + DIB.insertDbgValueIntrinsic(DAI->getValue(), DAI->getVariable(), + DAI->getExpression(), DAI->getDebugLoc(), + DAI); + } + + // It's possible for variables using assignment tracking to have no + // dbg.assign linked to this store. These are variables in DbgAssigns that + // are missing from VarHasDbgAssignForStore. Since there isn't a dbg.assign + // to mark the assignment - and the store is going to be deleted - insert a + // dbg.value to do that now. An untracked store may be either one that + // cannot be represented using assignment tracking (non-const offset or + // size) or one that is trackable but has had its DIAssignID attachment + // dropped accidentally. + for (auto *DAI : DbgAssigns) { + if (VarHasDbgAssignForStore.contains(DebugVariableAggregate(DAI))) + continue; + ConvertDebugDeclareToDebugValue(DAI, ToDelete, DIB); + } + } + + /// Update assignment tracking debug info given for the newly inserted PHI \p + /// NewPhi. + void updateForNewPhi(PHINode *NewPhi, DIBuilder &DIB) const { + // Regardless of the position of dbg.assigns relative to stores, the + // incoming values into a new PHI should be the same for the (imaginary) + // debug-phi. + for (auto *DAI : DbgAssigns) + ConvertDebugDeclareToDebugValue(DAI, NewPhi, DIB); + } + + void clear() { DbgAssigns.clear(); } + bool empty() { return DbgAssigns.empty(); } +}; + +struct AllocaInfo { + using DbgUserVec = SmallVector; + + SmallVector DefiningBlocks; + SmallVector UsingBlocks; + + StoreInst *OnlyStore; + BasicBlock *OnlyBlock; + bool OnlyUsedInOneBlock; + + /// Debug users of the alloca - does not include dbg.assign intrinsics. + DbgUserVec DbgUsers; + /// Helper to update assignment tracking debug info. + AssignmentTrackingInfo AssignmentTracking; + + void clear() { + DefiningBlocks.clear(); + UsingBlocks.clear(); + OnlyStore = nullptr; + OnlyBlock = nullptr; + OnlyUsedInOneBlock = true; + DbgUsers.clear(); + AssignmentTracking.clear(); + } + + /// Scan the uses of the specified alloca, filling in the AllocaInfo used + /// by the rest of the pass to reason about the uses of this alloca. + void AnalyzeAlloca(AllocaInst *AI) { + clear(); + + // As we scan the uses of the alloca instruction, keep track of stores, + // and decide whether all of the loads and stores to the alloca are within + // the same basic block. + for (User *U : AI->users()) { + Instruction *User = cast(U); + + if (StoreInst *SI = dyn_cast(User)) { + // Remember the basic blocks which define new values for the alloca + DefiningBlocks.push_back(SI->getParent()); + OnlyStore = SI; + } else { + LoadInst *LI = cast(User); + // Otherwise it must be a load instruction, keep track of variable + // reads. + UsingBlocks.push_back(LI->getParent()); + } + + if (OnlyUsedInOneBlock) { + if (!OnlyBlock) + OnlyBlock = User->getParent(); + else if (OnlyBlock != User->getParent()) + OnlyUsedInOneBlock = false; + } + } + DbgUserVec AllDbgUsers; + findDbgUsers(AllDbgUsers, AI); + std::copy_if(AllDbgUsers.begin(), AllDbgUsers.end(), + std::back_inserter(DbgUsers), [](DbgVariableIntrinsic *DII) { + return !isa(DII); + }); + AssignmentTracking.init(AI); + } +}; + +/// Data package used by RenamePass(). +struct RenamePassData { + using ValVector = std::vector; + using LocationVector = std::vector; + + RenamePassData(BasicBlock *B, BasicBlock *P, ValVector V, LocationVector L) + : BB(B), Pred(P), Values(std::move(V)), Locations(std::move(L)) {} + + BasicBlock *BB; + BasicBlock *Pred; + ValVector Values; + LocationVector Locations; +}; + +/// This assigns and keeps a per-bb relative ordering of load/store +/// instructions in the block that directly load or store an alloca. +/// +/// This functionality is important because it avoids scanning large basic +/// blocks multiple times when promoting many allocas in the same block. +class LargeBlockInfo { + /// For each instruction that we track, keep the index of the + /// instruction. + /// + /// The index starts out as the number of the instruction from the start of + /// the block. + DenseMap InstNumbers; + +public: + + /// This code only looks at accesses to allocas. + static bool isInterestingInstruction(const Instruction *I) { + return (isa(I) && isa(I->getOperand(0))) || + (isa(I) && isa(I->getOperand(1))); + } + + /// Get or calculate the index of the specified instruction. + unsigned getInstructionIndex(const Instruction *I) { + assert(isInterestingInstruction(I) && + "Not a load/store to/from an alloca?"); + + // If we already have this instruction number, return it. + DenseMap::iterator It = InstNumbers.find(I); + if (It != InstNumbers.end()) + return It->second; + + // Scan the whole block to get the instruction. This accumulates + // information for every interesting instruction in the block, in order to + // avoid gratuitus rescans. + const BasicBlock *BB = I->getParent(); + unsigned InstNo = 0; + for (const Instruction &BBI : *BB) + if (isInterestingInstruction(&BBI)) + InstNumbers[&BBI] = InstNo++; + It = InstNumbers.find(I); + + assert(It != InstNumbers.end() && "Didn't insert instruction?"); + return It->second; + } + + void deleteValue(const Instruction *I) { InstNumbers.erase(I); } + + void clear() { InstNumbers.clear(); } +}; + +struct PromoteMem2Reg { + /// The alloca instructions being promoted. + std::vector Allocas; + + DominatorTree &DT; + DIBuilder DIB; + + /// A cache of @llvm.assume intrinsics used by SimplifyInstruction. + AssumptionCache *AC; + + const SimplifyQuery SQ; + + /// Reverse mapping of Allocas. + DenseMap AllocaLookup; + + /// The PhiNodes we're adding. + /// + /// That map is used to simplify some Phi nodes as we iterate over it, so + /// it should have deterministic iterators. We could use a MapVector, but + /// since we already maintain a map from BasicBlock* to a stable numbering + /// (BBNumbers), the DenseMap is more efficient (also supports removal). + DenseMap, PHINode *> NewPhiNodes; + + /// For each PHI node, keep track of which entry in Allocas it corresponds + /// to. + DenseMap PhiToAllocaMap; + + /// For each alloca, we keep track of the dbg.declare intrinsic that + /// describes it, if any, so that we can convert it to a dbg.value + /// intrinsic if the alloca gets promoted. + SmallVector AllocaDbgUsers; + + /// For each alloca, keep an instance of a helper class that gives us an easy + /// way to update assignment tracking debug info if the alloca is promoted. + SmallVector AllocaATInfo; + /// A set of dbg.assigns to delete because they've been demoted to + /// dbg.values. Call cleanUpDbgAssigns to delete them. + SmallSet DbgAssignsToDelete; + + /// The set of basic blocks the renamer has already visited. + SmallPtrSet Visited; + + /// Contains a stable numbering of basic blocks to avoid non-determinstic + /// behavior. + DenseMap BBNumbers; + + /// Lazily compute the number of predecessors a block has. + DenseMap BBNumPreds; + +public: + PromoteMem2Reg(ArrayRef Allocas, DominatorTree &DT, + AssumptionCache *AC) + : Allocas(Allocas.begin(), Allocas.end()), DT(DT), + DIB(*DT.getRoot()->getParent()->getParent(), /*AllowUnresolved*/ false), + AC(AC), SQ(DT.getRoot()->getParent()->getParent()->getDataLayout(), + nullptr, &DT, AC) {} + + void run(); + +private: + void RemoveFromAllocasList(unsigned &AllocaIdx) { + Allocas[AllocaIdx] = Allocas.back(); + Allocas.pop_back(); + --AllocaIdx; + } + + unsigned getNumPreds(const BasicBlock *BB) { + unsigned &NP = BBNumPreds[BB]; + if (NP == 0) + NP = pred_size(BB) + 1; + return NP - 1; + } + + void ComputeLiveInBlocks(AllocaInst *AI, AllocaInfo &Info, + const SmallPtrSetImpl &DefBlocks, + SmallPtrSetImpl &LiveInBlocks); + void RenamePass(BasicBlock *BB, BasicBlock *Pred, + RenamePassData::ValVector &IncVals, + RenamePassData::LocationVector &IncLocs, + std::vector &Worklist); + bool QueuePhiNode(BasicBlock *BB, unsigned AllocaIdx, unsigned &Version); + + /// Delete dbg.assigns that have been demoted to dbg.values. + void cleanUpDbgAssigns() { + for (auto *DAI : DbgAssignsToDelete) + DAI->eraseFromParent(); + DbgAssignsToDelete.clear(); + } +}; + +} // end anonymous namespace + +/// Given a LoadInst LI this adds assume(LI != null) after it. +static void addAssumeNonNull(AssumptionCache *AC, LoadInst *LI) { + Function *AssumeIntrinsic = + Intrinsic::getDeclaration(LI->getModule(), Intrinsic::assume); + ICmpInst *LoadNotNull = new ICmpInst(ICmpInst::ICMP_NE, LI, + Constant::getNullValue(LI->getType())); + LoadNotNull->insertAfter(LI); + CallInst *CI = CallInst::Create(AssumeIntrinsic, {LoadNotNull}); + CI->insertAfter(LoadNotNull); + AC->registerAssumption(cast(CI)); +} + +static void convertMetadataToAssumes(LoadInst *LI, Value *Val, + const DataLayout &DL, AssumptionCache *AC, + const DominatorTree *DT) { + // If the load was marked as nonnull we don't want to lose that information + // when we erase this Load. So we preserve it with an assume. As !nonnull + // returns poison while assume violations are immediate undefined behavior, + // we can only do this if the value is known non-poison. + if (AC && LI->getMetadata(LLVMContext::MD_nonnull) && + LI->getMetadata(LLVMContext::MD_noundef) && + !isKnownNonZero(Val, DL, 0, AC, LI, DT)) + addAssumeNonNull(AC, LI); +} + +static void removeIntrinsicUsers(AllocaInst *AI) { + // Knowing that this alloca is promotable, we know that it's safe to kill all + // instructions except for load and store. + + for (Use &U : llvm::make_early_inc_range(AI->uses())) { + Instruction *I = cast(U.getUser()); + if (isa(I) || isa(I)) + continue; + + // Drop the use of AI in droppable instructions. + if (I->isDroppable()) { + I->dropDroppableUse(U); + continue; + } + + if (!I->getType()->isVoidTy()) { + // The only users of this bitcast/GEP instruction are lifetime intrinsics. + // Follow the use/def chain to erase them now instead of leaving it for + // dead code elimination later. + for (Use &UU : llvm::make_early_inc_range(I->uses())) { + Instruction *Inst = cast(UU.getUser()); + + // Drop the use of I in droppable instructions. + if (Inst->isDroppable()) { + Inst->dropDroppableUse(UU); + continue; + } + Inst->eraseFromParent(); + } + } + I->eraseFromParent(); + } +} + +/// Rewrite as many loads as possible given a single store. +/// +/// When there is only a single store, we can use the domtree to trivially +/// replace all of the dominated loads with the stored value. Do so, and return +/// true if this has successfully promoted the alloca entirely. If this returns +/// false there were some loads which were not dominated by the single store +/// and thus must be phi-ed with undef. We fall back to the standard alloca +/// promotion algorithm in that case. +static bool rewriteSingleStoreAlloca( + AllocaInst *AI, AllocaInfo &Info, LargeBlockInfo &LBI, const DataLayout &DL, + DominatorTree &DT, AssumptionCache *AC, + SmallSet *DbgAssignsToDelete) { + StoreInst *OnlyStore = Info.OnlyStore; + bool StoringGlobalVal = !isa(OnlyStore->getOperand(0)); + BasicBlock *StoreBB = OnlyStore->getParent(); + int StoreIndex = -1; + + // Clear out UsingBlocks. We will reconstruct it here if needed. + Info.UsingBlocks.clear(); + + for (User *U : make_early_inc_range(AI->users())) { + Instruction *UserInst = cast(U); + if (UserInst == OnlyStore) + continue; + LoadInst *LI = cast(UserInst); + + // Okay, if we have a load from the alloca, we want to replace it with the + // only value stored to the alloca. We can do this if the value is + // dominated by the store. If not, we use the rest of the mem2reg machinery + // to insert the phi nodes as needed. + if (!StoringGlobalVal) { // Non-instructions are always dominated. + if (LI->getParent() == StoreBB) { + // If we have a use that is in the same block as the store, compare the + // indices of the two instructions to see which one came first. If the + // load came before the store, we can't handle it. + if (StoreIndex == -1) + StoreIndex = LBI.getInstructionIndex(OnlyStore); + + if (unsigned(StoreIndex) > LBI.getInstructionIndex(LI)) { + // Can't handle this load, bail out. + Info.UsingBlocks.push_back(StoreBB); + continue; + } + } else if (!DT.dominates(StoreBB, LI->getParent())) { + // If the load and store are in different blocks, use BB dominance to + // check their relationships. If the store doesn't dom the use, bail + // out. + Info.UsingBlocks.push_back(LI->getParent()); + continue; + } + } + + // Otherwise, we *can* safely rewrite this load. + Value *ReplVal = OnlyStore->getOperand(0); + // If the replacement value is the load, this must occur in unreachable + // code. + if (ReplVal == LI) + ReplVal = PoisonValue::get(LI->getType()); + + convertMetadataToAssumes(LI, ReplVal, DL, AC, &DT); + LI->replaceAllUsesWith(ReplVal); + LI->eraseFromParent(); + LBI.deleteValue(LI); + } + + // Finally, after the scan, check to see if the store is all that is left. + if (!Info.UsingBlocks.empty()) + return false; // If not, we'll have to fall back for the remainder. + + DIBuilder DIB(*AI->getModule(), /*AllowUnresolved*/ false); + // Update assignment tracking info for the store we're going to delete. + Info.AssignmentTracking.updateForDeletedStore(Info.OnlyStore, DIB, + DbgAssignsToDelete); + + // Record debuginfo for the store and remove the declaration's + // debuginfo. + for (DbgVariableIntrinsic *DII : Info.DbgUsers) { + if (DII->isAddressOfVariable()) { + ConvertDebugDeclareToDebugValue(DII, Info.OnlyStore, DIB); + DII->eraseFromParent(); + } else if (DII->getExpression()->startsWithDeref()) { + DII->eraseFromParent(); + } + } + + // Remove dbg.assigns linked to the alloca as these are now redundant. + at::deleteAssignmentMarkers(AI); + + // Remove the (now dead) store and alloca. + Info.OnlyStore->eraseFromParent(); + LBI.deleteValue(Info.OnlyStore); + + AI->eraseFromParent(); + return true; +} + +/// Many allocas are only used within a single basic block. If this is the +/// case, avoid traversing the CFG and inserting a lot of potentially useless +/// PHI nodes by just performing a single linear pass over the basic block +/// using the Alloca. +/// +/// If we cannot promote this alloca (because it is read before it is written), +/// return false. This is necessary in cases where, due to control flow, the +/// alloca is undefined only on some control flow paths. e.g. code like +/// this is correct in LLVM IR: +/// // A is an alloca with no stores so far +/// for (...) { +/// int t = *A; +/// if (!first_iteration) +/// use(t); +/// *A = 42; +/// } +static bool promoteSingleBlockAlloca( + AllocaInst *AI, const AllocaInfo &Info, LargeBlockInfo &LBI, + const DataLayout &DL, DominatorTree &DT, AssumptionCache *AC, + SmallSet *DbgAssignsToDelete) { + // The trickiest case to handle is when we have large blocks. Because of this, + // this code is optimized assuming that large blocks happen. This does not + // significantly pessimize the small block case. This uses LargeBlockInfo to + // make it efficient to get the index of various operations in the block. + + // Walk the use-def list of the alloca, getting the locations of all stores. + using StoresByIndexTy = SmallVector, 64>; + StoresByIndexTy StoresByIndex; + + for (User *U : AI->users()) + if (StoreInst *SI = dyn_cast(U)) + StoresByIndex.push_back(std::make_pair(LBI.getInstructionIndex(SI), SI)); + + // Sort the stores by their index, making it efficient to do a lookup with a + // binary search. + llvm::sort(StoresByIndex, less_first()); + + // Walk all of the loads from this alloca, replacing them with the nearest + // store above them, if any. + for (User *U : make_early_inc_range(AI->users())) { + LoadInst *LI = dyn_cast(U); + if (!LI) + continue; + + unsigned LoadIdx = LBI.getInstructionIndex(LI); + + // Find the nearest store that has a lower index than this load. + StoresByIndexTy::iterator I = llvm::lower_bound( + StoresByIndex, + std::make_pair(LoadIdx, static_cast(nullptr)), + less_first()); + Value *ReplVal; + if (I == StoresByIndex.begin()) { + if (StoresByIndex.empty()) + // If there are no stores, the load takes the undef value. + ReplVal = UndefValue::get(LI->getType()); + else + // There is no store before this load, bail out (load may be affected + // by the following stores - see main comment). + return false; + } else { + // Otherwise, there was a store before this load, the load takes its + // value. + ReplVal = std::prev(I)->second->getOperand(0); + } + + convertMetadataToAssumes(LI, ReplVal, DL, AC, &DT); + + // If the replacement value is the load, this must occur in unreachable + // code. + if (ReplVal == LI) + ReplVal = PoisonValue::get(LI->getType()); + + LI->replaceAllUsesWith(ReplVal); + LI->eraseFromParent(); + LBI.deleteValue(LI); + } + + // Remove the (now dead) stores and alloca. + DIBuilder DIB(*AI->getModule(), /*AllowUnresolved*/ false); + while (!AI->use_empty()) { + StoreInst *SI = cast(AI->user_back()); + // Update assignment tracking info for the store we're going to delete. + Info.AssignmentTracking.updateForDeletedStore(SI, DIB, DbgAssignsToDelete); + // Record debuginfo for the store before removing it. + for (DbgVariableIntrinsic *DII : Info.DbgUsers) { + if (DII->isAddressOfVariable()) { + ConvertDebugDeclareToDebugValue(DII, SI, DIB); + } + } + SI->eraseFromParent(); + LBI.deleteValue(SI); + } + + // Remove dbg.assigns linked to the alloca as these are now redundant. + at::deleteAssignmentMarkers(AI); + AI->eraseFromParent(); + + // The alloca's debuginfo can be removed as well. + for (DbgVariableIntrinsic *DII : Info.DbgUsers) + if (DII->isAddressOfVariable() || DII->getExpression()->startsWithDeref()) + DII->eraseFromParent(); + + ++NumLocalPromoted; + return true; +} + +void PromoteMem2Reg::run() { + Function &F = *DT.getRoot()->getParent(); + + AllocaDbgUsers.resize(Allocas.size()); + AllocaATInfo.resize(Allocas.size()); + + AllocaInfo Info; + LargeBlockInfo LBI; + ForwardIDFCalculator IDF(DT); + + for (unsigned AllocaNum = 0; AllocaNum != Allocas.size(); ++AllocaNum) { + AllocaInst *AI = Allocas[AllocaNum]; + + assert(isAllocaPromotable(AI) && "Cannot promote non-promotable alloca!"); + assert(AI->getParent()->getParent() == &F && + "All allocas should be in the same function, which is same as DF!"); + + removeIntrinsicUsers(AI); + + if (AI->use_empty()) { + // If there are no uses of the alloca, just delete it now. + AI->eraseFromParent(); + + // Remove the alloca from the Allocas list, since it has been processed + RemoveFromAllocasList(AllocaNum); + ++NumDeadAlloca; + continue; + } + + // Calculate the set of read and write-locations for each alloca. This is + // analogous to finding the 'uses' and 'definitions' of each variable. + Info.AnalyzeAlloca(AI); + + // If there is only a single store to this value, replace any loads of + // it that are directly dominated by the definition with the value stored. + if (Info.DefiningBlocks.size() == 1) { + if (rewriteSingleStoreAlloca(AI, Info, LBI, SQ.DL, DT, AC, + &DbgAssignsToDelete)) { + // The alloca has been processed, move on. + RemoveFromAllocasList(AllocaNum); + ++NumSingleStore; + continue; + } + } + + // If the alloca is only read and written in one basic block, just perform a + // linear sweep over the block to eliminate it. + if (Info.OnlyUsedInOneBlock && + promoteSingleBlockAlloca(AI, Info, LBI, SQ.DL, DT, AC, + &DbgAssignsToDelete)) { + // The alloca has been processed, move on. + RemoveFromAllocasList(AllocaNum); + continue; + } + + // If we haven't computed a numbering for the BB's in the function, do so + // now. + if (BBNumbers.empty()) { + unsigned ID = 0; + for (auto &BB : F) + BBNumbers[&BB] = ID++; + } + + // Remember the dbg.declare intrinsic describing this alloca, if any. + /* Catfood is not going to use this + if (!Info.DbgUsers.empty()) + AllocaDbgUsers[AllocaNum] = Info.DbgUsers; + if (!Info.AssignmentTracking.empty()) + AllocaATInfo[AllocaNum] = Info.AssignmentTracking; + */ + // Keep the reverse mapping of the 'Allocas' array for the rename pass. + AllocaLookup[Allocas[AllocaNum]] = AllocaNum; + + // Unique the set of defining blocks for efficient lookup. + SmallPtrSet DefBlocks(Info.DefiningBlocks.begin(), + Info.DefiningBlocks.end()); + + // Determine which blocks the value is live in. These are blocks which lead + // to uses. + SmallPtrSet LiveInBlocks; + ComputeLiveInBlocks(AI, Info, DefBlocks, LiveInBlocks); + + // At this point, we're committed to promoting the alloca using IDF's, and + // the standard SSA construction algorithm. Determine which blocks need phi + // nodes and see if we can optimize out some work by avoiding insertion of + // dead phi nodes. + IDF.setLiveInBlocks(LiveInBlocks); + IDF.setDefiningBlocks(DefBlocks); + SmallVector PHIBlocks; + IDF.calculate(PHIBlocks); + llvm::sort(PHIBlocks, [this](BasicBlock *A, BasicBlock *B) { + return BBNumbers.find(A)->second < BBNumbers.find(B)->second; + }); + + unsigned CurrentVersion = 0; + for (BasicBlock *BB : PHIBlocks) + QueuePhiNode(BB, AllocaNum, CurrentVersion); + } + + if (Allocas.empty()) { + cleanUpDbgAssigns(); + return; // All of the allocas must have been trivial! + } + LBI.clear(); + + // Set the incoming values for the basic block to be null values for all of + // the alloca's. We do this in case there is a load of a value that has not + // been stored yet. In this case, it will get this null value. + RenamePassData::ValVector Values(Allocas.size()); + for (unsigned i = 0, e = Allocas.size(); i != e; ++i) + Values[i] = UndefValue::get(Allocas[i]->getAllocatedType()); + + // When handling debug info, treat all incoming values as if they have unknown + // locations until proven otherwise. + RenamePassData::LocationVector Locations(Allocas.size()); + + // Walks all basic blocks in the function performing the SSA rename algorithm + // and inserting the phi nodes we marked as necessary + std::vector RenamePassWorkList; + RenamePassWorkList.emplace_back(&F.front(), nullptr, std::move(Values), + std::move(Locations)); + do { + RenamePassData RPD = std::move(RenamePassWorkList.back()); + RenamePassWorkList.pop_back(); + // RenamePass may add new worklist entries. + RenamePass(RPD.BB, RPD.Pred, RPD.Values, RPD.Locations, RenamePassWorkList); + } while (!RenamePassWorkList.empty()); + + // The renamer uses the Visited set to avoid infinite loops. Clear it now. + Visited.clear(); + + // Remove the allocas themselves from the function. + for (Instruction *A : Allocas) { + // Remove dbg.assigns linked to the alloca as these are now redundant. + at::deleteAssignmentMarkers(A); + // If there are any uses of the alloca instructions left, they must be in + // unreachable basic blocks that were not processed by walking the dominator + // tree. Just delete the users now. + if (!A->use_empty()) + A->replaceAllUsesWith(PoisonValue::get(A->getType())); + A->eraseFromParent(); + } + + // Remove alloca's dbg.declare intrinsics from the function. + /* CATFOOD is not going to use this + for (auto &DbgUsers : AllocaDbgUsers) { + for (auto *DII : DbgUsers) + if (DII->isAddressOfVariable() || DII->getExpression()->startsWithDeref()) + DII->eraseFromParent(); + } + */ + + // Loop over all of the PHI nodes and see if there are any that we can get + // rid of because they merge all of the same incoming values. This can + // happen due to undef values coming into the PHI nodes. This process is + // iterative, because eliminating one PHI node can cause others to be removed. + bool EliminatedAPHI = true; + while (EliminatedAPHI) { + EliminatedAPHI = false; + + // Iterating over NewPhiNodes is deterministic, so it is safe to try to + // simplify and RAUW them as we go. If it was not, we could add uses to + // the values we replace with in a non-deterministic order, thus creating + // non-deterministic def->use chains. + for (DenseMap, PHINode *>::iterator + I = NewPhiNodes.begin(), + E = NewPhiNodes.end(); + I != E;) { + PHINode *PN = I->second; + + // If this PHI node merges one value and/or undefs, get the value. + if (Value *V = simplifyInstruction(PN, SQ)) { + PN->replaceAllUsesWith(V); + PN->eraseFromParent(); + NewPhiNodes.erase(I++); + EliminatedAPHI = true; + continue; + } + ++I; + } + } + + // At this point, the renamer has added entries to PHI nodes for all reachable + // code. Unfortunately, there may be unreachable blocks which the renamer + // hasn't traversed. If this is the case, the PHI nodes may not + // have incoming values for all predecessors. Loop over all PHI nodes we have + // created, inserting undef values if they are missing any incoming values. + for (DenseMap, PHINode *>::iterator + I = NewPhiNodes.begin(), + E = NewPhiNodes.end(); + I != E; ++I) { + // We want to do this once per basic block. As such, only process a block + // when we find the PHI that is the first entry in the block. + PHINode *SomePHI = I->second; + BasicBlock *BB = SomePHI->getParent(); + if (&BB->front() != SomePHI) + continue; + + // Only do work here if there the PHI nodes are missing incoming values. We + // know that all PHI nodes that were inserted in a block will have the same + // number of incoming values, so we can just check any of them. + if (SomePHI->getNumIncomingValues() == getNumPreds(BB)) + continue; + + // Get the preds for BB. + SmallVector Preds(predecessors(BB)); + + // Ok, now we know that all of the PHI nodes are missing entries for some + // basic blocks. Start by sorting the incoming predecessors for efficient + // access. + auto CompareBBNumbers = [this](BasicBlock *A, BasicBlock *B) { + return BBNumbers.find(A)->second < BBNumbers.find(B)->second; + }; + llvm::sort(Preds, CompareBBNumbers); + + // Now we loop through all BB's which have entries in SomePHI and remove + // them from the Preds list. + for (unsigned i = 0, e = SomePHI->getNumIncomingValues(); i != e; ++i) { + // Do a log(n) search of the Preds list for the entry we want. + SmallVectorImpl::iterator EntIt = llvm::lower_bound( + Preds, SomePHI->getIncomingBlock(i), CompareBBNumbers); + assert(EntIt != Preds.end() && *EntIt == SomePHI->getIncomingBlock(i) && + "PHI node has entry for a block which is not a predecessor!"); + + // Remove the entry + Preds.erase(EntIt); + } + + // At this point, the blocks left in the preds list must have dummy + // entries inserted into every PHI nodes for the block. Update all the phi + // nodes in this block that we are inserting (there could be phis before + // mem2reg runs). + unsigned NumBadPreds = SomePHI->getNumIncomingValues(); + BasicBlock::iterator BBI = BB->begin(); + while ((SomePHI = dyn_cast(BBI++)) && + SomePHI->getNumIncomingValues() == NumBadPreds) { + Value *UndefVal = UndefValue::get(SomePHI->getType()); + for (BasicBlock *Pred : Preds) + SomePHI->addIncoming(UndefVal, Pred); + } + } + + NewPhiNodes.clear(); + cleanUpDbgAssigns(); +} + +template +void IDFCalculatorBase::calculate( + SmallVectorImpl &IDFBlocks) { + // Use a priority queue keyed on dominator tree level so that inserted nodes + // are handled from the bottom of the dominator tree upwards. We also augment + // the level with a DFS number to ensure that the blocks are ordered in a + // deterministic way. + using DomTreeNodePair = + std::pair *, std::pair>; + using IDFPriorityQueue = + std::priority_queue, + less_second>; + + IDFPriorityQueue PQ; + + DT.updateDFSNumbers(); + + SmallVector *, 32> Worklist; + SmallPtrSet *, 32> VisitedPQ; + SmallPtrSet *, 32> VisitedWorklist; + + for (NodeTy *BB : *DefBlocks) + if (DomTreeNodeBase *Node = DT.getNode(BB)) { + PQ.push({Node, std::make_pair(Node->getLevel(), Node->getDFSNumIn())}); + VisitedWorklist.insert(Node); + } + + while (!PQ.empty()) { + DomTreeNodePair RootPair = PQ.top(); + PQ.pop(); + DomTreeNodeBase *Root = RootPair.first; + unsigned RootLevel = RootPair.second.first; + + // Walk all dominator tree children of Root, inspecting their CFG edges with + // targets elsewhere on the dominator tree. Only targets whose level is at + // most Root's level are added to the iterated dominance frontier of the + // definition set. + + assert(Worklist.empty()); + Worklist.push_back(Root); + + while (!Worklist.empty()) { + DomTreeNodeBase *Node = Worklist.pop_back_val(); + NodeTy *BB = Node->getBlock(); + // Succ is the successor in the direction we are calculating IDF, so it is + // successor for IDF, and predecessor for Reverse IDF. + auto DoWork = [&](NodeTy *Succ) { + DomTreeNodeBase *SuccNode = DT.getNode(Succ); + + const unsigned SuccLevel = SuccNode->getLevel(); + if (SuccLevel > RootLevel) + return; + + if (!VisitedPQ.insert(SuccNode).second) + return; + + NodeTy *SuccBB = SuccNode->getBlock(); + if (useLiveIn && !LiveInBlocks->count(SuccBB)) + return; + + IDFBlocks.emplace_back(SuccBB); + if (!DefBlocks->count(SuccBB)) + PQ.push(std::make_pair( + SuccNode, std::make_pair(SuccLevel, SuccNode->getDFSNumIn()))); + }; + + for (auto *Succ : ChildrenGetter.get(BB)) + DoWork(Succ); + + for (auto DomChild : *Node) { + if (VisitedWorklist.insert(DomChild).second) + Worklist.push_back(DomChild); + } + } + } +} + +/// Determine which blocks the value is live in. +/// +/// These are blocks which lead to uses. Knowing this allows us to avoid +/// inserting PHI nodes into blocks which don't lead to uses (thus, the +/// inserted phi nodes would be dead). +void PromoteMem2Reg::ComputeLiveInBlocks( + AllocaInst *AI, AllocaInfo &Info, + const SmallPtrSetImpl &DefBlocks, + SmallPtrSetImpl &LiveInBlocks) { + // To determine liveness, we must iterate through the predecessors of blocks + // where the def is live. Blocks are added to the worklist if we need to + // check their predecessors. Start with all the using blocks. + SmallVector LiveInBlockWorklist(Info.UsingBlocks.begin(), + Info.UsingBlocks.end()); + + // If any of the using blocks is also a definition block, check to see if the + // definition occurs before or after the use. If it happens before the use, + // the value isn't really live-in. + for (unsigned i = 0, e = LiveInBlockWorklist.size(); i != e; ++i) { + BasicBlock *BB = LiveInBlockWorklist[i]; + if (!DefBlocks.count(BB)) + continue; + + // Okay, this is a block that both uses and defines the value. If the first + // reference to the alloca is a def (store), then we know it isn't live-in. + for (BasicBlock::iterator I = BB->begin();; ++I) { + if (StoreInst *SI = dyn_cast(I)) { + if (SI->getOperand(1) != AI) + continue; + + // We found a store to the alloca before a load. The alloca is not + // actually live-in here. + LiveInBlockWorklist[i] = LiveInBlockWorklist.back(); + LiveInBlockWorklist.pop_back(); + --i; + --e; + break; + } + + if (LoadInst *LI = dyn_cast(I)) + // Okay, we found a load before a store to the alloca. It is actually + // live into this block. + if (LI->getOperand(0) == AI) + break; + } + } + + // Now that we have a set of blocks where the phi is live-in, recursively add + // their predecessors until we find the full region the value is live. + while (!LiveInBlockWorklist.empty()) { + BasicBlock *BB = LiveInBlockWorklist.pop_back_val(); + + // The block really is live in here, insert it into the set. If already in + // the set, then it has already been processed. + if (!LiveInBlocks.insert(BB).second) + continue; + + // Since the value is live into BB, it is either defined in a predecessor or + // live into it to. Add the preds to the worklist unless they are a + // defining block. + for (BasicBlock *P : predecessors(BB)) { + // The value is not live into a predecessor if it defines the value. + if (DefBlocks.count(P)) + continue; + + // Otherwise it is, add to the worklist. + LiveInBlockWorklist.push_back(P); + } + } +} + +/// Queue a phi-node to be added to a basic-block for a specific Alloca. +/// +/// Returns true if there wasn't already a phi-node for that variable +bool PromoteMem2Reg::QueuePhiNode(BasicBlock *BB, unsigned AllocaNo, + unsigned &Version) { + // Look up the basic-block in question. + PHINode *&PN = NewPhiNodes[std::make_pair(BBNumbers[BB], AllocaNo)]; + + // If the BB already has a phi node added for the i'th alloca then we're done! + if (PN) + return false; + + // Create a PhiNode using the dereferenced type... and add the phi-node to the + // BasicBlock. + PN = PHINode::Create(Allocas[AllocaNo]->getAllocatedType(), getNumPreds(BB), + Allocas[AllocaNo]->getName() + "." + Twine(Version++), + &BB->front()); + ++NumPHIInsert; + PhiToAllocaMap[PN] = AllocaNo; + return true; +} + +/// Update the debug location of a phi. \p ApplyMergedLoc indicates whether to +/// create a merged location incorporating \p DL, or to set \p DL directly. +static void updateForIncomingValueLocation(PHINode *PN, DebugLoc DL, + bool ApplyMergedLoc) { + if (ApplyMergedLoc) + PN->applyMergedLocation(PN->getDebugLoc(), DL); + else + PN->setDebugLoc(DL); +} + +/// Recursively traverse the CFG of the function, renaming loads and +/// stores to the allocas which we are promoting. +/// +/// IncomingVals indicates what value each Alloca contains on exit from the +/// predecessor block Pred. +void PromoteMem2Reg::RenamePass(BasicBlock *BB, BasicBlock *Pred, + RenamePassData::ValVector &IncomingVals, + RenamePassData::LocationVector &IncomingLocs, + std::vector &Worklist) { +NextIteration: + // If we are inserting any phi nodes into this BB, they will already be in the + // block. + if (PHINode *APN = dyn_cast(BB->begin())) { + // If we have PHI nodes to update, compute the number of edges from Pred to + // BB. + if (PhiToAllocaMap.count(APN)) { + // We want to be able to distinguish between PHI nodes being inserted by + // this invocation of mem2reg from those phi nodes that already existed in + // the IR before mem2reg was run. We determine that APN is being inserted + // because it is missing incoming edges. All other PHI nodes being + // inserted by this pass of mem2reg will have the same number of incoming + // operands so far. Remember this count. + unsigned NewPHINumOperands = APN->getNumOperands(); + + unsigned NumEdges = llvm::count(successors(Pred), BB); + assert(NumEdges && "Must be at least one edge from Pred to BB!"); + + // Add entries for all the phis. + BasicBlock::iterator PNI = BB->begin(); + do { + unsigned AllocaNo = PhiToAllocaMap[APN]; + + // Update the location of the phi node. + updateForIncomingValueLocation(APN, IncomingLocs[AllocaNo], + APN->getNumIncomingValues() > 0); + + // Add N incoming values to the PHI node. + for (unsigned i = 0; i != NumEdges; ++i) + APN->addIncoming(IncomingVals[AllocaNo], Pred); + + // The currently active variable for this block is now the PHI. + IncomingVals[AllocaNo] = APN; + AllocaATInfo[AllocaNo].updateForNewPhi(APN, DIB); + for (DbgVariableIntrinsic *DII : AllocaDbgUsers[AllocaNo]) + if (DII->isAddressOfVariable()) + ConvertDebugDeclareToDebugValue(DII, APN, DIB); + + // Get the next phi node. + ++PNI; + APN = dyn_cast(PNI); + if (!APN) + break; + + // Verify that it is missing entries. If not, it is not being inserted + // by this mem2reg invocation so we want to ignore it. + } while (APN->getNumOperands() == NewPHINumOperands); + } + } + + // Don't revisit blocks. + if (!Visited.insert(BB).second) + return; + + for (BasicBlock::iterator II = BB->begin(); !II->isTerminator();) { + Instruction *I = &*II++; // get the instruction, increment iterator + + if (LoadInst *LI = dyn_cast(I)) { + AllocaInst *Src = dyn_cast(LI->getPointerOperand()); + if (!Src) + continue; + + DenseMap::iterator AI = AllocaLookup.find(Src); + if (AI == AllocaLookup.end()) + continue; + + Value *V = IncomingVals[AI->second]; + convertMetadataToAssumes(LI, V, SQ.DL, AC, &DT); + + // Anything using the load now uses the current value. + LI->replaceAllUsesWith(V); + LI->eraseFromParent(); + } else if (StoreInst *SI = dyn_cast(I)) { + // Delete this instruction and mark the name as the current holder of the + // value + AllocaInst *Dest = dyn_cast(SI->getPointerOperand()); + if (!Dest) + continue; + + DenseMap::iterator ai = AllocaLookup.find(Dest); + if (ai == AllocaLookup.end()) + continue; + + // what value were we writing? + unsigned AllocaNo = ai->second; + IncomingVals[AllocaNo] = SI->getOperand(0); + + // Record debuginfo for the store before removing it. + IncomingLocs[AllocaNo] = SI->getDebugLoc(); + AllocaATInfo[AllocaNo].updateForDeletedStore(SI, DIB, + &DbgAssignsToDelete); + for (DbgVariableIntrinsic *DII : AllocaDbgUsers[ai->second]) + if (DII->isAddressOfVariable()) + ConvertDebugDeclareToDebugValue(DII, SI, DIB); + SI->eraseFromParent(); + } + } + + // 'Recurse' to our successors. + succ_iterator I = succ_begin(BB), E = succ_end(BB); + if (I == E) + return; + + // Keep track of the successors so we don't visit the same successor twice + SmallPtrSet VisitedSuccs; + + // Handle the first successor without using the worklist. + VisitedSuccs.insert(*I); + Pred = BB; + BB = *I; + ++I; + + for (; I != E; ++I) + if (VisitedSuccs.insert(*I).second) + Worklist.emplace_back(*I, Pred, IncomingVals, IncomingLocs); + + goto NextIteration; +} + +void llvm::PromoteMemToReg(ArrayRef Allocas, DominatorTree &DT, + AssumptionCache *AC) { + // If there is nothing to do, bail out... + if (Allocas.empty()) + return; + + PromoteMem2Reg(Allocas, DT, AC).run(); +} \ No newline at end of file diff --git a/include/algos.h b/include/algos.h new file mode 100644 index 0000000..b4eaf0f --- /dev/null +++ b/include/algos.h @@ -0,0 +1,11 @@ +#pragma once + +#include "llir.h" +#include "passes.h" + +namespace antlrSysY { +void gen_dominance(FunctionPtr_t func); +void gen_dominance_frontier(FunctionPtr_t func); +void update_dfs_numbers(BasicBlockPtr_t bb, bool rst); + +} // namespace antlrSysY diff --git a/include/common.h b/include/common.h index a8c9156..17b43ff 100644 --- a/include/common.h +++ b/include/common.h @@ -16,6 +16,10 @@ if (!(cond)) throw GrammarException(__FILE__, __LINE__, #cond); \ } while (0) +#define DEF_PTR_T(type) \ + class type; \ + typedef std::shared_ptr type##Ptr_t + namespace antlrSysY { template diff --git a/include/llir_instruction.h b/include/llir_instruction.h index 8cfdba3..a2bb0c6 100644 --- a/include/llir_instruction.h +++ b/include/llir_instruction.h @@ -7,6 +7,18 @@ #include namespace antlrSysY { + +DEF_PTR_T(InstAlloca); +DEF_PTR_T(InstStore); +DEF_PTR_T(InstLoad); +DEF_PTR_T(InstBinary); +DEF_PTR_T(InstZext); +DEF_PTR_T(InstBranch); +DEF_PTR_T(InstReturn); +DEF_PTR_T(InstCall); +DEF_PTR_T(InstGEP); +DEF_PTR_T(InstPhi); + enum class InstTag { Add, Sub, @@ -41,6 +53,7 @@ public: int ir_seqno = -1; InstTag tag; BasicBlockPtr_t parent_bb; + // decltype(parent_bb->inst_list.begin()) inst_itr_in_parent; Instruction(InstTag inst_tag, TypePtr_t type, BasicBlockPtr_t parent_bb) : User("", type), tag(inst_tag), parent_bb(parent_bb) {} virtual std::string to_string() override { @@ -235,4 +248,28 @@ public: } }; +class InstPhi : public Instruction { +public: + InstPhi(TypePtr_t type, const decltype(Function::bb_list) &incoming_vals, BasicBlockPtr_t parent_bb) + : Instruction(InstTag::Phi, type, parent_bb) { + for (auto incoming : incoming_vals) { + Add_Operand(incoming); + } + } + void set_incoming_val(unsigned index, ValuePtr_t val) { + auto old_op = operand_list[index]; + operand_list[index] = val; + if (val) val->use_list.push_back({val, this, index}); + if (old_op) { + if (std::find(operand_list.begin(), operand_list.end(), old_op) == operand_list.end()) { + old_op->u_remove_use(this); + } + } + } + virtual std::string to_IR_string() override { + std::string str = type->to_IR_string() + " %" + std::to_string(ir_seqno); + return str; + } +}; + } // namespace antlrSysY \ No newline at end of file diff --git a/include/llir_type.h b/include/llir_type.h index cf62da8..9cf76df 100644 --- a/include/llir_type.h +++ b/include/llir_type.h @@ -4,10 +4,17 @@ #include #include #include +#include + namespace antlrSysY { -class Type; -typedef std::shared_ptr TypePtr_t; +DEF_PTR_T(Type); +DEF_PTR_T(IntegerType); +DEF_PTR_T(VoidType); +DEF_PTR_T(LabelType); +DEF_PTR_T(ArrayType); +DEF_PTR_T(PointerType); +DEF_PTR_T(FunctionType); class Type { public: @@ -139,6 +146,12 @@ public: virtual std::string to_IR_string() override { return pointed_type->to_IR_string() + "*"; } + + static TypePtr_t pointedType(TypePtr_t ptr) { + assert(Type::isType(ptr)); + auto pointer_type = Type::asType(ptr); + return pointer_type->pointed_type; + } }; class FunctionType : public Type { diff --git a/include/llir_value.h b/include/llir_value.h index d308de2..d3e1fc1 100644 --- a/include/llir_value.h +++ b/include/llir_value.h @@ -8,29 +8,33 @@ #include #include #include -#include #include +#include namespace antlrSysY { -class Value; -class BasicBlock; -class User; -class Function; -class Instruction; -typedef std::shared_ptr ValuePtr_t; -typedef std::shared_ptr BasicBlockPtr_t; -typedef std::shared_ptr FunctionPtr_t; -typedef std::shared_ptr UserPtr_t; -typedef std::shared_ptr InstructionPtr_t; -typedef std::tuple UseEdge_t; +DEF_PTR_T(Value); +DEF_PTR_T(BasicBlock); +DEF_PTR_T(User); +DEF_PTR_T(Function); +DEF_PTR_T(Instruction); +DEF_PTR_T(Constant); +DEF_PTR_T(ConstantInt); + +// typedef std::tuple UseEdge_t; +struct Use { + ValuePtr_t value; + User* user; + unsigned op_index; +}; // Define, User, operand-index +// for Instruction, inst `uses` op, so inst is the user class Value { public: std::string name; TypePtr_t type; - std::list use_list; + std::list use_list; // a list of use-edge from this value Value(const std::string &name, TypePtr_t type) : name(name), type(type) {} virtual ~Value() = default; @@ -60,8 +64,24 @@ public: virtual std::string to_IR_string() { panic("No applicable for IR gen"); } -}; + void u_remove_use(User* user) { + // use_list.erase( + // std::remove_if( + // use_list.begin(), use_list.end(), [user](const UseEdge_t &use) { return std::get<1>(use) == user; } + // ), + // use_list.end() + // ); + for (auto itr = use_list.begin(); itr != use_list.end();) { + if (itr->user == user) { + itr = use_list.erase(itr); + } + else { + ++ itr; + } + } + } +}; class User : public Value { public: @@ -69,14 +89,34 @@ public: User(const std::string &name, TypePtr_t type) : Value(name, type) {} void Add_Operand(ValuePtr_t op) { + op->use_list.push_back({op, this, (unsigned)operand_list.size()}); operand_list.push_back(op); - use_list.push_back({op, std::make_shared(this), operand_list.size()}); + } + // make anything that use this value use the new value + void u_replace_users(ValuePtr_t value) { + if (value == nullptr) { + assert(!use_list.size() && "No one should use this"); + return; + } + for (auto use : use_list) { + auto user = use.user; + auto index = use.op_index; + user->operand_list[index] = value; + assert(value); + value->use_list.push_back({value, user, index}); + } + // all original uses are gone + use_list.clear(); + } + // remove this user from its operands + void u_remove_from_usees() { + for (auto op : operand_list) { + assert(op); + op->u_remove_use(this); + } } }; -class BasicBlock; -class Instruction; - class FParam : public Value { public: int ir_seqno = -1; @@ -115,18 +155,20 @@ public: class BasicBlock : public Value { public: int ir_seqno = -1; - std::vector> inst_list; + std::list inst_list; std::shared_ptr parent; BasicBlockListNode_t itr; - std::list> successors; - std::list> predecessors; + std::list successors; + std::list predecessors; - BasicBlockPtr_t idomer; - std::list idomee_list; - std::list domer_list; + BasicBlockPtr_t idomer; // dominating node + std::list idom_list; // immediate dominated nodes + std::list dom_list; // dominated nodes std::list dom_frontier; int dom_level; int _dom_helper_index; + int dom_dfs_in; + int dom_dfs_out; BasicBlock(const std::string &name, std::shared_ptr parent) : Value(name, TypeHelper::TYPE_LABEL) { this->parent = parent; diff --git a/include/pass.h b/include/pass.h index c066dd9..826ff3d 100644 --- a/include/pass.h +++ b/include/pass.h @@ -12,7 +12,7 @@ public: virtual void run(const Module &module) = 0; }; -class PassMem2Reg : public Pass { +class PassMem2Reg : public Pass { public: PassMem2Reg() : Pass("mem2reg") {} virtual void run(const Module &module) override; diff --git a/include/visitor.h b/include/visitor.h index 157898d..11e0a2c 100644 --- a/include/visitor.h +++ b/include/visitor.h @@ -81,6 +81,8 @@ std::shared_ptr build_InstGEP( BasicBlockPtr_t parent_bb ); +InstPhiPtr_t build_InstPhi(TypePtr_t type, const decltype(Function::bb_list) &incoming_vals, BasicBlockPtr_t parent_bb); + #pragma endregion class Visitor : public antlrSysY::SysyBaseVisitor { diff --git a/src/algo_dominance.cpp b/src/algo_dominance.cpp new file mode 100644 index 0000000..85d3be1 --- /dev/null +++ b/src/algo_dominance.cpp @@ -0,0 +1,137 @@ +#include "llir.h" +#include "visitor.h" + +namespace antlrSysY { + +static void _bitwise_and(std::vector &op1, const std::vector &op2) { + for (int i = 0; i < op1.size(); ++i) { + op1[i] = op1[i] & op2[i]; + } +} + +static void _bitwise_set(std::vector &op1, int l, int r, bool val) { + for (int i = l; i < r; ++i) { + op1[i] = val; + } +} + +static void _gen_dom_level(BasicBlockPtr_t bb, int level) { + bb->dom_level = level; + for (auto succ : bb->idom_list) { + _gen_dom_level(succ, level + 1); + } +} + +void gen_dominance(FunctionPtr_t func) { + // 编译器设计 2E 352 | Engineering A Compiler P479 + // Note: n \in Dom(n) + // Basic iterative idea: Dom(n) = {n} union (intersect Dom(pred(n))) + std::vector bb_list; + const int N = func->bb_list.size(); + auto itr = func->bb_list.begin(); + for (auto basicblock : func->bb_list) { + basicblock->idom_list.clear(); + basicblock->dom_list.clear(); + basicblock->_dom_helper_index = bb_list.size(); + bb_list.push_back(basicblock); + } + std::vector> dom(N); + dom[0].resize(N); + dom[0][0] = 1; // Dom(0) = {0} + // Dom(i) <- N (= {0, 1, 2,...}) + for (int i = 1; i < N; ++i) { + dom[i].resize(N, 1); + } + + bool changed = true; + while (changed) { + changed = false; + int i = 0; + for (int i = 1; i < N; ++i) { + auto cur_bb = bb_list[i]; + std::vector temp(true, N); + // temp = {i} union (intersect Dom(j)), j in pred(i) + for (auto pred : cur_bb->predecessors) { + _bitwise_and(temp, dom[pred->_dom_helper_index]); + } + temp[i] = true; + // if temp != Dom(i) + if (temp != dom[i]) { + dom[i] = temp; // Dom(i) <- temp + changed = true; // changed <- true + } + } + } + // set each basicblock's domer + for (int i = 0; i < N; ++i) { + for (int j = 0; j < N; ++j) { + if (dom[i][j]) { + bb_list[i]->dom_list.push_back(bb_list[j]); + } + } + } + // get domees and immediate domer + for (int i = 0; i < N; ++i) { + for (auto domer1 : bb_list[i]->dom_list) { + if (domer1 == bb_list[i]) continue; + bool flag = true; + // if dom(i)[j] dom dom(i)[k], it cannot be the immediate domer of i + for (auto domer2 : bb_list[i]->dom_list) { + if (domer2 == bb_list[i] || domer2 == domer1) continue; + if (std::find(domer2->dom_list.begin(), domer2->dom_list.end(), domer1) != domer2->dom_list.end()) { + flag = false; + break; + } + } + if (flag) { + bb_list[i]->idomer = domer1; + domer1->idom_list.push_back(bb_list[i]); + break; + } + } + } + _gen_dom_level(bb_list[0], 0); +} + +// DF is defined as: +// DF(n) = {m | (Exists q where n Dom q && q Pred m) && !(n SDom m)} +void gen_dominance_frontier(FunctionPtr_t func) { + // 编译器设计 2E 368 + /* + for all node in CFG: + DF(n) <- Empty + for all node in CFG: + if n has multiple pred: + foreach pred of n: + runner <- p + while runner != idom(n): + DF(runner) <- DF(runner) union n + runner <- IDOM(runner) + */ + for (auto bb : func->bb_list) { + bb->dom_frontier.clear(); + } + for (auto n : func->bb_list) { + if (n->predecessors.size() >= 2) { + for (auto pred : n->predecessors) { + auto runner = pred; + while (runner != n->idomer) { + runner->dom_frontier.push_back(n); + runner = runner->idomer; + } + } + } + } +} + +void update_dfs_numbers(BasicBlockPtr_t bb, bool rst) { + static int dfs_num; + if (rst) dfs_num = 0; + bb->dom_dfs_in = dfs_num++; + for (auto child : bb->dom_list) { + update_dfs_numbers(child, false); + } + bb->dom_dfs_out = dfs_num++; +} + +} // namespace antlrSysY \ No newline at end of file diff --git a/src/dominance_algo.cpp b/src/dominance_algo.cpp deleted file mode 100644 index 7b6c88c..0000000 --- a/src/dominance_algo.cpp +++ /dev/null @@ -1,106 +0,0 @@ -#include "llir.h" -#include "visitor.h" -#include - -namespace antlrSysY { - -static void _bitwise_and(std::vector& op1, const std::vector& op2) { - for (int i = 0; i < op1.size(); ++ i) { - op1[i] = op1[i] & op2[i]; - } -} - -static void _bitwise_set(std::vector& op1, int l, int r,bool val) { - for (int i = l; i < r; ++ i) { - op1[i] = val; - } -} - -static void _gen_dom_level(BasicBlockPtr_t bb, int level) { - bb->dom_level = level; - for (auto succ : bb->idomee_list) { - _gen_dom_level(succ, level + 1); - } -} - -void gen_dominance(FunctionPtr_t func) { - // 编译器设计 2E 352 | Engineering A Compiler P479 - // Note: n \in Dom(n) - // Basic iterative idea: Dom(n) = {n} union (intersect Dom(pred(n))) - std::vector> domers; - std::vector bb_list; - const int N = func->bb_list.size(); - auto itr = func->bb_list.begin(); - for (auto basicblock : func->bb_list) { - basicblock->idomee_list.clear(); - basicblock->domer_list.clear(); - domers.push_back({}); - domers.back().resize(N, true); // Dom(i) <- N - basicblock->_dom_helper_index = bb_list.size(); - bb_list.push_back(basicblock); - } - _bitwise_set(domers[0], 1, N, false); // Dom(0) <- {0} - - bool changed = true; - while (changed) { - changed = false; - int i = 0; - for (int i = 1; i < N; ++ i) { - auto cur_bb = bb_list[i]; - std::vector temp(true, N); - // temp = {i} union (intersect Dom(j)), j in pred(i) - for (auto pred : cur_bb->predecessors) { - _bitwise_and(temp, domers[pred->_dom_helper_index]); - } - temp[i] = true; - // if temp != Dom(i) - if (!(temp == domers[i])) { - domers[i] = temp; // Dom(i) <- temp - changed = true; // changed <- true - } - } - } - // set each basicblock's domer - for (int i = 0; i < N; ++ i) { - for (int j = 0; j < N; ++ j) { - if (domers[i][j]) { - bb_list[i]->domer_list.push_back(bb_list[j]); - } - } - } - // get domees and immediate domer - for (int i = 0; i < N; ++ i) { - for (auto domer1 : bb_list[i]->domer_list) { - if (domer1 == bb_list[i]) - continue; - bool flag = true; - // if dom(i)[j] dom dom(i)[k], it cannot be the immediate domer of i - for (auto domer2 : bb_list[i]->domer_list) { - if (domer2 == bb_list[i] || domer2 == domer1) - continue; - if (std::find(domer2->domer_list.begin(), domer2->domer_list.end(), domer1) != domer2->domer_list.end()) { - flag = false; - break; - } - } - if (flag) { - bb_list[i]->idomer = domer1; - domer1->idomee_list.push_back(bb_list[i]); - break; - } - } - } - _gen_dom_level(bb_list[0], 0); -} - -void gen_dominance_frontier(FunctionPtr_t func) { - // 编译器设计 2E 368 - // for all node in CFG: DF(n) <- Empty - for (auto bb : func->bb_list) { - bb->dom_frontier.clear(); - } - // for all node in CFG: - -} - -} \ No newline at end of file diff --git a/src/main.cpp b/src/main.cpp index b57ed3d..75b5361 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -66,6 +66,7 @@ int main(int argc, const char **argv) { defaultConf.set(el::Level::Warning, el::ConfigurationType::Format, "%level %loc %msg"); defaultConf.set(el::Level::Error, el::ConfigurationType::Format, "%level %loc %msg"); defaultConf.set(el::Level::Info, el::ConfigurationType::Format, "%level %loc %msg"); + defaultConf.set(el::Level::Verbose, el::ConfigurationType::Format, "%level %loc %msg"); el::Loggers::addFlag(el::LoggingFlag::ColoredTerminalOutput); el::Loggers::reconfigureLogger("default", defaultConf); #pragma endregion @@ -90,6 +91,16 @@ int main(int argc, const char **argv) { auto tree = parser.program(); Visitor visitor(lexer); visitor.visitProgram(tree); + + std::vector> passes = { + std::make_shared(), + std::make_shared(), + }; + + for (auto pass : passes) { + pass->run(visitor.module); + } + if (emit_llvm) { auto llir_file = output_file.substr(0, output_file.rfind(".")) + ".ll"; std::ofstream ofs_llir_file(llir_file); @@ -100,12 +111,6 @@ int main(int argc, const char **argv) { visitor.llir_gen(ofs_llir_file); } - std::vector> passes = {std::make_shared()}; - - for (auto pass : passes) { - pass->run(visitor.module); - } - // std::cout << tree->toStringTree(&parser) << std::endl << std::endl; return 0; diff --git a/src/pass_mem2reg.cpp b/src/pass_mem2reg.cpp index aab4496..c22a533 100644 --- a/src/pass_mem2reg.cpp +++ b/src/pass_mem2reg.cpp @@ -1,14 +1,293 @@ -#include "passes.h" -#include "llir.h" +#include "3rdparty/easylogging++.h" +#include "algos.h" #include "common.h" +#include "llir.h" +#include "passes.h" +#include "visitor.h" +#include +#include +#include +#include namespace antlrSysY { -void PassMem2Reg::run(const Module& module) { - for (auto func : module.function_list) { - if (func->is_libfunc()) continue; - +static bool is_alloca_promotable(InstAllocaPtr_t inst) { + for (const auto use : inst->use_list) { + auto user = use.user; + if (dynamic_cast(user)) { + const auto li = dynamic_cast(user); + if (li->type != inst->type) { + return false; + } + } + else if (dynamic_cast(user)) { + const auto si = dynamic_cast(user); + if (si->operand_list[1] == inst || si->type != inst->type) { + return false; + } + } + else if (dynamic_cast(user)) { + const auto gep = dynamic_cast(user); + for (int i = 1; i < gep->operand_list.size(); ++i) { + if (!Value::is(gep->operand_list[i]) || !Value::as(gep->operand_list[i])->value) { + return false; + } + } + } + else { + return false; + } + } + return true; +} + +struct AllocaInfo { + std::vector def_blocks = {}; + std::vector use_blocks = {}; + bool only_in_1_block = true; + BasicBlockPtr_t only_block = nullptr; + InstStore* only_store = nullptr; +}; + +static void analyze_alloca(InstAllocaPtr_t ai, AllocaInfo &info) { + for (auto use : ai->use_list) { + auto user = dynamic_cast(use.user); + if (dynamic_cast(user)) { + const auto si = dynamic_cast(user); + info.def_blocks.push_back(si->parent_bb); + info.only_store = si; + } + else { + assert(dynamic_cast(user)); + const auto li = dynamic_cast(user); + info.use_blocks.push_back(li->parent_bb); + } + if (info.only_in_1_block) { + if (!info.only_block) + info.only_block = user->parent_bb; + else if (info.only_block != user->parent_bb) + info.only_in_1_block = false; + } } } -} \ No newline at end of file +// static bool rewrite_single_store(InstAllocaPtr_t ai, AllocaInfo& alloca_info) { + +// } + +// live in analysis +static void live_in_blocks( + InstAllocaPtr_t ai, + AllocaInfo &alloca_info, + const std::unordered_set &def_blocks, + std::unordered_set &livein_blocks +) { + std::vector worklist(alloca_info.use_blocks.begin(), alloca_info.use_blocks.end()); + // for each block, compute whether alloca live in the block + // 检查每个block,如果store before load,就说明dead;如果load before store,说明live,因为被用到了 + for (int i = 0; i < worklist.size(); ++i) { + auto bb = worklist[i]; + if (def_blocks.count(bb) == 0) continue; + for (auto itr : bb->inst_list) { + // a store to this alloca(variable) before a load, it is dead in this block + if (Value::is(itr) && Value::as(itr)->operand_list[1] == ai) { + worklist[i] = worklist.back(); + worklist.pop_back(); + --i; + break; + } + else if (Value::is(itr)) { + // a load before store, it live in this block + if (Value::as(itr)->operand_list[1] == ai) break; + } + } + } + // add predecessors to get the full region where the var is live + // 好像cs143里面就有讲过这个东西,liveness分析好像是从后往前推的 + while (!worklist.empty()) { + auto bb = worklist.back(); + worklist.pop_back(); + if (!livein_blocks.insert(bb).second) continue; // already done + // 如果alloca没有在某个块中被定义,那么它一定是从前面的某个块里面live in过来的 + // 所以把它也塞进队列进行处理 + for (auto pred : bb->predecessors) { + if (def_blocks.count(pred)) continue; + worklist.push_back(pred); + } + } +} + +struct RenameInfo { + BasicBlockPtr_t bb; + BasicBlockPtr_t pred; + std::vector value_list; +}; + +// llvm:PromoteMemoryToRegister.cpp +// https://roife.github.io/2022/02/07/mem2reg/ +// https://github.com/Enna1/LLVM-Study-Notes/blob/master/source/ssa/SSA-Construction.rst +static void _mem_2_reg(FunctionPtr_t func) { + VLOG(4) << " Gen Dominance Tree & Frontier"; + gen_dominance(func); + gen_dominance_frontier(func); + // actually, all variable alloca is placed at block head, so we collect them first + std::vector alloca_list; + std::unordered_map alloca_to_id; + std::unordered_map bb_to_id; + std::unordered_map phi_to_allocaid; + + std::queue worklist; + + for (auto bb : func->bb_list) { + for (auto inst : bb->inst_list) { + if (Value::is(inst)) { + assert(inst->parent_bb == bb); + assert(bb == func->bb_list.front() && "Alloca should be at front of a func"); + auto ai = Value::as(inst); + // assert(is_alloca_promotable(ai) && "Invalid alloca"); + if (!Type::isType(PointerType::pointedType(ai->type))) + continue; + alloca_list.push_back(ai); + } + } + } + VLOG(4) << " alloca pruning & phi insertion"; + for (unsigned i = 0; i != alloca_list.size(); ++i) { + auto ai = alloca_list[i]; + // remove empty use + if (ai->use_list.empty()) { + ai->parent_bb->inst_list.remove(ai); + alloca_list[i] = alloca_list.back(); + alloca_list.pop_back(); + --i; + continue; + } + AllocaInfo alloca_info; + analyze_alloca(ai, alloca_info); + if (alloca_info.def_blocks.size() == 1) { + LOG(WARNING) << "To rewrite single store"; + } + if (alloca_info.only_in_1_block) { + LOG(WARNING) << "To promote single block alloca"; + } + // numbering bb + if (bb_to_id.empty()) { + int id = 0; + for (auto bb : func->bb_list) { + bb_to_id[bb] = id++; + } + } + alloca_to_id[alloca_list[i]] = i; + std::unordered_set def_blocks(alloca_info.def_blocks.begin(), alloca_info.def_blocks.end()); + std::unordered_set livein_blocks; + live_in_blocks(ai, alloca_info, def_blocks, livein_blocks); + // llvm use IDF to calculate phi blocks. + // But that is too complicated + // SSA book Algo 3.1 + std::vector visited(func->bb_list.size(), false); + for (auto bb : def_blocks) { + worklist.push(bb); + } + while (!worklist.empty()) { + auto bb = worklist.front(); + worklist.pop(); + for (auto frontier : bb->dom_frontier) { + auto frontier_index = bb_to_id.at(frontier); + if (!visited[frontier_index]) { + visited[frontier_index] = true; + if (livein_blocks.count(frontier)) { + auto inst_phi = build_InstPhi(TypeHelper::TYPE_I32, bb->predecessors, bb); + phi_to_allocaid.insert({inst_phi, i}); + } + if (!def_blocks.count(frontier)) { + worklist.push(frontier); + } + } + } + } + } + if (alloca_list.empty()) return; + // renaming + VLOG(4) << " variable renaming"; + std::vector _init_values; + for (int i = 0; i < alloca_list.size(); ++ i) + _init_values.push_back(ConstantInt::make_shared(0)); + std::vector rename_list = {{func->bb_list.front(), nullptr, _init_values}}; + std::vector visited(bb_to_id.size(), 0); + while (!rename_list.empty()) { + auto rename_info = rename_list.back(); + rename_list.pop_back(); + // replace block with more specific alloca + for (auto inst : rename_info.bb->inst_list) { + // phi only appear at block head + if (!Value::is(inst)) break; + auto phi = Value::as(inst); + auto alloca_index = phi_to_allocaid.at(phi); + int pred_index = -1; + for (auto pred : rename_info.bb->predecessors) { + pred_index++; + if (pred == rename_info.pred) break; + } + phi->set_incoming_val(pred_index, rename_info.value_list[alloca_index]); + } + // already processed, skip + if (visited[bb_to_id.at(rename_info.bb)]) continue; + visited[bb_to_id.at(rename_info.bb)] = true; + // process instruction + for (auto itr = rename_info.bb->inst_list.begin(); itr != rename_info.bb->inst_list.end();) { + auto inst = *itr++; + if (Value::is(inst)) { + assert(alloca_to_id.count(Value::as(inst))); + rename_info.bb->inst_list.remove(inst); + } + else if (Value::is(inst)) { + auto li = Value::as(inst); + if (!(Value::is(li->operand_list[0]))) { + continue; + } + auto ai = Value::as(li->operand_list[0]); + if (!Type::isType(PointerType::pointedType(ai->type))) { + continue; + } + int alloca_index = alloca_to_id.at(ai); + rename_info.bb->inst_list.remove(inst); + li->u_replace_users(rename_info.value_list[alloca_index]); + inst->u_remove_from_usees(); + } + else if (Value::is(inst)) { + auto si = Value::as(inst); + if (!(Value::is(si->operand_list[1]))) { + continue; + } + auto ai = Value::as(si->operand_list[1]); + if (!Type::isType(PointerType::pointedType(ai->type))) { + continue; + } + int alloca_index = alloca_to_id.at(ai); + rename_info.value_list[alloca_index] = si->operand_list[0]; + inst->u_remove_from_usees(); + // I dont think anyone will use a store? + si->u_replace_users(nullptr); + rename_info.bb->inst_list.remove(inst); + } + else if (Value::is(inst)) { + auto phi = Value::as(inst); + int alloca_index = phi_to_allocaid.at(phi); + rename_info.value_list[alloca_index] = phi; + } + } + for (auto succ : rename_info.bb->successors) { + rename_list.push_back({succ, rename_info.bb, rename_info.value_list}); + } + } +} + +void PassMem2Reg::run(const Module &module) { + LOG(INFO) << "Run pass " << pass_name; + for (auto func : module.function_list) { + if (func->is_libfunc()) continue; + _mem_2_reg(func); + } +} + +} // namespace antlrSysY \ No newline at end of file diff --git a/src/visitor_factory.cpp b/src/visitor_factory.cpp index 06c0b94..001995b 100644 --- a/src/visitor_factory.cpp +++ b/src/visitor_factory.cpp @@ -139,4 +139,14 @@ std::shared_ptr build_InstGEP( return inst; } +InstPhiPtr_t build_InstPhi( + TypePtr_t type, + const decltype(Function::bb_list) &incoming_vals, + BasicBlockPtr_t parent_bb +) { + auto inst = std::make_shared(type, incoming_vals, parent_bb); + parent_bb->inst_list.insert(parent_bb->inst_list.begin(), inst); + return inst; +} + } // namespace antlrSysY \ No newline at end of file diff --git a/src/visitor_llir_gen.cpp b/src/visitor_llir_gen.cpp index 3806e1b..2649a95 100644 --- a/src/visitor_llir_gen.cpp +++ b/src/visitor_llir_gen.cpp @@ -107,8 +107,7 @@ static void _gen_blocks(std::ostream &ostr, const std::list &bl auto block = *block_itr; sysy_assert(block->ir_seqno == -1); // multi-alloc is error block->ir_seqno = reg_count++; - for (int j = 0; j < block->inst_list.size(); ++j) { - auto inst = block->inst_list[j]; + for (auto inst : block->inst_list) { sysy_assert(inst->ir_seqno == -1); // multi-alloc is error switch (inst->tag) { // These are not to get a seqno @@ -138,6 +137,7 @@ static void _gen_blocks(std::ostream &ostr, const std::list &bl case InstTag::Load: case InstTag::GEP: case InstTag::Alloca: + case InstTag::Phi: case InstTag::Zext: inst->ir_seqno = reg_count++; break; // These should not be generated in frontend stage default: panic("Unexpected instruction"); @@ -150,9 +150,8 @@ static void _gen_blocks(std::ostream &ostr, const std::list &bl if (block_itr != block_list.begin()) { ostr << block->ir_seqno << ":" << std::endl; } - for (int j = 0; j < block->inst_list.size(); ++j) { + for (auto _inst : block->inst_list) { ostr << " "; - auto _inst = block->inst_list[j]; VLOG(5) << "Build inst" << _inst->ir_seqno << ": " << _inst->to_string(); switch (_inst->tag) { case InstTag::Br: { @@ -351,13 +350,13 @@ static void _gen_blocks(std::ostream &ostr, const std::list &bl assert(Type::isType(inst->operand_list[0]->type)); ostr << "%" << inst->ir_seqno << " = load " << inst->type->to_IR_string() << ", "; if (Value::is(inst->operand_list[0])) { - auto op1 = Value::as(inst->operand_list[0]); - ostr << op1->type->to_IR_string() << " @" << op1->name; + auto op = Value::as(inst->operand_list[0]); + ostr << op->type->to_IR_string() << " @" << op->name; } else if (Value::is(inst->operand_list[0])) { - auto op1 = Value::as(inst->operand_list[0]); - assert(op1->ir_seqno >= 0); - ostr << op1->type->to_IR_string() << " %" << op1->ir_seqno; + auto op = Value::as(inst->operand_list[0]); + assert(op->ir_seqno >= 0); + ostr << op->type->to_IR_string() << " %" << op->ir_seqno; } else if (Value::is(inst->operand_list[0])) { auto op0 = Value::as(inst->operand_list[0]); @@ -428,6 +427,42 @@ static void _gen_blocks(std::ostream &ostr, const std::list &bl ostr << "i32"; break; } + case InstTag::Phi: { + auto inst = Value::as(_inst); + assert(inst->ir_seqno >= 0); + ostr << "%" << inst->ir_seqno << " = phi " << inst->type->to_IR_string() << " "; + for (int i = 0; i < inst->operand_list.size(); ++i) { + auto op = inst->operand_list[i]; + ostr << "["; + if (Value::is(op)) { + auto op0 = Value::as(op); + ostr << "@" << op0->name; + } + else if (Value::is(op)) { + auto op0 = Value::as(op); + ostr << "%" << op0->ir_seqno; + } + else if (Value::is(op)) { + auto op0 = Value::as(op); + ostr << "%" << op0->ir_seqno; + } + else if (Value::is(op)) { + auto op0 = Value::as(op); + ostr << op0->value; + } + else { + LOG(WARNING) << "Unexpected type of op: " << op->to_string(); + assert(0); + } + auto pred = inst->parent_bb->predecessors.begin(); + std::advance(pred, i); + ostr << ", %" << (*pred)->ir_seqno << "]"; + if (i < inst->operand_list.size() - 1) { + ostr << ", "; + } + } + break; + } // These should not be generated in frontend stage default: panic("Unexpected instruction"); } @@ -439,7 +474,7 @@ static void _gen_blocks(std::ostream &ostr, const std::list &bl void Visitor::llir_gen(std::ostream &ostr) { #pragma region GenLibFuncDecl for (auto &lib_func_name : libfunc_list) { - LOG(DEBUG) << "Gen LibFunc " << lib_func_name; + VLOG(6) << "Gen LibFunc " << lib_func_name; auto lib_func = _func_tab.get_name(lib_func_name).value(); auto lib_func_type = std::dynamic_pointer_cast(lib_func->type); ostr << "declare" @@ -458,7 +493,7 @@ void Visitor::llir_gen(std::ostream &ostr) { #pragma region GenGlobDecl for (auto &global_var : module.global_var_list) { // both int and arr have the same leading part - LOG(DEBUG) << "Gen Global " << global_var->name; + VLOG(5) << "Gen Global " << global_var->name; ostr << "@" << global_var->name << " = " << "dso_local" << " " << (global_var->is_const ? "constant" : "global") << " ";