#include "3rdparty/easylogging++.h" #include "algos.h" #include "common.h" #include "llir.h" #include "pass.h" #include "visitor.h" #include #include #include #include namespace CompSysY { struct AllocaInfo { std::vector def_blocks = {}; std::vector use_blocks = {}; }; static void analyze_alloca_defuse(InstAllocaPtr_t ai, AllocaInfo &info) { for (auto use : ai->use_list) { auto user = dynamic_cast(use.user); if (dynamic_cast(user)) { const auto si = dynamic_cast(user); info.def_blocks.push_back(si->parent_bb); } else { assert(dynamic_cast(user)); const auto li = dynamic_cast(user); info.use_blocks.push_back(li->parent_bb); } } } static void print_alloca_defuse(InstAllocaPtr_t ai, const AllocaInfo &info) { std::cout << "Alloca " << ai->name; std::cout << " Def: ["; for (auto blk : info.def_blocks) { std::cout << blk->name << ", "; } std::cout << "]\n"; std::cout << " Use: ["; for (auto blk : info.use_blocks) { std::cout << blk->name << ", "; } std::cout << "]\n"; std::cout.flush(); } // live in analysis static void analyze_alloca_livein( InstAllocaPtr_t ai, AllocaInfo &alloca_info, const std::unordered_set &def_blocks, std::unordered_set &livein_blocks ) { std::vector worklist(alloca_info.use_blocks.begin(), alloca_info.use_blocks.end()); // for each block, compute whether alloca live in the block // 检查每个block,如果store before load,就说明dead;如果load before store,说明live,因为被用到了 for (int i = 0; i < worklist.size(); ++i) { auto bb = worklist[i]; if (def_blocks.count(bb) == 0) continue; for (auto itr : bb->inst_list) { // a store to this alloca(variable) before a load, it is dead in this block if (shared_cast(itr) && shared_cast(itr)->operand_list[1] == ai) { worklist[i] = worklist.back(); worklist.pop_back(); --i; break; } else if (shared_cast(itr)) { // a load before store, it live in this block if (shared_cast(itr)->operand_list[0] == ai) break; } } } // add predecessors to get the full region where the var is live // 好像cs143里面就有讲过这个东西,liveness分析好像是从后往前推的 while (!worklist.empty()) { auto bb = worklist.back(); worklist.pop_back(); if (!livein_blocks.insert(bb).second) continue; // already done // 如果alloca没有在某个块中被定义,那么它一定是从前面的某个块里面live in过来的 // 所以把它也塞进队列进行处理 for (auto pred : bb->pred_list) { if (def_blocks.count(pred)) continue; worklist.push_back(pred); } } } static void print_alloca_livein(InstAllocaPtr_t ai, const std::unordered_set &livein_blocks) { std::cout << "Live-in blocks of " << ai->name << ": ["; for (auto livein : livein_blocks) { std::cout << livein->name << ", "; } std::cout << "]\n"; std::cout.flush(); } struct RenameInfo { BasicBlockPtr_t bb; BasicBlockPtr_t pred; std::vector value_list; }; static void print_dom_info(BasicBlockPtr_t bb) { std::cout << bb->name << ":\n"; std::cout << " level: " << bb->dom_level << "\n"; std::cout << " pred: ["; for (auto pred : bb->pred_list) { std::cout << pred->name << ", "; } std::cout << "]\n"; std::cout << " succ: ["; for (auto succ : bb->succ_list) { std::cout << succ->name << ", "; } std::cout << "]\n"; std::cout << " idomer: "; if (bb->IDOM) std::cout << bb->IDOM->name; else std::cout << "null"; std::cout << "\n"; std::cout << " dom: ["; for (auto dom : bb->DOM_set) { std::cout << dom->name << ", "; } std::cout << "]\n"; std::cout << " idom: ["; for (auto idom : bb->idom_set) { std::cout << idom->name << ", "; } std::cout << "]\n"; std::cout << " DF: ["; for (auto df : bb->DF_set) { std::cout << df->name << ", "; } std::cout << "]\n"; std::cout.flush(); } static void print_dom_tree(BasicBlockPtr_t rt) { print_dom_info(rt); for (auto idom : rt->idom_set) { print_dom_tree(idom); } } // llvm:PromoteMemoryToRegister.cpp // https://roife.github.io/2022/02/07/mem2reg/ // https://github.com/Enna1/LLVM-Study-Notes/blob/master/source/ssa/SSA-Construction.rst static void _mem_2_reg(FunctionPtr_t func) { LOG(TRACE) << "Run mem2reg for " << func->name; VLOG(6) << "[mem2reg] Gen Dominance Tree & Frontier"; gen_dominance(func); gen_dominance_frontier(func); if (VLOG_IS_ON(6)) print_dom_tree(func->bb_list.front()); // actually, all variable alloca is placed at block head, so we collect them first std::vector alloca_list; std::unordered_map alloca_to_id; std::unordered_map bb_to_id; std::unordered_map phi_to_allocaid; std::queue worklist; for (auto bb : func->bb_list) { for (auto inst : bb->inst_list) { if (shared_cast(inst)) { // assert(inst->parent_bb == bb); // 貌似没问题,parent_bb是插入alloca的时候的那个block,而alloca实际上是被插入在entry_block的 assert(bb == func->bb_list.front() && "Alloca should be at front of a func"); auto ai = shared_cast(inst); // 暂时只考虑int类型的alloca, 数组和指针都先不管 if (!Type::isType(get_pointed_type(ai->type))) continue; alloca_list.push_back(ai); } } } int id = 0; for (auto bb : func->bb_list) { bb_to_id[bb] = id++; } VLOG(6) << "[mem2reg] Alloca pruning & phi insertion"; for (unsigned i = 0; i != alloca_list.size(); ++i) { auto ai = alloca_list[i]; // remove empty use if (ai->use_list.empty()) { ai->parent_bb->inst_list.remove(ai); alloca_list[i] = alloca_list.back(); alloca_list.pop_back(); --i; continue; } // analyze alloca's def&use AllocaInfo alloca_info; analyze_alloca_defuse(ai, alloca_info); if (VLOG_IS_ON(6)) { print_alloca_defuse(ai, alloca_info); } alloca_to_id[alloca_list[i]] = i; // compute alloca's livein blocks std::unordered_set def_blocks(BEGINEND(alloca_info.def_blocks)); std::unordered_set livein_blocks; analyze_alloca_livein(ai, alloca_info, def_blocks, livein_blocks); if (VLOG_IS_ON(6)) { print_alloca_livein(ai, livein_blocks); } // llvm use IDF to calculate phi blocks. But that is too complicated // SSA book Algo 3.1, with non-livein blocks skipped // 算法本质上是对于该alloca的每个def块,在对应的支配边界上放置phi(如果放过就不放了),同时如果alloca没有在DF中用到,也会跳过 std::vector visited(func->bb_list.size(), false); for (auto bb : def_blocks) { worklist.push(bb); } while (!worklist.empty()) { auto bb = worklist.front(); worklist.pop(); for (auto frontier : bb->DF_set) { auto frontier_index = bb_to_id.at(frontier); if (!visited[frontier_index]) { visited[frontier_index] = true; if (livein_blocks.count(frontier)) { auto inst_phi = InstPhi::New(TypeHelper::TYPE_I32, frontier->pred_list, frontier, ai->name); phi_to_allocaid.insert({inst_phi, i}); } if (!def_blocks.count(frontier)) { worklist.push(frontier); } } } } } if (alloca_list.empty()) return; VLOG(6) << "[mem2reg] Variable renaming"; /* From LLVM, 编译器设计书上写的算法比较抽象, 所以抄了个别的(乐) 本质上是一个先序dfs,从entry开始沿着succ往后走,在遍历中更新每个alloca的值,然后替换 */ std::vector _init_values; for (int i = 0; i < alloca_list.size(); ++i) _init_values.push_back(ConstantInt::New(0)); std::vector rename_list; rename_list.push_back({func->bb_list.front(), nullptr, _init_values}); std::vector visited(bb_to_id.size(), 0); while (!rename_list.empty()) { auto rename_info = rename_list.back(); rename_list.pop_back(); // 将phi指令中对应pred的值更新,因为它在处理pred的时候已经被重新定值 for (auto inst : rename_info.bb->inst_list) { // phi only appear at block head if (!shared_cast(inst)) break; auto phi = shared_cast(inst); int pred_index = GETINDEX(rename_info.bb->pred_list, rename_info.pred); phi->set_incoming_val(pred_index, rename_info.value_list[phi_to_allocaid.at(phi)]); } // already processed, skip if (visited[bb_to_id.at(rename_info.bb)]) continue; visited[bb_to_id.at(rename_info.bb)] = true; // process instruction // 这里其实十分的清楚,就是把所有load alloca的load指令删掉,把这条load的use换成alloca的值(而不是alloca地址) // 然后把store alloca的store也删掉,同时更新当前的alloca的值 // 遇到phi指令要更新alloca的值为phi // 如果是alloca,记得直接删掉(当然只删掉前面处理过的int类型的) for (auto itr = rename_info.bb->inst_list.begin(); itr != rename_info.bb->inst_list.end();) { auto inst = *itr++; // increase itr first, it will get invalidated by remove // we skip non-integer alloca, they are not in our alloca_list if (shared_cast(inst)) { if (alloca_to_id.count(shared_cast(inst))) rename_info.bb->inst_list.remove(inst); } else if (auto inst_ld = shared_cast(inst)) { if (!(shared_cast(inst_ld->operand_list[0]))) continue; auto ai = shared_cast(inst_ld->operand_list[0]); if (!Type::isType(get_pointed_type(ai->type))) continue; rename_info.bb->inst_list.remove(inst_ld); inst_ld->u_replace_users(rename_info.value_list[alloca_to_id.at(ai)]); inst_ld->u_remove_from_usees(); } else if (auto inst_st = shared_cast(inst)) { if (!(shared_cast(inst_st->operand_list[1]))) continue; auto ai = shared_cast(inst_st->operand_list[1]); if (!Type::isType(get_pointed_type(ai->type))) continue; rename_info.value_list[alloca_to_id.at(ai)] = inst_st->operand_list[0]; inst_st->u_remove_from_usees(); rename_info.bb->inst_list.remove(inst_st); } else if (auto phi = shared_cast(inst)) { rename_info.value_list[phi_to_allocaid.at(phi)] = phi; } } for (auto succ : rename_info.bb->succ_list) { rename_list.push_back({succ, rename_info.bb, rename_info.value_list}); } } } void PassMem2Reg::run(const Module &module) { LOG(INFO) << "Run pass " << pass_name; for (auto func : module.function_list) { if (func->is_libfunc()) continue; _mem_2_reg(func); } } } // namespace CompSysY