357 lines
12 KiB
C++
357 lines
12 KiB
C++
#include "3rdparty/easylogging++.h"
|
||
#include "algos.h"
|
||
#include "common.h"
|
||
#include "llir.h"
|
||
#include "pass.h"
|
||
#include "visitor.h"
|
||
#include <map>
|
||
#include <queue>
|
||
#include <unordered_set>
|
||
#include <vector>
|
||
|
||
namespace CompSysY
|
||
{
|
||
|
||
struct AllocaInfo
|
||
{
|
||
std::vector<BasicBlockPtr_t> def_blocks = {};
|
||
std::vector<BasicBlockPtr_t> use_blocks = {};
|
||
};
|
||
|
||
static void analyze_alloca_defuse(InstAllocaPtr_t ai, AllocaInfo &info)
|
||
{
|
||
for (auto use : ai->use_list)
|
||
{
|
||
auto user = dynamic_cast<Instruction *>(use.user);
|
||
if (dynamic_cast<InstStore *>(user))
|
||
{
|
||
const auto si = dynamic_cast<InstStore *>(user);
|
||
info.def_blocks.push_back(si->parent_bb);
|
||
}
|
||
else
|
||
{
|
||
assert(dynamic_cast<InstLoad *>(user));
|
||
const auto li = dynamic_cast<InstLoad *>(user);
|
||
info.use_blocks.push_back(li->parent_bb);
|
||
}
|
||
}
|
||
}
|
||
|
||
static void print_alloca_defuse(InstAllocaPtr_t ai, const AllocaInfo &info)
|
||
{
|
||
std::cout << "Alloca " << ai->name;
|
||
std::cout << " Def: [";
|
||
for (auto blk : info.def_blocks)
|
||
{
|
||
std::cout << blk->name << ", ";
|
||
}
|
||
std::cout << "]\n";
|
||
std::cout << " Use: [";
|
||
for (auto blk : info.use_blocks)
|
||
{
|
||
std::cout << blk->name << ", ";
|
||
}
|
||
std::cout << "]\n";
|
||
std::cout.flush();
|
||
}
|
||
|
||
// live in analysis
|
||
static void analyze_alloca_livein(
|
||
InstAllocaPtr_t ai,
|
||
AllocaInfo &alloca_info,
|
||
const std::unordered_set<BasicBlockPtr_t> &def_blocks,
|
||
std::unordered_set<BasicBlockPtr_t> &livein_blocks
|
||
)
|
||
{
|
||
std::vector<BasicBlockPtr_t> worklist(alloca_info.use_blocks.begin(), alloca_info.use_blocks.end());
|
||
// for each block, compute whether alloca live in the block
|
||
// 检查每个block,如果store before load,就说明dead;如果load before store,说明live,因为被用到了
|
||
for (int i = 0; i < worklist.size(); ++i)
|
||
{
|
||
auto bb = worklist[i];
|
||
if (def_blocks.count(bb) == 0) continue;
|
||
for (auto itr : bb->inst_list)
|
||
{
|
||
// a store to this alloca(variable) before a load, it is dead in this block
|
||
if (shared_cast<InstStore>(itr) && shared_cast<InstStore>(itr)->operand_list[1] == ai)
|
||
{
|
||
worklist[i] = worklist.back();
|
||
worklist.pop_back();
|
||
--i;
|
||
break;
|
||
}
|
||
else if (shared_cast<InstLoad>(itr))
|
||
{
|
||
// a load before store, it live in this block
|
||
if (shared_cast<InstLoad>(itr)->operand_list[0] == ai) break;
|
||
}
|
||
}
|
||
}
|
||
// add predecessors to get the full region where the var is live
|
||
// 好像cs143里面就有讲过这个东西,liveness分析好像是从后往前推的
|
||
while (!worklist.empty())
|
||
{
|
||
auto bb = worklist.back();
|
||
worklist.pop_back();
|
||
if (!livein_blocks.insert(bb).second) continue; // already done
|
||
// 如果alloca没有在某个块中被定义,那么它一定是从前面的某个块里面live in过来的
|
||
// 所以把它也塞进队列进行处理
|
||
for (auto pred : bb->pred_list)
|
||
{
|
||
if (def_blocks.count(pred)) continue;
|
||
worklist.push_back(pred);
|
||
}
|
||
}
|
||
}
|
||
|
||
static void print_alloca_livein(InstAllocaPtr_t ai, const std::unordered_set<BasicBlockPtr_t> &livein_blocks)
|
||
{
|
||
std::cout << "Live-in blocks of " << ai->name << ": [";
|
||
for (auto livein : livein_blocks)
|
||
{
|
||
std::cout << livein->name << ", ";
|
||
}
|
||
std::cout << "]\n";
|
||
std::cout.flush();
|
||
}
|
||
|
||
struct RenameInfo
|
||
{
|
||
BasicBlockPtr_t bb;
|
||
BasicBlockPtr_t pred;
|
||
std::vector<ValuePtr_t> value_list;
|
||
};
|
||
|
||
static void print_dom_info(BasicBlockPtr_t bb)
|
||
{
|
||
std::cout << bb->name << ":\n";
|
||
std::cout << " level: " << bb->dom_level << "\n";
|
||
std::cout << " pred: [";
|
||
for (auto pred : bb->pred_list)
|
||
{
|
||
std::cout << pred->name << ", ";
|
||
}
|
||
std::cout << "]\n";
|
||
std::cout << " succ: [";
|
||
for (auto succ : bb->succ_list)
|
||
{
|
||
std::cout << succ->name << ", ";
|
||
}
|
||
std::cout << "]\n";
|
||
std::cout << " idomer: ";
|
||
if (bb->IDOM)
|
||
std::cout << bb->IDOM->name;
|
||
else
|
||
std::cout << "null";
|
||
std::cout << "\n";
|
||
std::cout << " dom: [";
|
||
for (auto dom : bb->DOM_set)
|
||
{
|
||
std::cout << dom->name << ", ";
|
||
}
|
||
std::cout << "]\n";
|
||
std::cout << " idom: [";
|
||
for (auto idom : bb->idom_set)
|
||
{
|
||
std::cout << idom->name << ", ";
|
||
}
|
||
std::cout << "]\n";
|
||
std::cout << " DF: [";
|
||
for (auto df : bb->DF_set)
|
||
{
|
||
std::cout << df->name << ", ";
|
||
}
|
||
std::cout << "]\n";
|
||
std::cout.flush();
|
||
}
|
||
|
||
static void print_dom_tree(BasicBlockPtr_t rt)
|
||
{
|
||
print_dom_info(rt);
|
||
for (auto idom : rt->idom_set)
|
||
{
|
||
print_dom_tree(idom);
|
||
}
|
||
}
|
||
|
||
// llvm:PromoteMemoryToRegister.cpp
|
||
// https://roife.github.io/2022/02/07/mem2reg/
|
||
// https://github.com/Enna1/LLVM-Study-Notes/blob/master/source/ssa/SSA-Construction.rst
|
||
static void _mem_2_reg(FunctionPtr_t func)
|
||
{
|
||
LOG(TRACE) << "Run mem2reg for " << func->name;
|
||
VLOG(6) << "[mem2reg] Gen Dominance Tree & Frontier";
|
||
gen_dominance(func);
|
||
gen_dominance_frontier(func);
|
||
if (VLOG_IS_ON(6)) print_dom_tree(func->bb_list.front());
|
||
// actually, all variable alloca is placed at block head, so we collect them first
|
||
std::vector<InstAllocaPtr_t> alloca_list;
|
||
std::unordered_map<InstAllocaPtr_t, int> alloca_to_id;
|
||
std::unordered_map<BasicBlockPtr_t, int> bb_to_id;
|
||
std::unordered_map<InstPhiPtr_t, int> phi_to_allocaid;
|
||
|
||
std::queue<BasicBlockPtr_t> worklist;
|
||
|
||
for (auto bb : func->bb_list)
|
||
{
|
||
for (auto inst : bb->inst_list)
|
||
{
|
||
if (shared_cast<InstAlloca>(inst))
|
||
{
|
||
// assert(inst->parent_bb == bb);
|
||
// 貌似没问题,parent_bb是插入alloca的时候的那个block,而alloca实际上是被插入在entry_block的
|
||
assert(bb == func->bb_list.front() && "Alloca should be at front of a func");
|
||
auto ai = shared_cast<InstAlloca>(inst);
|
||
// 暂时只考虑int类型的alloca, 数组和指针都先不管
|
||
if (!Type::isType<IntegerType>(get_pointed_type(ai->type))) continue;
|
||
alloca_list.push_back(ai);
|
||
}
|
||
}
|
||
}
|
||
int id = 0;
|
||
for (auto bb : func->bb_list)
|
||
{
|
||
bb_to_id[bb] = id++;
|
||
}
|
||
VLOG(6) << "[mem2reg] Alloca pruning & phi insertion";
|
||
for (unsigned i = 0; i != alloca_list.size(); ++i)
|
||
{
|
||
auto ai = alloca_list[i];
|
||
// remove empty use
|
||
if (ai->use_list.empty())
|
||
{
|
||
ai->parent_bb->inst_list.remove(ai);
|
||
alloca_list[i] = alloca_list.back();
|
||
alloca_list.pop_back();
|
||
--i;
|
||
continue;
|
||
}
|
||
// analyze alloca's def&use
|
||
AllocaInfo alloca_info;
|
||
analyze_alloca_defuse(ai, alloca_info);
|
||
if (VLOG_IS_ON(6))
|
||
{
|
||
print_alloca_defuse(ai, alloca_info);
|
||
}
|
||
alloca_to_id[alloca_list[i]] = i;
|
||
// compute alloca's livein blocks
|
||
std::unordered_set<BasicBlockPtr_t> def_blocks(BEGINEND(alloca_info.def_blocks));
|
||
std::unordered_set<BasicBlockPtr_t> livein_blocks;
|
||
analyze_alloca_livein(ai, alloca_info, def_blocks, livein_blocks);
|
||
if (VLOG_IS_ON(6))
|
||
{
|
||
print_alloca_livein(ai, livein_blocks);
|
||
}
|
||
// llvm use IDF to calculate phi blocks. But that is too complicated
|
||
// SSA book Algo 3.1, with non-livein blocks skipped
|
||
// 算法本质上是对于该alloca的每个def块,在对应的支配边界上放置phi(如果放过就不放了),同时如果alloca没有在DF中用到,也会跳过
|
||
std::vector<bool> visited(func->bb_list.size(), false);
|
||
for (auto bb : def_blocks)
|
||
{
|
||
worklist.push(bb);
|
||
}
|
||
while (!worklist.empty())
|
||
{
|
||
auto bb = worklist.front();
|
||
worklist.pop();
|
||
for (auto frontier : bb->DF_set)
|
||
{
|
||
auto frontier_index = bb_to_id.at(frontier);
|
||
if (!visited[frontier_index])
|
||
{
|
||
visited[frontier_index] = true;
|
||
if (livein_blocks.count(frontier))
|
||
{
|
||
auto inst_phi = InstPhi::New(TypeHelper::TYPE_I32, frontier->pred_list, frontier, ai->name);
|
||
phi_to_allocaid.insert({inst_phi, i});
|
||
}
|
||
if (!def_blocks.count(frontier))
|
||
{
|
||
worklist.push(frontier);
|
||
}
|
||
}
|
||
}
|
||
}
|
||
}
|
||
if (alloca_list.empty()) return;
|
||
|
||
VLOG(6) << "[mem2reg] Variable renaming";
|
||
/*
|
||
From LLVM, 编译器设计书上写的算法比较抽象, 所以抄了个别的(乐)
|
||
本质上是一个先序dfs,从entry开始沿着succ往后走,在遍历中更新每个alloca的值,然后替换
|
||
*/
|
||
std::vector<ValuePtr_t> _init_values;
|
||
for (int i = 0; i < alloca_list.size(); ++i) _init_values.push_back(ConstantInt::New(0));
|
||
std::vector<RenameInfo> rename_list;
|
||
rename_list.push_back({func->bb_list.front(), nullptr, _init_values});
|
||
std::vector<bool> visited(bb_to_id.size(), 0);
|
||
while (!rename_list.empty())
|
||
{
|
||
auto rename_info = rename_list.back();
|
||
rename_list.pop_back();
|
||
// 将phi指令中对应pred的值更新,因为它在处理pred的时候已经被重新定值
|
||
for (auto inst : rename_info.bb->inst_list)
|
||
{
|
||
// phi only appear at block head
|
||
if (!shared_cast<InstPhi>(inst)) break;
|
||
auto phi = shared_cast<InstPhi>(inst);
|
||
int pred_index = GETINDEX(rename_info.bb->pred_list, rename_info.pred);
|
||
phi->set_incoming_val(pred_index, rename_info.value_list[phi_to_allocaid.at(phi)]);
|
||
}
|
||
// already processed, skip
|
||
if (visited[bb_to_id.at(rename_info.bb)]) continue;
|
||
visited[bb_to_id.at(rename_info.bb)] = true;
|
||
// process instruction
|
||
// 这里其实十分的清楚,就是把所有load alloca的load指令删掉,把这条load的use换成alloca的值(而不是alloca地址)
|
||
// 然后把store alloca的store也删掉,同时更新当前的alloca的值
|
||
// 遇到phi指令要更新alloca的值为phi
|
||
// 如果是alloca,记得直接删掉(当然只删掉前面处理过的int类型的)
|
||
for (auto itr = rename_info.bb->inst_list.begin(); itr != rename_info.bb->inst_list.end();)
|
||
{
|
||
auto inst = *itr++; // increase itr first, it will get invalidated by remove
|
||
// we skip non-integer alloca, they are not in our alloca_list
|
||
if (shared_cast<InstAlloca>(inst))
|
||
{
|
||
if (alloca_to_id.count(shared_cast<InstAlloca>(inst))) rename_info.bb->inst_list.remove(inst);
|
||
}
|
||
else if (auto inst_ld = shared_cast<InstLoad>(inst))
|
||
{
|
||
if (!(shared_cast<InstAlloca>(inst_ld->operand_list[0]))) continue;
|
||
auto ai = shared_cast<InstAlloca>(inst_ld->operand_list[0]);
|
||
if (!Type::isType<IntegerType>(get_pointed_type(ai->type))) continue;
|
||
rename_info.bb->inst_list.remove(inst_ld);
|
||
inst_ld->u_replace_users(rename_info.value_list[alloca_to_id.at(ai)]);
|
||
inst_ld->u_remove_from_usees();
|
||
}
|
||
else if (auto inst_st = shared_cast<InstStore>(inst))
|
||
{
|
||
if (!(shared_cast<InstAlloca>(inst_st->operand_list[1]))) continue;
|
||
auto ai = shared_cast<InstAlloca>(inst_st->operand_list[1]);
|
||
if (!Type::isType<IntegerType>(get_pointed_type(ai->type))) continue;
|
||
rename_info.value_list[alloca_to_id.at(ai)] = inst_st->operand_list[0];
|
||
inst_st->u_remove_from_usees();
|
||
rename_info.bb->inst_list.remove(inst_st);
|
||
}
|
||
else if (auto phi = shared_cast<InstPhi>(inst))
|
||
{
|
||
rename_info.value_list[phi_to_allocaid.at(phi)] = phi;
|
||
}
|
||
}
|
||
for (auto succ : rename_info.bb->succ_list)
|
||
{
|
||
rename_list.push_back({succ, rename_info.bb, rename_info.value_list});
|
||
}
|
||
}
|
||
}
|
||
|
||
void PassMem2Reg::run(const Module &module)
|
||
{
|
||
LOG(INFO) << "Run pass " << pass_name;
|
||
for (auto func : module.function_list)
|
||
{
|
||
if (func->is_libfunc()) continue;
|
||
_mem_2_reg(func);
|
||
}
|
||
}
|
||
|
||
} // namespace CompSysY
|