Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions include/core/allocator.h
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ namespace infini {
// TODO:可能需要设计一个数据结构来存储free block,以便于管理和合并
// HINT: 可以使用一个 map 来存储 free block,key 为 block 的起始/结尾地址,value 为 block 的大小
// =================================== 作业 ===================================
std::map<size_t, size_t> free_blocks;

public:
Allocator(Runtime runtime);
Expand Down
3 changes: 2 additions & 1 deletion include/core/graph.h
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,8 @@ namespace infini
protected:
Runtime runtime;
TensorVec tensors;
OpVec ops;
OpVec ops; //using OpVec = vector<Operator>;
//vector<Operator> ops;
Allocator allocator;

public:
Expand Down
65 changes: 56 additions & 9 deletions src/core/allocator.cc
Original file line number Diff line number Diff line change
Expand Up @@ -28,23 +28,70 @@ namespace infini
IT_ASSERT(this->ptr == nullptr);
// pad the size to the multiple of alignment
size = this->getAlignedSize(size);

// =================================== 作业 ===================================
// TODO: 设计一个算法来分配内存,返回起始地址偏移量
// =================================== 作业 ===================================

return 0;
//算法1:first fit 缺点:碎片化严重。
//我们使用的map来保存block,其中key是起始地址,value是block的大小
//我们遍历整个map,找到第一个value大于size的块
//将这个块从map中移除出去
//否则我们增加peak
//无论如何,我们要增加used

for(auto it = free_blocks.begin();it!=free_blocks.end();it++){
size_t block_addr = it->first;
size_t block_size = it->second;
if(block_size>=size){
this->used += size;
free_blocks.erase(it);
if(block_size>size){
free_blocks[block_addr+size] = block_size-size;
}
return block_addr;
}else if (block_addr + block_size == this->peak){
this->used += size;
size_t needed_extra = size - block_size; // 还需要向系统借多少?
this->peak += needed_extra; // 推高 peak (历史水位线)
free_blocks.erase(it); // 消耗掉这个末尾块
return block_addr;
}
}
//如果找不到合适的
size_t block_addr = this->peak;
this->peak += size;
this->used += size;
return block_addr;
}

void Allocator::free(size_t addr, size_t size)
{
IT_ASSERT(this->ptr == nullptr);
size = getAlignedSize(size);

// =================================== 作业 ===================================
// TODO: 设计一个算法来回收内存
// =================================== 作业 ===================================
//我的这段代码有问题吗?

void Allocator::free(size_t addr, size_t size)
{
IT_ASSERT(this->ptr == nullptr);
size = getAlignedSize(size);
// =================================== 作业 ===================================
// TODO: 设计一个算法来回收内存
// =================================== 作业 ===================================
this->used -= size;
free_blocks[addr] = size;
auto it = free_blocks.find(addr);
auto next_it = std::next(it);
if(next_it!=free_blocks.end()){
if(it->first + it->second == next_it->first){
it->second += next_it->second;
free_blocks.erase(next_it);
}
}
if(it!=free_blocks.begin()){
auto prev_it = std::prev(it);
if(prev_it->first + prev_it->second == it->first){
prev_it->second += it->second;
free_blocks.erase(it);
}
}
}

void *Allocator::getPtr()
{
Expand Down
159 changes: 157 additions & 2 deletions src/core/graph.cc
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@
#include <algorithm>
#include <numeric>
#include <queue>
#include "operators/transpose.h"
#include "operators/matmul.h"

namespace infini
{
Expand Down Expand Up @@ -42,7 +44,7 @@ namespace infini
oss << "Graph Tensors:\n";
for (const auto &tensor : tensors)
oss << tensor << "\n";

printf("完成tensor访问\n");
oss << "Graph operators:\n";
for (const auto &op : ops)
{
Expand All @@ -56,6 +58,8 @@ namespace infini
oss << ", succ " << vecToString(succs);
oss << ", " << op << "\n";
}
printf("完成op访问\n");

return oss.str();
}

Expand Down Expand Up @@ -106,6 +110,105 @@ namespace infini
// 1. 去除冗余的算子(例如,两个相邻的算子都是 transpose 算子,且做的是相反的操作,可以将其全部删除)
// 2. 合并算子(例如,矩阵乘算子中含有属性transA、transB,如果其输入存在transpose,且对最后两个维度做交换,就可以将transpose融入到矩阵乘算子的属性中去)
// =================================== 作业 ===================================
bool changed = true;
while(changed){
changed = false;
auto ops = this->ops;
for(auto op:ops){
//case1::检查如果是两个连续的转置

if(op->getOpType()==OpType::Transpose){
auto trans1 = as<TransposeObj>(op);
auto tensorA = trans1->getInputs()[0];//输入,不可丢弃
auto tensorB = trans1->getOutput();//中间变量,可丢弃
auto prevop = tensorA->getSource();//A的前置op
//如果只有一个去向
if (tensorB->getTargets().size() == 1){
auto nextOp = tensorB->getTargets()[0];
if (nextOp->getOpType() == OpType::Transpose){//如果这个刚好是transpose
auto trans2 = as<TransposeObj>(nextOp);
auto tensorC = trans2->getOutput();//最终输出,可丢弃
if (trans1->getPermute() == trans2->getPermute()){
auto targets = tensorC->getTargets();
//把C流向的算子们的张量来源设置为A,算子来源设置为prevop;
for (auto target : targets) {
target->removePredecessors(trans2);
if (prevop) {
prevop->addSuccessors(target);
target->addPredecessors(prevop); // 只有当 A 有生产者时,才连接
}
target->replaceInput(tensorC, tensorA);
tensorA->addTarget(target);
}
if (prevop) {
prevop->removeSuccessors(trans1);
}
tensorA->removeTarget(trans1);
this->removeOperator(trans1);
this->removeOperator(trans2);
this->removeTensor(tensorB);
this->removeTensor(tensorC);
//printf("优化1,成功移除\n");
changed = true;
break;
}
}
}
}
if (op->getOpType() == OpType::MatMul){
auto matmul = as<MatmulObj>(op);
auto inputs = matmul->getInputs();
for (size_t i = 0; i < 2; ++i){
auto tensorB = inputs[i];
auto prevOp = tensorB->getSource();
if (prevOp && prevOp->getOpType() == OpType::Transpose){
auto trans = as<TransposeObj>(prevOp);
auto tensorA = trans->getInputs()[0];
auto perm = trans->getPermute();

int rank = perm.size();
bool isSwapLastTwo = true;
if (rank < 2) isSwapLastTwo = false;
else {
if (perm[rank-1] != rank-2 || perm[rank-2] != rank-1) isSwapLastTwo = false;
for (int k = 0; k < rank - 2; ++k) {
if (perm[k] != k) { isSwapLastTwo = false; break; }
}
}
if(isSwapLastTwo){
if (i == 0) matmul->setTransA(!matmul->getTransA());
else matmul->setTransB(!matmul->getTransB());

matmul->replaceInput(tensorB, tensorA);
auto sourceA = tensorA->getSource();
if(sourceA){
sourceA->addSuccessors(matmul);
matmul->addPredecessors(sourceA);

}
matmul->removePredecessors(trans);
trans->removeSuccessors(matmul);
tensorA->addTarget(matmul);
tensorB->removeTarget(matmul);
if(tensorB->getTargets().empty()){
if(sourceA){
trans->removePredecessors(sourceA);
sourceA->removeSuccessors(trans);
}
tensorA->removeTarget(trans);
this->removeOperator(trans);
this->removeTensor(tensorB);
}
//printf("优化2,成功\n");
changed = true;
}
}
}
if(changed) break;
}
}
}
//printf("结束循环\n");
}

Tensor GraphObj::getTensor(int fuid) const
Expand Down Expand Up @@ -147,12 +250,64 @@ namespace infini
{
// topological sorting first
IT_ASSERT(topo_sort() == true);

// =================================== 作业 ===================================
// TODO:利用 allocator 给计算图分配内存
// HINT: 获取分配好的内存指针后,可以调用 tensor 的 setDataBlob 函数给 tensor 绑定内存
// =================================== 作业 ===================================
std::unordered_map<int, size_t> ref_counts;//记录引用数量,如果引用归0了就直接free
for(auto &tensor: this->tensors){
ref_counts[tensor->getFuid()] = tensor->getTargets().size();
}
//对需要最终输出的output进行人为+1让它可以被永久保存下来
for (auto &tensor : this->getOutputs()) {
ref_counts[tensor->getFuid()]++; // 人为加1,保证不被回收
}
std::unordered_map<int, size_t> offsets;
for (auto &tensor : this->tensors) {
if (!tensor->getSource()) { // 如果没有来源算子,说明它是图的输入
size_t size = tensor->getBytes();
size_t offset = allocator.alloc(size);
offsets[tensor->getFuid()] = offset;
}
}

//现在开始遍历Ops模拟内存分配情况:
for(auto &op:this->ops){
//这里我们检查输出,因为要为每个输出分配空间
for(auto& tensor:op->getOutputs()){
size_t size = tensor->getBytes();//拿到大小
size_t offset = allocator.alloc(size);//申请内存
offsets[tensor->getFuid()] = offset;//把分配的空间偏移保存下来
}
//检查输入看看能不能把输入释放了
for (auto &tensor : op->getInputs()){
int fuid = tensor->getFuid();
ref_counts[fuid]--;
if(ref_counts[fuid]==0){
//首先我们要理解的是,我们这里的分配只是为中间结果分配,
// 外部输入的向量是不包含在这里的,所以我们要检查这个是不是外部的输入。
if (offsets.find(fuid) != offsets.end()) {
//free掉
allocator.free(offsets[fuid], tensor->getBytes());
}
}
}
}
//现在我们为每个fuid对应的tensor找到了合适的offset。
void *basePtr = allocator.getPtr();
//开始分配Blob
for(auto&tensor:this->tensors){
int fuid = tensor->getFuid();
//首先我们要检查这个是不是外部的输入,或者说万一是计算图多余的
if(offsets.find(fuid)!=offsets.end()){
size_t offset = offsets[fuid];
//void*不能进行算数运算,所以先转成char*
void *ptr = static_cast<char *>(basePtr) + offset;
//首先我们要创建一个blob:BlobObj(Runtime runtime, void *ptr)
auto blob = make_ref<BlobObj>(this->runtime, ptr);
tensor->setDataBlob(blob);
}
}
allocator.info();
}

Expand Down
9 changes: 9 additions & 0 deletions src/operators/concat.cc
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,15 @@ optional<vector<Shape>> ConcatObj::inferShape(const TensorVec &inputs) {
// TODO:修改 dims,返回正确的 concat 后的 shape
// REF: https://onnx.ai/onnx/operators/onnx__Concat.html#concat-13
// =================================== 作业 ===================================
//沿着dim这一维进行拼接,除了dim其他的不变
int p_dim = this->dim;
if (p_dim < 0) {
p_dim += rank;
}

for (size_t i = 1; i < inputs.size(); ++i) {
dims[p_dim] += inputs[i]->getDims()[p_dim];
}

return {{dims}};
}
Expand Down
37 changes: 35 additions & 2 deletions src/operators/matmul.cc
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
#include "operators/matmul.h"

#include "utils/operator_utils.h"
namespace infini
{

Expand Down Expand Up @@ -27,7 +27,40 @@ namespace infini
// TODO:返回经过 matmul 操作后的 shape
// REF: https://github.com/onnx/onnx/blob/main/docs/Operators.md#gemm
// =================================== 作业 ===================================
return std::nullopt;
//先拿到两个维度
//这里是多维的矩阵乘法,以最后两维度作为相乘的维度
const auto A = inputs[0];
const auto B = inputs[1];
auto shapeA = A->getDims();
auto shapeB = B->getDims();

int rankA = shapeA.size();
int rankB = shapeB.size();
//检查是否专职
if (this->transA && rankA >= 2) {
std::swap(shapeA[rankA - 1], shapeA[rankA - 2]);
}
if (this->transB && rankB >= 2) {
std::swap(shapeB[rankB - 1], shapeB[rankB - 2]);
}
//一般是M,N * N,K = M,K
int M = shapeA[rankA - 2];
int K_A = shapeA[rankA - 1];
int K_B = shapeB[rankB - 2];
int N = shapeB[rankB - 1];
//判断形状对不对
IT_ASSERT(K_A == K_B);
//其他维度需要广播
Shape batchA(shapeA.begin(), shapeA.end() - 2);
Shape batchB(shapeB.begin(), shapeB.end() - 2);

Shape batchOut = infer_broadcast(batchA, batchB);

Shape outputShape = batchOut;
outputShape.push_back(M);
outputShape.push_back(N);

return vector<Shape>{outputShape};
}

} // namespace infini
15 changes: 14 additions & 1 deletion src/operators/transpose.cc
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,20 @@ namespace infini
// REF: https://onnx.ai/onnx/operators/onnx__Transpose.html#transpose-21
// =================================== 作业 ===================================

return std::nullopt;
auto &perm = this->transposePermute;

if (perm.empty()) {//为空则不用转置
for (int i = 0; i < rank; ++i) {
output_dim[i] = input_dim[rank - 1 - i];
}
} else {
for (int i = 0; i < rank; ++i) {
output_dim[i] = input_dim[perm[i]];
}
}

// 4. 返回结果
return vector<Shape>{output_dim};
}

std::string TransposeObj::toString() const
Expand Down
Loading