Skip to content
Open

pass #10

Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions include/core/allocator.h
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ namespace infini {
// TODO:可能需要设计一个数据结构来存储free block,以便于管理和合并
// HINT: 可以使用一个 map 来存储 free block,key 为 block 的起始/结尾地址,value 为 block 的大小
// =================================== 作业 ===================================
std::map<size_t, size_t> free_blocks;

public:
Allocator(Runtime runtime);
Expand Down
7 changes: 5 additions & 2 deletions include/core/ref.h
Original file line number Diff line number Diff line change
Expand Up @@ -35,8 +35,11 @@ std::vector<WRef<T>> refs_to_wrefs(const std::vector<Ref<T>> &refs) {
template <typename T>
std::vector<Ref<T>> wrefs_to_refs(const std::vector<WRef<T>> &wrefs) {
std::vector<Ref<T>> refs;
for (const auto &wref : wrefs)
refs.emplace_back(wref);
for (const auto &wref : wrefs) {
if (auto ref = wref.lock()) {
refs.emplace_back(ref);
}
}
return refs;
}

Expand Down
49 changes: 46 additions & 3 deletions src/core/allocator.cc
Original file line number Diff line number Diff line change
Expand Up @@ -26,14 +26,33 @@ namespace infini
size_t Allocator::alloc(size_t size)
{
IT_ASSERT(this->ptr == nullptr);
// pad the size to the multiple of alignment
size = this->getAlignedSize(size);

// =================================== 作业 ===================================
// TODO: 设计一个算法来分配内存,返回起始地址偏移量
// =================================== 作业 ===================================

return 0;

for (auto it = free_blocks.begin(); it != free_blocks.end(); ++it) {
if (it->second >= size) {
size_t addr = it->first;
if (it->second == size) {
free_blocks.erase(it);
} else {
free_blocks[it->first + size] = it->second - size;
free_blocks.erase(it);
}
used += size;
return addr;
}
}

size_t addr = used;
used += size;
if (used > peak) {
peak = used;
}

return addr;
}

void Allocator::free(size_t addr, size_t size)
Expand All @@ -44,6 +63,30 @@ namespace infini
// =================================== 作业 ===================================
// TODO: 设计一个算法来回收内存
// =================================== 作业 ===================================

size_t end = addr + size;

auto next_it = free_blocks.lower_bound(addr);

auto prev_it = (next_it != free_blocks.begin()) ? std::prev(next_it) : free_blocks.end();

bool merge_prev = (prev_it != free_blocks.end()) && (prev_it->first + prev_it->second == addr);
bool merge_next = (next_it != free_blocks.end()) && (end == next_it->first);

if (merge_prev && merge_next) {
prev_it->second = prev_it->second + size + next_it->second;
free_blocks.erase(next_it);
} else if (merge_prev) {
prev_it->second += size;
} else if (merge_next) {
size_t new_size = size + next_it->second;
free_blocks.erase(next_it);
free_blocks[addr] = new_size;
} else {
free_blocks[addr] = size;
}

used -= size;
}

void *Allocator::getPtr()
Expand Down
148 changes: 148 additions & 0 deletions src/core/graph.cc
Original file line number Diff line number Diff line change
@@ -1,4 +1,7 @@
#include "core/graph.h"
#include "core/blob.h"
#include "operators/transpose.h"
#include "operators/matmul.h"
#include <algorithm>
#include <numeric>
#include <queue>
Expand Down Expand Up @@ -106,6 +109,132 @@ namespace infini
// 1. 去除冗余的算子(例如,两个相邻的算子都是 transpose 算子,且做的是相反的操作,可以将其全部删除)
// 2. 合并算子(例如,矩阵乘算子中含有属性transA、transB,如果其输入存在transpose,且对最后两个维度做交换,就可以将transpose融入到矩阵乘算子的属性中去)
// =================================== 作业 ===================================

// 第一轮:消除相反的transpose
for (size_t i = 0; i < ops.size(); ++i) {
auto op = ops[i];
if (op->getOpType() != OpType::Transpose) continue;

auto transposeOp = as<TransposeObj>(op);
auto input = transposeOp->getInputs()[0];
auto output = transposeOp->getOutput();
auto permute = transposeOp->getPermute();

auto sourceOp = input->getSource();
if (!sourceOp) continue;
if (sourceOp->getOpType() != OpType::Transpose) continue;

auto prevTranspose = as<TransposeObj>(sourceOp);
auto prevPermute = prevTranspose->getPermute();

bool isInverse = true;
for (size_t j = 0; j < permute.size() && j < prevPermute.size(); ++j) {
if (permute[prevPermute[j]] != j) {
isInverse = false;
break;
}
}

if (isInverse) {
auto prevInput = prevTranspose->getInputs()[0];
auto prevOutput = prevTranspose->getOutput();

for (auto target : output->getTargets()) {
target->replaceInput(output, prevInput);
}

// 清理前驱/后继关系
for (auto suc : prevTranspose->getSuccessors()) {
auto it = std::find(suc->getPredecessors().begin(), suc->getPredecessors().end(), prevTranspose);
if (it != suc->getPredecessors().end()) {
suc->getPredecessors().erase(it);
}
}
for (auto suc : transposeOp->getSuccessors()) {
auto it = std::find(suc->getPredecessors().begin(), suc->getPredecessors().end(), transposeOp);
if (it != suc->getPredecessors().end()) {
suc->getPredecessors().erase(it);
}
}

prevTranspose->getSuccessors().clear();
transposeOp->getSuccessors().clear();
prevTranspose->getPredecessors().clear();
transposeOp->getPredecessors().clear();

removeOperator(prevTranspose);
removeOperator(transposeOp);
removeTensor(prevOutput);
removeTensor(output);

// 重新开始
i = 0;
}
}

// 第二轮:融合transpose到matmul
for (size_t i = 0; i < ops.size(); ++i) {
auto op = ops[i];
if (op->getOpType() != OpType::MatMul) continue;

auto matmulOp = as<MatmulObj>(op);

for (int j = 0; j < 2; ++j) {
auto input = matmulOp->getInputs(j);
if (!input) continue;

auto sourceOp = input->getSource();
if (!sourceOp) continue;
if (sourceOp->getOpType() != OpType::Transpose) continue;

auto transposeOp = as<TransposeObj>(sourceOp);
auto permute = transposeOp->getPermute();
auto inputShape = input->getDims();
int rank = inputShape.size();

if (rank < 2) continue;

bool swapsLastTwo = (permute[rank - 2] == rank - 1) &&
(permute[rank - 1] == rank - 2);
bool keepsOthers = true;
for (int k = 0; k < rank - 2; ++k) {
if (permute[k] != k) {
keepsOthers = false;
break;
}
}

if (swapsLastTwo && keepsOthers) {
auto transposeInput = transposeOp->getInputs()[0];
if (!transposeInput) continue;

matmulOp->replaceInput(input, transposeInput);

if (j == 0) {
matmulOp->setTransA(!matmulOp->getTransA());
} else {
matmulOp->setTransB(!matmulOp->getTransB());
}

// 清理前驱/后继关系
for (auto suc : transposeOp->getSuccessors()) {
auto it = std::find(suc->getPredecessors().begin(), suc->getPredecessors().end(), transposeOp);
if (it != suc->getPredecessors().end()) {
suc->getPredecessors().erase(it);
}
}
transposeOp->getSuccessors().clear();
transposeOp->getPredecessors().clear();

removeOperator(transposeOp);
removeTensor(input);

// 重新开始
i = 0;
break;
}
}
}
}

Tensor GraphObj::getTensor(int fuid) const
Expand All @@ -131,6 +260,7 @@ namespace infini
// replace the old outputshape and size with new one
for (int i = 0; i < (int)ans.value().size(); ++i)
{
if (!oldOutputs[i]) continue;
auto newShape = ans.value()[i];
auto oldShape = oldOutputs[i]->getDims();
auto fuid = oldOutputs[i]->getFuid();
Expand All @@ -152,6 +282,24 @@ namespace infini
// TODO:利用 allocator 给计算图分配内存
// HINT: 获取分配好的内存指针后,可以调用 tensor 的 setDataBlob 函数给 tensor 绑定内存
// =================================== 作业 ===================================

shape_infer();

std::vector<size_t> offsets;
offsets.reserve(tensors.size());

for (auto &tensor : tensors) {
size_t size = tensor->getBytes();
offsets.push_back(allocator.alloc(size));
}

void *base_ptr = allocator.getPtr();

for (size_t i = 0; i < tensors.size(); ++i) {
void *tensor_ptr = static_cast<char *>(base_ptr) + offsets[i];
Blob blob = make_ref<BlobObj>(runtime, tensor_ptr);
tensors[i]->setDataBlob(blob);
}

allocator.info();
}
Expand Down
6 changes: 4 additions & 2 deletions src/core/operator.cc
Original file line number Diff line number Diff line change
Expand Up @@ -54,14 +54,16 @@ namespace infini
auto dataTypes = inferDataType();
for (size_t i = 0; i < outputs.size(); i++)
{
IT_ASSERT(!outputs[i], "Find empty output while operator creation");
outputs[i] = graph->addTensor(shapes[i], dataTypes[i]);
if (outputs[i] == nullptr) {
outputs[i] = graph->addTensor(shapes[i], dataTypes[i]);
}
}
}
else
{ // if outputs have been created, check their shapes
for (size_t i = 0; i < shapes.size(); ++i)
{
if (!outputs[i]) return false;
if (shapes[i] != outputs[i]->getDims())
return false;
}
Expand Down
7 changes: 5 additions & 2 deletions src/core/tensor.cc
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,11 @@ namespace infini {
", dtype " + dtype.toString() + ", " + runtime->toString() +
", " + ss.str() + "\n";
vector<UidBaseType> targetGuids;
for (const auto &op : targets)
targetGuids.emplace_back(op.lock()->getGuid());
for (const auto &op : targets) {
if (auto lockedOp = op.lock()) {
targetGuids.emplace_back(lockedOp->getGuid());
}
}
if (auto o = source.lock())
ret += ", source " + std::to_string(o->getGuid());
else
Expand Down
11 changes: 11 additions & 0 deletions src/operators/concat.cc
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,17 @@ optional<vector<Shape>> ConcatObj::inferShape(const TensorVec &inputs) {
// TODO:修改 dims,返回正确的 concat 后的 shape
// REF: https://onnx.ai/onnx/operators/onnx__Concat.html#concat-13
// =================================== 作业 ===================================

for (size_t i = 1; i < inputs.size(); ++i) {
auto input_dims = inputs[i]->getDims();
for (int j = 0; j < rank; ++j) {
if (j == dim) {
dims[j] += input_dims[j];
} else {
IT_ASSERT(dims[j] == input_dims[j]);
}
}
}

return {{dims}};
}
Expand Down
37 changes: 36 additions & 1 deletion src/operators/matmul.cc
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
#include "operators/matmul.h"
#include "utils/operator_utils.h"

namespace infini
{
Expand Down Expand Up @@ -27,7 +28,41 @@ namespace infini
// TODO:返回经过 matmul 操作后的 shape
// REF: https://github.com/onnx/onnx/blob/main/docs/Operators.md#gemm
// =================================== 作业 ===================================
return std::nullopt;

auto shapeA = inputs[0]->getDims();
auto shapeB = inputs[1]->getDims();

int rankA = shapeA.size();
int rankB = shapeB.size();

IT_ASSERT(rankA >= 2 && rankB >= 2);

int m = transA ? shapeA[rankA - 1] : shapeA[rankA - 2];
int kA = transA ? shapeA[rankA - 2] : shapeA[rankA - 1];
int kB = transB ? shapeB[rankB - 1] : shapeB[rankB - 2];
int n = transB ? shapeB[rankB - 2] : shapeB[rankB - 1];

IT_ASSERT(kA == kB);

Shape batchDimsA, batchDimsB;
for (int i = 0; i < rankA - 2; ++i) {
batchDimsA.push_back(shapeA[i]);
}
for (int i = 0; i < rankB - 2; ++i) {
batchDimsB.push_back(shapeB[i]);
}

Shape batchDims = infer_broadcast(batchDimsA, batchDimsB);

Shape result = batchDims;
result.push_back(m);
result.push_back(n);

this->m = m;
this->n = n;
this->k = kA;

return {{result}};
}

} // namespace infini
6 changes: 5 additions & 1 deletion src/operators/transpose.cc
Original file line number Diff line number Diff line change
Expand Up @@ -33,8 +33,12 @@ namespace infini
// TODO:修改 output_dim,返回正确的 transpose 后的 shape
// REF: https://onnx.ai/onnx/operators/onnx__Transpose.html#transpose-21
// =================================== 作业 ===================================

for (int i = 0; i < rank; ++i) {
output_dim[i] = input_dim[transposePermute[i]];
}

return std::nullopt;
return {{output_dim}};
}

std::string TransposeObj::toString() const
Expand Down
Loading