From d39cde34c66153677e836af20cf4b6822546f0b0 Mon Sep 17 00:00:00 2001 From: onenewcode Date: Mon, 12 Jan 2026 17:43:34 +0800 Subject: [PATCH 1/3] lab 1 --- include/core/allocator.h | 1 + src/core/allocator.cc | 49 +++++++++++++++++++++++++++++++++++++--- src/core/graph.cc | 11 +++++++++ 3 files changed, 58 insertions(+), 3 deletions(-) diff --git a/include/core/allocator.h b/include/core/allocator.h index 002601d..7df475c 100644 --- a/include/core/allocator.h +++ b/include/core/allocator.h @@ -27,6 +27,7 @@ namespace infini { // TODO:可能需要设计一个数据结构来存储free block,以便于管理和合并 // HINT: 可以使用一个 map 来存储 free block,key 为 block 的起始/结尾地址,value 为 block 的大小 // =================================== 作业 =================================== + std::map free_blocks; public: Allocator(Runtime runtime); diff --git a/src/core/allocator.cc b/src/core/allocator.cc index ff593ae..cf245f1 100644 --- a/src/core/allocator.cc +++ b/src/core/allocator.cc @@ -26,14 +26,33 @@ namespace infini size_t Allocator::alloc(size_t size) { IT_ASSERT(this->ptr == nullptr); - // pad the size to the multiple of alignment size = this->getAlignedSize(size); // =================================== 作业 =================================== // TODO: 设计一个算法来分配内存,返回起始地址偏移量 // =================================== 作业 =================================== - - return 0; + + for (auto it = free_blocks.begin(); it != free_blocks.end(); ++it) { + if (it->second >= size) { + size_t addr = it->first; + if (it->second == size) { + free_blocks.erase(it); + } else { + free_blocks[it->first + size] = it->second - size; + free_blocks.erase(it); + } + used += size; + return addr; + } + } + + size_t addr = used; + used += size; + if (used > peak) { + peak = used; + } + + return addr; } void Allocator::free(size_t addr, size_t size) @@ -44,6 +63,30 @@ namespace infini // =================================== 作业 =================================== // TODO: 设计一个算法来回收内存 // =================================== 作业 =================================== + + size_t end = addr + size; + + auto next_it = free_blocks.lower_bound(addr); + + auto prev_it = (next_it != free_blocks.begin()) ? std::prev(next_it) : free_blocks.end(); + + bool merge_prev = (prev_it != free_blocks.end()) && (prev_it->first + prev_it->second == addr); + bool merge_next = (next_it != free_blocks.end()) && (end == next_it->first); + + if (merge_prev && merge_next) { + prev_it->second = prev_it->second + size + next_it->second; + free_blocks.erase(next_it); + } else if (merge_prev) { + prev_it->second += size; + } else if (merge_next) { + size_t new_size = size + next_it->second; + free_blocks.erase(next_it); + free_blocks[addr] = new_size; + } else { + free_blocks[addr] = size; + } + + used -= size; } void *Allocator::getPtr() diff --git a/src/core/graph.cc b/src/core/graph.cc index 3a90637..c5f00f9 100644 --- a/src/core/graph.cc +++ b/src/core/graph.cc @@ -1,4 +1,5 @@ #include "core/graph.h" +#include "core/blob.h" #include #include #include @@ -152,6 +153,16 @@ namespace infini // TODO:利用 allocator 给计算图分配内存 // HINT: 获取分配好的内存指针后,可以调用 tensor 的 setDataBlob 函数给 tensor 绑定内存 // =================================== 作业 =================================== + + void *base_ptr = allocator.getPtr(); + + for (auto &tensor : tensors) { + size_t size = tensor->getBytes(); + size_t offset = allocator.alloc(size); + void *tensor_ptr = static_cast(base_ptr) + offset; + Blob blob = make_ref(runtime, tensor_ptr); + tensor->setDataBlob(blob); + } allocator.info(); } From 9a9b0cf6a471fe09d9dc92f7868a67242e719d21 Mon Sep 17 00:00:00 2001 From: onenewcode Date: Mon, 12 Jan 2026 18:14:56 +0800 Subject: [PATCH 2/3] lab 11 fail --- src/operators/concat.cc | 11 +++++++++++ src/operators/matmul.cc | 37 ++++++++++++++++++++++++++++++++++++- src/operators/transpose.cc | 6 +++++- src/operators/unary.cc | 6 +++--- src/utils/operator_utils.cc | 23 ++++++++++++++++++++++- 5 files changed, 77 insertions(+), 6 deletions(-) diff --git a/src/operators/concat.cc b/src/operators/concat.cc index d196330..7feeded 100644 --- a/src/operators/concat.cc +++ b/src/operators/concat.cc @@ -17,6 +17,17 @@ optional> ConcatObj::inferShape(const TensorVec &inputs) { // TODO:修改 dims,返回正确的 concat 后的 shape // REF: https://onnx.ai/onnx/operators/onnx__Concat.html#concat-13 // =================================== 作业 =================================== + + for (size_t i = 1; i < inputs.size(); ++i) { + auto input_dims = inputs[i]->getDims(); + for (int j = 0; j < rank; ++j) { + if (j == dim) { + dims[j] += input_dims[j]; + } else { + IT_ASSERT(dims[j] == input_dims[j]); + } + } + } return {{dims}}; } diff --git a/src/operators/matmul.cc b/src/operators/matmul.cc index 7a16ca2..472f4b5 100644 --- a/src/operators/matmul.cc +++ b/src/operators/matmul.cc @@ -1,4 +1,5 @@ #include "operators/matmul.h" +#include "utils/operator_utils.h" namespace infini { @@ -27,7 +28,41 @@ namespace infini // TODO:返回经过 matmul 操作后的 shape // REF: https://github.com/onnx/onnx/blob/main/docs/Operators.md#gemm // =================================== 作业 =================================== - return std::nullopt; + + auto shapeA = inputs[0]->getDims(); + auto shapeB = inputs[1]->getDims(); + + int rankA = shapeA.size(); + int rankB = shapeB.size(); + + IT_ASSERT(rankA >= 2 && rankB >= 2); + + int m = transA ? shapeA[rankA - 1] : shapeA[rankA - 2]; + int kA = transA ? shapeA[rankA - 2] : shapeA[rankA - 1]; + int kB = transB ? shapeB[rankB - 1] : shapeB[rankB - 2]; + int n = transB ? shapeB[rankB - 2] : shapeB[rankB - 1]; + + IT_ASSERT(kA == kB); + + Shape batchDimsA, batchDimsB; + for (int i = 0; i < rankA - 2; ++i) { + batchDimsA.push_back(shapeA[i]); + } + for (int i = 0; i < rankB - 2; ++i) { + batchDimsB.push_back(shapeB[i]); + } + + Shape batchDims = infer_broadcast(batchDimsA, batchDimsB); + + Shape result = batchDims; + result.push_back(m); + result.push_back(n); + + this->m = m; + this->n = n; + this->k = kA; + + return {{result}}; } } // namespace infini \ No newline at end of file diff --git a/src/operators/transpose.cc b/src/operators/transpose.cc index faab2b6..105908a 100644 --- a/src/operators/transpose.cc +++ b/src/operators/transpose.cc @@ -33,8 +33,12 @@ namespace infini // TODO:修改 output_dim,返回正确的 transpose 后的 shape // REF: https://onnx.ai/onnx/operators/onnx__Transpose.html#transpose-21 // =================================== 作业 =================================== + + for (int i = 0; i < rank; ++i) { + output_dim[i] = input_dim[transposePermute[i]]; + } - return std::nullopt; + return {{output_dim}}; } std::string TransposeObj::toString() const diff --git a/src/operators/unary.cc b/src/operators/unary.cc index 3daad36..e4c18a4 100644 --- a/src/operators/unary.cc +++ b/src/operators/unary.cc @@ -39,7 +39,7 @@ namespace infini // TODO:返回经过 clip 操作后的 shape // REF: https://onnx.ai/onnx/operators/onnx__Clip.html#clip-13 // =================================== 作业 =================================== - return std::nullopt; + return {{inputs[0]->getDims()}}; } std::string ClipObj::toString() const @@ -66,7 +66,7 @@ namespace infini // REF_FILE: src/core/operator.cc // REF: https://onnx.ai/onnx/operators/onnx__Cast.html#cast-21 // =================================== 作业 =================================== - return {}; + return {getOutputDataType()}; } optional> CastObj::inferShape(const TensorVec &inputs) @@ -75,7 +75,7 @@ namespace infini // TODO:返回经过 cast 操作后的 shape // REF: https://onnx.ai/onnx/operators/onnx__Cast.html#cast-21 // =================================== 作业 =================================== - return std::nullopt; + return {{inputs[0]->getDims()}}; } std::string CastObj::toString() const diff --git a/src/utils/operator_utils.cc b/src/utils/operator_utils.cc index edbd2c8..2e93047 100644 --- a/src/utils/operator_utils.cc +++ b/src/utils/operator_utils.cc @@ -10,7 +10,28 @@ Shape infer_broadcast(const Shape &A, const Shape &B) { // REF: https://github.com/onnx/onnx/blob/main/docs/Broadcasting.md // =================================== 作业 =================================== - return {}; + size_t rankA = A.size(); + size_t rankB = B.size(); + size_t rank = std::max(rankA, rankB); + + Shape result(rank); + + for (size_t i = 0; i < rank; ++i) { + int dimA = (i < rankA) ? A[rankA - 1 - i] : 1; + int dimB = (i < rankB) ? B[rankB - 1 - i] : 1; + + if (dimA == dimB) { + result[rank - 1 - i] = dimA; + } else if (dimA == 1) { + result[rank - 1 - i] = dimB; + } else if (dimB == 1) { + result[rank - 1 - i] = dimA; + } else { + IT_ASSERT(false, "Cannot broadcast shapes"); + } + } + + return result; } int get_real_axis(const int &axis, const int &rank) { From 06580bc6ca897381404bf092ed45defd74456aa9 Mon Sep 17 00:00:00 2001 From: onenewcode Date: Tue, 13 Jan 2026 13:25:11 +0800 Subject: [PATCH 3/3] finish --- include/core/ref.h | 7 ++- src/core/graph.cc | 145 +++++++++++++++++++++++++++++++++++++++++-- src/core/operator.cc | 6 +- src/core/tensor.cc | 7 ++- 4 files changed, 155 insertions(+), 10 deletions(-) diff --git a/include/core/ref.h b/include/core/ref.h index 3393f6e..8daa694 100644 --- a/include/core/ref.h +++ b/include/core/ref.h @@ -35,8 +35,11 @@ std::vector> refs_to_wrefs(const std::vector> &refs) { template std::vector> wrefs_to_refs(const std::vector> &wrefs) { std::vector> refs; - for (const auto &wref : wrefs) - refs.emplace_back(wref); + for (const auto &wref : wrefs) { + if (auto ref = wref.lock()) { + refs.emplace_back(ref); + } + } return refs; } diff --git a/src/core/graph.cc b/src/core/graph.cc index c5f00f9..6d2e6d1 100644 --- a/src/core/graph.cc +++ b/src/core/graph.cc @@ -1,5 +1,7 @@ #include "core/graph.h" #include "core/blob.h" +#include "operators/transpose.h" +#include "operators/matmul.h" #include #include #include @@ -107,6 +109,132 @@ namespace infini // 1. 去除冗余的算子(例如,两个相邻的算子都是 transpose 算子,且做的是相反的操作,可以将其全部删除) // 2. 合并算子(例如,矩阵乘算子中含有属性transA、transB,如果其输入存在transpose,且对最后两个维度做交换,就可以将transpose融入到矩阵乘算子的属性中去) // =================================== 作业 =================================== + + // 第一轮:消除相反的transpose + for (size_t i = 0; i < ops.size(); ++i) { + auto op = ops[i]; + if (op->getOpType() != OpType::Transpose) continue; + + auto transposeOp = as(op); + auto input = transposeOp->getInputs()[0]; + auto output = transposeOp->getOutput(); + auto permute = transposeOp->getPermute(); + + auto sourceOp = input->getSource(); + if (!sourceOp) continue; + if (sourceOp->getOpType() != OpType::Transpose) continue; + + auto prevTranspose = as(sourceOp); + auto prevPermute = prevTranspose->getPermute(); + + bool isInverse = true; + for (size_t j = 0; j < permute.size() && j < prevPermute.size(); ++j) { + if (permute[prevPermute[j]] != j) { + isInverse = false; + break; + } + } + + if (isInverse) { + auto prevInput = prevTranspose->getInputs()[0]; + auto prevOutput = prevTranspose->getOutput(); + + for (auto target : output->getTargets()) { + target->replaceInput(output, prevInput); + } + + // 清理前驱/后继关系 + for (auto suc : prevTranspose->getSuccessors()) { + auto it = std::find(suc->getPredecessors().begin(), suc->getPredecessors().end(), prevTranspose); + if (it != suc->getPredecessors().end()) { + suc->getPredecessors().erase(it); + } + } + for (auto suc : transposeOp->getSuccessors()) { + auto it = std::find(suc->getPredecessors().begin(), suc->getPredecessors().end(), transposeOp); + if (it != suc->getPredecessors().end()) { + suc->getPredecessors().erase(it); + } + } + + prevTranspose->getSuccessors().clear(); + transposeOp->getSuccessors().clear(); + prevTranspose->getPredecessors().clear(); + transposeOp->getPredecessors().clear(); + + removeOperator(prevTranspose); + removeOperator(transposeOp); + removeTensor(prevOutput); + removeTensor(output); + + // 重新开始 + i = 0; + } + } + + // 第二轮:融合transpose到matmul + for (size_t i = 0; i < ops.size(); ++i) { + auto op = ops[i]; + if (op->getOpType() != OpType::MatMul) continue; + + auto matmulOp = as(op); + + for (int j = 0; j < 2; ++j) { + auto input = matmulOp->getInputs(j); + if (!input) continue; + + auto sourceOp = input->getSource(); + if (!sourceOp) continue; + if (sourceOp->getOpType() != OpType::Transpose) continue; + + auto transposeOp = as(sourceOp); + auto permute = transposeOp->getPermute(); + auto inputShape = input->getDims(); + int rank = inputShape.size(); + + if (rank < 2) continue; + + bool swapsLastTwo = (permute[rank - 2] == rank - 1) && + (permute[rank - 1] == rank - 2); + bool keepsOthers = true; + for (int k = 0; k < rank - 2; ++k) { + if (permute[k] != k) { + keepsOthers = false; + break; + } + } + + if (swapsLastTwo && keepsOthers) { + auto transposeInput = transposeOp->getInputs()[0]; + if (!transposeInput) continue; + + matmulOp->replaceInput(input, transposeInput); + + if (j == 0) { + matmulOp->setTransA(!matmulOp->getTransA()); + } else { + matmulOp->setTransB(!matmulOp->getTransB()); + } + + // 清理前驱/后继关系 + for (auto suc : transposeOp->getSuccessors()) { + auto it = std::find(suc->getPredecessors().begin(), suc->getPredecessors().end(), transposeOp); + if (it != suc->getPredecessors().end()) { + suc->getPredecessors().erase(it); + } + } + transposeOp->getSuccessors().clear(); + transposeOp->getPredecessors().clear(); + + removeOperator(transposeOp); + removeTensor(input); + + // 重新开始 + i = 0; + break; + } + } + } } Tensor GraphObj::getTensor(int fuid) const @@ -132,6 +260,7 @@ namespace infini // replace the old outputshape and size with new one for (int i = 0; i < (int)ans.value().size(); ++i) { + if (!oldOutputs[i]) continue; auto newShape = ans.value()[i]; auto oldShape = oldOutputs[i]->getDims(); auto fuid = oldOutputs[i]->getFuid(); @@ -154,14 +283,22 @@ namespace infini // HINT: 获取分配好的内存指针后,可以调用 tensor 的 setDataBlob 函数给 tensor 绑定内存 // =================================== 作业 =================================== - void *base_ptr = allocator.getPtr(); + shape_infer(); + + std::vector offsets; + offsets.reserve(tensors.size()); for (auto &tensor : tensors) { size_t size = tensor->getBytes(); - size_t offset = allocator.alloc(size); - void *tensor_ptr = static_cast(base_ptr) + offset; + offsets.push_back(allocator.alloc(size)); + } + + void *base_ptr = allocator.getPtr(); + + for (size_t i = 0; i < tensors.size(); ++i) { + void *tensor_ptr = static_cast(base_ptr) + offsets[i]; Blob blob = make_ref(runtime, tensor_ptr); - tensor->setDataBlob(blob); + tensors[i]->setDataBlob(blob); } allocator.info(); diff --git a/src/core/operator.cc b/src/core/operator.cc index a70ca48..44540db 100644 --- a/src/core/operator.cc +++ b/src/core/operator.cc @@ -54,14 +54,16 @@ namespace infini auto dataTypes = inferDataType(); for (size_t i = 0; i < outputs.size(); i++) { - IT_ASSERT(!outputs[i], "Find empty output while operator creation"); - outputs[i] = graph->addTensor(shapes[i], dataTypes[i]); + if (outputs[i] == nullptr) { + outputs[i] = graph->addTensor(shapes[i], dataTypes[i]); + } } } else { // if outputs have been created, check their shapes for (size_t i = 0; i < shapes.size(); ++i) { + if (!outputs[i]) return false; if (shapes[i] != outputs[i]->getDims()) return false; } diff --git a/src/core/tensor.cc b/src/core/tensor.cc index db54a2d..7fc8606 100644 --- a/src/core/tensor.cc +++ b/src/core/tensor.cc @@ -24,8 +24,11 @@ namespace infini { ", dtype " + dtype.toString() + ", " + runtime->toString() + ", " + ss.str() + "\n"; vector targetGuids; - for (const auto &op : targets) - targetGuids.emplace_back(op.lock()->getGuid()); + for (const auto &op : targets) { + if (auto lockedOp = op.lock()) { + targetGuids.emplace_back(lockedOp->getGuid()); + } + } if (auto o = source.lock()) ret += ", source " + std::to_string(o->getGuid()); else