Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion ggml/include/ggml.h
Original file line number Diff line number Diff line change
Expand Up @@ -688,7 +688,9 @@ extern "C" {

void * extra; // extra things e.g. for ggml-cuda.cu

char padding[8];
char padding[16];
// add a struct ggml_tensor * named org_src, initialized to NULL, for keeping track of original source tensors in case of in-place operations
struct ggml_tensor * org_src;
};

static const size_t GGML_TENSOR_SIZE = sizeof(struct ggml_tensor);
Expand Down
20 changes: 17 additions & 3 deletions ggml/src/ggml-backend.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1124,8 +1124,11 @@ void ggml_backend_sched_split_graph(ggml_backend_sched_t sched, struct ggml_cgra
struct ggml_tensor * node = graph->nodes[i];
int * cur_backend_id = &tensor_backend_id(node);
if (node->view_src != NULL && *cur_backend_id == -1) {
*cur_backend_id = tensor_backend_id(node->view_src);
SET_CAUSE(node, "4.vsrc");
auto view_src_backend = tensor_backend_id(node->view_src);
if (view_src_backend != -1 && ggml_backend_supports_op(sched->backends[view_src_backend], node)) {
*cur_backend_id = tensor_backend_id(node->view_src);
SET_CAUSE(node, "4.vsrc");
}
}
for (int j = 0; j < GGML_MAX_SRC; j++) {
struct ggml_tensor * src = node->src[j];
Expand All @@ -1151,6 +1154,14 @@ void ggml_backend_sched_split_graph(ggml_backend_sched_t sched, struct ggml_cgra
GGML_ASSERT(*cur_backend_id != -1);
}

// add the node id to the name for easier debugging
for (int i = 0; i < graph->n_nodes; i++) {
struct ggml_tensor * node = graph->nodes[i];
char new_name[128];
snprintf(new_name, sizeof(new_name), "%s#%d", node->name, i);
ggml_format_name(node, "%s", new_name);
}

// pass 5: split graph, find tensors that need to be copied
{
int i_split = 0;
Expand All @@ -1171,7 +1182,9 @@ void ggml_backend_sched_split_graph(ggml_backend_sched_t sched, struct ggml_cgra
struct ggml_tensor * node = graph->nodes[i];

if (ggml_is_view_op(node->op)) {
continue;
if ((tensor_backend_id(node) != cur_backend_id) && (ggml_backend_supports_op(sched->backends[cur_backend_id], node))) {
tensor_backend_id(node) = cur_backend_id;
}
}

const int node_backend_id = tensor_backend_id(node);
Expand Down Expand Up @@ -1269,6 +1282,7 @@ void ggml_backend_sched_split_graph(ggml_backend_sched_t sched, struct ggml_cgra
ggml_set_input(tensor_copy);
ggml_set_output(tensor_copy); // prevent ggml-alloc from overwriting the tensor
}
tensor_copy->org_src = src;
tensor_id_copy(src_id, cur_backend_id, c) = tensor_copy;
SET_CAUSE(tensor_copy, "4.cpy");
}
Expand Down
8 changes: 4 additions & 4 deletions ggml/src/ggml-openvino/ggml-decoder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1057,9 +1057,9 @@ void GgmlOvDecoder::compute_node_dynamic_dims() {
continue;
}
struct ggml_tensor *root_src = nullptr;
// if (src->org_src) {
// root_src = src->org_src;
// }
if (src->org_src) {
root_src = src->org_src;
}
if (root_src) {
if (is_inp_tok(root_src, node) || is_inp_pos(root_src, node) ||
is_output_idx(root_src, node)) {
Expand Down Expand Up @@ -1139,7 +1139,7 @@ void GgmlOvDecoder::compute_node_dynamic_dims() {
// identifies the dynamic dim even when two dims share the same size.
m_node_dynamic_dims[node] = -1;
if (m_node_dynamic_dims[node->src[0]] != -1) {
if (node->src[0]->op == GGML_OP_NONE) {
if (node->src[0]->op == GGML_OP_NONE && node->src[0]->org_src == nullptr) {
m_node_dynamic_dims[node] = m_node_dynamic_dims[node->src[0]];
break;
}
Expand Down
2 changes: 1 addition & 1 deletion ggml/src/ggml-openvino/ggml-decoder.h
Original file line number Diff line number Diff line change
Expand Up @@ -227,7 +227,7 @@ class GgmlOvDecoder : public ov::frontend::ggml::GgmlDecoder {
void update_io(ggml_cgraph * cgraph);

inline static bool is_inp_tok(const ggml_tensor * tensor, const ggml_tensor * op) {
return op->op == GGML_OP_GET_ROWS && tensor == op->src[1] && op->src[0]->op == GGML_OP_NONE;
return op->op == GGML_OP_GET_ROWS && tensor == op->src[1] && op->src[0]->op == GGML_OP_NONE && op->src[0]->org_src == nullptr;
}

inline static bool is_inp_pos(const ggml_tensor * tensor, const ggml_tensor * op) {
Expand Down
1 change: 1 addition & 0 deletions ggml/src/ggml.c
Original file line number Diff line number Diff line change
Expand Up @@ -1767,6 +1767,7 @@ static struct ggml_tensor * ggml_new_tensor_impl(
/*.name =*/ { 0 },
/*.extra =*/ NULL,
/*.padding =*/ { 0 },
/*.org_src =*/ NULL,
};

// TODO: this should not be needed as long as we don't rely on aligned SIMD loads
Expand Down