diff --git a/ggml/include/ggml.h b/ggml/include/ggml.h index 11d3e8a8167..96cac71c68a 100644 --- a/ggml/include/ggml.h +++ b/ggml/include/ggml.h @@ -688,7 +688,9 @@ extern "C" { void * extra; // extra things e.g. for ggml-cuda.cu - char padding[8]; + char padding[16]; + // add a struct ggml_tensor * named org_src, initialized to NULL, for keeping track of original source tensors in case of in-place operations + struct ggml_tensor * org_src; }; static const size_t GGML_TENSOR_SIZE = sizeof(struct ggml_tensor); diff --git a/ggml/src/ggml-backend.cpp b/ggml/src/ggml-backend.cpp index 22c656996cc..7c01bf0b6bf 100644 --- a/ggml/src/ggml-backend.cpp +++ b/ggml/src/ggml-backend.cpp @@ -1124,8 +1124,11 @@ void ggml_backend_sched_split_graph(ggml_backend_sched_t sched, struct ggml_cgra struct ggml_tensor * node = graph->nodes[i]; int * cur_backend_id = &tensor_backend_id(node); if (node->view_src != NULL && *cur_backend_id == -1) { - *cur_backend_id = tensor_backend_id(node->view_src); - SET_CAUSE(node, "4.vsrc"); + auto view_src_backend = tensor_backend_id(node->view_src); + if (view_src_backend != -1 && ggml_backend_supports_op(sched->backends[view_src_backend], node)) { + *cur_backend_id = tensor_backend_id(node->view_src); + SET_CAUSE(node, "4.vsrc"); + } } for (int j = 0; j < GGML_MAX_SRC; j++) { struct ggml_tensor * src = node->src[j]; @@ -1151,6 +1154,14 @@ void ggml_backend_sched_split_graph(ggml_backend_sched_t sched, struct ggml_cgra GGML_ASSERT(*cur_backend_id != -1); } + // add the node id to the name for easier debugging + for (int i = 0; i < graph->n_nodes; i++) { + struct ggml_tensor * node = graph->nodes[i]; + char new_name[128]; + snprintf(new_name, sizeof(new_name), "%s#%d", node->name, i); + ggml_format_name(node, "%s", new_name); + } + // pass 5: split graph, find tensors that need to be copied { int i_split = 0; @@ -1171,7 +1182,9 @@ void ggml_backend_sched_split_graph(ggml_backend_sched_t sched, struct ggml_cgra struct ggml_tensor * node = graph->nodes[i]; if (ggml_is_view_op(node->op)) { - continue; + if ((tensor_backend_id(node) != cur_backend_id) && (ggml_backend_supports_op(sched->backends[cur_backend_id], node))) { + tensor_backend_id(node) = cur_backend_id; + } } const int node_backend_id = tensor_backend_id(node); @@ -1269,6 +1282,7 @@ void ggml_backend_sched_split_graph(ggml_backend_sched_t sched, struct ggml_cgra ggml_set_input(tensor_copy); ggml_set_output(tensor_copy); // prevent ggml-alloc from overwriting the tensor } + tensor_copy->org_src = src; tensor_id_copy(src_id, cur_backend_id, c) = tensor_copy; SET_CAUSE(tensor_copy, "4.cpy"); } diff --git a/ggml/src/ggml-openvino/ggml-decoder.cpp b/ggml/src/ggml-openvino/ggml-decoder.cpp index d75915cd00d..9753272038c 100644 --- a/ggml/src/ggml-openvino/ggml-decoder.cpp +++ b/ggml/src/ggml-openvino/ggml-decoder.cpp @@ -1057,9 +1057,9 @@ void GgmlOvDecoder::compute_node_dynamic_dims() { continue; } struct ggml_tensor *root_src = nullptr; - // if (src->org_src) { - // root_src = src->org_src; - // } + if (src->org_src) { + root_src = src->org_src; + } if (root_src) { if (is_inp_tok(root_src, node) || is_inp_pos(root_src, node) || is_output_idx(root_src, node)) { @@ -1139,7 +1139,7 @@ void GgmlOvDecoder::compute_node_dynamic_dims() { // identifies the dynamic dim even when two dims share the same size. m_node_dynamic_dims[node] = -1; if (m_node_dynamic_dims[node->src[0]] != -1) { - if (node->src[0]->op == GGML_OP_NONE) { + if (node->src[0]->op == GGML_OP_NONE && node->src[0]->org_src == nullptr) { m_node_dynamic_dims[node] = m_node_dynamic_dims[node->src[0]]; break; } diff --git a/ggml/src/ggml-openvino/ggml-decoder.h b/ggml/src/ggml-openvino/ggml-decoder.h index ff8f81e8ae6..93e6973fb38 100644 --- a/ggml/src/ggml-openvino/ggml-decoder.h +++ b/ggml/src/ggml-openvino/ggml-decoder.h @@ -227,7 +227,7 @@ class GgmlOvDecoder : public ov::frontend::ggml::GgmlDecoder { void update_io(ggml_cgraph * cgraph); inline static bool is_inp_tok(const ggml_tensor * tensor, const ggml_tensor * op) { - return op->op == GGML_OP_GET_ROWS && tensor == op->src[1] && op->src[0]->op == GGML_OP_NONE; + return op->op == GGML_OP_GET_ROWS && tensor == op->src[1] && op->src[0]->op == GGML_OP_NONE && op->src[0]->org_src == nullptr; } inline static bool is_inp_pos(const ggml_tensor * tensor, const ggml_tensor * op) { diff --git a/ggml/src/ggml.c b/ggml/src/ggml.c index 0142498d967..41f3541da65 100644 --- a/ggml/src/ggml.c +++ b/ggml/src/ggml.c @@ -1767,6 +1767,7 @@ static struct ggml_tensor * ggml_new_tensor_impl( /*.name =*/ { 0 }, /*.extra =*/ NULL, /*.padding =*/ { 0 }, + /*.org_src =*/ NULL, }; // TODO: this should not be needed as long as we don't rely on aligned SIMD loads