Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
25 commits
Select commit Hold shift + click to select a range
2b6d2da
Add interface is_model_splitted() to check the c-graph is splited or not
zhaixuejun1993 Mar 6, 2026
813fe5f
Add member func named is_splited_model()
zhaixuejun1993 Mar 6, 2026
eb5dc53
Fix error in test ops
zhaixuejun1993 Mar 16, 2026
a528765
Add fun description
zhaixuejun1993 Mar 16, 2026
fbc3128
Infer and propagate dynamic-dimension indices for all tensors in the …
zhaixuejun1993 Mar 17, 2026
c397b1c
Thread safety per request only
cavusmustafa Mar 17, 2026
37f6bca
Merge branch 'dev_backend_openvino' into xuejun/ov-bk-add-func-is-spl…
zhaixuejun1993 Mar 18, 2026
be67f32
Merge pull request #71 from zhaixuejun1993/xuejun/ov-bk-add-func-is-s…
zhaixuejun1993 Mar 19, 2026
07029c1
Only do this for fallback sub graph
zhaixuejun1993 Mar 19, 2026
f4b663e
Merge pull request #76 from zhaixuejun1993/xuejun/fix_llama_cli-issue
zhaixuejun1993 Mar 19, 2026
b185b49
Use i4/i8 directly for symmetric quant
wine99 Mar 19, 2026
ee7c9f3
Use weightless caching
wine99 Mar 19, 2026
8930726
Add WeightlessCacheAttribute to reduce NPU memory usage
wine99 Mar 19, 2026
c13ca29
Move dynamic dims compute in graph missmatch
zhaixuejun1993 Mar 23, 2026
bb0028a
ggml-openvino: fix tensor data handling for PERMUTE/VIEW ops in split…
zhaixuejun1993 Mar 19, 2026
5c1ec64
ggml-openvino:add comments
zhaixuejun1993 Mar 19, 2026
ad8605e
ggml-openvino: override VIEW op_case to 0 for split model inputs
zhaixuejun1993 Mar 19, 2026
dc7ff7f
openvino backend: Handle unsupported VIEW shape-mismatch in OpenVINO …
zhaixuejun1993 Mar 19, 2026
b627d58
Fix sticky stateful config
wine99 Mar 19, 2026
2d032d8
Enable additional mul_mat tests and add tensor data saving function (…
zhaixuejun1993 Mar 23, 2026
6ce5e7a
Fix ROPE yarn case
wine99 Mar 24, 2026
ca1bd05
ggml-openvino: fix CONT/TRANSPOSE mapping and improve dynamic-dimensi…
zhaixuejun1993 Mar 26, 2026
6f0b803
OpenVINO: add NORM/TANH support and rework SOFT_MAX translation
zhaixuejun1993 Mar 28, 2026
0696172
ggml-openvino: extend VIEW handling
zhaixuejun1993 Mar 30, 2026
581a7d5
openvino backend: enable OpenVINO backend fallback to CPU backend
zhaixuejun1993 Mar 31, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion ggml/include/ggml.h
Original file line number Diff line number Diff line change
Expand Up @@ -686,7 +686,9 @@ extern "C" {

void * extra; // extra things e.g. for ggml-cuda.cu

char padding[8];
char padding[16];
// add a struct ggml_tensor * named org_src, initialized to NULL, for keeping track of original source tensors in case of in-place operations
struct ggml_tensor * org_src;
};

static const size_t GGML_TENSOR_SIZE = sizeof(struct ggml_tensor);
Expand Down
20 changes: 17 additions & 3 deletions ggml/src/ggml-backend.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1124,8 +1124,11 @@ void ggml_backend_sched_split_graph(ggml_backend_sched_t sched, struct ggml_cgra
struct ggml_tensor * node = graph->nodes[i];
int * cur_backend_id = &tensor_backend_id(node);
if (node->view_src != NULL && *cur_backend_id == -1) {
*cur_backend_id = tensor_backend_id(node->view_src);
SET_CAUSE(node, "4.vsrc");
auto view_src_backend = tensor_backend_id(node->view_src);
if (view_src_backend != -1 && ggml_backend_supports_op(sched->backends[view_src_backend], node)) {
*cur_backend_id = tensor_backend_id(node->view_src);
SET_CAUSE(node, "4.vsrc");
}
}
for (int j = 0; j < GGML_MAX_SRC; j++) {
struct ggml_tensor * src = node->src[j];
Expand All @@ -1151,6 +1154,14 @@ void ggml_backend_sched_split_graph(ggml_backend_sched_t sched, struct ggml_cgra
GGML_ASSERT(*cur_backend_id != -1);
}

// add the node id to the name for easier debugging
for (int i = 0; i < graph->n_nodes; i++) {
struct ggml_tensor * node = graph->nodes[i];
char new_name[128];
snprintf(new_name, sizeof(new_name), "%s#%d", node->name, i);
ggml_format_name(node, "%s", new_name);
}

// pass 5: split graph, find tensors that need to be copied
{
int i_split = 0;
Expand All @@ -1171,7 +1182,9 @@ void ggml_backend_sched_split_graph(ggml_backend_sched_t sched, struct ggml_cgra
struct ggml_tensor * node = graph->nodes[i];

if (ggml_is_view_op(node->op)) {
continue;
if ((tensor_backend_id(node) != cur_backend_id) && (ggml_backend_supports_op(sched->backends[cur_backend_id], node))) {
tensor_backend_id(node) = cur_backend_id;
}
}

const int node_backend_id = tensor_backend_id(node);
Expand Down Expand Up @@ -1269,6 +1282,7 @@ void ggml_backend_sched_split_graph(ggml_backend_sched_t sched, struct ggml_cgra
ggml_set_input(tensor_copy);
ggml_set_output(tensor_copy); // prevent ggml-alloc from overwriting the tensor
}
tensor_copy->org_src = src;
tensor_id_copy(src_id, cur_backend_id, c) = tensor_copy;
SET_CAUSE(tensor_copy, "4.cpy");
}
Expand Down
Loading