Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion ggml/src/ggml-openvino/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,10 @@ ggml_add_backend_library(ggml-openvino
${GGML_HEADERS_OPENVINO}
)

target_link_libraries(ggml-openvino PRIVATE openvino::runtime TBB::tbb OpenCL::OpenCL)
target_link_libraries(ggml-openvino
PRIVATE openvino::runtime TBB::tbb
PUBLIC OpenCL::OpenCL
)

if (GGML_OPENVINO)
if (CMAKE_SYSTEM_PROCESSOR STREQUAL "aarch64")
Expand Down
77 changes: 68 additions & 9 deletions ggml/src/ggml-openvino/ggml-decoder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -843,15 +843,30 @@ ov::element::Type GgmlOvDecoder::get_ov_type(const ggml_tensor * tensor) {
}

ov::PartialShape GgmlOvDecoder::get_input_shape(int node_idx, const std::string & name) const {
return ov::PartialShape(get_shape(m_node_info_list[node_idx].node_inputs.at(name)));
auto& inputs = m_node_info_list[node_idx].node_inputs;
if (inputs.find(name) == inputs.end()) {
std::cerr << "\nCRASH CAUGHT: get_input_shape missing key: '" << name << "' on node " << node_idx << "\n";
return ov::PartialShape::dynamic();
}
return ov::PartialShape(get_shape(inputs.at(name)));
}

std::vector<size_t> GgmlOvDecoder::get_input_stride(int node_idx, const std::string & name) const {
return get_stride(m_node_info_list[node_idx].node_inputs.at(name));
auto& inputs = m_node_info_list[node_idx].node_inputs;
if (inputs.find(name) == inputs.end()) {
std::cerr << "\nCRASH CAUGHT: get_input_stride missing key: '" << name << "' on node " << node_idx << "\n";
return {};
}
return get_stride(inputs.at(name));
}

ov::element::Type GgmlOvDecoder::get_input_type(int node_idx, const std::string & name) const {
return get_ov_type(m_node_info_list[node_idx].node_inputs.at(name));
auto& inputs = m_node_info_list[node_idx].node_inputs;
if (inputs.find(name) == inputs.end()) {
std::cerr << "\nCRASH CAUGHT: get_input_type missing key: '" << name << "' on node " << node_idx << "\n";
return ov::element::dynamic;
}
return get_ov_type(inputs.at(name));
}

size_t GgmlOvDecoder::get_input_size() const {
Expand Down Expand Up @@ -879,6 +894,28 @@ std::vector<std::string> GgmlOvDecoder::get_output_names(int node_idx) const {
return {m_node_info_list[node_idx].node_output_name};
}

// OUR NEW IMPLEMENTATIONS
std::vector<const struct ggml_tensor*> GgmlOvDecoder::get_input_tensors(int node_idx) const {
const auto& info = m_node_info_list[node_idx];
std::vector<const struct ggml_tensor*> input_tensors;
input_tensors.reserve(info.node_inputs_names.size());

for (const auto& name : info.node_inputs_names) {
// Safely get the pointer mapped by OpenVINO's strict port names
if (info.node_inputs.find(name) != info.node_inputs.end()) {
input_tensors.push_back(info.node_inputs.at(name));
} else {
input_tensors.push_back(nullptr); // Fallback flag
}
}
return input_tensors;
}

std::vector<const struct ggml_tensor*> GgmlOvDecoder::get_output_tensors(int node_idx) const {
// The output is simple: just return the raw node_output pointer wrapped in a vector.
return {m_node_info_list[node_idx].node_output};
}

const std::string & GgmlOvDecoder::get_op_name() const {
static const std::string unknown_name = "UNKNOWN_OP_NAME";
return unknown_name;
Expand All @@ -889,7 +926,12 @@ const std::string & GgmlOvDecoder::get_op_name(int node_idx) const {
}

int32_t * GgmlOvDecoder::get_input_op_params(int node_idx, const std::string & name) const {
return m_node_info_list[node_idx].node_inputs.at(name)->op_params;
auto& inputs = m_node_info_list[node_idx].node_inputs;
if (inputs.find(name) == inputs.end()) {
std::cerr << "\nCRASH CAUGHT: get_input_op_params missing key: '" << name << "' on node " << node_idx << "\n";
return nullptr;
}
return inputs.at(name)->op_params;
}

int32_t * GgmlOvDecoder::get_output_op_params(int node_idx) const {
Expand Down Expand Up @@ -954,13 +996,30 @@ std::string GgmlOvDecoder::compute_op_type(const ggml_tensor * node) {
};

switch (node->op) {
case GGML_OP_UNARY:
return unary_ops.at(ggml_get_unary_op(node));
case GGML_OP_GLU:
return glu_ops.at(ggml_get_glu_op(node));
default:
case GGML_OP_UNARY: {
auto uop = ggml_get_unary_op(node);
if (unary_ops.find(uop) == unary_ops.end()) {
std::cerr << "\n[GgmlOvDecoder] MISSING UNARY OP: " << uop << " (Node: " << node->name << ")\n";
return "UNKNOWN_GGML_OP";
}
return unary_ops.at(uop);
}
case GGML_OP_GLU: {
auto gop = ggml_get_glu_op(node);
if (glu_ops.find(gop) == glu_ops.end()) {
std::cerr << "\n[GgmlOvDecoder] MISSING GLU OP: " << gop << " (Node: " << node->name << ")\n";
return "UNKNOWN_GGML_OP";
}
return glu_ops.at(gop);
}
default: {
if (ops.find(node->op) == ops.end()) {
std::cerr << "\n[GgmlOvDecoder] CRASH PREVENTED: Missing GGML OP Code: " << node->op << " (Node: " << node->name << ")\n";
return "UNKNOWN_GGML_OP";
}
return ops.at(node->op);
}
}
static const std::string unknown_op = "UNKNOWN_GGML_OP";
return unknown_op;
}
Expand Down
4 changes: 4 additions & 0 deletions ggml/src/ggml-openvino/ggml-decoder.h
Original file line number Diff line number Diff line change
Expand Up @@ -102,6 +102,8 @@ class GgmlOvDecoder : public ov::frontend::ggml::GgmlDecoder {

virtual std::vector<std::string> get_input_names(int node_idx) const override;

virtual std::vector<const struct ggml_tensor*> get_input_tensors(int node_idx) const override;

virtual ov::PartialShape get_output_shape(int node_idx) const override;

virtual ov::element::Type get_output_type(int node_idx) const override;
Expand All @@ -112,6 +114,8 @@ class GgmlOvDecoder : public ov::frontend::ggml::GgmlDecoder {

virtual std::vector<std::string> get_output_names(int node_idx) const override;

virtual std::vector<const struct ggml_tensor*> get_output_tensors(int node_idx) const override;

virtual const std::string & get_op_type() const override;

virtual const std::string & get_op_type(int node_idx) const override;
Expand Down
20 changes: 20 additions & 0 deletions ggml/src/ggml-openvino/ggml-openvino-extra.h
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,9 @@ struct ggml_openvino_device_config {
ov::AnyMap compile_config;
cl_command_queue cl_queue = nullptr;

bool is_capturing = false;
struct ggml_cgraph * captured_graph = nullptr;

void init();
~ggml_openvino_device_config();
};
Expand Down Expand Up @@ -178,5 +181,22 @@ struct ggml_backend_openvino_context {

std::shared_ptr<void> runtime_context = nullptr;

bool is_capturing = false;
struct ggml_cgraph * captured_graph = nullptr;

ggml_backend_openvino_context() = default;
};

#ifdef __cplusplus
extern "C" {
#endif

void ggml_backend_ov_set_capture_mode(bool enable);
struct ggml_cgraph * ggml_backend_ov_get_captured_graph();

// maths bypass (temporary)
void ggml_backend_ov_set_bypass(bool bypass);

#ifdef __cplusplus
}
#endif
51 changes: 50 additions & 1 deletion ggml/src/ggml-openvino/ggml-openvino.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -606,8 +606,18 @@ static const char * ggml_backend_openvino_get_name(ggml_backend_t backend) {
}

static enum ggml_status ggml_backend_openvino_graph_compute(ggml_backend_t backend, ggml_cgraph * cgraph) {
// 1. Grab our internal context
auto& config = ggml_openvino_get_device_config();

// 2. --- OPENVINO GENAI TRACER INTERCEPTION ---
if (config.is_capturing) {
config.captured_graph = cgraph;
return GGML_STATUS_SUCCESS;
}
// ---------------------------------------------

// 3. Normal execution path (if we are NOT capturing)
return ov_graph_compute(cgraph, backend);
GGML_UNUSED(backend);
}

static const ggml_backend_i ggml_backend_openvino_interface = {
Expand Down Expand Up @@ -732,6 +742,11 @@ static void ggml_backend_openvino_device_get_props(ggml_backend_dev_t dev, ggml_
/* .buffer_from_host_ptr = */ false,
/* .events = */ false,
};

if (ggml_openvino_get_device_config().is_capturing) {
props->caps.host_buffer = true;
props->caps.buffer_from_host_ptr = true;
}
}

static ggml_backend_t ggml_backend_openvino_device_init(ggml_backend_dev_t dev, const char * params) {
Expand Down Expand Up @@ -922,9 +937,19 @@ static bool is_op_unsupported_case(const ggml_tensor * op) {
return false;
}

extern bool g_ov_bypass_mode;

static bool ggml_backend_openvino_device_supports_op(ggml_backend_dev_t dev, const ggml_tensor * op) {
GGML_ASSERT(dev->reg != nullptr);

if (g_ov_bypass_mode) {
return false;
}

if (ggml_openvino_get_device_config().is_capturing) {
return true;
}

static std::set<ggml_type> supported_types{GGML_TYPE_F32, GGML_TYPE_F16, GGML_TYPE_BF16, GGML_TYPE_I64,
GGML_TYPE_I32, GGML_TYPE_Q4_0, GGML_TYPE_Q4_1, GGML_TYPE_Q4_K,
GGML_TYPE_Q5_K, GGML_TYPE_Q8_0, GGML_TYPE_Q6_K};
Expand Down Expand Up @@ -1017,6 +1042,10 @@ static bool ggml_backend_openvino_device_supports_op(ggml_backend_dev_t dev, con
}

static bool ggml_backend_openvino_device_supports_buft(ggml_backend_dev_t dev, ggml_backend_buffer_type_t buft) {

if (ggml_openvino_get_device_config().is_capturing) {
return true;
}
return ggml_backend_buft_is_openvino(buft) || ggml_backend_buft_is_host(buft);
GGML_UNUSED(dev);
}
Expand Down Expand Up @@ -1108,3 +1137,23 @@ GGML_BACKEND_API ggml_backend_reg_t ggml_backend_openvino_reg(void) {

return &reg;
}

void ggml_backend_ov_set_capture_mode(bool enable) {
auto& config = ggml_openvino_get_device_config();
config.is_capturing = enable;
if (enable) {
config.captured_graph = nullptr;
}
}

struct ggml_cgraph * ggml_backend_ov_get_captured_graph() {
return ggml_openvino_get_device_config().captured_graph;
}

// phase-1 temporary bypass system for ov::model verification

bool g_ov_bypass_mode = false; // The global kill switch

void ggml_backend_ov_set_bypass(bool bypass) {
g_ov_bypass_mode = bypass;
}
6 changes: 6 additions & 0 deletions ggml/src/ggml-openvino/openvino/decoder.h
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@
#include <openvino/frontend/decoder.hpp>
#include <string>

struct ggml_tensor;

namespace ov {
namespace frontend {
namespace ggml {
Expand Down Expand Up @@ -33,6 +35,8 @@ class GgmlDecoder : public DecoderBase {

virtual PartialShape get_output_shape(int node_idx) const = 0;

virtual std::vector<const struct ggml_tensor*> get_input_tensors(int node_idx) const = 0;

virtual element::Type get_output_type(const int node_idx) const = 0;

virtual int32_t* get_input_op_params(int node_idx, const std::string& name) const = 0;
Expand All @@ -41,6 +45,8 @@ class GgmlDecoder : public DecoderBase {

virtual std::vector<std::string> get_output_names(int node_idx) const = 0;

virtual std::vector<const struct ggml_tensor*> get_output_tensors(int node_idx) const = 0;

virtual const std::string& get_op_type() const = 0;

virtual const std::string& get_op_type(int node_idx) const = 0;
Expand Down
59 changes: 57 additions & 2 deletions ggml/src/ggml-openvino/openvino/node_context.h
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,11 @@
#include <cstdint>
#include <openvino/frontend/node_context.hpp>
#include <string>

#include <iostream>
#include "decoder.h"
#include "ggml.h"

struct ggml_tensor;

namespace ov {
namespace frontend {
Expand All @@ -13,20 +16,26 @@ namespace ggml {
class TranslateSession;

typedef std::map<std::string, Output<Node>> TensorMap;
typedef std::map<const struct ggml_tensor*, Output<Node>> TensorPtrMap;

class NodeContext : public frontend::NodeContext {
public:
NodeContext(const std::shared_ptr<GgmlDecoder>& decoder,
std::shared_ptr<TensorMap>& tensor_map,
std::shared_ptr<TensorPtrMap>& tensor_ptr_map,
int node_idx,
TranslateSession* translate_session = nullptr)
: ov::frontend::NodeContext(decoder->get_op_type(node_idx)),
m_decoder(decoder),
m_tensor_map(tensor_map),
m_tensor_ptr_map(tensor_ptr_map),
m_node_idx(node_idx),
m_translate_session(translate_session) {
m_input_names = decoder->get_input_names(m_node_idx);
m_output_names = decoder->get_output_names(m_node_idx);

m_input_tensors = decoder->get_input_tensors(m_node_idx);
m_output_tensors = decoder->get_output_tensors(m_node_idx);
}

TranslateSession* get_translate_session() const {
Expand Down Expand Up @@ -66,7 +75,50 @@ class NodeContext : public frontend::NodeContext {
}

Output<Node> get_input(int idx) const override {
return m_tensor_map->at(m_input_names[idx]);
// 1. Safely check the pointer map first (Physical Memory Address)
if (idx < m_input_tensors.size() && m_input_tensors[idx] != nullptr) {
auto it = m_tensor_ptr_map->find(m_input_tensors[idx]);
if (it != m_tensor_ptr_map->end()) {
// PROOF IT WORKS:
// std::cout << "[DEBUG] Tensor found perfectly via Pointer Map!\n";
return it->second; // Found it via exact pointer!
}
}

// 2. Fallback to the string map (For OpenVINO synthetic tensors & static weights)
if (idx < m_input_names.size()) {
std::string target_name = m_input_names[idx];

auto it = m_tensor_map->find(target_name);
if (it != m_tensor_map->end()) {
return it->second; // Found it via string name!
}

// Temporary fallback: Brute-Force Pointer Search
// If the pointer mutated due to in-place optimization, scan all translated physical nodes!
for (const auto& pair : *m_tensor_ptr_map) {
if (pair.first != nullptr) {
std::string actual_name = ggml_get_name(pair.first);

// IF WE ARE LOOKING FOR NORM-21, PRINT EVERYTHING WE HAVE!
if (target_name == "norm-21" || target_name == "ffn_inp-21") {
std::cout << "[DEBUG TRAP] In memory pointer name: '" << actual_name << "'\n";
}

if (actual_name == target_name) {
std::cerr << "[GGUFReaderV2] Recovered shifted tensor via brute-force: '" << target_name << "'\n";
return pair.second;
}
}
}

// 🚨 THE GSOC FIX: NO MORE DUMMY NODES! 🚨
// If we get here, the node is TRULY missing. We throw a hard error
// so we know if our Scheduler Capture Override worked or failed.
throw std::runtime_error("[GGUFReaderV2] FATAL: Tensor completely lost during extraction: '" + target_name + "'");
}

throw std::runtime_error("CRITICAL: Input index out of bounds!");
}

Output<Node> get_input(const std::string& name) const override {
Expand Down Expand Up @@ -99,10 +151,13 @@ class NodeContext : public frontend::NodeContext {
private:
std::shared_ptr<GgmlDecoder> m_decoder;
std::shared_ptr<TensorMap>& m_tensor_map;
std::shared_ptr<TensorPtrMap>& m_tensor_ptr_map;
int m_node_idx;
TranslateSession* m_translate_session;
std::vector<std::string> m_input_names;
std::vector<std::string> m_output_names;
std::vector<const struct ggml_tensor*> m_input_tensors;
std::vector<const struct ggml_tensor*> m_output_tensors;
};

using CreatorFunction = std::function<ov::OutputVector(const ov::frontend::ggml::NodeContext&)>;
Expand Down
Loading