Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,7 @@ add_extension_if_enabled_and_skip_32bit("azure")
add_extension_if_enabled_and_skip_32bit("unity_catalog")
add_extension_if_enabled("json")
add_extension_if_enabled("fts")
add_extension_if_enabled("gremlin")
add_extension_if_enabled("vector")
add_extension_if_enabled("llm")
add_extension_if_enabled("httpfs")
Expand Down
2 changes: 1 addition & 1 deletion extension_config.cmake
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
set(EXTENSION_LIST adbc azure delta duckdb fts httpfs iceberg json llm postgres sqlite unity_catalog vector neo4j algo)
set(EXTENSION_LIST adbc azure delta duckdb fts gremlin httpfs iceberg json llm postgres sqlite unity_catalog vector neo4j algo)

#set(EXTENSION_STATIC_LINK_LIST fts)
foreach(extension IN LISTS EXTENSION_STATIC_LINK_LIST)
Expand Down
9 changes: 9 additions & 0 deletions gremlin/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
include_directories(
${PROJECT_SOURCE_DIR}/src/include
${CMAKE_BINARY_DIR}/src/include
src/include)

add_subdirectory(src/function)
add_subdirectory(src/main)

build_extension_lib(${BUILD_STATIC_EXTENSION} "gremlin")
7 changes: 7 additions & 0 deletions gremlin/src/function/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
add_library(lbug_gremlin_extension_function
OBJECT
gremlin_query.cpp)

set(GREMLIN_EXTENSION_OBJECT_FILES
${GREMLIN_EXTENSION_OBJECT_FILES} $<TARGET_OBJECTS:lbug_gremlin_extension_function>
PARENT_SCOPE)
227 changes: 227 additions & 0 deletions gremlin/src/function/gremlin_query.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,227 @@
#include "function/gremlin_query.h"

#include <cctype>
#include <sstream>

#include "common/exception/runtime.h"
#include "function/table/bind_data.h"
#include "function/table/bind_input.h"
#include "function/table/simple_table_function.h"
#include "function/table/table_function.h"

namespace lbug {
namespace gremlin_extension {

using namespace lbug::common;
using namespace lbug::function;
using namespace lbug::main;

namespace {

struct GremlinQueryBindData final : TableFuncBindData {
std::string query;

explicit GremlinQueryBindData(std::string query)
: TableFuncBindData{binder::expression_vector{}, 0 /* maxOffset */},
query{std::move(query)} {}

std::unique_ptr<TableFuncBindData> copy() const override {
return std::make_unique<GremlinQueryBindData>(*this);
}
};

struct Traversal {
std::string hasKey;
std::string hasValue;
std::vector<std::string> outLabels;
std::string valuesKey;
};

class GremlinParser {
public:
explicit GremlinParser(std::string query) : query{std::move(query)} {}

Traversal parse() {
consumeWhitespace();
consumeToken("g");
consumeToken(".");
consumeCall("V");
consumeWhitespace();
Traversal traversal;
while (!isAtEnd()) {
consumeToken(".");
const auto step = parseIdentifier();
consumeWhitespace();
consumeToken("(");
if (step == "has") {
if (!traversal.hasKey.empty()) {
throw RuntimeException{"GREMLIN supports a single has(key, value) step."};
}
traversal.hasKey = parseString();
consumeWhitespace();
consumeToken(",");
traversal.hasValue = parseString();
} else if (step == "out") {
traversal.outLabels.push_back(parseString());
} else if (step == "values") {
traversal.valuesKey = parseString();
consumeWhitespace();
consumeToken(")");
consumeWhitespace();
if (!isAtEnd()) {
throw RuntimeException{"GREMLIN values(key) must be the final step."};
}
validate(traversal);
return traversal;
} else {
throw RuntimeException{"GREMLIN supports only has(key, value), out(label), and "
"values(key) after g.V()."};
}
consumeWhitespace();
consumeToken(")");
consumeWhitespace();
}
validate(traversal);
return traversal;
}

private:
bool isAtEnd() const { return pos >= query.size(); }

void consumeWhitespace() {
while (!isAtEnd() && std::isspace(static_cast<unsigned char>(query[pos]))) {
pos++;
}
}

void consumeToken(const std::string& token) {
consumeWhitespace();
if (query.substr(pos, token.size()) != token) {
throw RuntimeException{"Invalid GREMLIN traversal near '" + query.substr(pos) + "'."};
}
pos += token.size();
}

void consumeCall(const std::string& name) {
consumeToken(name);
consumeToken("(");
consumeToken(")");
}

std::string parseIdentifier() {
consumeWhitespace();
const auto start = pos;
while (!isAtEnd() &&
(std::isalnum(static_cast<unsigned char>(query[pos])) || query[pos] == '_')) {
pos++;
}
if (start == pos) {
throw RuntimeException{"Expected GREMLIN step name."};
}
return query.substr(start, pos - start);
}

std::string parseString() {
consumeWhitespace();
if (isAtEnd() || (query[pos] != '"' && query[pos] != '\'')) {
throw RuntimeException{"Expected GREMLIN string literal."};
}
const auto quote = query[pos++];
std::string result;
while (!isAtEnd()) {
const auto ch = query[pos++];
if (ch == quote) {
return result;
}
if (ch == '\\') {
if (isAtEnd()) {
throw RuntimeException{"Unterminated GREMLIN string escape."};
}
result.push_back(query[pos++]);
} else {
result.push_back(ch);
}
}
throw RuntimeException{"Unterminated GREMLIN string literal."};
}

static void validate(const Traversal& traversal) {
if (traversal.hasKey.empty() || traversal.valuesKey.empty()) {
throw RuntimeException{
"GREMLIN traversal must contain has(key, value) and final values(key) steps."};
}
}

private:
std::string query;
size_t pos = 0;
};

static std::string quoteIdentifier(const std::string& identifier) {
std::string result = "`";
for (const auto ch : identifier) {
if (ch == '`') {
result += "``";
} else {
result.push_back(ch);
}
}
result += "`";
return result;
}

static std::string quoteStringLiteral(const std::string& value) {
std::string result = "'";
for (const auto ch : value) {
if (ch == '\'') {
result += "\\'";
} else if (ch == '\\') {
result += "\\\\";
} else {
result.push_back(ch);
}
}
result += "'";
return result;
}

static std::string translateToCypher(const std::string& gremlinQuery) {
const auto traversal = GremlinParser{gremlinQuery}.parse();
std::ostringstream cypher;
cypher << "MATCH (v0";
for (auto i = 0u; i < traversal.outLabels.size(); i++) {
cypher << ")-[:" << quoteIdentifier(traversal.outLabels[i]) << "]->(v" << (i + 1);
}
cypher << ") WHERE v0." << quoteIdentifier(traversal.hasKey) << " = "
<< quoteStringLiteral(traversal.hasValue) << " RETURN v" << traversal.outLabels.size()
<< "." << quoteIdentifier(traversal.valuesKey) << " AS "
<< quoteIdentifier(traversal.valuesKey) << ";";
return cypher.str();
}

static std::unique_ptr<TableFuncBindData> bindFunc(ClientContext* /*context*/,
const TableFuncBindInput* input) {
return std::make_unique<GremlinQueryBindData>(input->getLiteralVal<std::string>(0));
}

static std::string rewriteQuery(ClientContext& /*context*/, const TableFuncBindData& bindData) {
return translateToCypher(bindData.constPtrCast<GremlinQueryBindData>()->query);
}

} // namespace

function_set GremlinQueryFunction::getFunctionSet() {
function_set functionSet;
auto func = std::make_unique<TableFunction>(name, std::vector{LogicalTypeID::STRING});
func->tableFunc = TableFunction::emptyTableFunc;
func->bindFunc = bindFunc;
func->initSharedStateFunc = SimpleTableFunc::initSharedState;
func->initLocalStateFunc = TableFunction::initEmptyLocalState;
func->rewriteFunc = rewriteQuery;
func->canParallelFunc = [] { return false; };
functionSet.push_back(std::move(func));
return functionSet;
}

} // namespace gremlin_extension
} // namespace lbug
15 changes: 15 additions & 0 deletions gremlin/src/include/function/gremlin_query.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
#pragma once

#include "function/function.h"

namespace lbug {
namespace gremlin_extension {

struct GremlinQueryFunction {
static constexpr const char* name = "GREMLIN";

static function::function_set getFunctionSet();
};

} // namespace gremlin_extension
} // namespace lbug
17 changes: 17 additions & 0 deletions gremlin/src/include/main/gremlin_extension.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
#pragma once

#include "extension/extension.h"

namespace lbug {
namespace gremlin_extension {

class GremlinExtension final : public extension::Extension {
public:
static constexpr char EXTENSION_NAME[] = "GREMLIN";

public:
static void load(main::ClientContext* context);
};

} // namespace gremlin_extension
} // namespace lbug
7 changes: 7 additions & 0 deletions gremlin/src/main/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
add_library(gremlin_extension_main
OBJECT
gremlin_extension.cpp)

set(GREMLIN_EXTENSION_OBJECT_FILES
${GREMLIN_EXTENSION_OBJECT_FILES} $<TARGET_OBJECTS:gremlin_extension_main>
PARENT_SCOPE)
35 changes: 35 additions & 0 deletions gremlin/src/main/gremlin_extension.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
#include "main/gremlin_extension.h"

#include "function/gremlin_query.h"
#include "main/client_context.h"
#include "main/database.h"

namespace lbug {
namespace gremlin_extension {

using namespace extension;

void GremlinExtension::load(main::ClientContext* context) {
auto& db = *context->getDatabase();
ExtensionUtils::addStandaloneTableFunc<GremlinQueryFunction>(db);
}

} // namespace gremlin_extension
} // namespace lbug

#if defined(BUILD_DYNAMIC_LOAD)
extern "C" {
#if defined(_WIN32)
#define INIT_EXPORT __declspec(dllexport)
#else
#define INIT_EXPORT __attribute__((visibility("default")))
#endif
INIT_EXPORT void init(lbug::main::ClientContext* context) {
lbug::gremlin_extension::GremlinExtension::load(context);
}

INIT_EXPORT const char* name() {
return lbug::gremlin_extension::GremlinExtension::EXTENSION_NAME;
}
}
#endif
25 changes: 25 additions & 0 deletions gremlin/test/basic.test
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
-DATASET CSV empty
-BUFFER_POOL_SIZE 1024000000

--

-CASE GremlinOutOutValues

-LOAD_DYNAMIC_EXTENSION gremlin

-STATEMENT CREATE NODE TABLE person(id INT64, name STRING, PRIMARY KEY(id));
---- ok

-STATEMENT CREATE REL TABLE knows(FROM person TO person);
---- ok

-STATEMENT CREATE (:person {id: 0, name: "gremlin"}), (:person {id: 1, name: "marko"}), (:person {id: 2, name: "lop"}), (:person {id: 3, name: "josh"});
---- ok

-STATEMENT MATCH (a:person {id: 0}), (b:person {id: 1}), (c:person {id: 2}), (d:person {id: 3}) CREATE (a)-[:knows]->(b), (b)-[:knows]->(c), (b)-[:knows]->(d);
---- ok

-STATEMENT CALL GREMLIN("g.V().has('name','gremlin').out('knows').out('knows').values('name')");
---- 2
josh
lop