From 435823112106bca0df7d0311121b1326f7e910f3 Mon Sep 17 00:00:00 2001 From: Ally Heev Date: Mon, 22 Jun 2026 21:54:39 +0530 Subject: [PATCH 1/4] add TableStorageFormat enum --- src/common/enums/CMakeLists.txt | 1 + src/common/enums/table_storage_format.cpp | 29 +++++++ .../common/enums/table_storage_format.h | 27 +++++++ .../storage/table/columnar_node_table_base.h | 9 ++- .../storage/table/columnar_rel_table_base.h | 9 ++- src/include/storage/table/foreign_rel_table.h | 3 + src/storage/table/arrow_node_table.cpp | 3 +- src/storage/table/arrow_rel_table.cpp | 3 +- src/storage/table/ice_disk_node_table.cpp | 3 +- src/storage/table/ice_disk_rel_table.cpp | 3 +- test/api/CMakeLists.txt | 1 + .../columnar_table_storage_format_test.cpp | 79 +++++++++++++++++++ test/common/CMakeLists.txt | 1 + test/common/table_storage_format_test.cpp | 13 +++ 14 files changed, 176 insertions(+), 8 deletions(-) create mode 100644 src/common/enums/table_storage_format.cpp create mode 100644 src/include/common/enums/table_storage_format.h create mode 100644 test/api/columnar_table_storage_format_test.cpp create mode 100644 test/common/table_storage_format_test.cpp diff --git a/src/common/enums/CMakeLists.txt b/src/common/enums/CMakeLists.txt index 1205d9fe0d..03466054ec 100644 --- a/src/common/enums/CMakeLists.txt +++ b/src/common/enums/CMakeLists.txt @@ -1,6 +1,7 @@ add_library(lbug_common_enums OBJECT accumulate_type.cpp + table_storage_format.cpp storage_format.cpp path_semantic.cpp query_rel_type.cpp diff --git a/src/common/enums/table_storage_format.cpp b/src/common/enums/table_storage_format.cpp new file mode 100644 index 0000000000..eab08be80f --- /dev/null +++ b/src/common/enums/table_storage_format.cpp @@ -0,0 +1,29 @@ +#include "common/enums/table_storage_format.h" + +#include "common/assert.h" +#include "common/exception/binder.h" +#include "common/string_utils.h" +#include + +namespace lbug { +namespace common { + +std::string TableStorageFormatUtils::toString(TableStorageFormat format) { + switch (format) { + case TableStorageFormat::UNKNOWN: + return "unknown"; + case TableStorageFormat::NATIVE: + return "native"; + case TableStorageFormat::ICEBUG_DISK: + return "icebug-disk"; + case TableStorageFormat::ARROW: + return "arrow"; + case TableStorageFormat::FOREIGN: + return "foreign"; + default: + UNREACHABLE_CODE; + } +} + +} // namespace common +} // namespace lbug diff --git a/src/include/common/enums/table_storage_format.h b/src/include/common/enums/table_storage_format.h new file mode 100644 index 0000000000..e89c1e5c43 --- /dev/null +++ b/src/include/common/enums/table_storage_format.h @@ -0,0 +1,27 @@ +#pragma once + +#include +#include + +#include "common/api.h" + +namespace lbug { +namespace common { + +enum class TableStorageFormat : uint8_t { + UNKNOWN = 0, + // native + NATIVE = 1, + // first class citizens + ICEBUG_DISK = 10, + ARROW = 11, + // foreign tables + FOREIGN = 30, +}; + +struct TableStorageFormatUtils { + static std::string toString(TableStorageFormat format); +}; + +} // namespace common +} // namespace lbug diff --git a/src/include/storage/table/columnar_node_table_base.h b/src/include/storage/table/columnar_node_table_base.h index a719aa8402..50a76f45f3 100644 --- a/src/include/storage/table/columnar_node_table_base.h +++ b/src/include/storage/table/columnar_node_table_base.h @@ -37,10 +37,11 @@ class ColumnarNodeTableBase : public NodeTable { public: ColumnarNodeTableBase(const StorageManager* storageManager, const catalog::NodeTableCatalogEntry* nodeTableEntry, MemoryManager* memoryManager, - std::unique_ptr tableScanSharedState) + std::unique_ptr tableScanSharedState, + lbug::common::TableStorageFormat storageFormat) : NodeTable{storageManager, nodeTableEntry, memoryManager}, nodeTableCatalogEntry{nodeTableEntry}, - tableScanSharedState{std::move(tableScanSharedState)} {} + tableScanSharedState{std::move(tableScanSharedState)}, storageFormat{storageFormat} {} virtual ~ColumnarNodeTableBase() = default; @@ -81,6 +82,10 @@ class ColumnarNodeTableBase : public NodeTable { ColumnarNodeTableScanSharedState* getTableScanSharedState() const { return tableScanSharedState.get(); } + lbug::common::TableStorageFormat getStorageFormat() const { return storageFormat; } + +private: + lbug::common::TableStorageFormat storageFormat; }; } // namespace storage diff --git a/src/include/storage/table/columnar_rel_table_base.h b/src/include/storage/table/columnar_rel_table_base.h index 0cbc6eb2f7..5933afce72 100644 --- a/src/include/storage/table/columnar_rel_table_base.h +++ b/src/include/storage/table/columnar_rel_table_base.h @@ -17,9 +17,10 @@ class ColumnarRelTableBase : public RelTable { public: ColumnarRelTableBase(catalog::RelGroupCatalogEntry* relGroupEntry, common::table_id_t fromTableID, common::table_id_t toTableID, - const StorageManager* storageManager, MemoryManager* memoryManager) + const StorageManager* storageManager, MemoryManager* memoryManager, + ::lbug::common::TableStorageFormat storageFormat) : RelTable{relGroupEntry, fromTableID, toTableID, storageManager, memoryManager}, - relGroupEntry{relGroupEntry} {} + relGroupEntry{relGroupEntry}, storageFormat{storageFormat} {} virtual ~ColumnarRelTableBase() = default; @@ -51,6 +52,7 @@ class ColumnarRelTableBase : public RelTable { std::vector> getTopKDegrees( const transaction::Transaction* transaction, common::RelDataDirection direction, common::idx_t k) override; + lbug::common::TableStorageFormat getStorageFormat() const { return storageFormat; } protected: catalog::RelGroupCatalogEntry* relGroupEntry; @@ -72,6 +74,9 @@ class ColumnarRelTableBase : public RelTable { // Subclasses should cache indptr data and provide it via this interface virtual common::offset_t findSourceNodeForRowInternal(common::offset_t globalRowIdx, const std::vector& indptrData) const; + +private: + lbug::common::TableStorageFormat storageFormat; }; } // namespace storage diff --git a/src/include/storage/table/foreign_rel_table.h b/src/include/storage/table/foreign_rel_table.h index cbb1466ca3..ecc2ae1ec0 100644 --- a/src/include/storage/table/foreign_rel_table.h +++ b/src/include/storage/table/foreign_rel_table.h @@ -25,6 +25,8 @@ class ForeignRelTable final : public RelTable { MemoryManager* memoryManager, function::TableFunction scanFunction, std::shared_ptr scanBindData); + lbug::common::TableStorageFormat getStorageFormat() const { return storageFormat; } + void initScanState(transaction::Transaction* transaction, TableScanState& scanState, bool resetCachedBoundNodeSelVec = true) const override; @@ -50,6 +52,7 @@ class ForeignRelTable final : public RelTable { private: function::TableFunction scanFunction; std::shared_ptr scanBindData; + lbug::common::TableStorageFormat storageFormat = lbug::common::TableStorageFormat::FOREIGN; }; } // namespace storage diff --git a/src/storage/table/arrow_node_table.cpp b/src/storage/table/arrow_node_table.cpp index a139e14647..5a787d8d03 100644 --- a/src/storage/table/arrow_node_table.cpp +++ b/src/storage/table/arrow_node_table.cpp @@ -28,7 +28,8 @@ ArrowNodeTable::ArrowNodeTable(const StorageManager* storageManager, const catalog::NodeTableCatalogEntry* nodeTableEntry, MemoryManager* memoryManager, ArrowSchemaWrapper schema, std::vector arrays, std::string arrowId) : ColumnarNodeTableBase{storageManager, nodeTableEntry, memoryManager, - std::make_unique(scanMorselSize)}, + std::make_unique(scanMorselSize), + lbug::common::TableStorageFormat::ARROW}, schema{std::move(schema)}, arrays{std::move(arrays)}, totalRows{0}, arrowId{std::move(arrowId)} { // Note: release may be nullptr if schema is managed by registry diff --git a/src/storage/table/arrow_rel_table.cpp b/src/storage/table/arrow_rel_table.cpp index 327443eaf5..756fa93fdc 100644 --- a/src/storage/table/arrow_rel_table.cpp +++ b/src/storage/table/arrow_rel_table.cpp @@ -65,7 +65,8 @@ ArrowRelTable::ArrowRelTable(catalog::RelGroupCatalogEntry* relGroupEntry, table ArrowSchemaWrapper schema, std::vector arrays, ArrowSchemaWrapper indptrSchema, std::vector indptrArrays, std::string arrowId, std::string dstColumnName) - : ColumnarRelTableBase{relGroupEntry, fromTableID, toTableID, storageManager, memoryManager}, + : ColumnarRelTableBase{relGroupEntry, fromTableID, toTableID, storageManager, memoryManager, + lbug::common::TableStorageFormat::ARROW}, fromNodeTable{fromNodeTable}, toNodeTable{toNodeTable}, layout{layout}, schema{std::move(schema)}, arrays{std::move(arrays)}, indptrSchema{std::move(indptrSchema)}, indptrArrays{std::move(indptrArrays)}, arrowId{std::move(arrowId)} { diff --git a/src/storage/table/ice_disk_node_table.cpp b/src/storage/table/ice_disk_node_table.cpp index d61b029474..f79598a54e 100644 --- a/src/storage/table/ice_disk_node_table.cpp +++ b/src/storage/table/ice_disk_node_table.cpp @@ -29,7 +29,8 @@ IceDiskNodeTable::IceDiskNodeTable(const StorageManager* storageManager, const NodeTableCatalogEntry* nodeTableEntry, MemoryManager* memoryManager, main::ClientContext* context) : ColumnarNodeTableBase{storageManager, nodeTableEntry, memoryManager, - std::make_unique()} { + std::make_unique(), + ::lbug::common::TableStorageFormat::ICEBUG_DISK} { const auto& storage = nodeTableEntry->getStorage(); auto path = common::StringUtils::getLower(storage).ends_with("parquet") ? diff --git a/src/storage/table/ice_disk_rel_table.cpp b/src/storage/table/ice_disk_rel_table.cpp index e662b71620..79db9bca2f 100644 --- a/src/storage/table/ice_disk_rel_table.cpp +++ b/src/storage/table/ice_disk_rel_table.cpp @@ -66,7 +66,8 @@ void IceDiskRelTableScanState::reloadCachedBatchData(Transaction* transaction) { IceDiskRelTable::IceDiskRelTable(RelGroupCatalogEntry* relGroupEntry, table_id_t fromTableID, table_id_t toTableID, const StorageManager* storageManager, MemoryManager* memoryManager, main::ClientContext* context) - : ColumnarRelTableBase{relGroupEntry, fromTableID, toTableID, storageManager, memoryManager}, + : ColumnarRelTableBase{relGroupEntry, fromTableID, toTableID, storageManager, memoryManager, + ::lbug::common::TableStorageFormat::ICEBUG_DISK}, layout{IceDiskRelTableLayout::CSR} { const auto& storage = relGroupEntry->getStorage(); if (common::StringUtils::getLower(storage).ends_with("parquet")) { diff --git a/test/api/CMakeLists.txt b/test/api/CMakeLists.txt index 58f1c09b25..fc4b269a82 100644 --- a/test/api/CMakeLists.txt +++ b/test/api/CMakeLists.txt @@ -10,6 +10,7 @@ add_lbug_api_test(api_test arrow_error_scenarios_test.cpp arrow_drop_table_test.cpp arrow_table_function_test.cpp + table_storage_format_test.cpp prepare_test.cpp result_value_test.cpp storage_driver_test.cpp diff --git a/test/api/columnar_table_storage_format_test.cpp b/test/api/columnar_table_storage_format_test.cpp new file mode 100644 index 0000000000..3adafd2a46 --- /dev/null +++ b/test/api/columnar_table_storage_format_test.cpp @@ -0,0 +1,79 @@ +#include +#include + +#include "api_test/api_test.h" +#include "arrow_test_utils.h" +#include "catalog/catalog.h" +#include "catalog/catalog_entry/rel_group_catalog_entry.h" +#include "common/arrow/arrow.h" +#include "common/enums/table_storage_format.h" +#include "common/enums/table_type.h" +#include "gtest/gtest.h" +#include "storage/storage_manager.h" +#include "storage/table/arrow_table_support.h" +#include "storage/table/table.h" +#include "test_helper/test_helper.h" +#include "transaction/transaction.h" +#include + +using namespace lbug; +using namespace lbug::common; +using namespace lbug::testing; + +class ColumnarTableStorageFormatTest : public ApiTest {}; + +TEST_F(ColumnarTableStorageFormatTest, ReportsRuntimeTableFormats) { + auto* context = getClientContext(*conn); + auto* transaction = &transaction::DUMMY_CHECKPOINT_TRANSACTION; + auto catalog = catalog::Catalog::Get(*context); + auto storageManager = database->getStorageManager(); + + auto expectFormat = [&](const std::string& tableName, TableStorageFormat expected) { + auto* entry = catalog->getTableCatalogEntry(transaction, tableName); + ASSERT_NE(entry, nullptr); + lbug::common::table_id_t tableID = entry->getTableID(); + if (entry->getTableType() == lbug::common::TableType::REL) { + tableID = entry->ptrCast()->getSingleRelEntryInfo().oid; + } + auto* table = storageManager->getTable(tableID); + ASSERT_NE(table, nullptr); + EXPECT_EQ(expected, table->getStorageFormat()) << tableName; + }; + + const auto iceDiskStorage = TestHelper::appendLbugRootPath("dataset/ice-disk-test/"); + auto iceDiskResult = conn->query(std::format( + "CREATE NODE TABLE upperversion(id INT64 PRIMARY KEY) WITH (storage = '{}', format = " + "'icebug-disk')", + iceDiskStorage)); + ASSERT_TRUE(iceDiskResult->isSuccess()) << iceDiskResult->toString(); + expectFormat("upperversion", TableStorageFormat::ICEBUG_DISK); + + ArrowSchemaWrapper nodeSchema; + createStructSchema(&nodeSchema, 1); + createSchema(nodeSchema.children[0], "id"); + std::vector nodeIds = {1, 2, 3}; + auto nodeArray = createStructArray(static_cast(nodeIds.size()), + {[&](ArrowArray* array) { createInt64Array(array, nodeIds); }}); + std::vector nodeArrays; + nodeArrays.push_back(std::move(nodeArray)); + auto nodeCreation = ArrowTableSupport::createViewFromArrowTable(*conn, "arrow_format_person", + std::move(nodeSchema), std::move(nodeArrays)); + ASSERT_TRUE(nodeCreation.queryResult->isSuccess()) << nodeCreation.queryResult->toString(); + expectFormat("arrow_format_person", TableStorageFormat::ARROW); + + ArrowSchemaWrapper relSchema; + createStructSchema(&relSchema, 2); + createSchema(relSchema.children[0], "from"); + createSchema(relSchema.children[1], "to"); + std::vector relFrom = {0, 1}; + std::vector relTo = {1, 2}; + auto relArray = createStructArray(static_cast(relFrom.size()), + {[&](ArrowArray* array) { createInt64Array(array, relFrom); }, + [&](ArrowArray* array) { createInt64Array(array, relTo); }}); + std::vector relArrays; + relArrays.push_back(std::move(relArray)); + auto relCreation = ArrowTableSupport::createRelTableFromArrowTable(*conn, "arrow_format_knows", + "person", "person", std::move(relSchema), std::move(relArrays)); + ASSERT_TRUE(relCreation.queryResult->isSuccess()) << relCreation.queryResult->toString(); + expectFormat("arrow_format_knows", TableStorageFormat::ARROW); +} diff --git a/test/common/CMakeLists.txt b/test/common/CMakeLists.txt index 5b65d8ab35..14e0f4a877 100644 --- a/test/common/CMakeLists.txt +++ b/test/common/CMakeLists.txt @@ -6,6 +6,7 @@ add_lbug_test(types_test interval_test.cpp null_mask_test.cpp string_test.cpp + table_storage_format_test.cpp time_test.cpp timestamp_test.cpp vfs_test.cpp diff --git a/test/common/table_storage_format_test.cpp b/test/common/table_storage_format_test.cpp new file mode 100644 index 0000000000..56d5b56ccd --- /dev/null +++ b/test/common/table_storage_format_test.cpp @@ -0,0 +1,13 @@ +#include "common/enums/table_storage_format.h" +#include "common/exception/binder.h" +#include "gtest/gtest.h" + +using namespace lbug::common; + +TEST(TableStorageFormatUtilsTest, ToString) { + EXPECT_EQ("unknown", TableStorageFormatUtils::toString(TableStorageFormat::UNKNOWN)); + EXPECT_EQ("native", TableStorageFormatUtils::toString(TableStorageFormat::NATIVE)); + EXPECT_EQ("icebug-disk", TableStorageFormatUtils::toString(TableStorageFormat::ICEBUG_DISK)); + EXPECT_EQ("arrow", TableStorageFormatUtils::toString(TableStorageFormat::ARROW)); + EXPECT_EQ("foreign", TableStorageFormatUtils::toString(TableStorageFormat::FOREIGN)); +} From a865aba47c4568e086fa81e85a692853420e170d Mon Sep 17 00:00:00 2001 From: Ally Heev Date: Tue, 23 Jun 2026 16:55:10 +0530 Subject: [PATCH 2/4] standardise scans --- src/common/enums/table_storage_format.cpp | 8 ++ src/graph/on_disk_graph.cpp | 8 +- .../common/enums/table_storage_format.h | 2 + src/include/storage/table/arrow_node_table.h | 8 +- src/include/storage/table/arrow_rel_table.h | 3 + .../storage/table/columnar_node_table_base.h | 7 +- .../storage/table/columnar_rel_table_base.h | 3 + .../storage/table/ice_disk_node_table.h | 7 +- .../storage/table/ice_disk_rel_table.h | 3 + src/processor/map/map_extend.cpp | 32 ++--- .../operator/scan/scan_multi_rel_tables.cpp | 112 ++++++++---------- .../operator/scan/scan_node_table.cpp | 41 ++----- .../operator/scan/scan_rel_table.cpp | 27 ++--- src/storage/table/arrow_node_table.cpp | 15 +-- src/storage/table/arrow_rel_table.cpp | 7 ++ src/storage/table/ice_disk_node_table.cpp | 17 ++- src/storage/table/ice_disk_rel_table.cpp | 7 ++ 17 files changed, 160 insertions(+), 147 deletions(-) diff --git a/src/common/enums/table_storage_format.cpp b/src/common/enums/table_storage_format.cpp index eab08be80f..235922eb3e 100644 --- a/src/common/enums/table_storage_format.cpp +++ b/src/common/enums/table_storage_format.cpp @@ -25,5 +25,13 @@ std::string TableStorageFormatUtils::toString(TableStorageFormat format) { } } +bool TableStorageFormatUtils::isArrow(TableStorageFormat format) { + return format == TableStorageFormat::ARROW; +} + +bool TableStorageFormatUtils::isIceDisk(TableStorageFormat format) { + return format == TableStorageFormat::ICEBUG_DISK; +} + } // namespace common } // namespace lbug diff --git a/src/graph/on_disk_graph.cpp b/src/graph/on_disk_graph.cpp index 5ebf9acc21..ca4507b2e4 100644 --- a/src/graph/on_disk_graph.cpp +++ b/src/graph/on_disk_graph.cpp @@ -129,9 +129,11 @@ OnDiskGraphNbrScanState::OnDiskGraphNbrScanState(ClientContext* context, } std::unique_ptr scanState; - if (dynamic_cast(table) != nullptr) { - scanState = std::make_unique(*mm, srcNodeIDVector.get(), - outVectors, state); + if (const auto* columnarTable = + dynamic_cast(table) && + TableStorageFormatUtils::isIceDisk(columnarTable->getStorageFormat())) { + scanState = + columnarTable->createScanState(srcNodeIDVector.get(), outVectors, mm, state); } else { scanState = std::make_unique(*MemoryManager::Get(*context), srcNodeIDVector.get(), outVectors, dstNodeIDVector->state, randomLookup); diff --git a/src/include/common/enums/table_storage_format.h b/src/include/common/enums/table_storage_format.h index e89c1e5c43..7b6c680ccc 100644 --- a/src/include/common/enums/table_storage_format.h +++ b/src/include/common/enums/table_storage_format.h @@ -21,6 +21,8 @@ enum class TableStorageFormat : uint8_t { struct TableStorageFormatUtils { static std::string toString(TableStorageFormat format); + static bool isArrow(TableStorageFormat format); + static bool isIceDisk(TableStorageFormat format); }; } // namespace common diff --git a/src/include/storage/table/arrow_node_table.h b/src/include/storage/table/arrow_node_table.h index febe709207..8f5a5c7e4f 100644 --- a/src/include/storage/table/arrow_node_table.h +++ b/src/include/storage/table/arrow_node_table.h @@ -96,13 +96,15 @@ class ArrowNodeTable final : public ColumnarNodeTableBase { const ArrowSchemaWrapper& getArrowSchema() const { return schema; } const std::vector& getArrowArrays() const { return arrays; } - common::node_group_idx_t getNumBatches( + common::node_group_idx_t getNumScanMorsels( const transaction::Transaction* transaction) const override; - size_t getNumScanMorsels(const transaction::Transaction* transaction) const; - const catalog::NodeTableCatalogEntry* getCatalogEntry() const { return nodeTableCatalogEntry; } + std::unique_ptr createScanState(common::ValueVector* nodeIDVector, + const std::vector& outVectors, + MemoryManager* memoryManager) const override; + protected: std::string getColumnarFormatName() const override { return "Arrow"; } common::row_idx_t getTotalRowCount(const transaction::Transaction* transaction) const override; diff --git a/src/include/storage/table/arrow_rel_table.h b/src/include/storage/table/arrow_rel_table.h index 7f6d8c9283..e6bdb68f0c 100644 --- a/src/include/storage/table/arrow_rel_table.h +++ b/src/include/storage/table/arrow_rel_table.h @@ -41,6 +41,9 @@ class ArrowRelTable final : public ColumnarRelTableBase { bool resetCachedBoundNodeSelVec = true) const override; bool scanInternal(transaction::Transaction* transaction, TableScanState& scanState) override; + std::unique_ptr createScanState(common::ValueVector* nodeIDVector, + const std::vector& outVectors, MemoryManager* memoryManager, + std::shared_ptr outChunkState) const override; protected: std::string getColumnarFormatName() const override { return "Arrow"; } diff --git a/src/include/storage/table/columnar_node_table_base.h b/src/include/storage/table/columnar_node_table_base.h index 50a76f45f3..3579d0b948 100644 --- a/src/include/storage/table/columnar_node_table_base.h +++ b/src/include/storage/table/columnar_node_table_base.h @@ -73,8 +73,6 @@ class ColumnarNodeTableBase : public NodeTable { // Template method pattern: subclasses implement format-specific operations virtual std::string getColumnarFormatName() const = 0; - virtual common::node_group_idx_t getNumBatches( - const transaction::Transaction* transaction) const = 0; virtual common::row_idx_t getTotalRowCount( const transaction::Transaction* transaction) const = 0; @@ -83,6 +81,11 @@ class ColumnarNodeTableBase : public NodeTable { return tableScanSharedState.get(); } lbug::common::TableStorageFormat getStorageFormat() const { return storageFormat; } + virtual std::unique_ptr createScanState(common::ValueVector* nodeIDVector, + const std::vector& outVectors, + MemoryManager* memoryManager) const = 0; + virtual common::node_group_idx_t getNumScanMorsels( + const transaction::Transaction* transaction) const = 0; private: lbug::common::TableStorageFormat storageFormat; diff --git a/src/include/storage/table/columnar_rel_table_base.h b/src/include/storage/table/columnar_rel_table_base.h index 5933afce72..d5777b0364 100644 --- a/src/include/storage/table/columnar_rel_table_base.h +++ b/src/include/storage/table/columnar_rel_table_base.h @@ -53,6 +53,9 @@ class ColumnarRelTableBase : public RelTable { const transaction::Transaction* transaction, common::RelDataDirection direction, common::idx_t k) override; lbug::common::TableStorageFormat getStorageFormat() const { return storageFormat; } + virtual std::unique_ptr createScanState(common::ValueVector* nodeIDVector, + const std::vector& outVectors, MemoryManager* memoryManager, + std::shared_ptr outChunkState) const = 0; protected: catalog::RelGroupCatalogEntry* relGroupEntry; diff --git a/src/include/storage/table/ice_disk_node_table.h b/src/include/storage/table/ice_disk_node_table.h index 2a6dc251ff..e266f0a5a4 100644 --- a/src/include/storage/table/ice_disk_node_table.h +++ b/src/include/storage/table/ice_disk_node_table.h @@ -76,12 +76,15 @@ class IceDiskNodeTable final : public ColumnarNodeTableBase { common::offset_t offset) const override; const std::string& getParquetFilePath() const { return parquetFilePath; } + std::unique_ptr createScanState(common::ValueVector* nodeIDVector, + const std::vector& outVectors, + MemoryManager* memoryManager) const override; + common::node_group_idx_t getNumScanMorsels( + const transaction::Transaction* transaction) const override; protected: // Implement ColumnarNodeTableBase interface std::string getColumnarFormatName() const override { return "icebug-disk"; } - common::node_group_idx_t getNumBatches( - const transaction::Transaction* transaction) const override; common::row_idx_t getTotalRowCount(const transaction::Transaction* transaction) const override; private: diff --git a/src/include/storage/table/ice_disk_rel_table.h b/src/include/storage/table/ice_disk_rel_table.h index e04bc75dc3..22a1c719e4 100644 --- a/src/include/storage/table/ice_disk_rel_table.h +++ b/src/include/storage/table/ice_disk_rel_table.h @@ -65,6 +65,9 @@ class IceDiskRelTable final : public ColumnarRelTableBase { bool resetCachedBoundNodeSelVec = true) const override; bool scanInternal(transaction::Transaction* transaction, TableScanState& scanState) override; + std::unique_ptr createScanState(common::ValueVector* nodeIDVector, + const std::vector& outVectors, MemoryManager* memoryManager, + std::shared_ptr outChunkState) const override; protected: // Implement ColumnarRelTableBase interface diff --git a/src/processor/map/map_extend.cpp b/src/processor/map/map_extend.cpp index 42a03255d0..0c6c3c2d01 100644 --- a/src/processor/map/map_extend.cpp +++ b/src/processor/map/map_extend.cpp @@ -223,21 +223,25 @@ std::unique_ptr PlanMapper::mapExtend(const LogicalOperator* l std::make_shared(std::move(semiMask))); } } - if (!sourceNodeTables.empty() && !scanNode->getProperties().empty() && - dynamic_cast(relTable) != nullptr) { - std::vector sourceOutVectorsPos; - for (auto& expression : scanNode->getProperties()) { - sourceOutVectorsPos.emplace_back(getDataPos(*expression, *inFSchema)); + if (!sourceNodeTables.empty() && !scanNode->getProperties().empty() { + if (const auto* columnarTable = + dynamic_cast(relTable) && + TableStorageFormatUtils::isIceDisk(columnarTable->getStorageFormat())) { + std::vector sourceOutVectorsPos; + for (auto& expression : scanNode->getProperties()) { + sourceOutVectorsPos.emplace_back(getDataPos(*expression, *inFSchema)); + } + auto sourceNodeScanInfo = + ScanOpInfo(inNodeIDPos, std::move(sourceOutVectorsPos)); + auto progressSharedState = + std::make_shared(); + return std::make_unique(std::move(scanInfo), + std::move(scanRelInfo), std::move(sourceNodeTableInfos), + std::move(sourceNodeSharedStates), std::move(progressSharedState), + std::move(sourceNodeScanInfo), getOperatorID(), printInfo->copy(), + physicalOperatorType); } - auto sourceNodeScanInfo = - ScanOpInfo(inNodeIDPos, std::move(sourceOutVectorsPos)); - auto progressSharedState = std::make_shared(); - return std::make_unique(std::move(scanInfo), - std::move(scanRelInfo), std::move(sourceNodeTableInfos), - std::move(sourceNodeSharedStates), std::move(progressSharedState), - std::move(sourceNodeScanInfo), getOperatorID(), printInfo->copy(), - physicalOperatorType); - } + } // Only apply the existing no-property optimization if scan node is not already // mapped (e.g., by a semi-masker). if (!sourceNodeTables.empty() && diff --git a/src/processor/operator/scan/scan_multi_rel_tables.cpp b/src/processor/operator/scan/scan_multi_rel_tables.cpp index f24eae254b..f17704ca8d 100644 --- a/src/processor/operator/scan/scan_multi_rel_tables.cpp +++ b/src/processor/operator/scan/scan_multi_rel_tables.cpp @@ -67,86 +67,76 @@ void ScanMultiRelTable::initLocalStateInternal(ResultSet* resultSet, ExecutionCo auto nbrNodeIDVector = outVectors[0]; // Check if any table in any scanner is an external rel table with a custom scan state. - bool hasArrowTable = false; - bool hasIceDiskTable = false; + storage::ColumnarRelTableBase* columnarTable = nullptr; for (auto& [_, scanner] : scanners) { for (auto& relInfo : scanner.relInfos) { - if (dynamic_cast(relInfo.table) != nullptr) { - hasArrowTable = true; + if (dynamic_cast(relInfo.table) != nullptr) { + columnarTable = dynamic_cast(relInfo.table); break; } - if (dynamic_cast(relInfo.table) != nullptr) { - hasIceDiskTable = true; + + if (columnarTable != nullptr) { break; } } - if (hasArrowTable || hasIceDiskTable) { - break; - } - } - // IceDisk scan state extends the common rel scan state and Arrow stores its per-table state - // there, so one scan state can now cover IceDisk, Arrow, and native rel tables. - if (hasIceDiskTable) { - scanState = - std::make_unique(*MemoryManager::Get(*clientContext), - boundNodeIDVector, outVectors, nbrNodeIDVector->state); - } else if (hasArrowTable) { - scanState = - std::make_unique(*MemoryManager::Get(*clientContext), + // IceDisk scan state extends the common rel scan state and Arrow stores its per-table state + // there, so one scan state can now cover IceDisk, Arrow, and native rel tables. + if (columnarTable != nullptr) { + scanState = columnarTable->createScanState(boundNodeIDVector, outVectors, + MemoryManager::Get(*clientContext), nbrNodeIDVector->state); + } else { + scanState = std::make_unique(*MemoryManager::Get(*clientContext), boundNodeIDVector, outVectors, nbrNodeIDVector->state); - } else { - scanState = std::make_unique(*MemoryManager::Get(*clientContext), - boundNodeIDVector, outVectors, nbrNodeIDVector->state); - } - for (auto& [_, scanner] : scanners) { - for (auto& relInfo : scanner.relInfos) { - if (directionInfo.directionPos.isValid()) { - scanner.directionVector = - resultSet->getValueVector(directionInfo.directionPos).get(); - scanner.directionValues.push_back(directionInfo.needFlip(relInfo.direction)); + } + for (auto& [_, scanner] : scanners) { + for (auto& relInfo : scanner.relInfos) { + if (directionInfo.directionPos.isValid()) { + scanner.directionVector = + resultSet->getValueVector(directionInfo.directionPos).get(); + scanner.directionValues.push_back(directionInfo.needFlip(relInfo.direction)); + } } } + currentScanner = nullptr; } - currentScanner = nullptr; -} -bool ScanMultiRelTable::getNextTuplesInternal(ExecutionContext* context) { - while (true) { - if (currentScanner != nullptr && - currentScanner->scan(context->clientContext, *scanState, outVectors)) { - metrics->numOutputTuple.increase(scanState->outState->getSelVector().getSelSize()); - return true; - } - if (!children[0]->getNextTuple(context)) { - resetState(); - return false; - } - const auto currentIdx = boundNodeIDVector->state->getSelVector()[0]; - if (boundNodeIDVector->isNull(currentIdx)) { - currentScanner = nullptr; - continue; + bool ScanMultiRelTable::getNextTuplesInternal(ExecutionContext * context) { + while (true) { + if (currentScanner != nullptr && + currentScanner->scan(context->clientContext, *scanState, outVectors)) { + metrics->numOutputTuple.increase(scanState->outState->getSelVector().getSelSize()); + return true; + } + if (!children[0]->getNextTuple(context)) { + resetState(); + return false; + } + const auto currentIdx = boundNodeIDVector->state->getSelVector()[0]; + if (boundNodeIDVector->isNull(currentIdx)) { + currentScanner = nullptr; + continue; + } + auto nodeID = boundNodeIDVector->getValue(currentIdx); + initCurrentScanner(nodeID); } - auto nodeID = boundNodeIDVector->getValue(currentIdx); - initCurrentScanner(nodeID); } -} -void ScanMultiRelTable::resetState() { - currentScanner = nullptr; - for (auto& [_, scanner] : scanners) { - scanner.resetState(); + void ScanMultiRelTable::resetState() { + currentScanner = nullptr; + for (auto& [_, scanner] : scanners) { + scanner.resetState(); + } } -} -void ScanMultiRelTable::initCurrentScanner(const nodeID_t& nodeID) { - if (scanners.contains(nodeID.tableID)) { - currentScanner = &scanners.at(nodeID.tableID); - currentScanner->resetState(); - } else { - currentScanner = nullptr; + void ScanMultiRelTable::initCurrentScanner(const nodeID_t& nodeID) { + if (scanners.contains(nodeID.tableID)) { + currentScanner = &scanners.at(nodeID.tableID); + currentScanner->resetState(); + } else { + currentScanner = nullptr; + } } -} } // namespace processor } // namespace lbug diff --git a/src/processor/operator/scan/scan_node_table.cpp b/src/processor/operator/scan/scan_node_table.cpp index e9c9524854..741671cdf3 100644 --- a/src/processor/operator/scan/scan_node_table.cpp +++ b/src/processor/operator/scan/scan_node_table.cpp @@ -17,14 +17,11 @@ namespace processor { static std::unique_ptr createNodeTableScanState(NodeTable* table, ValueVector* nodeIDVector, const std::vector& outVectors, MemoryManager* memoryManager) { - if (dynamic_cast(table) != nullptr) { - return std::make_unique(*memoryManager, nodeIDVector, outVectors, - nodeIDVector->state); - } - if (dynamic_cast(table) != nullptr) { - return std::make_unique(*memoryManager, nodeIDVector, outVectors, - nodeIDVector->state); + if (dynamic_cast(table) != nullptr) { + return table->cast().createScanState(nodeIDVector, outVectors, + memoryManager); } + return std::make_unique(nodeIDVector, outVectors, nodeIDVector->state); } @@ -56,23 +53,8 @@ void ScanNodeTableSharedState::initialize(const transaction::Transaction* transa // Initialize table-specific scan coordination (e.g., for IceDiskNodeTable) table->initializeScanCoordination(transaction); - if (const auto iceDiskTable = dynamic_cast(table)) { - // For ice-disk tables, set numCommittedNodeGroups to number of row groups - std::vector columnSkips; - try { - auto context = transaction->getClientContext(); - auto resolvedPath = - common::VirtualFileSystem::resolvePath(context, iceDiskTable->getParquetFilePath()); - auto tempReader = - std::make_unique(resolvedPath, columnSkips, context); - this->numCommittedNodeGroups = tempReader->getNumRowGroups(); - } catch (const std::exception& e) { - this->numCommittedNodeGroups = 1; - } - } else if (const auto arrowTable = dynamic_cast(table)) { - // For Arrow tables, set numCommittedNodeGroups to number of morsels - this->numCommittedNodeGroups = - static_cast(arrowTable->getNumScanMorsels(transaction)); + if (const auto columnarNodeTable = dynamic_cast(table)) { + this->numCommittedNodeGroups = columnarNodeTable->getNumScanMorsels(transaction); } else { this->numCommittedNodeGroups = table->getNumCommittedNodeGroups(); } @@ -90,10 +72,8 @@ void ScanNodeTableSharedState::nextMorsel(TableScanState& scanState, ScanNodeTableProgressSharedState& progressSharedState) { std::unique_lock lck{mtx}; - // ColumnarNodeTables handle morsel assignment internally - // TODO: icebug-disk tables https://github.com/LadybugDB/ladybug/issues/245 - if (const auto arrowTable = dynamic_cast(this->table)) { - const auto tableSharedState = arrowTable->getTableScanSharedState(); + if (const auto columnarTable = dynamic_cast(this->table)) { + const auto tableSharedState = columnarTable->getTableScanSharedState(); if (tableSharedState->getNextMorsel(static_cast(&scanState))) { scanState.source = TableScanSource::COMMITTED; progressSharedState.numMorselsScanned++; @@ -149,9 +129,8 @@ void ScanNodeTable::initCurrentTable(ExecutionContext* context) { outVectors, MemoryManager::Get(*context->clientContext)); currentInfo.initScanState(*scanState, outVectors, context->clientContext); scanState->semiMask = sharedStates[currentTableIdx]->getSemiMask(); - // Call table->initScanState for IceDiskNodeTable or ArrowNodeTable - if (dynamic_cast(tableInfos[currentTableIdx].table) || - dynamic_cast(tableInfos[currentTableIdx].table)) { + // Call table->initScanState for ColumnarNodeTables + if (dynamic_cast(tableInfos[currentTableIdx].table)) { auto transaction = transaction::Transaction::Get(*context->clientContext); tableInfos[currentTableIdx].table->initScanState(transaction, *scanState); } diff --git a/src/processor/operator/scan/scan_rel_table.cpp b/src/processor/operator/scan/scan_rel_table.cpp index 8e7b5a7d14..fa4cc720bc 100644 --- a/src/processor/operator/scan/scan_rel_table.cpp +++ b/src/processor/operator/scan/scan_rel_table.cpp @@ -23,13 +23,9 @@ namespace processor { static std::unique_ptr createSourceNodeTableScanState(NodeTable* table, ValueVector* nodeIDVector, const std::vector& outVectors, MemoryManager* memoryManager) { - if (dynamic_cast(table) != nullptr) { - return std::make_unique(*memoryManager, nodeIDVector, outVectors, - nodeIDVector->state); - } - if (dynamic_cast(table) != nullptr) { - return std::make_unique(*memoryManager, nodeIDVector, outVectors, - nodeIDVector->state); + if (dynamic_cast(table) != nullptr) { + return table->cast().createScanState(nodeIDVector, outVectors, + memoryManager); } return std::make_unique(nodeIDVector, outVectors, nodeIDVector->state); } @@ -91,17 +87,11 @@ void ScanRelTable::initLocalStateInternal(ResultSet* resultSet, ExecutionContext auto boundNodeIDVector = resultSet->getValueVector(opInfo.nodeIDPos).get(); auto nbrNodeIDVector = outVectors[0]; // Check if this is an external rel table and create the corresponding scan state. - auto* arrowTable = dynamic_cast(tableInfo.table); - auto* iceDiskTable = dynamic_cast(tableInfo.table); + auto* columnarTable = dynamic_cast(tableInfo.table); auto* foreignTable = dynamic_cast(tableInfo.table); - if (arrowTable) { - scanState = - std::make_unique(*MemoryManager::Get(*clientContext), - boundNodeIDVector, outVectors, nbrNodeIDVector->state); - } else if (iceDiskTable) { - scanState = - std::make_unique(*MemoryManager::Get(*clientContext), - boundNodeIDVector, outVectors, nbrNodeIDVector->state); + if (columnarTable) { + scanState = columnarTable->createScanState(boundNodeIDVector, outVectors, + MemoryManager::Get(*clientContext), nbrNodeIDVector->state); } else if (foreignTable) { scanState = std::make_unique(*MemoryManager::Get(*clientContext), @@ -142,8 +132,7 @@ static void initSourceNodeScanState(ScanNodeTableInfo& sourceInfo, sourceScanState = createSourceNodeTableScanState(sourceInfo.table->ptrCast(), boundNodeIDVector, sourceNodeOutVectors, MemoryManager::Get(*context)); sourceInfo.initScanState(*sourceScanState, sourceNodeOutVectors, context); - if (dynamic_cast(sourceInfo.table) || - dynamic_cast(sourceInfo.table)) { + if (dynamic_cast(sourceInfo.table)) { sourceInfo.table->initScanState(transaction::Transaction::Get(*context), *sourceScanState); } } diff --git a/src/storage/table/arrow_node_table.cpp b/src/storage/table/arrow_node_table.cpp index 5a787d8d03..65aaa220ab 100644 --- a/src/storage/table/arrow_node_table.cpp +++ b/src/storage/table/arrow_node_table.cpp @@ -51,6 +51,12 @@ ArrowNodeTable::~ArrowNodeTable() { } } +std::unique_ptr ArrowNodeTable::createScanState(common::ValueVector* nodeIDVector, + const std::vector& outVectors, MemoryManager* memoryManager) const { + return std::make_unique(*memoryManager, nodeIDVector, outVectors, + nodeIDVector->state); +} + void ArrowNodeTable::initializeScanCoordination(const transaction::Transaction* transaction) { auto arrowScanSharedState = static_cast(tableScanSharedState.get()); @@ -136,11 +142,6 @@ bool ArrowNodeTable::scanInternal([[maybe_unused]] transaction::Transaction* tra return true; } -common::node_group_idx_t ArrowNodeTable::getNumBatches( - [[maybe_unused]] const transaction::Transaction* transaction) const { - return arrays.size(); -} - common::row_idx_t ArrowNodeTable::getTotalRowCount( [[maybe_unused]] const transaction::Transaction* transaction) const { return totalRows; @@ -157,9 +158,9 @@ std::vector ArrowNodeTable::getBatchSizes( return batchSizes; } -size_t ArrowNodeTable::getNumScanMorsels( +common::node_group_idx_t ArrowNodeTable::getNumScanMorsels( [[maybe_unused]] const transaction::Transaction* transaction) const { - size_t numMorsels = 0; + common::node_group_idx_t numMorsels = 0; for (const auto& array : arrays) { auto batchLength = getArrowBatchLength(array); numMorsels += (batchLength + scanMorselSize - 1) / scanMorselSize; diff --git a/src/storage/table/arrow_rel_table.cpp b/src/storage/table/arrow_rel_table.cpp index 756fa93fdc..e8961959e0 100644 --- a/src/storage/table/arrow_rel_table.cpp +++ b/src/storage/table/arrow_rel_table.cpp @@ -38,6 +38,13 @@ static int64_t findColumnIdx(const ArrowSchemaWrapper& schema, const std::string return -1; } +std::unique_ptr ArrowRelTable::createScanState(common::ValueVector* nodeIDVector, + const std::vector& outVectors, MemoryManager* memoryManager, + std::shared_ptr outChunkState) const { + return std::make_unique(*memoryManager, nodeIDVector, + outVectors, outChunkState); +} + void ArrowRelTableScanState::setToTable(const transaction::Transaction* transaction, Table* table_, std::vector columnIDs_, std::vector columnPredicateSets_, RelDataDirection direction_) { diff --git a/src/storage/table/ice_disk_node_table.cpp b/src/storage/table/ice_disk_node_table.cpp index f79598a54e..b8fb4ea548 100644 --- a/src/storage/table/ice_disk_node_table.cpp +++ b/src/storage/table/ice_disk_node_table.cpp @@ -30,7 +30,7 @@ IceDiskNodeTable::IceDiskNodeTable(const StorageManager* storageManager, main::ClientContext* context) : ColumnarNodeTableBase{storageManager, nodeTableEntry, memoryManager, std::make_unique(), - ::lbug::common::TableStorageFormat::ICEBUG_DISK} { + lbug::common::TableStorageFormat::ICEBUG_DISK} { const auto& storage = nodeTableEntry->getStorage(); auto path = common::StringUtils::getLower(storage).ends_with("parquet") ? @@ -41,11 +41,17 @@ IceDiskNodeTable::IceDiskNodeTable(const StorageManager* storageManager, parquetFilePath = resolvedPath; } +std::unique_ptr IceDiskNodeTable::createScanState(common::ValueVector* nodeIDVector, + const std::vector& outVectors, MemoryManager* memoryManager) const { + return std::make_unique(*memoryManager, nodeIDVector, outVectors, + nodeIDVector->state); +} + void IceDiskNodeTable::initializeScanCoordination(const transaction::Transaction* transaction) { auto iceDiskScanSharedState = static_cast(tableScanSharedState.get()); - auto numBatches = getNumBatches(transaction); - iceDiskScanSharedState->reset(numBatches); + auto numMorsels = getNumScanMorsels(transaction); + iceDiskScanSharedState->reset(numMorsels); } void IceDiskNodeTable::initScanState(Transaction* transaction, TableScanState& scanState, @@ -86,7 +92,8 @@ void IceDiskNodeTable::initScanState(Transaction* transaction, TableScanState& s initParquetScanForRowGroup(transaction, iceDiskScanState); } -common::node_group_idx_t IceDiskNodeTable::getNumBatches(const Transaction* transaction) const { +common::node_group_idx_t IceDiskNodeTable::getNumScanMorsels( + const transaction::Transaction* transaction) const { auto context = transaction->getClientContext(); if (!context) { return 1; @@ -95,7 +102,7 @@ common::node_group_idx_t IceDiskNodeTable::getNumBatches(const Transaction* tran std::vector columnSkips; try { auto tempReader = std::make_unique(parquetFilePath, columnSkips, context); - return tempReader->getNumRowGroups(); + return static_cast(tempReader->getNumRowGroups()); } catch (const std::exception& e) { return 1; // Fallback } diff --git a/src/storage/table/ice_disk_rel_table.cpp b/src/storage/table/ice_disk_rel_table.cpp index 79db9bca2f..fb786f689c 100644 --- a/src/storage/table/ice_disk_rel_table.cpp +++ b/src/storage/table/ice_disk_rel_table.cpp @@ -22,6 +22,13 @@ using namespace lbug::transaction; namespace lbug { namespace storage { +std::unique_ptr IceDiskRelTable::createScanState(common::ValueVector* nodeIDVector, + const std::vector& outVectors, MemoryManager* memoryManager, + std::shared_ptr outChunkState) const { + return std::make_unique(*memoryManager, nodeIDVector, + outVectors, outChunkState); +} + void IceDiskRelTableScanState::setToTable(const Transaction* transaction, Table* table_, std::vector columnIDs_, std::vector columnPredicateSets_, RelDataDirection direction_) { From 2699bba21dbb167d2b32caf97cb8354f92fb10d2 Mon Sep 17 00:00:00 2001 From: Ally Heev Date: Tue, 23 Jun 2026 19:50:34 +0530 Subject: [PATCH 3/4] fix tests --- src/graph/on_disk_graph.cpp | 12 ++- .../storage/table/columnar_node_table_base.h | 7 +- .../storage/table/columnar_rel_table_base.h | 7 +- src/include/storage/table/foreign_rel_table.h | 5 +- src/processor/map/map_extend.cpp | 10 +- .../operator/scan/scan_multi_rel_tables.cpp | 97 ++++++++++--------- .../operator/scan/scan_rel_table.cpp | 4 +- src/storage/table/arrow_node_table.cpp | 2 +- src/storage/table/arrow_rel_table.cpp | 2 +- src/storage/table/ice_disk_node_table.cpp | 2 +- src/storage/table/ice_disk_rel_table.cpp | 2 +- test/api/CMakeLists.txt | 1 - test/common/CMakeLists.txt | 2 +- ...pp => table_storage_format_utils_test.cpp} | 0 test/storage/CMakeLists.txt | 1 + .../columnar_table_storage_format_test.cpp | 32 ++++-- 16 files changed, 108 insertions(+), 78 deletions(-) rename test/common/{table_storage_format_test.cpp => table_storage_format_utils_test.cpp} (100%) rename test/{api => storage}/columnar_table_storage_format_test.cpp (75%) diff --git a/src/graph/on_disk_graph.cpp b/src/graph/on_disk_graph.cpp index ca4507b2e4..1873fe8ac3 100644 --- a/src/graph/on_disk_graph.cpp +++ b/src/graph/on_disk_graph.cpp @@ -129,11 +129,13 @@ OnDiskGraphNbrScanState::OnDiskGraphNbrScanState(ClientContext* context, } std::unique_ptr scanState; - if (const auto* columnarTable = - dynamic_cast(table) && - TableStorageFormatUtils::isIceDisk(columnarTable->getStorageFormat())) { - scanState = - columnarTable->createScanState(srcNodeIDVector.get(), outVectors, mm, state); + if (const auto* columnarTable = dynamic_cast(table)) { + if (TableStorageFormatUtils::isIceDisk(columnarTable->getStorageFormat())) { + auto tableScanState = + columnarTable->createScanState(srcNodeIDVector.get(), outVectors, mm, state); + scanState = std::unique_ptr( + dynamic_cast(tableScanState.release())); + } } else { scanState = std::make_unique(*MemoryManager::Get(*context), srcNodeIDVector.get(), outVectors, dstNodeIDVector->state, randomLookup); diff --git a/src/include/storage/table/columnar_node_table_base.h b/src/include/storage/table/columnar_node_table_base.h index 3579d0b948..e145f4a309 100644 --- a/src/include/storage/table/columnar_node_table_base.h +++ b/src/include/storage/table/columnar_node_table_base.h @@ -4,6 +4,7 @@ #include #include "catalog/catalog_entry/node_table_catalog_entry.h" +#include "common/enums/table_storage_format.h" #include "common/exception/runtime.h" #include "common/mask.h" #include "common/types/internal_id_util.h" @@ -38,7 +39,7 @@ class ColumnarNodeTableBase : public NodeTable { ColumnarNodeTableBase(const StorageManager* storageManager, const catalog::NodeTableCatalogEntry* nodeTableEntry, MemoryManager* memoryManager, std::unique_ptr tableScanSharedState, - lbug::common::TableStorageFormat storageFormat) + common::TableStorageFormat storageFormat) : NodeTable{storageManager, nodeTableEntry, memoryManager}, nodeTableCatalogEntry{nodeTableEntry}, tableScanSharedState{std::move(tableScanSharedState)}, storageFormat{storageFormat} {} @@ -80,7 +81,7 @@ class ColumnarNodeTableBase : public NodeTable { ColumnarNodeTableScanSharedState* getTableScanSharedState() const { return tableScanSharedState.get(); } - lbug::common::TableStorageFormat getStorageFormat() const { return storageFormat; } + common::TableStorageFormat getStorageFormat() const { return storageFormat; } virtual std::unique_ptr createScanState(common::ValueVector* nodeIDVector, const std::vector& outVectors, MemoryManager* memoryManager) const = 0; @@ -88,7 +89,7 @@ class ColumnarNodeTableBase : public NodeTable { const transaction::Transaction* transaction) const = 0; private: - lbug::common::TableStorageFormat storageFormat; + common::TableStorageFormat storageFormat; }; } // namespace storage diff --git a/src/include/storage/table/columnar_rel_table_base.h b/src/include/storage/table/columnar_rel_table_base.h index d5777b0364..7808e2544d 100644 --- a/src/include/storage/table/columnar_rel_table_base.h +++ b/src/include/storage/table/columnar_rel_table_base.h @@ -5,6 +5,7 @@ #include #include "catalog/catalog_entry/rel_group_catalog_entry.h" +#include "common/enums/table_storage_format.h" #include "common/exception/runtime.h" #include "common/types/internal_id_util.h" #include "storage/table/rel_table.h" @@ -18,7 +19,7 @@ class ColumnarRelTableBase : public RelTable { ColumnarRelTableBase(catalog::RelGroupCatalogEntry* relGroupEntry, common::table_id_t fromTableID, common::table_id_t toTableID, const StorageManager* storageManager, MemoryManager* memoryManager, - ::lbug::common::TableStorageFormat storageFormat) + common::TableStorageFormat storageFormat) : RelTable{relGroupEntry, fromTableID, toTableID, storageManager, memoryManager}, relGroupEntry{relGroupEntry}, storageFormat{storageFormat} {} @@ -52,7 +53,7 @@ class ColumnarRelTableBase : public RelTable { std::vector> getTopKDegrees( const transaction::Transaction* transaction, common::RelDataDirection direction, common::idx_t k) override; - lbug::common::TableStorageFormat getStorageFormat() const { return storageFormat; } + common::TableStorageFormat getStorageFormat() const { return storageFormat; } virtual std::unique_ptr createScanState(common::ValueVector* nodeIDVector, const std::vector& outVectors, MemoryManager* memoryManager, std::shared_ptr outChunkState) const = 0; @@ -79,7 +80,7 @@ class ColumnarRelTableBase : public RelTable { const std::vector& indptrData) const; private: - lbug::common::TableStorageFormat storageFormat; + common::TableStorageFormat storageFormat; }; } // namespace storage diff --git a/src/include/storage/table/foreign_rel_table.h b/src/include/storage/table/foreign_rel_table.h index ecc2ae1ec0..749826b44f 100644 --- a/src/include/storage/table/foreign_rel_table.h +++ b/src/include/storage/table/foreign_rel_table.h @@ -1,6 +1,7 @@ #pragma once #include "catalog/catalog_entry/rel_group_catalog_entry.h" +#include "common/enums/table_storage_format.h" #include "common/exception/runtime.h" #include "function/table/table_function.h" #include "storage/table/rel_table.h" @@ -25,7 +26,7 @@ class ForeignRelTable final : public RelTable { MemoryManager* memoryManager, function::TableFunction scanFunction, std::shared_ptr scanBindData); - lbug::common::TableStorageFormat getStorageFormat() const { return storageFormat; } + common::TableStorageFormat getStorageFormat() const { return storageFormat; } void initScanState(transaction::Transaction* transaction, TableScanState& scanState, bool resetCachedBoundNodeSelVec = true) const override; @@ -52,7 +53,7 @@ class ForeignRelTable final : public RelTable { private: function::TableFunction scanFunction; std::shared_ptr scanBindData; - lbug::common::TableStorageFormat storageFormat = lbug::common::TableStorageFormat::FOREIGN; + common::TableStorageFormat storageFormat = common::TableStorageFormat::FOREIGN; }; } // namespace storage diff --git a/src/processor/map/map_extend.cpp b/src/processor/map/map_extend.cpp index 0c6c3c2d01..dc8e6f7b8e 100644 --- a/src/processor/map/map_extend.cpp +++ b/src/processor/map/map_extend.cpp @@ -223,10 +223,10 @@ std::unique_ptr PlanMapper::mapExtend(const LogicalOperator* l std::make_shared(std::move(semiMask))); } } - if (!sourceNodeTables.empty() && !scanNode->getProperties().empty() { - if (const auto* columnarTable = - dynamic_cast(relTable) && - TableStorageFormatUtils::isIceDisk(columnarTable->getStorageFormat())) { + if (!sourceNodeTables.empty() && !scanNode->getProperties().empty()) { + if (const auto* columnarTable = dynamic_cast(relTable); + columnarTable != nullptr && + TableStorageFormatUtils::isIceDisk(columnarTable->getStorageFormat())) { std::vector sourceOutVectorsPos; for (auto& expression : scanNode->getProperties()) { sourceOutVectorsPos.emplace_back(getDataPos(*expression, *inFSchema)); @@ -241,7 +241,7 @@ std::unique_ptr PlanMapper::mapExtend(const LogicalOperator* l std::move(sourceNodeScanInfo), getOperatorID(), printInfo->copy(), physicalOperatorType); } - } + } // Only apply the existing no-property optimization if scan node is not already // mapped (e.g., by a semi-masker). if (!sourceNodeTables.empty() && diff --git a/src/processor/operator/scan/scan_multi_rel_tables.cpp b/src/processor/operator/scan/scan_multi_rel_tables.cpp index f17704ca8d..05c31e8478 100644 --- a/src/processor/operator/scan/scan_multi_rel_tables.cpp +++ b/src/processor/operator/scan/scan_multi_rel_tables.cpp @@ -79,64 +79,67 @@ void ScanMultiRelTable::initLocalStateInternal(ResultSet* resultSet, ExecutionCo break; } } + } - // IceDisk scan state extends the common rel scan state and Arrow stores its per-table state - // there, so one scan state can now cover IceDisk, Arrow, and native rel tables. - if (columnarTable != nullptr) { - scanState = columnarTable->createScanState(boundNodeIDVector, outVectors, - MemoryManager::Get(*clientContext), nbrNodeIDVector->state); - } else { - scanState = std::make_unique(*MemoryManager::Get(*clientContext), - boundNodeIDVector, outVectors, nbrNodeIDVector->state); - } - for (auto& [_, scanner] : scanners) { - for (auto& relInfo : scanner.relInfos) { - if (directionInfo.directionPos.isValid()) { - scanner.directionVector = - resultSet->getValueVector(directionInfo.directionPos).get(); - scanner.directionValues.push_back(directionInfo.needFlip(relInfo.direction)); - } + // IceDisk scan state extends the common rel scan state and Arrow stores its per-table state + // there, so one scan state can now cover IceDisk, Arrow, and native rel tables. + if (columnarTable != nullptr) { + auto tableScanState = columnarTable->createScanState(boundNodeIDVector, outVectors, + MemoryManager::Get(*clientContext), nbrNodeIDVector->state); + scanState = std::unique_ptr( + dynamic_cast(tableScanState.release())); + } else { + scanState = std::make_unique(*MemoryManager::Get(*clientContext), + boundNodeIDVector, outVectors, nbrNodeIDVector->state); + } + for (auto& [_, scanner] : scanners) { + for (auto& relInfo : scanner.relInfos) { + if (directionInfo.directionPos.isValid()) { + scanner.directionVector = + resultSet->getValueVector(directionInfo.directionPos).get(); + scanner.directionValues.push_back(directionInfo.needFlip(relInfo.direction)); } } - currentScanner = nullptr; } + currentScanner = nullptr; +} - bool ScanMultiRelTable::getNextTuplesInternal(ExecutionContext * context) { - while (true) { - if (currentScanner != nullptr && - currentScanner->scan(context->clientContext, *scanState, outVectors)) { - metrics->numOutputTuple.increase(scanState->outState->getSelVector().getSelSize()); - return true; - } - if (!children[0]->getNextTuple(context)) { - resetState(); - return false; - } - const auto currentIdx = boundNodeIDVector->state->getSelVector()[0]; - if (boundNodeIDVector->isNull(currentIdx)) { - currentScanner = nullptr; - continue; - } - auto nodeID = boundNodeIDVector->getValue(currentIdx); - initCurrentScanner(nodeID); +bool ScanMultiRelTable::getNextTuplesInternal(ExecutionContext* context) { + while (true) { + if (currentScanner != nullptr && + currentScanner->scan(context->clientContext, *scanState, outVectors)) { + metrics->numOutputTuple.increase(scanState->outState->getSelVector().getSelSize()); + return true; } + if (!children[0]->getNextTuple(context)) { + resetState(); + return false; + } + const auto currentIdx = boundNodeIDVector->state->getSelVector()[0]; + if (boundNodeIDVector->isNull(currentIdx)) { + currentScanner = nullptr; + continue; + } + auto nodeID = boundNodeIDVector->getValue(currentIdx); + initCurrentScanner(nodeID); } +} - void ScanMultiRelTable::resetState() { - currentScanner = nullptr; - for (auto& [_, scanner] : scanners) { - scanner.resetState(); - } +void ScanMultiRelTable::resetState() { + currentScanner = nullptr; + for (auto& [_, scanner] : scanners) { + scanner.resetState(); } +} - void ScanMultiRelTable::initCurrentScanner(const nodeID_t& nodeID) { - if (scanners.contains(nodeID.tableID)) { - currentScanner = &scanners.at(nodeID.tableID); - currentScanner->resetState(); - } else { - currentScanner = nullptr; - } +void ScanMultiRelTable::initCurrentScanner(const nodeID_t& nodeID) { + if (scanners.contains(nodeID.tableID)) { + currentScanner = &scanners.at(nodeID.tableID); + currentScanner->resetState(); + } else { + currentScanner = nullptr; } +} } // namespace processor } // namespace lbug diff --git a/src/processor/operator/scan/scan_rel_table.cpp b/src/processor/operator/scan/scan_rel_table.cpp index fa4cc720bc..aff9fe0204 100644 --- a/src/processor/operator/scan/scan_rel_table.cpp +++ b/src/processor/operator/scan/scan_rel_table.cpp @@ -90,8 +90,10 @@ void ScanRelTable::initLocalStateInternal(ResultSet* resultSet, ExecutionContext auto* columnarTable = dynamic_cast(tableInfo.table); auto* foreignTable = dynamic_cast(tableInfo.table); if (columnarTable) { - scanState = columnarTable->createScanState(boundNodeIDVector, outVectors, + auto tableScanState = columnarTable->createScanState(boundNodeIDVector, outVectors, MemoryManager::Get(*clientContext), nbrNodeIDVector->state); + scanState = std::unique_ptr( + dynamic_cast(tableScanState.release())); } else if (foreignTable) { scanState = std::make_unique(*MemoryManager::Get(*clientContext), diff --git a/src/storage/table/arrow_node_table.cpp b/src/storage/table/arrow_node_table.cpp index 65aaa220ab..a48973f000 100644 --- a/src/storage/table/arrow_node_table.cpp +++ b/src/storage/table/arrow_node_table.cpp @@ -29,7 +29,7 @@ ArrowNodeTable::ArrowNodeTable(const StorageManager* storageManager, ArrowSchemaWrapper schema, std::vector arrays, std::string arrowId) : ColumnarNodeTableBase{storageManager, nodeTableEntry, memoryManager, std::make_unique(scanMorselSize), - lbug::common::TableStorageFormat::ARROW}, + common::TableStorageFormat::ARROW}, schema{std::move(schema)}, arrays{std::move(arrays)}, totalRows{0}, arrowId{std::move(arrowId)} { // Note: release may be nullptr if schema is managed by registry diff --git a/src/storage/table/arrow_rel_table.cpp b/src/storage/table/arrow_rel_table.cpp index e8961959e0..49973d0c54 100644 --- a/src/storage/table/arrow_rel_table.cpp +++ b/src/storage/table/arrow_rel_table.cpp @@ -73,7 +73,7 @@ ArrowRelTable::ArrowRelTable(catalog::RelGroupCatalogEntry* relGroupEntry, table ArrowSchemaWrapper indptrSchema, std::vector indptrArrays, std::string arrowId, std::string dstColumnName) : ColumnarRelTableBase{relGroupEntry, fromTableID, toTableID, storageManager, memoryManager, - lbug::common::TableStorageFormat::ARROW}, + common::TableStorageFormat::ARROW}, fromNodeTable{fromNodeTable}, toNodeTable{toNodeTable}, layout{layout}, schema{std::move(schema)}, arrays{std::move(arrays)}, indptrSchema{std::move(indptrSchema)}, indptrArrays{std::move(indptrArrays)}, arrowId{std::move(arrowId)} { diff --git a/src/storage/table/ice_disk_node_table.cpp b/src/storage/table/ice_disk_node_table.cpp index b8fb4ea548..f7681c5f2b 100644 --- a/src/storage/table/ice_disk_node_table.cpp +++ b/src/storage/table/ice_disk_node_table.cpp @@ -30,7 +30,7 @@ IceDiskNodeTable::IceDiskNodeTable(const StorageManager* storageManager, main::ClientContext* context) : ColumnarNodeTableBase{storageManager, nodeTableEntry, memoryManager, std::make_unique(), - lbug::common::TableStorageFormat::ICEBUG_DISK} { + common::TableStorageFormat::ICEBUG_DISK} { const auto& storage = nodeTableEntry->getStorage(); auto path = common::StringUtils::getLower(storage).ends_with("parquet") ? diff --git a/src/storage/table/ice_disk_rel_table.cpp b/src/storage/table/ice_disk_rel_table.cpp index fb786f689c..96ac3f9814 100644 --- a/src/storage/table/ice_disk_rel_table.cpp +++ b/src/storage/table/ice_disk_rel_table.cpp @@ -74,7 +74,7 @@ IceDiskRelTable::IceDiskRelTable(RelGroupCatalogEntry* relGroupEntry, table_id_t table_id_t toTableID, const StorageManager* storageManager, MemoryManager* memoryManager, main::ClientContext* context) : ColumnarRelTableBase{relGroupEntry, fromTableID, toTableID, storageManager, memoryManager, - ::lbug::common::TableStorageFormat::ICEBUG_DISK}, + common::TableStorageFormat::ICEBUG_DISK}, layout{IceDiskRelTableLayout::CSR} { const auto& storage = relGroupEntry->getStorage(); if (common::StringUtils::getLower(storage).ends_with("parquet")) { diff --git a/test/api/CMakeLists.txt b/test/api/CMakeLists.txt index fc4b269a82..58f1c09b25 100644 --- a/test/api/CMakeLists.txt +++ b/test/api/CMakeLists.txt @@ -10,7 +10,6 @@ add_lbug_api_test(api_test arrow_error_scenarios_test.cpp arrow_drop_table_test.cpp arrow_table_function_test.cpp - table_storage_format_test.cpp prepare_test.cpp result_value_test.cpp storage_driver_test.cpp diff --git a/test/common/CMakeLists.txt b/test/common/CMakeLists.txt index 14e0f4a877..ffb9e80ddc 100644 --- a/test/common/CMakeLists.txt +++ b/test/common/CMakeLists.txt @@ -6,7 +6,7 @@ add_lbug_test(types_test interval_test.cpp null_mask_test.cpp string_test.cpp - table_storage_format_test.cpp + table_storage_format_utils_test.cpp time_test.cpp timestamp_test.cpp vfs_test.cpp diff --git a/test/common/table_storage_format_test.cpp b/test/common/table_storage_format_utils_test.cpp similarity index 100% rename from test/common/table_storage_format_test.cpp rename to test/common/table_storage_format_utils_test.cpp diff --git a/test/storage/CMakeLists.txt b/test/storage/CMakeLists.txt index 83efdeabba..9700d9aa12 100644 --- a/test/storage/CMakeLists.txt +++ b/test/storage/CMakeLists.txt @@ -8,5 +8,6 @@ add_lbug_test(node_update_test node_update_test.cpp) add_lbug_test(detach_delete_test detach_delete_test.cpp) add_lbug_test(storage_utils_test storage_utils_test.cpp) add_lbug_test(ice_disk_utils_test ice_disk_utils_test.cpp) +add_lbug_test(columnar_table_storage_format_test columnar_table_storage_format_test.cpp) target_include_directories(compression_test PRIVATE ${PROJECT_SOURCE_DIR}/third_party/alp/include) diff --git a/test/api/columnar_table_storage_format_test.cpp b/test/storage/columnar_table_storage_format_test.cpp similarity index 75% rename from test/api/columnar_table_storage_format_test.cpp rename to test/storage/columnar_table_storage_format_test.cpp index 3adafd2a46..9bdf96384c 100644 --- a/test/api/columnar_table_storage_format_test.cpp +++ b/test/storage/columnar_table_storage_format_test.cpp @@ -8,9 +8,13 @@ #include "common/arrow/arrow.h" #include "common/enums/table_storage_format.h" #include "common/enums/table_type.h" +#include "graph_test/private_graph_test.h" #include "gtest/gtest.h" +#include "main/client_context.h" #include "storage/storage_manager.h" #include "storage/table/arrow_table_support.h" +#include "storage/table/columnar_node_table_base.h" +#include "storage/table/columnar_rel_table_base.h" #include "storage/table/table.h" #include "test_helper/test_helper.h" #include "transaction/transaction.h" @@ -20,7 +24,16 @@ using namespace lbug; using namespace lbug::common; using namespace lbug::testing; -class ColumnarTableStorageFormatTest : public ApiTest {}; +class ColumnarTableStorageFormatTest : public EmptyDBTest { +protected: + void SetUp() override { + EmptyDBTest::SetUp(); + createDBAndConn(); + context = conn->getClientContext(); + } + + lbug::main::ClientContext* context = nullptr; +}; TEST_F(ColumnarTableStorageFormatTest, ReportsRuntimeTableFormats) { auto* context = getClientContext(*conn); @@ -32,12 +45,19 @@ TEST_F(ColumnarTableStorageFormatTest, ReportsRuntimeTableFormats) { auto* entry = catalog->getTableCatalogEntry(transaction, tableName); ASSERT_NE(entry, nullptr); lbug::common::table_id_t tableID = entry->getTableID(); - if (entry->getTableType() == lbug::common::TableType::REL) { + + if (entry->getTableType() == lbug::common::TableType::NODE) { + auto* table = + storageManager->getTable(tableID)->ptrCast(); + ASSERT_NE(table, nullptr); + EXPECT_EQ(expected, table->getStorageFormat()) << tableName; + } else { tableID = entry->ptrCast()->getSingleRelEntryInfo().oid; + auto* table = + storageManager->getTable(tableID)->ptrCast(); + ASSERT_NE(table, nullptr); + EXPECT_EQ(expected, table->getStorageFormat()) << tableName; } - auto* table = storageManager->getTable(tableID); - ASSERT_NE(table, nullptr); - EXPECT_EQ(expected, table->getStorageFormat()) << tableName; }; const auto iceDiskStorage = TestHelper::appendLbugRootPath("dataset/ice-disk-test/"); @@ -73,7 +93,7 @@ TEST_F(ColumnarTableStorageFormatTest, ReportsRuntimeTableFormats) { std::vector relArrays; relArrays.push_back(std::move(relArray)); auto relCreation = ArrowTableSupport::createRelTableFromArrowTable(*conn, "arrow_format_knows", - "person", "person", std::move(relSchema), std::move(relArrays)); + "arrow_format_person", "arrow_format_person", std::move(relSchema), std::move(relArrays)); ASSERT_TRUE(relCreation.queryResult->isSuccess()) << relCreation.queryResult->toString(); expectFormat("arrow_format_knows", TableStorageFormat::ARROW); } From e15d5e7d83327dc92659b65d4e775f2936f427d8 Mon Sep 17 00:00:00 2001 From: Ally Heev Date: Tue, 23 Jun 2026 20:13:54 +0530 Subject: [PATCH 4/4] rm TableStorageFormat --- src/common/enums/CMakeLists.txt | 1 - src/common/enums/table_storage_format.cpp | 37 ------- src/graph/on_disk_graph.cpp | 10 +- .../common/enums/table_storage_format.h | 29 ------ .../storage/table/columnar_node_table_base.h | 10 +- .../storage/table/columnar_rel_table_base.h | 10 +- src/include/storage/table/foreign_rel_table.h | 4 - src/processor/map/map_extend.cpp | 30 +++--- src/storage/table/arrow_node_table.cpp | 3 +- src/storage/table/arrow_rel_table.cpp | 3 +- src/storage/table/ice_disk_node_table.cpp | 3 +- src/storage/table/ice_disk_rel_table.cpp | 3 +- test/common/CMakeLists.txt | 1 - .../table_storage_format_utils_test.cpp | 13 --- test/storage/CMakeLists.txt | 1 - .../columnar_table_storage_format_test.cpp | 99 ------------------- 16 files changed, 24 insertions(+), 233 deletions(-) delete mode 100644 src/common/enums/table_storage_format.cpp delete mode 100644 src/include/common/enums/table_storage_format.h delete mode 100644 test/common/table_storage_format_utils_test.cpp delete mode 100644 test/storage/columnar_table_storage_format_test.cpp diff --git a/src/common/enums/CMakeLists.txt b/src/common/enums/CMakeLists.txt index 03466054ec..1205d9fe0d 100644 --- a/src/common/enums/CMakeLists.txt +++ b/src/common/enums/CMakeLists.txt @@ -1,7 +1,6 @@ add_library(lbug_common_enums OBJECT accumulate_type.cpp - table_storage_format.cpp storage_format.cpp path_semantic.cpp query_rel_type.cpp diff --git a/src/common/enums/table_storage_format.cpp b/src/common/enums/table_storage_format.cpp deleted file mode 100644 index 235922eb3e..0000000000 --- a/src/common/enums/table_storage_format.cpp +++ /dev/null @@ -1,37 +0,0 @@ -#include "common/enums/table_storage_format.h" - -#include "common/assert.h" -#include "common/exception/binder.h" -#include "common/string_utils.h" -#include - -namespace lbug { -namespace common { - -std::string TableStorageFormatUtils::toString(TableStorageFormat format) { - switch (format) { - case TableStorageFormat::UNKNOWN: - return "unknown"; - case TableStorageFormat::NATIVE: - return "native"; - case TableStorageFormat::ICEBUG_DISK: - return "icebug-disk"; - case TableStorageFormat::ARROW: - return "arrow"; - case TableStorageFormat::FOREIGN: - return "foreign"; - default: - UNREACHABLE_CODE; - } -} - -bool TableStorageFormatUtils::isArrow(TableStorageFormat format) { - return format == TableStorageFormat::ARROW; -} - -bool TableStorageFormatUtils::isIceDisk(TableStorageFormat format) { - return format == TableStorageFormat::ICEBUG_DISK; -} - -} // namespace common -} // namespace lbug diff --git a/src/graph/on_disk_graph.cpp b/src/graph/on_disk_graph.cpp index 1873fe8ac3..5ebf9acc21 100644 --- a/src/graph/on_disk_graph.cpp +++ b/src/graph/on_disk_graph.cpp @@ -129,13 +129,9 @@ OnDiskGraphNbrScanState::OnDiskGraphNbrScanState(ClientContext* context, } std::unique_ptr scanState; - if (const auto* columnarTable = dynamic_cast(table)) { - if (TableStorageFormatUtils::isIceDisk(columnarTable->getStorageFormat())) { - auto tableScanState = - columnarTable->createScanState(srcNodeIDVector.get(), outVectors, mm, state); - scanState = std::unique_ptr( - dynamic_cast(tableScanState.release())); - } + if (dynamic_cast(table) != nullptr) { + scanState = std::make_unique(*mm, srcNodeIDVector.get(), + outVectors, state); } else { scanState = std::make_unique(*MemoryManager::Get(*context), srcNodeIDVector.get(), outVectors, dstNodeIDVector->state, randomLookup); diff --git a/src/include/common/enums/table_storage_format.h b/src/include/common/enums/table_storage_format.h deleted file mode 100644 index 7b6c680ccc..0000000000 --- a/src/include/common/enums/table_storage_format.h +++ /dev/null @@ -1,29 +0,0 @@ -#pragma once - -#include -#include - -#include "common/api.h" - -namespace lbug { -namespace common { - -enum class TableStorageFormat : uint8_t { - UNKNOWN = 0, - // native - NATIVE = 1, - // first class citizens - ICEBUG_DISK = 10, - ARROW = 11, - // foreign tables - FOREIGN = 30, -}; - -struct TableStorageFormatUtils { - static std::string toString(TableStorageFormat format); - static bool isArrow(TableStorageFormat format); - static bool isIceDisk(TableStorageFormat format); -}; - -} // namespace common -} // namespace lbug diff --git a/src/include/storage/table/columnar_node_table_base.h b/src/include/storage/table/columnar_node_table_base.h index e145f4a309..9c91bc0973 100644 --- a/src/include/storage/table/columnar_node_table_base.h +++ b/src/include/storage/table/columnar_node_table_base.h @@ -4,7 +4,6 @@ #include #include "catalog/catalog_entry/node_table_catalog_entry.h" -#include "common/enums/table_storage_format.h" #include "common/exception/runtime.h" #include "common/mask.h" #include "common/types/internal_id_util.h" @@ -38,11 +37,10 @@ class ColumnarNodeTableBase : public NodeTable { public: ColumnarNodeTableBase(const StorageManager* storageManager, const catalog::NodeTableCatalogEntry* nodeTableEntry, MemoryManager* memoryManager, - std::unique_ptr tableScanSharedState, - common::TableStorageFormat storageFormat) + std::unique_ptr tableScanSharedState) : NodeTable{storageManager, nodeTableEntry, memoryManager}, nodeTableCatalogEntry{nodeTableEntry}, - tableScanSharedState{std::move(tableScanSharedState)}, storageFormat{storageFormat} {} + tableScanSharedState{std::move(tableScanSharedState)} {} virtual ~ColumnarNodeTableBase() = default; @@ -81,15 +79,11 @@ class ColumnarNodeTableBase : public NodeTable { ColumnarNodeTableScanSharedState* getTableScanSharedState() const { return tableScanSharedState.get(); } - common::TableStorageFormat getStorageFormat() const { return storageFormat; } virtual std::unique_ptr createScanState(common::ValueVector* nodeIDVector, const std::vector& outVectors, MemoryManager* memoryManager) const = 0; virtual common::node_group_idx_t getNumScanMorsels( const transaction::Transaction* transaction) const = 0; - -private: - common::TableStorageFormat storageFormat; }; } // namespace storage diff --git a/src/include/storage/table/columnar_rel_table_base.h b/src/include/storage/table/columnar_rel_table_base.h index 7808e2544d..686f92c6b1 100644 --- a/src/include/storage/table/columnar_rel_table_base.h +++ b/src/include/storage/table/columnar_rel_table_base.h @@ -5,7 +5,6 @@ #include #include "catalog/catalog_entry/rel_group_catalog_entry.h" -#include "common/enums/table_storage_format.h" #include "common/exception/runtime.h" #include "common/types/internal_id_util.h" #include "storage/table/rel_table.h" @@ -18,10 +17,9 @@ class ColumnarRelTableBase : public RelTable { public: ColumnarRelTableBase(catalog::RelGroupCatalogEntry* relGroupEntry, common::table_id_t fromTableID, common::table_id_t toTableID, - const StorageManager* storageManager, MemoryManager* memoryManager, - common::TableStorageFormat storageFormat) + const StorageManager* storageManager, MemoryManager* memoryManager) : RelTable{relGroupEntry, fromTableID, toTableID, storageManager, memoryManager}, - relGroupEntry{relGroupEntry}, storageFormat{storageFormat} {} + relGroupEntry{relGroupEntry} {} virtual ~ColumnarRelTableBase() = default; @@ -53,7 +51,6 @@ class ColumnarRelTableBase : public RelTable { std::vector> getTopKDegrees( const transaction::Transaction* transaction, common::RelDataDirection direction, common::idx_t k) override; - common::TableStorageFormat getStorageFormat() const { return storageFormat; } virtual std::unique_ptr createScanState(common::ValueVector* nodeIDVector, const std::vector& outVectors, MemoryManager* memoryManager, std::shared_ptr outChunkState) const = 0; @@ -78,9 +75,6 @@ class ColumnarRelTableBase : public RelTable { // Subclasses should cache indptr data and provide it via this interface virtual common::offset_t findSourceNodeForRowInternal(common::offset_t globalRowIdx, const std::vector& indptrData) const; - -private: - common::TableStorageFormat storageFormat; }; } // namespace storage diff --git a/src/include/storage/table/foreign_rel_table.h b/src/include/storage/table/foreign_rel_table.h index 749826b44f..cbb1466ca3 100644 --- a/src/include/storage/table/foreign_rel_table.h +++ b/src/include/storage/table/foreign_rel_table.h @@ -1,7 +1,6 @@ #pragma once #include "catalog/catalog_entry/rel_group_catalog_entry.h" -#include "common/enums/table_storage_format.h" #include "common/exception/runtime.h" #include "function/table/table_function.h" #include "storage/table/rel_table.h" @@ -26,8 +25,6 @@ class ForeignRelTable final : public RelTable { MemoryManager* memoryManager, function::TableFunction scanFunction, std::shared_ptr scanBindData); - common::TableStorageFormat getStorageFormat() const { return storageFormat; } - void initScanState(transaction::Transaction* transaction, TableScanState& scanState, bool resetCachedBoundNodeSelVec = true) const override; @@ -53,7 +50,6 @@ class ForeignRelTable final : public RelTable { private: function::TableFunction scanFunction; std::shared_ptr scanBindData; - common::TableStorageFormat storageFormat = common::TableStorageFormat::FOREIGN; }; } // namespace storage diff --git a/src/processor/map/map_extend.cpp b/src/processor/map/map_extend.cpp index dc8e6f7b8e..42a03255d0 100644 --- a/src/processor/map/map_extend.cpp +++ b/src/processor/map/map_extend.cpp @@ -223,24 +223,20 @@ std::unique_ptr PlanMapper::mapExtend(const LogicalOperator* l std::make_shared(std::move(semiMask))); } } - if (!sourceNodeTables.empty() && !scanNode->getProperties().empty()) { - if (const auto* columnarTable = dynamic_cast(relTable); - columnarTable != nullptr && - TableStorageFormatUtils::isIceDisk(columnarTable->getStorageFormat())) { - std::vector sourceOutVectorsPos; - for (auto& expression : scanNode->getProperties()) { - sourceOutVectorsPos.emplace_back(getDataPos(*expression, *inFSchema)); - } - auto sourceNodeScanInfo = - ScanOpInfo(inNodeIDPos, std::move(sourceOutVectorsPos)); - auto progressSharedState = - std::make_shared(); - return std::make_unique(std::move(scanInfo), - std::move(scanRelInfo), std::move(sourceNodeTableInfos), - std::move(sourceNodeSharedStates), std::move(progressSharedState), - std::move(sourceNodeScanInfo), getOperatorID(), printInfo->copy(), - physicalOperatorType); + if (!sourceNodeTables.empty() && !scanNode->getProperties().empty() && + dynamic_cast(relTable) != nullptr) { + std::vector sourceOutVectorsPos; + for (auto& expression : scanNode->getProperties()) { + sourceOutVectorsPos.emplace_back(getDataPos(*expression, *inFSchema)); } + auto sourceNodeScanInfo = + ScanOpInfo(inNodeIDPos, std::move(sourceOutVectorsPos)); + auto progressSharedState = std::make_shared(); + return std::make_unique(std::move(scanInfo), + std::move(scanRelInfo), std::move(sourceNodeTableInfos), + std::move(sourceNodeSharedStates), std::move(progressSharedState), + std::move(sourceNodeScanInfo), getOperatorID(), printInfo->copy(), + physicalOperatorType); } // Only apply the existing no-property optimization if scan node is not already // mapped (e.g., by a semi-masker). diff --git a/src/storage/table/arrow_node_table.cpp b/src/storage/table/arrow_node_table.cpp index a48973f000..ba84148c2b 100644 --- a/src/storage/table/arrow_node_table.cpp +++ b/src/storage/table/arrow_node_table.cpp @@ -28,8 +28,7 @@ ArrowNodeTable::ArrowNodeTable(const StorageManager* storageManager, const catalog::NodeTableCatalogEntry* nodeTableEntry, MemoryManager* memoryManager, ArrowSchemaWrapper schema, std::vector arrays, std::string arrowId) : ColumnarNodeTableBase{storageManager, nodeTableEntry, memoryManager, - std::make_unique(scanMorselSize), - common::TableStorageFormat::ARROW}, + std::make_unique(scanMorselSize)}, schema{std::move(schema)}, arrays{std::move(arrays)}, totalRows{0}, arrowId{std::move(arrowId)} { // Note: release may be nullptr if schema is managed by registry diff --git a/src/storage/table/arrow_rel_table.cpp b/src/storage/table/arrow_rel_table.cpp index 49973d0c54..868b06c764 100644 --- a/src/storage/table/arrow_rel_table.cpp +++ b/src/storage/table/arrow_rel_table.cpp @@ -72,8 +72,7 @@ ArrowRelTable::ArrowRelTable(catalog::RelGroupCatalogEntry* relGroupEntry, table ArrowSchemaWrapper schema, std::vector arrays, ArrowSchemaWrapper indptrSchema, std::vector indptrArrays, std::string arrowId, std::string dstColumnName) - : ColumnarRelTableBase{relGroupEntry, fromTableID, toTableID, storageManager, memoryManager, - common::TableStorageFormat::ARROW}, + : ColumnarRelTableBase{relGroupEntry, fromTableID, toTableID, storageManager, memoryManager}, fromNodeTable{fromNodeTable}, toNodeTable{toNodeTable}, layout{layout}, schema{std::move(schema)}, arrays{std::move(arrays)}, indptrSchema{std::move(indptrSchema)}, indptrArrays{std::move(indptrArrays)}, arrowId{std::move(arrowId)} { diff --git a/src/storage/table/ice_disk_node_table.cpp b/src/storage/table/ice_disk_node_table.cpp index f7681c5f2b..c2eeb8c471 100644 --- a/src/storage/table/ice_disk_node_table.cpp +++ b/src/storage/table/ice_disk_node_table.cpp @@ -29,8 +29,7 @@ IceDiskNodeTable::IceDiskNodeTable(const StorageManager* storageManager, const NodeTableCatalogEntry* nodeTableEntry, MemoryManager* memoryManager, main::ClientContext* context) : ColumnarNodeTableBase{storageManager, nodeTableEntry, memoryManager, - std::make_unique(), - common::TableStorageFormat::ICEBUG_DISK} { + std::make_unique()} { const auto& storage = nodeTableEntry->getStorage(); auto path = common::StringUtils::getLower(storage).ends_with("parquet") ? diff --git a/src/storage/table/ice_disk_rel_table.cpp b/src/storage/table/ice_disk_rel_table.cpp index 96ac3f9814..13d5778dd7 100644 --- a/src/storage/table/ice_disk_rel_table.cpp +++ b/src/storage/table/ice_disk_rel_table.cpp @@ -73,8 +73,7 @@ void IceDiskRelTableScanState::reloadCachedBatchData(Transaction* transaction) { IceDiskRelTable::IceDiskRelTable(RelGroupCatalogEntry* relGroupEntry, table_id_t fromTableID, table_id_t toTableID, const StorageManager* storageManager, MemoryManager* memoryManager, main::ClientContext* context) - : ColumnarRelTableBase{relGroupEntry, fromTableID, toTableID, storageManager, memoryManager, - common::TableStorageFormat::ICEBUG_DISK}, + : ColumnarRelTableBase{relGroupEntry, fromTableID, toTableID, storageManager, memoryManager}, layout{IceDiskRelTableLayout::CSR} { const auto& storage = relGroupEntry->getStorage(); if (common::StringUtils::getLower(storage).ends_with("parquet")) { diff --git a/test/common/CMakeLists.txt b/test/common/CMakeLists.txt index ffb9e80ddc..5b65d8ab35 100644 --- a/test/common/CMakeLists.txt +++ b/test/common/CMakeLists.txt @@ -6,7 +6,6 @@ add_lbug_test(types_test interval_test.cpp null_mask_test.cpp string_test.cpp - table_storage_format_utils_test.cpp time_test.cpp timestamp_test.cpp vfs_test.cpp diff --git a/test/common/table_storage_format_utils_test.cpp b/test/common/table_storage_format_utils_test.cpp deleted file mode 100644 index 56d5b56ccd..0000000000 --- a/test/common/table_storage_format_utils_test.cpp +++ /dev/null @@ -1,13 +0,0 @@ -#include "common/enums/table_storage_format.h" -#include "common/exception/binder.h" -#include "gtest/gtest.h" - -using namespace lbug::common; - -TEST(TableStorageFormatUtilsTest, ToString) { - EXPECT_EQ("unknown", TableStorageFormatUtils::toString(TableStorageFormat::UNKNOWN)); - EXPECT_EQ("native", TableStorageFormatUtils::toString(TableStorageFormat::NATIVE)); - EXPECT_EQ("icebug-disk", TableStorageFormatUtils::toString(TableStorageFormat::ICEBUG_DISK)); - EXPECT_EQ("arrow", TableStorageFormatUtils::toString(TableStorageFormat::ARROW)); - EXPECT_EQ("foreign", TableStorageFormatUtils::toString(TableStorageFormat::FOREIGN)); -} diff --git a/test/storage/CMakeLists.txt b/test/storage/CMakeLists.txt index 9700d9aa12..83efdeabba 100644 --- a/test/storage/CMakeLists.txt +++ b/test/storage/CMakeLists.txt @@ -8,6 +8,5 @@ add_lbug_test(node_update_test node_update_test.cpp) add_lbug_test(detach_delete_test detach_delete_test.cpp) add_lbug_test(storage_utils_test storage_utils_test.cpp) add_lbug_test(ice_disk_utils_test ice_disk_utils_test.cpp) -add_lbug_test(columnar_table_storage_format_test columnar_table_storage_format_test.cpp) target_include_directories(compression_test PRIVATE ${PROJECT_SOURCE_DIR}/third_party/alp/include) diff --git a/test/storage/columnar_table_storage_format_test.cpp b/test/storage/columnar_table_storage_format_test.cpp deleted file mode 100644 index 9bdf96384c..0000000000 --- a/test/storage/columnar_table_storage_format_test.cpp +++ /dev/null @@ -1,99 +0,0 @@ -#include -#include - -#include "api_test/api_test.h" -#include "arrow_test_utils.h" -#include "catalog/catalog.h" -#include "catalog/catalog_entry/rel_group_catalog_entry.h" -#include "common/arrow/arrow.h" -#include "common/enums/table_storage_format.h" -#include "common/enums/table_type.h" -#include "graph_test/private_graph_test.h" -#include "gtest/gtest.h" -#include "main/client_context.h" -#include "storage/storage_manager.h" -#include "storage/table/arrow_table_support.h" -#include "storage/table/columnar_node_table_base.h" -#include "storage/table/columnar_rel_table_base.h" -#include "storage/table/table.h" -#include "test_helper/test_helper.h" -#include "transaction/transaction.h" -#include - -using namespace lbug; -using namespace lbug::common; -using namespace lbug::testing; - -class ColumnarTableStorageFormatTest : public EmptyDBTest { -protected: - void SetUp() override { - EmptyDBTest::SetUp(); - createDBAndConn(); - context = conn->getClientContext(); - } - - lbug::main::ClientContext* context = nullptr; -}; - -TEST_F(ColumnarTableStorageFormatTest, ReportsRuntimeTableFormats) { - auto* context = getClientContext(*conn); - auto* transaction = &transaction::DUMMY_CHECKPOINT_TRANSACTION; - auto catalog = catalog::Catalog::Get(*context); - auto storageManager = database->getStorageManager(); - - auto expectFormat = [&](const std::string& tableName, TableStorageFormat expected) { - auto* entry = catalog->getTableCatalogEntry(transaction, tableName); - ASSERT_NE(entry, nullptr); - lbug::common::table_id_t tableID = entry->getTableID(); - - if (entry->getTableType() == lbug::common::TableType::NODE) { - auto* table = - storageManager->getTable(tableID)->ptrCast(); - ASSERT_NE(table, nullptr); - EXPECT_EQ(expected, table->getStorageFormat()) << tableName; - } else { - tableID = entry->ptrCast()->getSingleRelEntryInfo().oid; - auto* table = - storageManager->getTable(tableID)->ptrCast(); - ASSERT_NE(table, nullptr); - EXPECT_EQ(expected, table->getStorageFormat()) << tableName; - } - }; - - const auto iceDiskStorage = TestHelper::appendLbugRootPath("dataset/ice-disk-test/"); - auto iceDiskResult = conn->query(std::format( - "CREATE NODE TABLE upperversion(id INT64 PRIMARY KEY) WITH (storage = '{}', format = " - "'icebug-disk')", - iceDiskStorage)); - ASSERT_TRUE(iceDiskResult->isSuccess()) << iceDiskResult->toString(); - expectFormat("upperversion", TableStorageFormat::ICEBUG_DISK); - - ArrowSchemaWrapper nodeSchema; - createStructSchema(&nodeSchema, 1); - createSchema(nodeSchema.children[0], "id"); - std::vector nodeIds = {1, 2, 3}; - auto nodeArray = createStructArray(static_cast(nodeIds.size()), - {[&](ArrowArray* array) { createInt64Array(array, nodeIds); }}); - std::vector nodeArrays; - nodeArrays.push_back(std::move(nodeArray)); - auto nodeCreation = ArrowTableSupport::createViewFromArrowTable(*conn, "arrow_format_person", - std::move(nodeSchema), std::move(nodeArrays)); - ASSERT_TRUE(nodeCreation.queryResult->isSuccess()) << nodeCreation.queryResult->toString(); - expectFormat("arrow_format_person", TableStorageFormat::ARROW); - - ArrowSchemaWrapper relSchema; - createStructSchema(&relSchema, 2); - createSchema(relSchema.children[0], "from"); - createSchema(relSchema.children[1], "to"); - std::vector relFrom = {0, 1}; - std::vector relTo = {1, 2}; - auto relArray = createStructArray(static_cast(relFrom.size()), - {[&](ArrowArray* array) { createInt64Array(array, relFrom); }, - [&](ArrowArray* array) { createInt64Array(array, relTo); }}); - std::vector relArrays; - relArrays.push_back(std::move(relArray)); - auto relCreation = ArrowTableSupport::createRelTableFromArrowTable(*conn, "arrow_format_knows", - "arrow_format_person", "arrow_format_person", std::move(relSchema), std::move(relArrays)); - ASSERT_TRUE(relCreation.queryResult->isSuccess()) << relCreation.queryResult->toString(); - expectFormat("arrow_format_knows", TableStorageFormat::ARROW); -}