From b14fee7030735302fb334797a8e7bd22b869c391 Mon Sep 17 00:00:00 2001 From: Guangyang Deng Date: Wed, 6 Aug 2025 11:25:28 +0800 Subject: [PATCH 001/175] chore: Lower the system version for compatibility (#64) --- .github/workflows/topling-jni.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/topling-jni.yml b/.github/workflows/topling-jni.yml index ae9048041b..52f6e3f6ca 100644 --- a/.github/workflows/topling-jni.yml +++ b/.github/workflows/topling-jni.yml @@ -24,7 +24,7 @@ on: jobs: build: # refer https://github.com/actions/runner-images to get the details - runs-on: ubuntu-latest + runs-on: ubuntu-22.04 env: GCC_VER: "11.3" # TODO: better get from the 'gcc --version' GITHUB_TOKEN: ${{ github.token }} From 242222e4e3ef56939e3567dab49f298ce61b73be Mon Sep 17 00:00:00 2001 From: leipeng Date: Fri, 8 Aug 2025 14:33:46 +0800 Subject: [PATCH 002/175] submodule rockside: RunManualFlushAll: remove redundant useless code --- Makefile | 6 ++++-- sideplugin/rockside | 2 +- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/Makefile b/Makefile index ae37d225f1..4c122e39cb 100644 --- a/Makefile +++ b/Makefile @@ -374,13 +374,15 @@ CXXFLAGS += \ LDFLAGS += -L${TOPLING_CORE_DIR}/${BUILD_ROOT}/lib_shared \ -lterark-{zbs,fsa,core}-${COMPILER}-${BUILD_TYPE_SIG} +GIT_TOPLING_ROCKS ?= git@github.com:rockeet/topling-rocks + ifndef WITH_TOPLING_ROCKS # auto check ifeq (,$(wildcard sideplugin/topling-rocks)) # topling specific: just for people who has permission to topling-rocks dummy := $(shell set -e -x; \ cd sideplugin; \ - git clone git@github.com:rockeet/topling-rocks; \ + git clone ${GIT_TOPLING_ROCKS}; \ cd topling-rocks; \ git submodule update --init --recursive \ ) @@ -397,7 +399,7 @@ ifeq (,$(wildcard sideplugin/topling-rocks)) # topling specific: just for people who has permission to topling-rocks dummy := $(shell set -e -x; \ cd sideplugin; \ - git clone git@github.com:rockeet/topling-rocks; \ + git clone ${GIT_TOPLING_ROCKS}; \ cd topling-rocks; \ git submodule update --init --recursive \ ) diff --git a/sideplugin/rockside b/sideplugin/rockside index 18e4f69a29..c81ef76ced 160000 --- a/sideplugin/rockside +++ b/sideplugin/rockside @@ -1 +1 @@ -Subproject commit 18e4f69a29b31e1a490521feebd6b773de6983f9 +Subproject commit c81ef76ced5dbf53ee138d4b8e3b6696bf2b2ea9 From 31004b9f747d422c7e2dfbb174a6372d7eff619e Mon Sep 17 00:00:00 2001 From: leipeng Date: Sat, 9 Aug 2025 23:22:42 +0800 Subject: [PATCH 003/175] c api: check _GLIBCXX_USE_CXX11_ABI for rocksdb_pinnableslice_t --- db/c.cc | 2 ++ include/rocksdb/c.h | 2 ++ 2 files changed, 4 insertions(+) diff --git a/db/c.cc b/db/c.cc index 3da28c9dc8..e4d6efa92e 100644 --- a/db/c.cc +++ b/db/c.cc @@ -286,7 +286,9 @@ struct rocksdb_pinnableslice_t { PinnableSlice rep; }; static_assert(sizeof(rocksdb_pinnableslice_t) == sizeof(PinnableSlice)); +#if defined(__GLIBCXX__) && _GLIBCXX_USE_CXX11_ABI static_assert(sizeof(rocksdb_pinnableslice_t) == 96, "see _opaque_data_ in c.h"); +#endif struct rocksdb_transactiondb_options_t { TransactionDBOptions rep; }; diff --git a/include/rocksdb/c.h b/include/rocksdb/c.h index 6430247f7c..16564ce79f 100644 --- a/include/rocksdb/c.h +++ b/include/rocksdb/c.h @@ -148,11 +148,13 @@ struct rocksdb_slice_t { size_t size; }; typedef struct rocksdb_slice_t rocksdb_slice_t; +#if defined(__GLIBCXX__) && _GLIBCXX_USE_CXX11_ABI struct rocksdb_pinnableslice_t { const char* data; size_t size; unsigned char _opaque_data_[80]; }; +#endif // _GLIBCXX_USE_CXX11_ABI #endif /* DB operations */ From d34aeaf0491da675b4ade6fd1446ba18a4c5c5d7 Mon Sep 17 00:00:00 2001 From: leipeng Date: Sun, 10 Aug 2025 17:47:44 +0800 Subject: [PATCH 004/175] ReadOptions::BooleanDontCopyTrue(&&) minor bugfix --- include/rocksdb/options.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/rocksdb/options.h b/include/rocksdb/options.h index 6e10227ff8..739724d8ba 100644 --- a/include/rocksdb/options.h +++ b/include/rocksdb/options.h @@ -1811,7 +1811,7 @@ struct ReadOptions { } BooleanDontCopyTrue(BooleanDontCopyTrue&& y) { ROCKSDB_VERIFY(y.value == false); // strict for release - ROCKSDB_VERIFY(this->value == false); // strict for release + this->value = false; } BooleanDontCopyTrue& operator=(BooleanDontCopyTrue&& y) { ROCKSDB_VERIFY(y.value == false); // strict for release From 7b5c6629306772141e56e87fc76b8250c66674bb Mon Sep 17 00:00:00 2001 From: leipeng Date: Sun, 10 Aug 2025 18:14:45 +0800 Subject: [PATCH 005/175] java: jmh: change project.build.source 1.7 to 1.8 --- java/jmh/build.sh | 2 +- java/jmh/pom.xml | 4 ++-- java/jmh/run.sh | 1 + 3 files changed, 4 insertions(+), 3 deletions(-) mode change 100644 => 100755 java/jmh/build.sh mode change 100644 => 100755 java/jmh/run.sh diff --git a/java/jmh/build.sh b/java/jmh/build.sh old mode 100644 new mode 100755 index ede549db26..599a727d57 --- a/java/jmh/build.sh +++ b/java/jmh/build.sh @@ -5,4 +5,4 @@ cd $mydir/../target cp rocksdbjni-8.10.2-linux64.jar rocksdbjni-8.10.2-SNAPSHOT-linux64.jar mvn install:install-file -Dfile=rocksdbjni-8.10.2-SNAPSHOT-linux64.jar -DgroupId=org.rocksdb -DartifactId=rocksdbjni -Dversion=8.10.2-SNAPSHOT -Dpackaging=jar cd $mydir -mvn clean package +mvn clean package -T 1C diff --git a/java/jmh/pom.xml b/java/jmh/pom.xml index fe6f35e9c7..f9f9474f45 100644 --- a/java/jmh/pom.xml +++ b/java/jmh/pom.xml @@ -38,8 +38,8 @@ - 1.7 - 1.7 + 1.8 + 1.8 UTF-8 1.22 diff --git a/java/jmh/run.sh b/java/jmh/run.sh old mode 100644 new mode 100755 index e6f7c0718a..feba026ce1 --- a/java/jmh/run.sh +++ b/java/jmh/run.sh @@ -6,6 +6,7 @@ dbdir=/dev/shm/db_bench_enterprise # defined in db_bench_enterprise.yaml mkdir -p $dbdir cp $topdir/sideplugin/rockside/src/topling/web/{style.css,index.html} $dbdir args=( + --add-opens java.base/java.nio=ALL-UNNAMED -jar $mydir/target/rocksdbjni-jmh-1.0-SNAPSHOT-benchmarks.jar -p keyCount=10000 -p keySize=128 From a301c63fe3efabdd2af25108d1b791300c9e2175 Mon Sep 17 00:00:00 2001 From: leipeng Date: Sun, 10 Aug 2025 19:06:09 +0800 Subject: [PATCH 006/175] SideGetBenchmarks.java: Add bench aZeroCopyGet --- java/jmh/run.sh | 1 + .../main/java/org/rocksdb/jmh/SideGetBenchmarks.java | 12 ++++++++++++ 2 files changed, 13 insertions(+) diff --git a/java/jmh/run.sh b/java/jmh/run.sh index feba026ce1..a5059a5f7f 100755 --- a/java/jmh/run.sh +++ b/java/jmh/run.sh @@ -11,6 +11,7 @@ args=( -p keyCount=10000 -p keySize=128 -p valueSize=512 + #-p valueSize=65536 # larger value size showing zero copy faster #-p dbname=db_bench_enterprise #-p dbpath=/dev/shm/db_bench_enterprise -p sideConf=$topdir/sideplugin/rockside/sample-conf/db_bench_enterprise.yaml diff --git a/java/jmh/src/main/java/org/rocksdb/jmh/SideGetBenchmarks.java b/java/jmh/src/main/java/org/rocksdb/jmh/SideGetBenchmarks.java index 12e241331f..20a7bef32c 100644 --- a/java/jmh/src/main/java/org/rocksdb/jmh/SideGetBenchmarks.java +++ b/java/jmh/src/main/java/org/rocksdb/jmh/SideGetBenchmarks.java @@ -43,6 +43,7 @@ public class SideGetBenchmarks { private final AtomicInteger keyIndex = new AtomicInteger(); private ByteBuffer keyBuf; private ByteBuffer valueBuf; + private ByteBuffer zeroCopyBuf; private byte[] keyArr; private byte[] valueArr; @@ -98,6 +99,7 @@ public void setup() throws IOException, RocksDBException { keyBuf.flip(); valueBuf.put(valueArr); valueBuf.flip(); + zeroCopyBuf = DirectSlice.newZeroCopyDirectBuffer(); } @TearDown(Level.Trial) @@ -183,6 +185,16 @@ private ByteBuffer getValueBuf() { return valueBuf; } + @Benchmark + public void aZeroCopyGet() throws RocksDBException { + try { + readOptions.startZeroCopy(); + db.get(getColumnFamily(), readOptions, getKeyArr(), zeroCopyBuf); + } finally { + readOptions.finishZeroCopy(); + } + } + @Benchmark public void get() throws RocksDBException { db.get(getColumnFamily(), getKeyArr()); From ec55229422a8dcaaf7fa938257e3326f4b0b47b5 Mon Sep 17 00:00:00 2001 From: leipeng Date: Tue, 12 Aug 2025 01:52:01 +0800 Subject: [PATCH 007/175] java: optimize compile speed --- Makefile | 27 ++++++++++++++++++++++++++- java/Makefile | 21 +++++++++++++++++++-- 2 files changed, 45 insertions(+), 3 deletions(-) diff --git a/Makefile b/Makefile index 4c122e39cb..ba91886dcc 100644 --- a/Makefile +++ b/Makefile @@ -621,6 +621,9 @@ am__v_at_ = $(am__v_at_$(AM_DEFAULT_VERBOSITY)) am__v_at_0 = @ am__v_at_1 = +export AM_V_at +export AM_V_GEN + AM_V_CC = $(am__v_CC_$(V)) am__v_CC_ = $(am__v_CC_$(AM_DEFAULT_VERBOSITY)) am__v_CC_0 = @echo " CC " $@; @@ -2625,6 +2628,11 @@ ifndef JAVA_HOME JAVA_HOME := $(shell javac -J-XshowSettings:properties -version 2>&1 | awk '/java.home/{print $$NF}') $(warning Auto detected JAVA_HOME = ${JAVA_HOME}, if it is not true please set JAVA_HOME) endif +ifneq ($(wildcard $(JAVA_HOME)/bin/javac),) + ifeq (${MAKE_RESTARTS},) + dummy := $(shell rm -f rocksdbjava-header) + endif +endif JAVA_INCLUDE = -I$(JAVA_HOME)/include/ -I$(JAVA_HOME)/include/linux ifeq ($(PLATFORM), OS_SOLARIS) ARCH := $(shell isainfo -b) @@ -2969,7 +2977,9 @@ jl/%.o: %.cc $(AM_V_CC)mkdir -p $(@D) && $(CXX) $(CXXFLAGS) -fPIC -c $< -o $@ $(COVERAGEFLAGS) ${ALL_JNI_NATIVE_OBJECTS}: CXXFLAGS += -I./java/. -I./java/rocksjni $(JAVA_INCLUDE) $(ROCKSDB_PLUGIN_JNI_CXX_INCLUDEFLAGS) +ifeq ($(SKIP_DEPENDS),1) ${ALL_JNI_NATIVE_OBJECTS}: rocksdbjava-header +endif rocksdbjava: $(LIB_OBJECTS) $(ALL_JNI_NATIVE_OBJECTS) ifeq ($(JAVA_HOME),) $(error JAVA_HOME is not set) @@ -2988,6 +2998,8 @@ endif $(AM_V_at)cd java/target; $(JAR_CMD) -uf $(ROCKSDB_JAR) style.css index.html $(AM_V_at)cd java/target/classes; $(JAR_CMD) -uf ../$(ROCKSDB_JAR) org/rocksdb/*.class org/rocksdb/util/*.class $(AM_V_at)openssl sha1 java/target/$(ROCKSDB_JAR) | sed 's/.*= \([0-9a-f]*\)/\1/' > java/target/$(ROCKSDB_JAR).sha1 + $(AM_V_at)rm rocksdbjava-header + @echo make $@ done install-jni: rocksdbjava mkdir -p $(INSTALL_LIBDIR) @@ -2997,7 +3009,11 @@ rocksdbjava-header: ifeq ($(JAVA_HOME),) $(error JAVA_HOME is not set) endif - $(AM_V_GEN)cd java; $(MAKE) javalib; + $(AM_V_GEN)flock .rocksdbjava-header -c \ + 'if [ ! -f rocksdbjava-header ]; then \ + $(MAKE) -C java java_test; \ + touch rocksdbjava-header; \ + fi' jclean: cd java;$(MAKE) clean; @@ -3137,6 +3153,9 @@ endif # If skip dependencies is ON, skip including the dep files ifneq ($(SKIP_DEPENDS), 1) DEPFILES := $(patsubst %.cc, $(OBJ_DIR)/%.cc.d, $(ALL_SOURCES)) +ifneq ($(wildcard $(JAVA_HOME)/bin/javac),) +DEPFILES += $(patsubst %.cc, $(OBJ_DIR)/%.cc.d, ${ALL_JNI_NATIVE_SOURCES}) +endif DEPFILES := $(patsubst %.cpp,$(OBJ_DIR)/%.cpp.d,$(DEPFILES)) DEPFILES += $(patsubst %.c, $(OBJ_DIR)/%.c.d, $(LIB_SOURCES_C) $(TEST_MAIN_SOURCES_C)) ifeq ($(USE_FOLLY_LITE),1) @@ -3149,6 +3168,12 @@ endif # The .d file indicates .cc file's dependencies on .h files. We generate such # dependency by g++'s -MM option, whose output is a make dependency rule. +$(OBJ_DIR)/java/%.cc.d: java/%.cc rocksdbjava-header + $(AM_V_at)mkdir -p $(@D) + $(AM_V_at)$(CXX) $(CXXFLAGS) \ + -Ijava -Ijava/rocksjni $(JAVA_INCLUDE) $(ROCKSDB_PLUGIN_JNI_CXX_INCLUDEFLAGS)\ + -MM -MT'$@' -MT'$(<:%.cc=$(OBJ_DIR)/%.o)' "$<" -o '$@' + $(OBJ_DIR)/%.cc.d: %.cc @mkdir -p $(@D) && $(CXX) $(CXXFLAGS) $(PLATFORM_SHARED_CFLAGS) \ -MM -MT'$@' -MT'$(<:.cc=.o)' -MT'$(<:%.cc=$(OBJ_DIR)/%.o)' \ diff --git a/java/Makefile b/java/Makefile index 9778355e6f..c22c2a0d9e 100644 --- a/java/Makefile +++ b/java/Makefile @@ -335,6 +335,8 @@ ifneq ($(DEBUG_LEVEL),0) endif ifeq (${ENABLE_JAVA_LINT},1) JAVAC_ARGS += -Xlint:deprecation -Xlint:unchecked +else + JAVAC_ARGS += -Xlint:none -nowarn -XDignore.symbol.file endif # Using a Facebook AWS account for S3 storage. (maven.org has a history @@ -367,7 +369,15 @@ javalib: java java_test javadocs java: java-version $(AM_V_GEN)mkdir -p $(MAIN_CLASSES) - $(AM_V_at) $(JAVAC_CMD) $(JAVAC_ARGS) -h $(NATIVE_INCLUDE) -d $(MAIN_CLASSES) $(SOURCES) + $(AM_V_GEN)mkdir -p $(NATIVE_INCLUDE) + $(AM_V_at)$(JAVAC_CMD) $(JAVAC_ARGS) -h $(NATIVE_INCLUDE)-tmp -d $(MAIN_CLASSES) $(SOURCES) + $(AM_V_at)cd $(NATIVE_INCLUDE)-tmp; \ + for f in *.h; do \ + if ! cmp -s $$f ../$(NATIVE_INCLUDE)/$$f; then \ + mv $$f ../$(NATIVE_INCLUDE); \ + fi \ + done + $(AM_V_at)rm -rf $(NATIVE_INCLUDE)-tmp $(AM_V_at)@cp ../HISTORY.md ./HISTORY-CPP.md $(AM_V_at)@rm -f ./HISTORY-CPP.md @@ -468,8 +478,15 @@ resolve_test_deps: $(JAVA_JUNIT_JAR_PATH) $(JAVA_HAMCREST_JAR_PATH) $(JAVA_MOCKI java_test: java resolve_test_deps $(AM_V_GEN)mkdir -p $(TEST_CLASSES) - $(AM_V_at) $(JAVAC_CMD) $(JAVAC_ARGS) -cp $(MAIN_CLASSES):$(JAVA_TESTCLASSPATH) -h $(NATIVE_INCLUDE) -d $(TEST_CLASSES)\ + $(AM_V_at) $(JAVAC_CMD) $(JAVAC_ARGS) -cp $(MAIN_CLASSES):$(JAVA_TESTCLASSPATH) -h $(NATIVE_INCLUDE)-test -d $(TEST_CLASSES)\ $(TEST_SOURCES) + $(AM_V_at)cd $(NATIVE_INCLUDE)-test; \ + for f in *.h; do \ + if ! cmp -s $$f ../$(NATIVE_INCLUDE)/$$f; then \ + mv $$f ../$(NATIVE_INCLUDE); \ + fi \ + done + $(AM_V_at)rm -rf $(NATIVE_INCLUDE)-test test: java java_test $(MAKE) run_test From 8af6e2ca677dd93b08a8f8c1f14cded2291029ce Mon Sep 17 00:00:00 2001 From: leipeng Date: Tue, 12 Aug 2025 08:53:03 +0800 Subject: [PATCH 008/175] java/Makefile: delete useless action --- java/Makefile | 2 -- 1 file changed, 2 deletions(-) diff --git a/java/Makefile b/java/Makefile index c22c2a0d9e..fea0da7f24 100644 --- a/java/Makefile +++ b/java/Makefile @@ -378,8 +378,6 @@ java: java-version fi \ done $(AM_V_at)rm -rf $(NATIVE_INCLUDE)-tmp - $(AM_V_at)@cp ../HISTORY.md ./HISTORY-CPP.md - $(AM_V_at)@rm -f ./HISTORY-CPP.md sample: java $(AM_V_GEN)mkdir -p $(SAMPLES_MAIN_CLASSES) From 41864f53dd643a19d4002cd60f2d626fabdef10d Mon Sep 17 00:00:00 2001 From: leipeng Date: Tue, 12 Aug 2025 11:41:05 +0800 Subject: [PATCH 009/175] java: simplify and speed `make` --- Makefile | 18 ++++-------------- 1 file changed, 4 insertions(+), 14 deletions(-) diff --git a/Makefile b/Makefile index ba91886dcc..43e10b0547 100644 --- a/Makefile +++ b/Makefile @@ -2630,7 +2630,8 @@ ifndef JAVA_HOME endif ifneq ($(wildcard $(JAVA_HOME)/bin/javac),) ifeq (${MAKE_RESTARTS},) - dummy := $(shell rm -f rocksdbjava-header) + # java_test is for generate all jni header files + dummy := $(shell $(MAKE) -C java java_test) endif endif JAVA_INCLUDE = -I$(JAVA_HOME)/include/ -I$(JAVA_HOME)/include/linux @@ -2977,9 +2978,6 @@ jl/%.o: %.cc $(AM_V_CC)mkdir -p $(@D) && $(CXX) $(CXXFLAGS) -fPIC -c $< -o $@ $(COVERAGEFLAGS) ${ALL_JNI_NATIVE_OBJECTS}: CXXFLAGS += -I./java/. -I./java/rocksjni $(JAVA_INCLUDE) $(ROCKSDB_PLUGIN_JNI_CXX_INCLUDEFLAGS) -ifeq ($(SKIP_DEPENDS),1) -${ALL_JNI_NATIVE_OBJECTS}: rocksdbjava-header -endif rocksdbjava: $(LIB_OBJECTS) $(ALL_JNI_NATIVE_OBJECTS) ifeq ($(JAVA_HOME),) $(error JAVA_HOME is not set) @@ -2998,7 +2996,6 @@ endif $(AM_V_at)cd java/target; $(JAR_CMD) -uf $(ROCKSDB_JAR) style.css index.html $(AM_V_at)cd java/target/classes; $(JAR_CMD) -uf ../$(ROCKSDB_JAR) org/rocksdb/*.class org/rocksdb/util/*.class $(AM_V_at)openssl sha1 java/target/$(ROCKSDB_JAR) | sed 's/.*= \([0-9a-f]*\)/\1/' > java/target/$(ROCKSDB_JAR).sha1 - $(AM_V_at)rm rocksdbjava-header @echo make $@ done install-jni: rocksdbjava @@ -3006,14 +3003,7 @@ install-jni: rocksdbjava install -C -m 644 java/target/*.so $(INSTALL_LIBDIR) rocksdbjava-header: -ifeq ($(JAVA_HOME),) - $(error JAVA_HOME is not set) -endif - $(AM_V_GEN)flock .rocksdbjava-header -c \ - 'if [ ! -f rocksdbjava-header ]; then \ - $(MAKE) -C java java_test; \ - touch rocksdbjava-header; \ - fi' + $(AM_V_GEN)$(MAKE) -C java java_test jclean: cd java;$(MAKE) clean; @@ -3168,7 +3158,7 @@ endif # The .d file indicates .cc file's dependencies on .h files. We generate such # dependency by g++'s -MM option, whose output is a make dependency rule. -$(OBJ_DIR)/java/%.cc.d: java/%.cc rocksdbjava-header +$(OBJ_DIR)/java/%.cc.d: java/%.cc $(AM_V_at)mkdir -p $(@D) $(AM_V_at)$(CXX) $(CXXFLAGS) \ -Ijava -Ijava/rocksjni $(JAVA_INCLUDE) $(ROCKSDB_PLUGIN_JNI_CXX_INCLUDEFLAGS)\ From 0eb2a24fc68a89744e9fe41180109fb8e179cc85 Mon Sep 17 00:00:00 2001 From: leipeng Date: Tue, 12 Aug 2025 12:09:42 +0800 Subject: [PATCH 010/175] java: pass JAVA_HOME between Makefile & autocheck --- Makefile | 3 ++- java/Makefile | 4 ++++ 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 43e10b0547..026b556d0e 100644 --- a/Makefile +++ b/Makefile @@ -2628,10 +2628,11 @@ ifndef JAVA_HOME JAVA_HOME := $(shell javac -J-XshowSettings:properties -version 2>&1 | awk '/java.home/{print $$NF}') $(warning Auto detected JAVA_HOME = ${JAVA_HOME}, if it is not true please set JAVA_HOME) endif +export JAVA_HOME ifneq ($(wildcard $(JAVA_HOME)/bin/javac),) ifeq (${MAKE_RESTARTS},) # java_test is for generate all jni header files - dummy := $(shell $(MAKE) -C java java_test) + dummy := $(shell $(MAKE) -C java java_test JAVA_HOME=${JAVA_HOME}) endif endif JAVA_INCLUDE = -I$(JAVA_HOME)/include/ -I$(JAVA_HOME)/include/linux diff --git a/java/Makefile b/java/Makefile index fea0da7f24..3e21971e9f 100644 --- a/java/Makefile +++ b/java/Makefile @@ -250,6 +250,10 @@ JAVA_TESTCLASSPATH = $(JAVA_JUNIT_JAR_PATH):$(JAVA_HAMCREST_JAR_PATH):$(JAVA_MOC MVN_LOCAL = ~/.m2/repository # Set the path of the java commands +ifndef JAVA_HOME + JAVA_HOME := $(shell javac -J-XshowSettings:properties -version 2>&1 | awk '/java.home/{print $$NF}') + $(warning Auto detected JAVA_HOME = ${JAVA_HOME}, if it is not true please set JAVA_HOME) +endif ifeq ($(JAVA_CMD),) ifneq ($(JAVA_HOME),) JAVA_CMD := $(JAVA_HOME)/bin/java From 92f97aeb846cd016b09a3d67e0347c92de723e78 Mon Sep 17 00:00:00 2001 From: rockeet Date: Thu, 14 Aug 2025 23:52:47 +0800 Subject: [PATCH 011/175] Makefile for java: fix for generate header with max parallel build --- Makefile | 20 +++++++++----------- java/Makefile | 14 ++++++++++++-- 2 files changed, 21 insertions(+), 13 deletions(-) diff --git a/Makefile b/Makefile index 026b556d0e..47b5bc6fe2 100644 --- a/Makefile +++ b/Makefile @@ -621,9 +621,6 @@ am__v_at_ = $(am__v_at_$(AM_DEFAULT_VERBOSITY)) am__v_at_0 = @ am__v_at_1 = -export AM_V_at -export AM_V_GEN - AM_V_CC = $(am__v_CC_$(V)) am__v_CC_ = $(am__v_CC_$(AM_DEFAULT_VERBOSITY)) am__v_CC_0 = @echo " CC " $@; @@ -2629,12 +2626,6 @@ ifndef JAVA_HOME $(warning Auto detected JAVA_HOME = ${JAVA_HOME}, if it is not true please set JAVA_HOME) endif export JAVA_HOME -ifneq ($(wildcard $(JAVA_HOME)/bin/javac),) - ifeq (${MAKE_RESTARTS},) - # java_test is for generate all jni header files - dummy := $(shell $(MAKE) -C java java_test JAVA_HOME=${JAVA_HOME}) - endif -endif JAVA_INCLUDE = -I$(JAVA_HOME)/include/ -I$(JAVA_HOME)/include/linux ifeq ($(PLATFORM), OS_SOLARIS) ARCH := $(shell isainfo -b) @@ -2979,7 +2970,7 @@ jl/%.o: %.cc $(AM_V_CC)mkdir -p $(@D) && $(CXX) $(CXXFLAGS) -fPIC -c $< -o $@ $(COVERAGEFLAGS) ${ALL_JNI_NATIVE_OBJECTS}: CXXFLAGS += -I./java/. -I./java/rocksjni $(JAVA_INCLUDE) $(ROCKSDB_PLUGIN_JNI_CXX_INCLUDEFLAGS) -rocksdbjava: $(LIB_OBJECTS) $(ALL_JNI_NATIVE_OBJECTS) +rocksdbjava: $(LIB_OBJECTS) $(ALL_JNI_NATIVE_OBJECTS) rocksdbjava-header ifeq ($(JAVA_HOME),) $(error JAVA_HOME is not set) endif @@ -3005,9 +2996,11 @@ install-jni: rocksdbjava rocksdbjava-header: $(AM_V_GEN)$(MAKE) -C java java_test + $(AM_V_at)touch $@ jclean: cd java;$(MAKE) clean; + $(AM_V_at)rm -f rocksdbjava-header jtest_compile: rocksdbjava cd java;$(MAKE) java_test @@ -3159,7 +3152,12 @@ endif # The .d file indicates .cc file's dependencies on .h files. We generate such # dependency by g++'s -MM option, whose output is a make dependency rule. -$(OBJ_DIR)/java/%.cc.d: java/%.cc +ifeq (${MAKE_RESTARTS},) + ifeq ($(wildcard rocksdbjava-header),) + GEN_ROCKSDB_JAVA_HEADER := rocksdbjava-header + endif +endif +$(OBJ_DIR)/java/%.cc.d: java/%.cc ${GEN_ROCKSDB_JAVA_HEADER} $(AM_V_at)mkdir -p $(@D) $(AM_V_at)$(CXX) $(CXXFLAGS) \ -Ijava -Ijava/rocksjni $(JAVA_INCLUDE) $(ROCKSDB_PLUGIN_JNI_CXX_INCLUDEFLAGS)\ diff --git a/java/Makefile b/java/Makefile index 3e21971e9f..1b91c2b49f 100644 --- a/java/Makefile +++ b/java/Makefile @@ -1,3 +1,14 @@ +AM_DEFAULT_VERBOSITY ?= 0 + +AM_V_GEN = $(am__v_GEN_$(V)) +am__v_GEN_ = $(am__v_GEN_$(AM_DEFAULT_VERBOSITY)) +am__v_GEN_0 = @echo " GEN " $@; +am__v_GEN_1 = +AM_V_at = $(am__v_at_$(V)) +am__v_at_ = $(am__v_at_$(AM_DEFAULT_VERBOSITY)) +am__v_at_0 = @ +am__v_at_1 = + NATIVE_JAVA_CLASSES = \ org.rocksdb.AbstractCompactionFilter\ org.rocksdb.AbstractCompactionFilterFactory\ @@ -372,8 +383,7 @@ javadocs: java javalib: java java_test javadocs java: java-version - $(AM_V_GEN)mkdir -p $(MAIN_CLASSES) - $(AM_V_GEN)mkdir -p $(NATIVE_INCLUDE) + $(AM_V_GEN)mkdir -p $(MAIN_CLASSES) $(NATIVE_INCLUDE) $(AM_V_at)$(JAVAC_CMD) $(JAVAC_ARGS) -h $(NATIVE_INCLUDE)-tmp -d $(MAIN_CLASSES) $(SOURCES) $(AM_V_at)cd $(NATIVE_INCLUDE)-tmp; \ for f in *.h; do \ From ccb8d448eb0241301528cb0a8f764001cd4528f7 Mon Sep 17 00:00:00 2001 From: rockeet Date: Sat, 16 Aug 2025 13:32:09 +0800 Subject: [PATCH 012/175] update submodule rockside --- sideplugin/rockside | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sideplugin/rockside b/sideplugin/rockside index c81ef76ced..333154e209 160000 --- a/sideplugin/rockside +++ b/sideplugin/rockside @@ -1 +1 @@ -Subproject commit c81ef76ced5dbf53ee138d4b8e3b6696bf2b2ea9 +Subproject commit 333154e2099689de391ce9e3d042bd6cde82d8ef From d595abd889f29b390b1c9db6faed911648e91eaa Mon Sep 17 00:00:00 2001 From: rockeet Date: Sat, 16 Aug 2025 15:49:10 +0800 Subject: [PATCH 013/175] java: Add native SidePluginRepo.importJson(String) --- java/rocksjni/side_plugin_repo_jni.cc | 15 +++++++++++++++ .../src/main/java/org/rocksdb/SidePluginRepo.java | 1 + 2 files changed, 16 insertions(+) diff --git a/java/rocksjni/side_plugin_repo_jni.cc b/java/rocksjni/side_plugin_repo_jni.cc index 6616937872..f10d32fe7e 100644 --- a/java/rocksjni/side_plugin_repo_jni.cc +++ b/java/rocksjni/side_plugin_repo_jni.cc @@ -68,6 +68,21 @@ void Java_org_rocksdb_SidePluginRepo_importAutoFile } } +JNIEXPORT void JNICALL Java_org_rocksdb_SidePluginRepo_importJson +(JNIEnv* env, jobject jrepo, jstring jstrJson) +{ + const auto* strjson = env->GetStringUTFChars(jstrJson, nullptr); + ROCKSDB_VERIFY(strjson != nullptr); + jclass clazz = env->GetObjectClass(jrepo); + jfieldID handleFieldID = env->GetFieldID(clazz, "nativeHandle_", "J"); // long + auto repo = (SidePluginRepo*)env->GetLongField(jrepo, handleFieldID); + auto status = repo->Import(std::string(strjson)); + env->ReleaseStringUTFChars(jstrJson, strjson); + if (!status.ok()) { + RocksDBExceptionJni::ThrowNew(env, status); + } +} + static jobject CreateJDB (JNIEnv* env, DB* db, ColumnFamilyHandle** cfh_a, size_t cfh_n) { diff --git a/java/src/main/java/org/rocksdb/SidePluginRepo.java b/java/src/main/java/org/rocksdb/SidePluginRepo.java index a4b848f167..2ceb57037a 100644 --- a/java/src/main/java/org/rocksdb/SidePluginRepo.java +++ b/java/src/main/java/org/rocksdb/SidePluginRepo.java @@ -12,6 +12,7 @@ public class SidePluginRepo extends RocksObject { RocksDB.loadLibrary(); } public native void importAutoFile(String fname) throws RocksDBException; + public native void importJson(String strJson) throws RocksDBException; public RocksDB openDB(String js) throws RocksDBException { RocksDB db = nativeOpenDB(nativeHandle_, js); dblist_.add(db); From cfc420360a8fdfc3aac59458d45f9e1cbe438d29 Mon Sep 17 00:00:00 2001 From: rockeet Date: Sat, 16 Aug 2025 19:28:59 +0800 Subject: [PATCH 014/175] java: Add SidePluginRepo.closeOneDB(db) --- java/rocksjni/side_plugin_repo_jni.cc | 16 ++++++++++++++++ .../main/java/org/rocksdb/SidePluginRepo.java | 6 ++++++ sideplugin/rockside | 2 +- 3 files changed, 23 insertions(+), 1 deletion(-) diff --git a/java/rocksjni/side_plugin_repo_jni.cc b/java/rocksjni/side_plugin_repo_jni.cc index f10d32fe7e..4029df64f6 100644 --- a/java/rocksjni/side_plugin_repo_jni.cc +++ b/java/rocksjni/side_plugin_repo_jni.cc @@ -193,6 +193,22 @@ void Java_org_rocksdb_SidePluginRepo_nativeCloseAllDB repo->CloseAllDB(false); // dont close DB and cf } +/* + * Class: org_rocksdb_SidePluginRepo + * Method: nativeCloseOneDB + * Signature: (JJ)V + */ +JNIEXPORT void JNICALL Java_org_rocksdb_SidePluginRepo_nativeCloseOneDB +(JNIEnv* env, jobject, jlong jhrepo, jlong jhdb) +{ + auto repo = (SidePluginRepo*)jhrepo; + auto db = (DB*)jhdb; + Status s = repo->CloseOneDB(db, false); // dont close DB and cf + if (!s.ok()) { + RocksDBExceptionJni::ThrowNew(env, s); + } +} + /* * Class: org_rocksdb_SidePluginRepo * Method: nativePutDB diff --git a/java/src/main/java/org/rocksdb/SidePluginRepo.java b/java/src/main/java/org/rocksdb/SidePluginRepo.java index 2ceb57037a..ff2e2965d3 100644 --- a/java/src/main/java/org/rocksdb/SidePluginRepo.java +++ b/java/src/main/java/org/rocksdb/SidePluginRepo.java @@ -66,6 +66,11 @@ public void closeAllDB() { } dblist_ = null; } + public void closeOneDB(RocksDB db) { + if (dblist_.remove(db)) { + nativeCloseOneDB(nativeHandle_, db.nativeHandle_); + } + } public ColumnFamilyHandle createCF(RocksDB db, String cfname, String spec) throws RocksDBException { long cfh = nativeCreateCF(nativeHandle_, db.nativeHandle_, cfname, spec); return new ColumnFamilyHandle(db, cfh); @@ -88,6 +93,7 @@ public void put(String name, String spec, RocksDB db) { // call native->CloseAllDB(false) private native void nativeCloseAllDB(long handle); + private native void nativeCloseOneDB(long handle, long db_handle); public void put(String name, Options opt) { // vscode sucks on text block, use plain stupid string literal diff --git a/sideplugin/rockside b/sideplugin/rockside index 333154e209..78eefac1c0 160000 --- a/sideplugin/rockside +++ b/sideplugin/rockside @@ -1 +1 @@ -Subproject commit 333154e2099689de391ce9e3d042bd6cde82d8ef +Subproject commit 78eefac1c03047e4c1dd0fa187b3ec8d2e61b854 From e14ea2d50e343c3497aa5ecda0db701d9d0c3362 Mon Sep 17 00:00:00 2001 From: rockeet Date: Sat, 16 Aug 2025 21:41:05 +0800 Subject: [PATCH 015/175] EventHelpers::LogAndNotifyTableFileCreationFinished: log the reason --- db/event_helpers.cc | 1 + 1 file changed, 1 insertion(+) diff --git a/db/event_helpers.cc b/db/event_helpers.cc index eb57e38618..6d1f10a152 100644 --- a/db/event_helpers.cc +++ b/db/event_helpers.cc @@ -82,6 +82,7 @@ void EventHelpers::LogAndNotifyTableFileCreationFinished( AppendCurrentTime(&jwriter); jwriter << "cf_name" << cf_name << "job" << job_id << "event" << "table_file_creation" + << "reason" << enum_cstr(reason, "unknown") << "file_number" << fd.GetNumber() << "file_size" << fd.GetFileSize() << "file_checksum" << Slice(file_checksum).ToString(true) << "file_checksum_func_name" From 4fb3dd71842773b244c61e85fe97da1db1bf3e9f Mon Sep 17 00:00:00 2001 From: rockeet Date: Sat, 16 Aug 2025 23:39:49 +0800 Subject: [PATCH 016/175] java: Add SidePluginRepo.createCFWithImport --- java/rocksjni/side_plugin_repo_jni.cc | 44 +++++++++++++++++++ .../main/java/org/rocksdb/SidePluginRepo.java | 15 +++++++ sideplugin/rockside | 2 +- 3 files changed, 60 insertions(+), 1 deletion(-) diff --git a/java/rocksjni/side_plugin_repo_jni.cc b/java/rocksjni/side_plugin_repo_jni.cc index 4029df64f6..1f3735e416 100644 --- a/java/rocksjni/side_plugin_repo_jni.cc +++ b/java/rocksjni/side_plugin_repo_jni.cc @@ -323,6 +323,8 @@ JNIEXPORT jlong JNICALL Java_org_rocksdb_SidePluginRepo_nativeCreateCF auto db = (DB*)hdb; DB_MultiCF* dbm = Get_DB_MultiCF(env, db, repo); if (!dbm) { + Status status = Status::InvalidArgument("DB_MultiCF not found", db->GetName()); + RocksDBExceptionJni::ThrowNew(env, status); return 0; } const char* cfname = env->GetStringUTFChars(jcfname, nullptr); @@ -340,6 +342,48 @@ JNIEXPORT jlong JNICALL Java_org_rocksdb_SidePluginRepo_nativeCreateCF return (jlong)cfh; } +/* + * Class: org_rocksdb_SidePluginRepo + * Method: nativeCreateCFWithImport + * Signature: (JJLjava/lang/String;Ljava/lang/String;J[J)J + */ +JNIEXPORT jlong JNICALL Java_org_rocksdb_SidePluginRepo_nativeCreateCFWithImport +(JNIEnv* env, jobject, jlong hrepo, jlong hdb, + jstring jcfname, jstring jspec, + jlong jimport_options, jlongArray jmetadatas) +{ + auto repo = (SidePluginRepo*)hrepo; + auto db = (DB*)hdb; + DB_MultiCF* dbm = Get_DB_MultiCF(env, db, repo); + if (!dbm) { + Status status = Status::InvalidArgument("DB_MultiCF not found", db->GetName()); + RocksDBExceptionJni::ThrowNew(env, status); + return 0; + } + auto import_options = (ImportColumnFamilyOptions*)jimport_options; + auto metadata_handles = env->GetLongArrayElements(jmetadatas, nullptr); + const char* cfname = env->GetStringUTFChars(jcfname, nullptr); + const char* spec = env->GetStringUTFChars(jspec, nullptr); + ROCKSDB_SCOPE_EXIT( + env->ReleaseStringUTFChars(jspec, spec); + env->ReleaseStringUTFChars(jcfname, cfname); + env->ReleaseLongArrayElements(jmetadatas, metadata_handles, 0); + ); + jsize metadata_count = env->GetArrayLength(jmetadatas); + std::vector metadatas(metadata_count); + for (jsize i = 0; i < metadata_count; i++) { + metadatas[i] = (const ExportImportFilesMetaData*)metadata_handles[i]; + } + ColumnFamilyHandle* cfh = nullptr; + Status status = dbm->CreateColumnFamilyWithImport + (cfname, *import_options, metadatas, spec, &cfh); + if (!status.ok()) { + RocksDBExceptionJni::ThrowNew(env, status); + return 0; + } + return (jlong)cfh; +} + /* * Class: org_rocksdb_SidePluginRepo * Method: nativeDropCF diff --git a/java/src/main/java/org/rocksdb/SidePluginRepo.java b/java/src/main/java/org/rocksdb/SidePluginRepo.java index ff2e2965d3..402e91f482 100644 --- a/java/src/main/java/org/rocksdb/SidePluginRepo.java +++ b/java/src/main/java/org/rocksdb/SidePluginRepo.java @@ -75,6 +75,17 @@ public ColumnFamilyHandle createCF(RocksDB db, String cfname, String spec) throw long cfh = nativeCreateCF(nativeHandle_, db.nativeHandle_, cfname, spec); return new ColumnFamilyHandle(db, cfh); } + public ColumnFamilyHandle createCFWithImport(RocksDB db, String cfname, String spec, + final ImportColumnFamilyOptions importColumnFamilyOptions, + final List metadatas) throws RocksDBException { + long[] metadataHandles = new long[metadatas.size()]; + for (int i = 0; i < metadatas.size(); i++) { + metadataHandles[i] = metadatas.get(i).nativeHandle_; + } + long cfh = nativeCreateCFWithImport(nativeHandle_, db.nativeHandle_, + cfname, spec, importColumnFamilyOptions.nativeHandle_, metadataHandles); + return new ColumnFamilyHandle(db, cfh); + } public void dropCF(RocksDB db, String cfname) throws RocksDBException { nativeDropCF(nativeHandle_, db.nativeHandle_, cfname); } @@ -135,6 +146,10 @@ public DBOptions getDBOptions(String name) { private native void nativeDropCF(long handle, long dbh, String cfname) throws RocksDBException; private native void nativeDropCF(long handle, long dbh, long cfh) throws RocksDBException; + private native long nativeCreateCFWithImport(final long handle, + final long dbHandle, final String columnFamilyName, final String spec, + long importCFOptions, final long[] metadataHandleList) throws RocksDBException; + public SidePluginRepo() { super(newSidePluginRepo()); } diff --git a/sideplugin/rockside b/sideplugin/rockside index 78eefac1c0..5da72560c7 160000 --- a/sideplugin/rockside +++ b/sideplugin/rockside @@ -1 +1 @@ -Subproject commit 78eefac1c03047e4c1dd0fa187b3ec8d2e61b854 +Subproject commit 5da72560c76ba7bfbc827126258a4abb028aa1b0 From a198bcabb665a2659bfac6a73919cf8c0613f8e7 Mon Sep 17 00:00:00 2001 From: rockeet Date: Sun, 17 Aug 2025 17:48:01 +0800 Subject: [PATCH 017/175] update submodule rockside --- sideplugin/rockside | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sideplugin/rockside b/sideplugin/rockside index 5da72560c7..44a3eaf000 160000 --- a/sideplugin/rockside +++ b/sideplugin/rockside @@ -1 +1 @@ -Subproject commit 5da72560c76ba7bfbc827126258a4abb028aa1b0 +Subproject commit 44a3eaf0003fbf924df4af69d31806d650731fe1 From 8f5ff93963e50f2c8507139af85559e377759258 Mon Sep 17 00:00:00 2001 From: rockeet Date: Sun, 17 Aug 2025 20:53:28 +0800 Subject: [PATCH 018/175] java: Java_org_rocksdb_RocksDB_closeDatabase: add log --- java/rocksjni/rocksjni.cc | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/java/rocksjni/rocksjni.cc b/java/rocksjni/rocksjni.cc index 695c75d2a0..ad96395bc4 100644 --- a/java/rocksjni/rocksjni.cc +++ b/java/rocksjni/rocksjni.cc @@ -29,6 +29,9 @@ #include "rocksjni/kv_helper.h" #include "rocksjni/portal.h" +#include +#include + #ifdef min #undef min #endif @@ -308,6 +311,11 @@ void Java_org_rocksdb_RocksDB_closeDatabase(JNIEnv* env, jclass, jlong jhandle) { auto* db = reinterpret_cast(jhandle); assert(db != nullptr); + if (ROCKSDB_NAMESPACE::SidePluginRepo::DebugLevel() >= 1) { + fprintf(stderr, + "%s: INFO: %s:%d: Java_org_rocksdb_RocksDB_closeDatabase(): db = %p, dbname = %s\n", + terark::StrDateTimeNow(), __FILE__, __LINE__, db, db->GetName().c_str()); + } ROCKSDB_NAMESPACE::Status s = db->Close(); ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew(env, s); } From 220c79b00320b8ecbae2f02efbf081b2d788c5a3 Mon Sep 17 00:00:00 2001 From: rockeet Date: Sun, 17 Aug 2025 20:54:38 +0800 Subject: [PATCH 019/175] java: SidePluginRepo.closeOneDB() multi close(db) is safe --- java/src/main/java/org/rocksdb/SidePluginRepo.java | 1 + 1 file changed, 1 insertion(+) diff --git a/java/src/main/java/org/rocksdb/SidePluginRepo.java b/java/src/main/java/org/rocksdb/SidePluginRepo.java index 402e91f482..90c21b4628 100644 --- a/java/src/main/java/org/rocksdb/SidePluginRepo.java +++ b/java/src/main/java/org/rocksdb/SidePluginRepo.java @@ -69,6 +69,7 @@ public void closeAllDB() { public void closeOneDB(RocksDB db) { if (dblist_.remove(db)) { nativeCloseOneDB(nativeHandle_, db.nativeHandle_); + db.close(); // can be called multiple times } } public ColumnFamilyHandle createCF(RocksDB db, String cfname, String spec) throws RocksDBException { From a2654763aaf7a2a2fd0d10e6d32dec0c10438c5d Mon Sep 17 00:00:00 2001 From: rockeet Date: Sun, 17 Aug 2025 21:00:12 +0800 Subject: [PATCH 020/175] Update submodule rockside --- sideplugin/rockside | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sideplugin/rockside b/sideplugin/rockside index 44a3eaf000..64dab97722 160000 --- a/sideplugin/rockside +++ b/sideplugin/rockside @@ -1 +1 @@ -Subproject commit 44a3eaf0003fbf924df4af69d31806d650731fe1 +Subproject commit 64dab977225effd569f1ef9c1dee0bc1752df17a From ce4136319969116e798da22abfa081e445669687 Mon Sep 17 00:00:00 2001 From: rockeet Date: Sun, 17 Aug 2025 21:54:39 +0800 Subject: [PATCH 021/175] Add SidePluginRepo.removeOneDB(db), without db.close() --- java/src/main/java/org/rocksdb/SidePluginRepo.java | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/java/src/main/java/org/rocksdb/SidePluginRepo.java b/java/src/main/java/org/rocksdb/SidePluginRepo.java index 90c21b4628..d949f6e053 100644 --- a/java/src/main/java/org/rocksdb/SidePluginRepo.java +++ b/java/src/main/java/org/rocksdb/SidePluginRepo.java @@ -72,6 +72,12 @@ public void closeOneDB(RocksDB db) { db.close(); // can be called multiple times } } + public void removeOneDB(RocksDB db) { + // just remove from the repo + if (dblist_.remove(db)) { + nativeCloseOneDB(nativeHandle_, db.nativeHandle_); + } + } public ColumnFamilyHandle createCF(RocksDB db, String cfname, String spec) throws RocksDBException { long cfh = nativeCreateCF(nativeHandle_, db.nativeHandle_, cfname, spec); return new ColumnFamilyHandle(db, cfh); From 6c9426a2d56bd182708f2098c74030305e26c211 Mon Sep 17 00:00:00 2001 From: rockeet Date: Sun, 17 Aug 2025 22:46:16 +0800 Subject: [PATCH 022/175] java: Add RocksIterator.refreshForDatabaseGC() --- java/rocksjni/iterator.cc | 19 +++++++++++++++++++ .../main/java/org/rocksdb/RocksIterator.java | 6 ++++++ 2 files changed, 25 insertions(+) diff --git a/java/rocksjni/iterator.cc b/java/rocksjni/iterator.cc index 0d4d3e13db..8756e65861 100644 --- a/java/rocksjni/iterator.cc +++ b/java/rocksjni/iterator.cc @@ -431,3 +431,22 @@ jint Java_org_rocksdb_RocksIterator_valueByteArray0( return static_cast(value_slice.size()); } + +/* + * Class: org_rocksdb_RocksIterator + * Method: nativeRefreshForDatabaseGC + * Signature: (J)V + */ +JNIEXPORT void JNICALL Java_org_rocksdb_RocksIterator_nativeRefreshForDatabaseGC +(JNIEnv* env, jobject, jlong jiter) +{ + auto zc_it = reinterpret_cast(jiter); + auto iter = zc_it->iter; + bool is_valid = iter->Valid(); + ROCKSDB_NAMESPACE::Status s = iter->RefreshKeepSnapshot(true); + if (is_valid) { + zc_it->key = iter->key(); + zc_it->value = iter->value(); + } + ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew(env, s); +} diff --git a/java/src/main/java/org/rocksdb/RocksIterator.java b/java/src/main/java/org/rocksdb/RocksIterator.java index 4739c99218..f72f6592ad 100644 --- a/java/src/main/java/org/rocksdb/RocksIterator.java +++ b/java/src/main/java/org/rocksdb/RocksIterator.java @@ -402,6 +402,12 @@ public final boolean isDefaultEagerFetchValue() { prev0(nativeHandle_ | eagerFetchValue_); } + // iter position is kept and native key/value ptr may be updated + public final void refreshForDatabaseGC() throws RocksDBException { + nativeRefreshForDatabaseGC(nativeHandle_); + } + final native void nativeRefreshForDatabaseGC(long handle) throws RocksDBException; + @Override protected final native void disposeInternal(final long handle); @Override final native boolean isValid0(long handle); @Override final native void seekToFirst0(long handle); From b6bedbd111cf29f21b64016497cdd1f2c73ccfe7 Mon Sep 17 00:00:00 2001 From: leipeng Date: Mon, 18 Aug 2025 17:27:52 +0800 Subject: [PATCH 023/175] JAVA_ARGS += --add-opens java.base/jdk.internal.misc=ALL-UNNAMED --- java/Makefile | 1 + 1 file changed, 1 insertion(+) diff --git a/java/Makefile b/java/Makefile index 1b91c2b49f..1d760a713a 100644 --- a/java/Makefile +++ b/java/Makefile @@ -310,6 +310,7 @@ ifeq (${ROCKSDB_FORCE_DIRECT_BUFFER_ZERO_COPY},true) JAVA_ARGS += --add-opens java.base/jdk.internal.ref=ALL-UNNAMED endif JAVA_ARGS += --add-opens java.base/java.nio=ALL-UNNAMED +JAVA_ARGS += --add-opens java.base/jdk.internal.misc=ALL-UNNAMED JAVAC_ARGS ?= From 780711275d6f92c89811970761f6f4339dda5494 Mon Sep 17 00:00:00 2001 From: rockeet Date: Tue, 19 Aug 2025 09:32:54 +0800 Subject: [PATCH 024/175] java: use ReadOptions::just_check_key_exists --- java/rocksjni/rocksjni.cc | 3 +++ 1 file changed, 3 insertions(+) diff --git a/java/rocksjni/rocksjni.cc b/java/rocksjni/rocksjni.cc index ad96395bc4..2680927871 100644 --- a/java/rocksjni/rocksjni.cc +++ b/java/rocksjni/rocksjni.cc @@ -2502,14 +2502,17 @@ jboolean key_exists_helper(JNIEnv* env, jlong jdb_handle, jlong jcf_handle, ROCKSDB_NAMESPACE::Slice key_slice(key, jkey_len); const bool may_exist = + true ? true : // minimize code diff, toplingdb doesn't need KeyMayExist db->KeyMayExist(read_opts, cf_handle, key_slice, &value, &value_found); if (may_exist) { ROCKSDB_NAMESPACE::Status s; + read_opts.just_check_key_exists = true; { ROCKSDB_NAMESPACE::PinnableSlice pinnable_val; s = db->Get(read_opts, cf_handle, key_slice, &pinnable_val); } + read_opts.just_check_key_exists = false; if (s.IsNotFound()) { return JNI_FALSE; } else if (s.ok()) { From 1f74b47a78d6235311b1e3f934d5f785cfc138ce Mon Sep 17 00:00:00 2001 From: rockeet Date: Tue, 19 Aug 2025 13:04:23 +0800 Subject: [PATCH 025/175] ColumnFamilyOptions::Dump(): print memtable fac options --- options/options.cc | 2 ++ 1 file changed, 2 insertions(+) diff --git a/options/options.cc b/options/options.cc index 8e6310693c..05e800e5d8 100644 --- a/options/options.cc +++ b/options/options.cc @@ -169,6 +169,8 @@ void ColumnFamilyOptions::Dump(Logger* log) const { sst_partitioner_factory ? sst_partitioner_factory->Name() : "None"); ROCKS_LOG_HEADER(log, " Options.memtable_factory: %s", memtable_factory->Name()); + ROCKS_LOG_HEADER(log, " memtable_factory options: %s", + memtable_factory->GetPrintableOptions().c_str()); ROCKS_LOG_HEADER(log, " Options.table_factory: %s", table_factory->Name()); ROCKS_LOG_HEADER(log, " table_factory options: %s", From 2674efcc36f9256f7b3b5b473f6365ae9fd75290 Mon Sep 17 00:00:00 2001 From: leipeng Date: Tue, 19 Aug 2025 14:57:42 +0800 Subject: [PATCH 026/175] Makefile: jtest MAKE_UNIT_TEST ?= 1 --- Makefile | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/Makefile b/Makefile index 47b5bc6fe2..f4b8c3d636 100644 --- a/Makefile +++ b/Makefile @@ -344,13 +344,15 @@ ifeq (${DEBUG_LEVEL}, 2) BUILD_TYPE_SIG := d OBJ_DIR := ${BUILD_ROOT}/dbg endif -ifneq ($(filter auto_all_tests check check_0 watch-log gen_parallel_tests %_test %_test2, $(MAKECMDGOALS)),) +ifneq ($(filter auto_all_tests check check_0 watch-log gen_parallel_tests %_test %_test2 jtest, $(MAKECMDGOALS)),) + MAKE_UNIT_TEST ?= 1 +endif +ifeq (${MAKE_UNIT_TEST},1) CXXFLAGS += -DROCKSDB_UNIT_TEST CXXFLAGS += -DROCKSDB_DYNAMIC_CREATE_CF CXXFLAGS += -DTOPLINGDB_WITH_TIMESTAMP CXXFLAGS += -DTOPLINGDB_WITH_WIDE_COLUMNS CXXFLAGS += -DTOPLINGDB_WITH_FABRICATED_COMPLEXITY - MAKE_UNIT_TEST := 1 OBJ_DIR := $(subst build/,build-ut/,${OBJ_DIR}) endif From 82bad0104c825e3f693eb680f66a31bd0b901a6f Mon Sep 17 00:00:00 2001 From: leipeng Date: Tue, 19 Aug 2025 14:59:00 +0800 Subject: [PATCH 027/175] Add KeyValuePopulator for zero copy write --- db/db_impl/db_impl_write.cc | 44 ++++++ db/write_batch.cc | 222 +++++++++++++++++++++++++++++ include/rocksdb/db.h | 4 + include/rocksdb/write_batch.h | 4 + include/rocksdb/write_batch_base.h | 25 ++++ java/rocksjni/rocksjni.cc | 80 +++++++++++ 6 files changed, 379 insertions(+) diff --git a/db/db_impl/db_impl_write.cc b/db/db_impl/db_impl_write.cc index 8bcd1a945e..fbef3d5b47 100644 --- a/db/db_impl/db_impl_write.cc +++ b/db/db_impl/db_impl_write.cc @@ -2502,6 +2502,17 @@ Status DB::Put(const WriteOptions& opt, ColumnFamilyHandle* column_family, return Write(opt, &batch); } +Status DB::Put(const WriteOptions& opt, ColumnFamilyHandle* cf, const KeyValuePopulator& kvp) { + WriteBatch batch(0 /* reserved_bytes */, 0 /* max_bytes */, + opt.protection_bytes_per_key, + 0 /* default_cf_ts_sz */); + Status s = batch.Put(cf, kvp); + if (!s.ok()) { + return s; + } + return Write(opt, &batch); +} + Status DB::PutEntity(const WriteOptions& options, ColumnFamilyHandle* column_family, const Slice& key, const WideColumns& columns) { @@ -2566,6 +2577,17 @@ Status DB::Delete(const WriteOptions& opt, ColumnFamilyHandle* column_family, return Write(opt, &batch); } +Status DB::Delete(const WriteOptions& opt, ColumnFamilyHandle* cf, const KeyValuePopulator& kvp) { + WriteBatch batch(0 /* reserved_bytes */, 0 /* max_bytes */, + opt.protection_bytes_per_key, + 0 /* default_cf_ts_sz */); + Status s = batch.Delete(cf, kvp); + if (!s.ok()) { + return s; + } + return Write(opt, &batch); +} + Status DB::SingleDelete(const WriteOptions& opt, ColumnFamilyHandle* column_family, const Slice& key) { WriteBatch batch(0 /* reserved_bytes */, 0 /* max_bytes */, @@ -2594,6 +2616,17 @@ Status DB::SingleDelete(const WriteOptions& opt, return Write(opt, &batch); } +Status DB::SingleDelete(const WriteOptions& opt, ColumnFamilyHandle* cf, const KeyValuePopulator& kvp) { + WriteBatch batch(0 /* reserved_bytes */, 0 /* max_bytes */, + opt.protection_bytes_per_key, + 0 /* default_cf_ts_sz */); + Status s = batch.SingleDelete(cf, kvp); + if (!s.ok()) { + return s; + } + return Write(opt, &batch); +} + Status DB::DeleteRange(const WriteOptions& opt, ColumnFamilyHandle* column_family, const Slice& begin_key, const Slice& end_key) { @@ -2651,4 +2684,15 @@ Status DB::Merge(const WriteOptions& opt, ColumnFamilyHandle* column_family, return Write(opt, &batch); } +Status DB::Merge(const WriteOptions& opt, ColumnFamilyHandle* cf, const KeyValuePopulator& kvp) { + WriteBatch batch(0 /* reserved_bytes */, 0 /* max_bytes */, + opt.protection_bytes_per_key, + 0 /* default_cf_ts_sz */); + Status s = batch.Merge(cf, kvp); + if (!s.ok()) { + return s; + } + return Write(opt, &batch); +} + } // namespace ROCKSDB_NAMESPACE diff --git a/db/write_batch.cc b/db/write_batch.cc index 34ae22c57d..4765b46ac4 100644 --- a/db/write_batch.cc +++ b/db/write_batch.cc @@ -961,6 +961,78 @@ Status WriteBatch::Put(ColumnFamilyHandle* column_family, const Slice& key, SliceParts(&value, 1)); } +static void DoRevertWriteBatch(std::string* rep, size_t old_size) { + terark::string_resize_no_touch_memory(rep, old_size); + char* ptr = rep->data(); + EncodeFixed32(ptr + 8, DecodeFixed32(ptr + 8) - 1); // Revert Batch Count + ptr[old_size] = '\0'; // end of str +} + +// use ptr as a flag to indicate success or failure, +// if ptr is nullptr, it means the operation succeeded +static inline +void CommitOrRevertWriteBatch(std::string* rep, size_t old_size, char* ptr) { + if (UNLIKELY(ptr != nullptr)) { // failed, revert the batch + DoRevertWriteBatch(rep, old_size); + } +} + +Status WriteBatchBase::Put(ColumnFamilyHandle* cf, const KeyValuePopulator& kvp) { + std::unique_ptr key_buf(new char[kvp.key_len()]); + std::unique_ptr val_buf(new char[kvp.val_len()]); + kvp.PopulateKeyValue(key_buf.get(), val_buf.get()); + Slice key(key_buf.get(), kvp.key_len()); + Slice val(val_buf.get(), kvp.val_len()); + return Put(cf, key, val); +} +Status WriteBatch::Put(ColumnFamilyHandle* cf, const KeyValuePopulator& kvp) { + const size_t key_len = kvp.key_len(); + const size_t val_len = kvp.val_len(); + if (UNLIKELY(key_len > size_t{std::numeric_limits::max()})) { + return Status::InvalidArgument("key is too large"); + } + if (UNLIKELY(val_len > size_t{std::numeric_limits::max()})) { + return Status::InvalidArgument("value is too large"); + } + uint32_t cf_id = nullptr == cf ? 0 : cf->GetID(); + if (LIKELY(nullptr == prot_info_)) { + size_t old_size = rep_.size(); + size_t inc_size = 1 + + (cf_id ? VarUint32Length(cf_id) : 0) + + VarUint32Length(uint32_t(key_len)) + key_len + + VarUint32Length(uint32_t(val_len)) + val_len; + if (UNLIKELY(max_bytes_ && old_size + inc_size > max_bytes_)) { + return Status::MemoryLimit(); + } + terark::string_resize_no_touch_memory(&rep_, old_size + inc_size); + char* ptr = rep_.data(); + EncodeFixed32(ptr + 8, DecodeFixed32(ptr + 8) + 1); // Update Batch Count + ptr += old_size; + if (cf_id == 0) { + ptr[0] = static_cast(kTypeValue); + ptr += 1; + } else { + ptr[0] = static_cast(kTypeColumnFamilyValue); + ptr = EncodeVarint32(ptr + 1, cf_id); + } + char* key = EncodeVarint32(ptr, key_len); + char* val = EncodeVarint32(key+ key_len, val_len); + ROCKSDB_SCOPE_EXIT(CommitOrRevertWriteBatch(&rep_, old_size, ptr)); + kvp.PopulateKeyValue(key, val); + val[val_len] = '\0'; // end of str + content_flags_.fetch_or(ContentFlags::HAS_PUT, std::memory_order_relaxed); + ptr = nullptr; // notify success + return Status::OK(); + } + // fallback to the prot_info_ based code path + std::unique_ptr key_buf(new char[key_len]); + std::unique_ptr val_buf(new char[val_len]); + kvp.PopulateKeyValue(key_buf.get(), val_buf.get()); + Slice key(key_buf.get(), key_len); + Slice val(val_buf.get(), val_len); + return WriteBatchInternal::Put(this, cf_id, key, val); +} + Status WriteBatchInternal::CheckSlicePartsLength(const SliceParts& key, const SliceParts& value) { size_t total_key_bytes = 0; @@ -1266,6 +1338,52 @@ Status WriteBatch::Delete(ColumnFamilyHandle* column_family, const Slice& key) { #endif } +Status WriteBatchBase::Delete(ColumnFamilyHandle* cf, const KeyValuePopulator& kvp) { + std::unique_ptr key_buf(new char[kvp.key_len()]); + kvp.PopulateKeyValue(key_buf.get(), nullptr /* value */); + Slice key(key_buf.get(), kvp.key_len()); + return Delete(cf, key); +} +Status WriteBatch::Delete(ColumnFamilyHandle* cf, const KeyValuePopulator& kvp) { + const size_t key_len = kvp.key_len(); + if (UNLIKELY(key_len > size_t{std::numeric_limits::max()})) { + return Status::InvalidArgument("key is too large"); + } + uint32_t cf_id = nullptr == cf ? 0 : cf->GetID(); + if (LIKELY(nullptr == prot_info_)) { + size_t old_size = rep_.size(); + size_t inc_size = 1 + + (cf_id ? VarUint32Length(cf_id) : 0) + + VarUint32Length(uint32_t(key_len)) + key_len; + if (UNLIKELY(max_bytes_ && old_size + inc_size > max_bytes_)) { + return Status::MemoryLimit(); + } + terark::string_resize_no_touch_memory(&rep_, old_size + inc_size); + char* ptr = rep_.data(); + EncodeFixed32(ptr + 8, DecodeFixed32(ptr + 8) + 1); // Update Batch Count + ptr += old_size; + if (cf_id == 0) { + ptr[0] = static_cast(kTypeDeletion); + ptr += 1; + } else { + ptr[0] = static_cast(kTypeColumnFamilyDeletion); + ptr = EncodeVarint32(ptr + 1, cf_id); + } + char* key = EncodeVarint32(ptr, key_len); + ROCKSDB_SCOPE_EXIT(CommitOrRevertWriteBatch(&rep_, old_size, ptr)); + kvp.PopulateKeyValue(key, nullptr /* value */); + key[key_len] = '\0'; // end of str + content_flags_.fetch_or(ContentFlags::HAS_DELETE, + std::memory_order_relaxed); + ptr = nullptr; // notify success + return Status::OK(); + } + // fallback to the prot_info_ based code path + std::unique_ptr key_buf(new char[key_len]); + kvp.PopulateKeyValue(key_buf.get(), nullptr /* value */); + Slice key(key_buf.get(), key_len); + return WriteBatchInternal::Delete(this, cf_id, key); +} Status WriteBatch::Delete(ColumnFamilyHandle* column_family, const Slice& key, const Slice& ts) { const Status s = CheckColumnFamilyTimestampSize(column_family, ts); @@ -1407,6 +1525,53 @@ Status WriteBatch::SingleDelete(ColumnFamilyHandle* column_family, #endif } +Status WriteBatchBase::SingleDelete(ColumnFamilyHandle* cf, const KeyValuePopulator& kvp) { + std::unique_ptr key_buf(new char[kvp.key_len()]); + kvp.PopulateKeyValue(key_buf.get(), nullptr /* value */); + Slice key(key_buf.get(), kvp.key_len()); + return SingleDelete(cf, key); +} +Status WriteBatch::SingleDelete(ColumnFamilyHandle* column_family, const KeyValuePopulator& kvp) { + const size_t key_len = kvp.key_len(); + if (UNLIKELY(key_len > size_t{std::numeric_limits::max()})) { + return Status::InvalidArgument("key is too large"); + } + uint32_t cf_id = nullptr == column_family ? 0 : column_family->GetID(); + if (LIKELY(nullptr == prot_info_)) { + size_t old_size = rep_.size(); + size_t inc_size = 1 + + (cf_id ? VarUint32Length(cf_id) : 0) + + VarUint32Length(uint32_t(key_len)) + key_len; + if (UNLIKELY(max_bytes_ && old_size + inc_size > max_bytes_)) { + return Status::MemoryLimit(); + } + terark::string_resize_no_touch_memory(&rep_, old_size + inc_size); + char* ptr = rep_.data(); + EncodeFixed32(ptr + 8, DecodeFixed32(ptr + 8) + 1); // Update Batch Count + ptr += old_size; + if (cf_id == 0) { + ptr[0] = static_cast(kTypeSingleDeletion); + ptr += 1; + } else { + ptr[0] = static_cast(kTypeColumnFamilySingleDeletion); + ptr = EncodeVarint32(ptr + 1, cf_id); + } + char* key = EncodeVarint32(ptr, key_len); + ROCKSDB_SCOPE_EXIT(CommitOrRevertWriteBatch(&rep_, old_size, ptr)); + kvp.PopulateKeyValue(key, nullptr /* value */); + key[key_len] = '\0'; // end of str + content_flags_.fetch_or(ContentFlags::HAS_SINGLE_DELETE, + std::memory_order_relaxed); + ptr = nullptr; // notify success + return Status::OK(); + } + // fallback to the prot_info_ based code path + std::unique_ptr key_buf(new char[key_len]); + kvp.PopulateKeyValue(key_buf.get(), nullptr /* value */); + Slice key(key_buf.get(), key_len); + return WriteBatchInternal::SingleDelete(this, cf_id, key); +} + Status WriteBatch::SingleDelete(ColumnFamilyHandle* column_family, const Slice& key, const Slice& ts) { const Status s = CheckColumnFamilyTimestampSize(column_family, ts); @@ -1699,6 +1864,63 @@ Status WriteBatch::Merge(ColumnFamilyHandle* column_family, const Slice& key, this, cf_id, SliceParts(key_with_ts.data(), 2), SliceParts(&value, 1)); } +Status WriteBatchBase::Merge(ColumnFamilyHandle* cf, const KeyValuePopulator& kvp) { + std::unique_ptr key_buf(new char[kvp.key_len()]); + std::unique_ptr val_buf(new char[kvp.val_len()]); + kvp.PopulateKeyValue(key_buf.get(), val_buf.get()); + Slice key(key_buf.get(), kvp.key_len()); + Slice val(val_buf.get(), kvp.val_len()); + return Merge(cf, key, val); +} +Status WriteBatch::Merge(ColumnFamilyHandle* cf, const KeyValuePopulator& kvp) { + const size_t key_len = kvp.key_len(); + const size_t val_len = kvp.val_len(); + if (UNLIKELY(key_len > size_t{std::numeric_limits::max()})) { + return Status::InvalidArgument("key is too large"); + } + if (UNLIKELY(val_len > size_t{std::numeric_limits::max()})) { + return Status::InvalidArgument("value is too large"); + } + uint32_t cf_id = nullptr == cf ? 0 : cf->GetID(); + if (LIKELY(nullptr == prot_info_)) { + size_t old_size = rep_.size(); + size_t inc_size = 1 + + (cf_id ? VarUint32Length(cf_id) : 0) + + VarUint32Length(uint32_t(key_len)) + key_len + + VarUint32Length(uint32_t(val_len)) + val_len; + if (UNLIKELY(max_bytes_ && old_size + inc_size > max_bytes_)) { + return Status::MemoryLimit(); + } + terark::string_resize_no_touch_memory(&rep_, old_size + inc_size); + char* ptr = rep_.data(); + EncodeFixed32(ptr + 8, DecodeFixed32(ptr + 8) + 1); // Update Batch Count + ptr += old_size; + if (cf_id == 0) { + ptr[0] = static_cast(kTypeMerge); + ptr += 1; + } else { + ptr[0] = static_cast(kTypeColumnFamilyMerge); + ptr = EncodeVarint32(ptr + 1, cf_id); + } + char* key = EncodeVarint32(ptr, key_len); + char* val = EncodeVarint32(key+ key_len, val_len); + ROCKSDB_SCOPE_EXIT(CommitOrRevertWriteBatch(&rep_, old_size, ptr)); + kvp.PopulateKeyValue(key, val); + val[val_len] = '\0'; // end of str + content_flags_.fetch_or(ContentFlags::HAS_MERGE, + std::memory_order_relaxed); + ptr = nullptr; // notify success + return Status::OK(); + } + // fallback to the prot_info_ based code path + std::unique_ptr key_buf(new char[key_len]); + std::unique_ptr val_buf(new char[val_len]); + kvp.PopulateKeyValue(key_buf.get(), val_buf.get()); + Slice key(key_buf.get(), key_len); + Slice val(val_buf.get(), val_len); + return WriteBatchInternal::Merge(this, cf_id, key, val); +} + Status WriteBatchInternal::Merge(WriteBatch* b, uint32_t column_family_id, const SliceParts& key, const SliceParts& value) { diff --git a/include/rocksdb/db.h b/include/rocksdb/db.h index 5028c27c37..f641fa9ad7 100644 --- a/include/rocksdb/db.h +++ b/include/rocksdb/db.h @@ -462,6 +462,7 @@ class DB : public CacheAlignedNewDelete { const Slice& ts, const Slice& value) { return Put(options, DefaultColumnFamily(), key, ts, value); } + virtual Status Put(const WriteOptions&, ColumnFamilyHandle*, const KeyValuePopulator&); // Set the database entry for "key" in the column family specified by // "column_family" to the wide-column entity defined by "columns". If the key @@ -493,6 +494,7 @@ class DB : public CacheAlignedNewDelete { const Slice& ts) { return Delete(options, DefaultColumnFamily(), key, ts); } + virtual Status Delete(const WriteOptions&, ColumnFamilyHandle*, const KeyValuePopulator&); // Remove the database entry for "key". Requires that the key exists // and was not overwritten. Returns OK on success, and a non-OK status @@ -523,6 +525,7 @@ class DB : public CacheAlignedNewDelete { const Slice& ts) { return SingleDelete(options, DefaultColumnFamily(), key, ts); } + virtual Status SingleDelete(const WriteOptions&, ColumnFamilyHandle*, const KeyValuePopulator&); // Removes the database entries in the range ["begin_key", "end_key"), i.e., // including "begin_key" and excluding "end_key". Returns OK on success, and @@ -570,6 +573,7 @@ class DB : public CacheAlignedNewDelete { ColumnFamilyHandle* /*column_family*/, const Slice& /*key*/, const Slice& /*ts*/, const Slice& /*value*/); + virtual Status Merge(const WriteOptions&, ColumnFamilyHandle*, const KeyValuePopulator&); // Apply the specified updates to the database. // If `updates` contains no update, WAL will still be synced if diff --git a/include/rocksdb/write_batch.h b/include/rocksdb/write_batch.h index 426458cef8..8b7423386f 100644 --- a/include/rocksdb/write_batch.h +++ b/include/rocksdb/write_batch.h @@ -99,6 +99,7 @@ class WriteBatch : public WriteBatchBase { } Status Put(ColumnFamilyHandle* column_family, const Slice& key, const Slice& ts, const Slice& value) override; + Status Put(ColumnFamilyHandle*, const KeyValuePopulator&) override; // Variant of Put() that gathers output like writev(2). The key and value // that will be written to the database are concatenations of arrays of @@ -145,6 +146,7 @@ class WriteBatch : public WriteBatchBase { Status Delete(const Slice& key) override { return Delete(nullptr, key); } Status Delete(ColumnFamilyHandle* column_family, const Slice& key, const Slice& ts) override; + Status Delete(ColumnFamilyHandle*, const KeyValuePopulator&) override; // variant that takes SliceParts // These two variants of Delete(..., const SliceParts& key) can be used when @@ -164,6 +166,7 @@ class WriteBatch : public WriteBatchBase { } Status SingleDelete(ColumnFamilyHandle* column_family, const Slice& key, const Slice& ts) override; + Status SingleDelete(ColumnFamilyHandle*, const KeyValuePopulator&) override; // variant that takes SliceParts Status SingleDelete(ColumnFamilyHandle* column_family, @@ -202,6 +205,7 @@ class WriteBatch : public WriteBatchBase { } Status Merge(ColumnFamilyHandle* /*column_family*/, const Slice& /*key*/, const Slice& /*ts*/, const Slice& /*value*/) override; + Status Merge(ColumnFamilyHandle*, const KeyValuePopulator&) override; // variant that takes SliceParts Status Merge(ColumnFamilyHandle* column_family, const SliceParts& key, diff --git a/include/rocksdb/write_batch_base.h b/include/rocksdb/write_batch_base.h index 5b26ee543b..32b5709d4c 100644 --- a/include/rocksdb/write_batch_base.h +++ b/include/rocksdb/write_batch_base.h @@ -21,6 +21,27 @@ class ColumnFamilyHandle; class WriteBatch; struct SliceParts; +// Used for zero-copy Put/Delete/Merge operations +// The concret class should constructed with known key len and value len, +// and implement PopulateKeyValue() to populate the key and value buffers +// with the actual data to be written. +// +// WriteBatch will preallocate the key and value buffers with the +// key_len() and val_len() sizes, respectively. +class KeyValuePopulator { + public: + KeyValuePopulator() {} // intentionally not init key_len_ and val_len_ + KeyValuePopulator(size_t key_len, size_t val_len) + : key_len_(key_len), val_len_(val_len) {} + virtual ~KeyValuePopulator() {} + virtual void PopulateKeyValue(char* key_buf, char* val_buf) const = 0; + inline size_t key_len() const { return key_len_; } + inline size_t val_len() const { return val_len_; } + protected: + size_t key_len_; + size_t val_len_; +}; + // Abstract base class that defines the basic interface for a write batch. // See WriteBatch for a basic implementation and WrithBatchWithIndex for an // indexed implementation. @@ -34,6 +55,7 @@ class WriteBatchBase { virtual Status Put(const Slice& key, const Slice& value) = 0; virtual Status Put(ColumnFamilyHandle* column_family, const Slice& key, const Slice& ts, const Slice& value) = 0; + virtual Status Put(ColumnFamilyHandle*, const KeyValuePopulator&); // Variant of Put() that gathers output like writev(2). The key and value // that will be written to the database are concatenations of arrays of @@ -75,6 +97,7 @@ class WriteBatchBase { virtual Status Merge(const Slice& key, const Slice& value) = 0; virtual Status Merge(ColumnFamilyHandle* column_family, const Slice& key, const Slice& ts, const Slice& value) = 0; + virtual Status Merge(ColumnFamilyHandle*, const KeyValuePopulator&); // variant that takes SliceParts virtual Status Merge(ColumnFamilyHandle* column_family, const SliceParts& key, @@ -87,6 +110,7 @@ class WriteBatchBase { virtual Status Delete(const Slice& key) = 0; virtual Status Delete(ColumnFamilyHandle* column_family, const Slice& key, const Slice& ts) = 0; + virtual Status Delete(ColumnFamilyHandle*, const KeyValuePopulator&); // variant that takes SliceParts virtual Status Delete(ColumnFamilyHandle* column_family, @@ -100,6 +124,7 @@ class WriteBatchBase { virtual Status SingleDelete(const Slice& key) = 0; virtual Status SingleDelete(ColumnFamilyHandle* column_family, const Slice& key, const Slice& ts) = 0; + virtual Status SingleDelete(ColumnFamilyHandle*, const KeyValuePopulator&); // variant that takes SliceParts virtual Status SingleDelete(ColumnFamilyHandle* column_family, diff --git a/java/rocksjni/rocksjni.cc b/java/rocksjni/rocksjni.cc index 2680927871..03d131fa4b 100644 --- a/java/rocksjni/rocksjni.cc +++ b/java/rocksjni/rocksjni.cc @@ -608,6 +608,40 @@ void Java_org_rocksdb_RocksDB_dropColumnFamilies( } } +#define JNI_USE_KEY_VALUE_POPULATOR 1 +class JNIKeyValuePopulator : public ROCKSDB_NAMESPACE::KeyValuePopulator { + JNIEnv* env_; + jbyteArray jkey_, jval_; + jint jkey_off_, jval_off_; +public: + virtual ~JNIKeyValuePopulator() = default; + JNIKeyValuePopulator(JNIEnv* env, + jbyteArray jkey, jint jkey_off, jint jkey_len, + jbyteArray jval, jint jval_off, jint jval_len) + : KeyValuePopulator(jkey_len, jval_len), env_(env), + jkey_(jkey), jval_(jval), jkey_off_(jkey_off), jval_off_(jval_off) { } + void PopulateKeyValue(char* key, char* val) const override { + env_->GetByteArrayRegion(jkey_, jkey_off_, (jint)key_len_, (jbyte*)key); + ROCKSDB_NAMESPACE::KVException::ThrowOnError(env_); + env_->GetByteArrayRegion(jval_, jval_off_, (jint)val_len_, (jbyte*)val); + ROCKSDB_NAMESPACE::KVException::ThrowOnError(env_); + } +}; +class JNIKeyOnlyPopulator : public ROCKSDB_NAMESPACE::KeyValuePopulator { + JNIEnv* env_; + jbyteArray jkey_; + jint jkey_off_; +public: + virtual ~JNIKeyOnlyPopulator() = default; + JNIKeyOnlyPopulator(JNIEnv* env, jbyteArray jkey, jint jkey_off, jint jkey_len) + : KeyValuePopulator(jkey_len, 0), env_(env), + jkey_(jkey), jkey_off_(jkey_off) { } + void PopulateKeyValue(char* key, char*) const override { + env_->GetByteArrayRegion(jkey_, jkey_off_, (jint)key_len_, (jbyte*)key); + ROCKSDB_NAMESPACE::KVException::ThrowOnError(env_); + } +}; + ////////////////////////////////////////////////////////////////////////////// // ROCKSDB_NAMESPACE::DB::Put @@ -625,10 +659,16 @@ void Java_org_rocksdb_RocksDB_put__J_3BII_3BII(JNIEnv* env, jobject, static const ROCKSDB_NAMESPACE::WriteOptions default_write_options = ROCKSDB_NAMESPACE::WriteOptions(); try { + #if JNI_USE_KEY_VALUE_POPULATOR + JNIKeyValuePopulator kvp(env, jkey, jkey_off, jkey_len, jval, jval_off, jval_len); + ROCKSDB_NAMESPACE::Status s = db->Put(default_write_options, nullptr, kvp); + ROCKSDB_NAMESPACE::KVException::ThrowOnError(env, s); + #else ROCKSDB_NAMESPACE::JByteArraySlice key(env, jkey, jkey_off, jkey_len); ROCKSDB_NAMESPACE::JByteArraySlice value(env, jval, jval_off, jval_len); ROCKSDB_NAMESPACE::KVException::ThrowOnError( env, db->Put(default_write_options, key.slice(), value.slice())); + #endif } catch (ROCKSDB_NAMESPACE::KVException&) { return; } @@ -658,11 +698,17 @@ void Java_org_rocksdb_RocksDB_put__J_3BII_3BIIJ(JNIEnv* env, jobject, } try { + #if JNI_USE_KEY_VALUE_POPULATOR + JNIKeyValuePopulator kvp(env, jkey, jkey_off, jkey_len, jval, jval_off, jval_len); + ROCKSDB_NAMESPACE::Status s = db->Put(default_write_options, cf_handle, kvp); + ROCKSDB_NAMESPACE::KVException::ThrowOnError(env, s); + #else ROCKSDB_NAMESPACE::JByteArraySlice key(env, jkey, jkey_off, jkey_len); ROCKSDB_NAMESPACE::JByteArraySlice value(env, jval, jval_off, jval_len); ROCKSDB_NAMESPACE::KVException::ThrowOnError( env, db->Put(default_write_options, cf_handle, key.slice(), value.slice())); + #endif } catch (ROCKSDB_NAMESPACE::KVException&) { return; } @@ -684,10 +730,16 @@ void Java_org_rocksdb_RocksDB_put__JJ_3BII_3BII(JNIEnv* env, jobject, reinterpret_cast(jwrite_options_handle); try { + #if JNI_USE_KEY_VALUE_POPULATOR + JNIKeyValuePopulator kvp(env, jkey, jkey_off, jkey_len, jval, jval_off, jval_len); + ROCKSDB_NAMESPACE::Status s = db->Put(*write_options, nullptr, kvp); + ROCKSDB_NAMESPACE::KVException::ThrowOnError(env, s); + #else ROCKSDB_NAMESPACE::JByteArraySlice key(env, jkey, jkey_off, jkey_len); ROCKSDB_NAMESPACE::JByteArraySlice value(env, jval, jval_off, jval_len); ROCKSDB_NAMESPACE::KVException::ThrowOnError( env, db->Put(*write_options, key.slice(), value.slice())); + #endif } catch (ROCKSDB_NAMESPACE::KVException&) { return; } @@ -714,10 +766,16 @@ void Java_org_rocksdb_RocksDB_put__JJ_3BII_3BIIJ( return; } try { + #if JNI_USE_KEY_VALUE_POPULATOR + JNIKeyValuePopulator kvp(env, jkey, jkey_off, jkey_len, jval, jval_off, jval_len); + ROCKSDB_NAMESPACE::Status s = db->Put(*write_options, cf_handle, kvp); + ROCKSDB_NAMESPACE::KVException::ThrowOnError(env, s); + #else ROCKSDB_NAMESPACE::JByteArraySlice key(env, jkey, jkey_off, jkey_len); ROCKSDB_NAMESPACE::JByteArraySlice value(env, jval, jval_off, jval_len); ROCKSDB_NAMESPACE::KVException::ThrowOnError( env, db->Put(*write_options, cf_handle, key.slice(), value.slice())); + #endif } catch (ROCKSDB_NAMESPACE::KVException&) { return; } @@ -765,6 +823,10 @@ bool rocksdb_delete_helper(JNIEnv* env, ROCKSDB_NAMESPACE::DB* db, const ROCKSDB_NAMESPACE::WriteOptions& write_options, ROCKSDB_NAMESPACE::ColumnFamilyHandle* cf_handle, jbyteArray jkey, jint jkey_off, jint jkey_len) { +#if JNI_USE_KEY_VALUE_POPULATOR + JNIKeyOnlyPopulator kop(env, jkey, jkey_off, jkey_len); + ROCKSDB_NAMESPACE::Status s = db->Delete(write_options, cf_handle, kop); +#else jbyte* key = new jbyte[jkey_len]; env->GetByteArrayRegion(jkey, jkey_off, jkey_len, key); if (env->ExceptionCheck()) { @@ -784,6 +846,7 @@ bool rocksdb_delete_helper(JNIEnv* env, ROCKSDB_NAMESPACE::DB* db, // cleanup delete[] key; +#endif if (s.ok()) { return true; @@ -883,6 +946,10 @@ bool rocksdb_single_delete_helper( const ROCKSDB_NAMESPACE::WriteOptions& write_options, ROCKSDB_NAMESPACE::ColumnFamilyHandle* cf_handle, jbyteArray jkey, jint jkey_len) { +#if JNI_USE_KEY_VALUE_POPULATOR + JNIKeyOnlyPopulator kop(env, jkey, 0, jkey_len); + ROCKSDB_NAMESPACE::Status s = db->SingleDelete(write_options, cf_handle, kop); +#else jbyte* key = new jbyte[jkey_len]; env->GetByteArrayRegion(jkey, 0, jkey_len, key); if (env->ExceptionCheck()) { @@ -901,6 +968,7 @@ bool rocksdb_single_delete_helper( } delete[] key; +#endif if (s.ok()) { return true; @@ -1389,10 +1457,16 @@ void Java_org_rocksdb_RocksDB_merge__J_3BII_3BII(JNIEnv* env, jobject, static const ROCKSDB_NAMESPACE::WriteOptions default_write_options = ROCKSDB_NAMESPACE::WriteOptions(); try { + #if JNI_USE_KEY_VALUE_POPULATOR + JNIKeyValuePopulator kvp(env, jkey, jkey_off, jkey_len, jval, jval_off, jval_len); + ROCKSDB_NAMESPACE::Status s = db->Merge(default_write_options, nullptr, kvp); + ROCKSDB_NAMESPACE::KVException::ThrowOnError(env, s); + #else ROCKSDB_NAMESPACE::JByteArraySlice key(env, jkey, jkey_off, jkey_len); ROCKSDB_NAMESPACE::JByteArraySlice value(env, jval, jval_off, jval_len); ROCKSDB_NAMESPACE::KVException::ThrowOnError( env, db->Merge(default_write_options, key.slice(), value.slice())); + #endif } catch (ROCKSDB_NAMESPACE::KVException&) { return; } @@ -1467,11 +1541,17 @@ void Java_org_rocksdb_RocksDB_merge__JJ_3BII_3BIIJ( reinterpret_cast(jcf_handle); if (cf_handle != nullptr) { try { + #if JNI_USE_KEY_VALUE_POPULATOR + JNIKeyValuePopulator kvp(env, jkey, jkey_off, jkey_len, jval, jval_off, jval_len); + ROCKSDB_NAMESPACE::Status s = db->Merge(*write_options, cf_handle, kvp); + ROCKSDB_NAMESPACE::KVException::ThrowOnError(env, s); + #else ROCKSDB_NAMESPACE::JByteArraySlice key(env, jkey, jkey_off, jkey_len); ROCKSDB_NAMESPACE::JByteArraySlice value(env, jval, jval_off, jval_len); ROCKSDB_NAMESPACE::KVException::ThrowOnError( env, db->Merge(*write_options, cf_handle, key.slice(), value.slice())); + #endif } catch (ROCKSDB_NAMESPACE::KVException&) { return; } From b6f56abb842e8525fe234cf417325f501c02f7e0 Mon Sep 17 00:00:00 2001 From: rockeet Date: Tue, 19 Aug 2025 17:33:55 +0800 Subject: [PATCH 028/175] jtest.sh: remove useless lines --- java/jtest.sh | 2 -- 1 file changed, 2 deletions(-) diff --git a/java/jtest.sh b/java/jtest.sh index f4149b79c3..ca9cf18b6e 100644 --- a/java/jtest.sh +++ b/java/jtest.sh @@ -11,7 +11,5 @@ export CPU="-march=native" export UPDATE_REPO=0 export BUILD_PREFIX=../build-toplingdb/ export PREFIX=/opt -export EXTRA_CXXFLAGS='-DROCKSDB_DYNAMIC_CREATE_CF -DTOPLINGDB_WITH_TIMESTAMP -DTOPLINGDB_WITH_WIDE_COLUMNS -DTOPLINGDB_WITH_FABRICATED_COMPLEXITY -DROCKSDB_UNIT_TEST' -#export RUN_JAVA_TESTS='%PerfContextTest' make -j`nproc` jtest From eab716192de855ce96c8230057b75a03a4594c2c Mon Sep 17 00:00:00 2001 From: rockeet Date: Tue, 19 Aug 2025 19:23:49 +0800 Subject: [PATCH 029/175] java: RocksIterator: improve value0() for fetchValue() --- java/rocksjni/iterator.cc | 21 +++---------------- .../main/java/org/rocksdb/RocksIterator.java | 6 ++---- 2 files changed, 5 insertions(+), 22 deletions(-) diff --git a/java/rocksjni/iterator.cc b/java/rocksjni/iterator.cc index 8756e65861..7a5828a978 100644 --- a/java/rocksjni/iterator.cc +++ b/java/rocksjni/iterator.cc @@ -360,29 +360,14 @@ jint Java_org_rocksdb_RocksIterator_keyByteArray0(JNIEnv* env, jobject /*jobj*/, /* * Class: org_rocksdb_RocksIterator * Method: value0 - * Signature: (J)[B + * Signature: (J)V */ -jbyteArray Java_org_rocksdb_RocksIterator_value0(JNIEnv* env, jobject /*jobj*/, +void Java_org_rocksdb_RocksIterator_value0(JNIEnv* env, jobject /*jobj*/, jlong handle) { - auto* zc_it = reinterpret_cast(handle & jlong(~1L)); + auto* zc_it = reinterpret_cast(handle); assert(zc_it->iter->Valid()); assert(zc_it->key.data() != nullptr); zc_it->value = zc_it->iter->value(); - if (handle & 1) { // just fetch the value - return nullptr; // do not copy, just return nullptr - } - ROCKSDB_NAMESPACE::Slice value_slice = zc_it->value; - - jbyteArray jkeyValue = - env->NewByteArray(static_cast(value_slice.size())); - if (jkeyValue == nullptr) { - // exception thrown: OutOfMemoryError - return nullptr; - } - env->SetByteArrayRegion( - jkeyValue, 0, static_cast(value_slice.size()), - const_cast(reinterpret_cast(value_slice.data()))); - return jkeyValue; } /* diff --git a/java/src/main/java/org/rocksdb/RocksIterator.java b/java/src/main/java/org/rocksdb/RocksIterator.java index f72f6592ad..f317820175 100644 --- a/java/src/main/java/org/rocksdb/RocksIterator.java +++ b/java/src/main/java/org/rocksdb/RocksIterator.java @@ -77,9 +77,7 @@ public final boolean isValueFetched() { public final void fetchValue() { assert(isOwningHandle()); if (getZeroCopyValuePtr() == 0) { - byte[] val = value0(nativeHandle_ | 1); // or 1 indicate just do fetch - assert(val == null); // just fetch the value, not copy it, must be null - // this will not copy the value, just set the zero-copy pointer + value0(nativeHandle_); // just set the zero-copy value ptr and len assert(getZeroCopyValuePtr() != 0); } } @@ -429,5 +427,5 @@ final native void seekForPrevByteArray0( long handle, byte[] target, int targetOffset, int targetLen); @Override final native void status0(long handle) throws RocksDBException; - private native byte[] value0(long handle); + private native void value0(long handle); } From 3813748eb965cbccf5c30ffa37e36556de6bdd75 Mon Sep 17 00:00:00 2001 From: rockeet Date: Tue, 19 Aug 2025 21:02:55 +0800 Subject: [PATCH 030/175] Makefile: improve for java --- Makefile | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) diff --git a/Makefile b/Makefile index f4b8c3d636..ebf34e26c7 100644 --- a/Makefile +++ b/Makefile @@ -375,6 +375,7 @@ CXXFLAGS += \ LDFLAGS += -L${TOPLING_CORE_DIR}/${BUILD_ROOT}/lib_shared \ -lterark-{zbs,fsa,core}-${COMPILER}-${BUILD_TYPE_SIG} +TOPLING_ZBS_TARGET := ${BUILD_ROOT}/lib_shared/libterark-zbs-${COMPILER}-${BUILD_TYPE_SIG}.${PLATFORM_SHARED_EXT} GIT_TOPLING_ROCKS ?= git@github.com:rockeet/topling-rocks @@ -2972,7 +2973,7 @@ jl/%.o: %.cc $(AM_V_CC)mkdir -p $(@D) && $(CXX) $(CXXFLAGS) -fPIC -c $< -o $@ $(COVERAGEFLAGS) ${ALL_JNI_NATIVE_OBJECTS}: CXXFLAGS += -I./java/. -I./java/rocksjni $(JAVA_INCLUDE) $(ROCKSDB_PLUGIN_JNI_CXX_INCLUDEFLAGS) -rocksdbjava: $(LIB_OBJECTS) $(ALL_JNI_NATIVE_OBJECTS) rocksdbjava-header +rocksdbjava: $(LIB_OBJECTS) $(ALL_JNI_NATIVE_OBJECTS) ${TOPLING_CORE_DIR}/${TOPLING_ZBS_TARGET} ifeq ($(JAVA_HOME),) $(error JAVA_HOME is not set) endif @@ -2998,11 +2999,10 @@ install-jni: rocksdbjava rocksdbjava-header: $(AM_V_GEN)$(MAKE) -C java java_test - $(AM_V_at)touch $@ jclean: cd java;$(MAKE) clean; - $(AM_V_at)rm -f rocksdbjava-header + $(AM_V_at)rm -f $(ALL_JNI_NATIVE_OBJECTS) jtest_compile: rocksdbjava cd java;$(MAKE) java_test @@ -3139,7 +3139,7 @@ endif # If skip dependencies is ON, skip including the dep files ifneq ($(SKIP_DEPENDS), 1) DEPFILES := $(patsubst %.cc, $(OBJ_DIR)/%.cc.d, $(ALL_SOURCES)) -ifneq ($(wildcard $(JAVA_HOME)/bin/javac),) +ifneq ($(filter j% rocksdbjava%, $(MAKECMDGOALS)),) DEPFILES += $(patsubst %.cc, $(OBJ_DIR)/%.cc.d, ${ALL_JNI_NATIVE_SOURCES}) endif DEPFILES := $(patsubst %.cpp,$(OBJ_DIR)/%.cpp.d,$(DEPFILES)) @@ -3155,9 +3155,7 @@ endif # The .d file indicates .cc file's dependencies on .h files. We generate such # dependency by g++'s -MM option, whose output is a make dependency rule. ifeq (${MAKE_RESTARTS},) - ifeq ($(wildcard rocksdbjava-header),) - GEN_ROCKSDB_JAVA_HEADER := rocksdbjava-header - endif + GEN_ROCKSDB_JAVA_HEADER := rocksdbjava-header endif $(OBJ_DIR)/java/%.cc.d: java/%.cc ${GEN_ROCKSDB_JAVA_HEADER} $(AM_V_at)mkdir -p $(@D) @@ -3201,7 +3199,6 @@ build_subset_tests: $(ROCKSDBTESTS_SUBSET) list_all_tests: echo "$(ROCKSDBTESTS_SUBSET)" -TOPLING_ZBS_TARGET := ${BUILD_ROOT}/lib_shared/libterark-zbs-${COMPILER}-${BUILD_TYPE_SIG}.${PLATFORM_SHARED_EXT} ${SHARED4}: ${TOPLING_CORE_DIR}/${TOPLING_ZBS_TARGET} ${TOPLING_CORE_DIR}/${TOPLING_ZBS_TARGET}: CXXFLAGS = ${TOPLING_CORE_DIR}/${TOPLING_ZBS_TARGET}: LDFLAGS = @@ -3283,7 +3280,7 @@ rust-support: $(filter-out util/build_version.cc, ${LIB_SOURCES}) $(OBJ_DIR)/uti # Remove the rules for which dependencies should not be generated and see if any are left. #If so, include the dependencies; if not, do not include the dependency files -ROCKS_DEP_RULES:=$(filter-out clean format check-format check-buck-targets check-headers check-sources jclean jtest package analyze tags rocksdbjavastatic% unity.% unity_test checkout_folly, $(MAKECMDGOALS)) +ROCKS_DEP_RULES:=$(filter-out clean format check-format check-buck-targets check-headers check-sources jclean package analyze tags unity.% checkout_folly, $(MAKECMDGOALS)) ROCKS_DEP_RULES:=$(filter-out rust-support, $(ROCKS_DEP_RULES)) ifneq ("$(ROCKS_DEP_RULES)", "") -include $(DEPFILES) From cc204c4077a51b73bafbc9af89447f563bada0e0 Mon Sep 17 00:00:00 2001 From: rockeet Date: Tue, 19 Aug 2025 23:50:38 +0800 Subject: [PATCH 031/175] update submdoule rockside --- sideplugin/rockside | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sideplugin/rockside b/sideplugin/rockside index 64dab97722..30178b735b 160000 --- a/sideplugin/rockside +++ b/sideplugin/rockside @@ -1 +1 @@ -Subproject commit 64dab977225effd569f1ef9c1dee0bc1752df17a +Subproject commit 30178b735b215d818e3ee4996abcf916fb0e8fbf From d67089ad0f12f4874bbec3551dbcf5a86a5e4261 Mon Sep 17 00:00:00 2001 From: rockeet Date: Wed, 20 Aug 2025 01:12:18 +0800 Subject: [PATCH 032/175] java: improve key_may_exist_direct_helper --- java/rocksjni/rocksjni.cc | 37 ++++++++++--------------------------- 1 file changed, 10 insertions(+), 27 deletions(-) diff --git a/java/rocksjni/rocksjni.cc b/java/rocksjni/rocksjni.cc index 03d131fa4b..4a94b1c1ac 100644 --- a/java/rocksjni/rocksjni.cc +++ b/java/rocksjni/rocksjni.cc @@ -2560,9 +2560,6 @@ bool key_may_exist_direct_helper(JNIEnv* env, jlong jdb_handle, jboolean key_exists_helper(JNIEnv* env, jlong jdb_handle, jlong jcf_handle, jlong jread_opts_handle, char* key, jint jkey_len) { - std::string value; - bool value_found = false; - auto* db = reinterpret_cast(jdb_handle); ROCKSDB_NAMESPACE::ColumnFamilyHandle* cf_handle; @@ -2580,28 +2577,16 @@ jboolean key_exists_helper(JNIEnv* env, jlong jdb_handle, jlong jcf_handle, jread_opts_handle)); ROCKSDB_NAMESPACE::Slice key_slice(key, jkey_len); - - const bool may_exist = - true ? true : // minimize code diff, toplingdb doesn't need KeyMayExist - db->KeyMayExist(read_opts, cf_handle, key_slice, &value, &value_found); - - if (may_exist) { - ROCKSDB_NAMESPACE::Status s; - read_opts.just_check_key_exists = true; - { - ROCKSDB_NAMESPACE::PinnableSlice pinnable_val; - s = db->Get(read_opts, cf_handle, key_slice, &pinnable_val); - } - read_opts.just_check_key_exists = false; - if (s.IsNotFound()) { - return JNI_FALSE; - } else if (s.ok()) { - return JNI_TRUE; - } else { - ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew(env, s); - return JNI_FALSE; - } + read_opts.just_check_key_exists = true; + ROCKSDB_NAMESPACE::PinnableSlice pinnable_val; + auto s = db->Get(read_opts, cf_handle, key_slice, &pinnable_val); + read_opts.just_check_key_exists = false; + if (s.IsNotFound()) { + return JNI_FALSE; + } else if (s.ok()) { + return JNI_TRUE; } else { + ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew(env, s); return JNI_FALSE; } } @@ -2616,17 +2601,15 @@ jboolean Java_org_rocksdb_RocksDB_keyExists(JNIEnv* env, jobject, jlong jread_opts_handle, jbyteArray jkey, jint jkey_offset, jint jkey_len) { - jbyte* key = new jbyte[jkey_len]; + jbyte* key = (jbyte*)alloca(jkey_len); env->GetByteArrayRegion(jkey, jkey_offset, jkey_len, key); if (env->ExceptionCheck()) { // exception thrown: ArrayIndexOutOfBoundsException - delete[] key; return JNI_FALSE; } else { jboolean key_exists = key_exists_helper(env, jdb_handle, jcf_handle, jread_opts_handle, reinterpret_cast(key), jkey_len); - delete[] key; return key_exists; } } From 3a6d1573a0bef9aa43624300712658c90391eb51 Mon Sep 17 00:00:00 2001 From: rockeet Date: Thu, 21 Aug 2025 21:46:18 +0800 Subject: [PATCH 033/175] Move control macro JNI_USE_KEY_VALUE_POPULATOR to Makefile --- Makefile | 2 ++ java/CMakeLists.txt | 3 +++ java/rocksjni/rocksjni.cc | 1 - 3 files changed, 5 insertions(+), 1 deletion(-) diff --git a/Makefile b/Makefile index ebf34e26c7..a874b0149b 100644 --- a/Makefile +++ b/Makefile @@ -2972,6 +2972,8 @@ rocksdbjavastaticnexusbundlejar: rocksdbjavageneratepom jl/%.o: %.cc $(AM_V_CC)mkdir -p $(@D) && $(CXX) $(CXXFLAGS) -fPIC -c $< -o $@ $(COVERAGEFLAGS) +JNI_USE_KEY_VALUE_POPULATOR ?= 1 +${ALL_JNI_NATIVE_OBJECTS}: CXXFLAGS += -DJNI_USE_KEY_VALUE_POPULATOR=${JNI_USE_KEY_VALUE_POPULATOR} ${ALL_JNI_NATIVE_OBJECTS}: CXXFLAGS += -I./java/. -I./java/rocksjni $(JAVA_INCLUDE) $(ROCKSDB_PLUGIN_JNI_CXX_INCLUDEFLAGS) rocksdbjava: $(LIB_OBJECTS) $(ALL_JNI_NATIVE_OBJECTS) ${TOPLING_CORE_DIR}/${TOPLING_ZBS_TARGET} ifeq ($(JAVA_HOME),) diff --git a/java/CMakeLists.txt b/java/CMakeLists.txt index 7d2d56b370..b65c992936 100644 --- a/java/CMakeLists.txt +++ b/java/CMakeLists.txt @@ -569,6 +569,9 @@ else () endif() +option(JNI_USE_KEY_VALUE_POPULATOR "use key value populator to reduce copy for write" 1) +add_definitions(-DJNI_USE_KEY_VALUE_POPULATOR=${JNI_USE_KEY_VALUE_POPULATOR}) + add_jar( rocksdbjni_test_classes SOURCES diff --git a/java/rocksjni/rocksjni.cc b/java/rocksjni/rocksjni.cc index 4a94b1c1ac..834f795442 100644 --- a/java/rocksjni/rocksjni.cc +++ b/java/rocksjni/rocksjni.cc @@ -608,7 +608,6 @@ void Java_org_rocksdb_RocksDB_dropColumnFamilies( } } -#define JNI_USE_KEY_VALUE_POPULATOR 1 class JNIKeyValuePopulator : public ROCKSDB_NAMESPACE::KeyValuePopulator { JNIEnv* env_; jbyteArray jkey_, jval_; From 42ec4204618f83816fc53834a496611876e13a1e Mon Sep 17 00:00:00 2001 From: rockeet Date: Thu, 21 Aug 2025 21:56:46 +0800 Subject: [PATCH 034/175] java: write_batch.cc: use KeyValuePopulator --- java/rocksjni/write_batch.cc | 105 ++++++++++++++++++++++++++++++++++- 1 file changed, 104 insertions(+), 1 deletion(-) diff --git a/java/rocksjni/write_batch.cc b/java/rocksjni/write_batch.cc index 452c6e3c61..d38f9f2185 100644 --- a/java/rocksjni/write_batch.cc +++ b/java/rocksjni/write_batch.cc @@ -20,7 +20,7 @@ #include "rocksdb/status.h" #include "rocksdb/write_buffer_manager.h" #include "rocksjni/cplusplus_to_java_convert.h" -#include "rocksjni/portal.h" +#include "rocksjni/kv_helper.h" #include "rocksjni/writebatchhandlerjnicallback.h" #include "table/scoped_arena_iterator.h" @@ -117,6 +117,12 @@ struct JniWriteBatch : public WriteBatch { updateJavaAddrSizeCapFromNative(); return s; } + Status Put(CFH* cf, const KeyValuePopulator& kvp) override { + updateNativeDataSizeFromJava(); + Status s = WriteBatch::Put(cf, kvp); + updateJavaAddrSizeCapFromNative(); + return s; + } using WriteBatch::Merge; Status Merge(CFH* cf, const Slice& k, const Slice& v) override { updateNativeDataSizeFromJava(); @@ -131,6 +137,12 @@ struct JniWriteBatch : public WriteBatch { updateJavaAddrSizeCapFromNative(); return s; } + Status Merge(CFH* cf, const KeyValuePopulator& kvp) override { + updateNativeDataSizeFromJava(); + Status s = WriteBatch::Merge(cf, kvp); + updateJavaAddrSizeCapFromNative(); + return s; + } using WriteBatch::DeleteRange; Status DeleteRange(CFH* cf, const Slice& k, const Slice& v) override { updateNativeDataSizeFromJava(); @@ -158,6 +170,12 @@ struct JniWriteBatch : public WriteBatch { updateJavaAddrSizeCapFromNative(); return s; } + Status Delete(CFH* cf, const KeyValuePopulator& kvp) override { + updateNativeDataSizeFromJava(); + Status s = WriteBatch::Delete(cf, kvp); + updateJavaAddrSizeCapFromNative(); + return s; + } using WriteBatch::SingleDelete; Status SingleDelete(CFH* cf, const Slice& k) override { updateNativeDataSizeFromJava(); @@ -171,6 +189,12 @@ struct JniWriteBatch : public WriteBatch { updateJavaAddrSizeCapFromNative(); return s; } + Status SingleDelete(CFH* cf, const KeyValuePopulator& kvp) override { + updateNativeDataSizeFromJava(); + Status s = WriteBatch::SingleDelete(cf, kvp); + updateJavaAddrSizeCapFromNative(); + return s; + } Status PutLogData(const Slice& b) override { updateNativeDataSizeFromJava(); Status s = WriteBatch::PutLogData(b); @@ -180,6 +204,37 @@ struct JniWriteBatch : public WriteBatch { }; } // namespace ROCKSDB_NAMESPACE +class JNIKeyValuePopulator0 : public ROCKSDB_NAMESPACE::KeyValuePopulator { + JNIEnv* env_; + jbyteArray jkey_, jval_; +public: + virtual ~JNIKeyValuePopulator0() = default; + JNIKeyValuePopulator0(JNIEnv* env, + jbyteArray jkey, jint jkey_len, + jbyteArray jval, jint jval_len) + : KeyValuePopulator(jkey_len, jval_len), + env_(env), jkey_(jkey), jval_(jval) { } + void PopulateKeyValue(char* key, char* val) const override { + env_->GetByteArrayRegion(jkey_, 0, (jint)key_len_, (jbyte*)key); + ROCKSDB_NAMESPACE::KVException::ThrowOnError(env_); + env_->GetByteArrayRegion(jval_, 0, (jint)val_len_, (jbyte*)val); + ROCKSDB_NAMESPACE::KVException::ThrowOnError(env_); + } +}; + +class JNIKeyOnlyPopulator0 : public ROCKSDB_NAMESPACE::KeyValuePopulator { + JNIEnv* env_; + jbyteArray jkey_; +public: + virtual ~JNIKeyOnlyPopulator0() = default; + JNIKeyOnlyPopulator0(JNIEnv* env, jbyteArray jkey, jint jkey_len) + : KeyValuePopulator(jkey_len, 0), env_(env), jkey_(jkey) { } + void PopulateKeyValue(char* key, char*) const override { + env_->GetByteArrayRegion(jkey_, 0, (jint)key_len_, (jbyte*)key); + ROCKSDB_NAMESPACE::KVException::ThrowOnError(env_); + } +}; + /* * Class: org_rocksdb_WriteBatch * Method: getAddrSizeCapOffset @@ -377,6 +432,11 @@ void Java_org_rocksdb_WriteBatch_put__J_3BI_3BI(JNIEnv* env, jobject jobj, jint jentry_value_len) { auto* wb = reinterpret_cast(jwb_handle); assert(wb != nullptr); + #if JNI_USE_KEY_VALUE_POPULATOR + JNIKeyValuePopulator0 kvp(env, jkey, jkey_len, jentry_value, jentry_value_len); + ROCKSDB_NAMESPACE::Status status = wb->Put(nullptr, kvp); + ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew(env, status); + #else auto put = [&wb](ROCKSDB_NAMESPACE::Slice key, ROCKSDB_NAMESPACE::Slice value) { return wb->Put(key, value); @@ -387,6 +447,7 @@ void Java_org_rocksdb_WriteBatch_put__J_3BI_3BI(JNIEnv* env, jobject jobj, if (status != nullptr && !status->ok()) { ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew(env, status); } + #endif } /* @@ -402,6 +463,11 @@ void Java_org_rocksdb_WriteBatch_put__J_3BI_3BIJ( auto* cf_handle = reinterpret_cast(jcf_handle); assert(cf_handle != nullptr); + #if JNI_USE_KEY_VALUE_POPULATOR + JNIKeyValuePopulator0 kvp(env, jkey, jkey_len, jentry_value, jentry_value_len); + ROCKSDB_NAMESPACE::Status status = wb->Put(cf_handle, kvp); + ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew(env, status); + #else auto put = [&wb, &cf_handle](ROCKSDB_NAMESPACE::Slice key, ROCKSDB_NAMESPACE::Slice value) { return wb->Put(cf_handle, key, value); @@ -412,6 +478,7 @@ void Java_org_rocksdb_WriteBatch_put__J_3BI_3BIJ( if (status != nullptr && !status->ok()) { ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew(env, status); } + #endif } /* @@ -450,6 +517,11 @@ void Java_org_rocksdb_WriteBatch_merge__J_3BI_3BI( jbyteArray jentry_value, jint jentry_value_len) { auto* wb = reinterpret_cast(jwb_handle); assert(wb != nullptr); + #if JNI_USE_KEY_VALUE_POPULATOR + JNIKeyValuePopulator0 kvp(env, jkey, jkey_len, jentry_value, jentry_value_len); + ROCKSDB_NAMESPACE::Status status = wb->Merge(nullptr, kvp); + ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew(env, status); + #else auto merge = [&wb](ROCKSDB_NAMESPACE::Slice key, ROCKSDB_NAMESPACE::Slice value) { return wb->Merge(key, value); @@ -460,6 +532,7 @@ void Java_org_rocksdb_WriteBatch_merge__J_3BI_3BI( if (status != nullptr && !status->ok()) { ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew(env, status); } + #endif } /* @@ -475,6 +548,11 @@ void Java_org_rocksdb_WriteBatch_merge__J_3BI_3BIJ( auto* cf_handle = reinterpret_cast(jcf_handle); assert(cf_handle != nullptr); + #if JNI_USE_KEY_VALUE_POPULATOR + JNIKeyValuePopulator0 kvp(env, jkey, jkey_len, jentry_value, jentry_value_len); + ROCKSDB_NAMESPACE::Status status = wb->Merge(cf_handle, kvp); + ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew(env, status); + #else auto merge = [&wb, &cf_handle](ROCKSDB_NAMESPACE::Slice key, ROCKSDB_NAMESPACE::Slice value) { return wb->Merge(cf_handle, key, value); @@ -485,6 +563,7 @@ void Java_org_rocksdb_WriteBatch_merge__J_3BI_3BIJ( if (status != nullptr && !status->ok()) { ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew(env, status); } + #endif } /* @@ -497,12 +576,18 @@ void Java_org_rocksdb_WriteBatch_delete__J_3BI(JNIEnv* env, jobject jobj, jbyteArray jkey, jint jkey_len) { auto* wb = reinterpret_cast(jwb_handle); assert(wb != nullptr); + #if JNI_USE_KEY_VALUE_POPULATOR + JNIKeyOnlyPopulator0 kvp(env, jkey, jkey_len); + ROCKSDB_NAMESPACE::Status status = wb->Delete(nullptr, kvp); + ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew(env, status); + #else auto remove = [&wb](ROCKSDB_NAMESPACE::Slice key) { return wb->Delete(key); }; std::unique_ptr status = ROCKSDB_NAMESPACE::JniUtil::k_op(remove, env, jobj, jkey, jkey_len); if (status != nullptr && !status->ok()) { ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew(env, status); } + #endif } /* @@ -519,6 +604,11 @@ void Java_org_rocksdb_WriteBatch_delete__J_3BIJ(JNIEnv* env, jobject jobj, auto* cf_handle = reinterpret_cast(jcf_handle); assert(cf_handle != nullptr); + #if JNI_USE_KEY_VALUE_POPULATOR + JNIKeyOnlyPopulator0 kvp(env, jkey, jkey_len); + ROCKSDB_NAMESPACE::Status status = wb->Delete(cf_handle, kvp); + ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew(env, status); + #else auto remove = [&wb, &cf_handle](ROCKSDB_NAMESPACE::Slice key) { return wb->Delete(cf_handle, key); }; @@ -527,6 +617,7 @@ void Java_org_rocksdb_WriteBatch_delete__J_3BIJ(JNIEnv* env, jobject jobj, if (status != nullptr && !status->ok()) { ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew(env, status); } + #endif } /* @@ -540,6 +631,11 @@ void Java_org_rocksdb_WriteBatch_singleDelete__J_3BI(JNIEnv* env, jobject jobj, jint jkey_len) { auto* wb = reinterpret_cast(jwb_handle); assert(wb != nullptr); + #if JNI_USE_KEY_VALUE_POPULATOR + JNIKeyOnlyPopulator0 kvp(env, jkey, jkey_len); + ROCKSDB_NAMESPACE::Status status = wb->SingleDelete(nullptr, kvp); + ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew(env, status); + #else auto single_delete = [&wb](ROCKSDB_NAMESPACE::Slice key) { return wb->SingleDelete(key); }; @@ -549,6 +645,7 @@ void Java_org_rocksdb_WriteBatch_singleDelete__J_3BI(JNIEnv* env, jobject jobj, if (status != nullptr && !status->ok()) { ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew(env, status); } + #endif } /* @@ -566,6 +663,11 @@ void Java_org_rocksdb_WriteBatch_singleDelete__J_3BIJ(JNIEnv* env, jobject jobj, auto* cf_handle = reinterpret_cast(jcf_handle); assert(cf_handle != nullptr); + #if JNI_USE_KEY_VALUE_POPULATOR + JNIKeyOnlyPopulator0 kvp(env, jkey, jkey_len); + ROCKSDB_NAMESPACE::Status status = wb->SingleDelete(cf_handle, kvp); + ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew(env, status); + #else auto single_delete = [&wb, &cf_handle](ROCKSDB_NAMESPACE::Slice key) { return wb->SingleDelete(cf_handle, key); }; @@ -575,6 +677,7 @@ void Java_org_rocksdb_WriteBatch_singleDelete__J_3BIJ(JNIEnv* env, jobject jobj, if (status != nullptr && !status->ok()) { ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew(env, status); } + #endif } /* From 9bb59496953afd89c58002c7011e5d95a1133932 Mon Sep 17 00:00:00 2001 From: rockeet Date: Fri, 22 Aug 2025 09:23:38 +0800 Subject: [PATCH 035/175] rocksdbjni.cc: Missed `Merge` overload of JNIKeyValuePopulator changes --- java/rocksjni/rocksjni.cc | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/java/rocksjni/rocksjni.cc b/java/rocksjni/rocksjni.cc index 834f795442..23f9da3a16 100644 --- a/java/rocksjni/rocksjni.cc +++ b/java/rocksjni/rocksjni.cc @@ -1487,11 +1487,17 @@ void Java_org_rocksdb_RocksDB_merge__J_3BII_3BIIJ( reinterpret_cast(jcf_handle); if (cf_handle != nullptr) { try { + #if JNI_USE_KEY_VALUE_POPULATOR + JNIKeyValuePopulator kvp(env, jkey, jkey_off, jkey_len, jval, jval_off, jval_len); + ROCKSDB_NAMESPACE::Status s = db->Merge(default_write_options, cf_handle, kvp); + ROCKSDB_NAMESPACE::KVException::ThrowOnError(env, s); + #else ROCKSDB_NAMESPACE::JByteArraySlice key(env, jkey, jkey_off, jkey_len); ROCKSDB_NAMESPACE::JByteArraySlice value(env, jval, jval_off, jval_len); ROCKSDB_NAMESPACE::KVException::ThrowOnError( env, db->Merge(default_write_options, cf_handle, key.slice(), value.slice())); + #endif } catch (ROCKSDB_NAMESPACE::KVException&) { return; } @@ -1515,10 +1521,16 @@ void Java_org_rocksdb_RocksDB_merge__JJ_3BII_3BII( auto* write_options = reinterpret_cast(jwrite_options_handle); try { + #if JNI_USE_KEY_VALUE_POPULATOR + JNIKeyValuePopulator kvp(env, jkey, jkey_off, jkey_len, jval, jval_off, jval_len); + ROCKSDB_NAMESPACE::Status s = db->Merge(*write_options, nullptr, kvp); + ROCKSDB_NAMESPACE::KVException::ThrowOnError(env, s); + #else ROCKSDB_NAMESPACE::JByteArraySlice key(env, jkey, jkey_off, jkey_len); ROCKSDB_NAMESPACE::JByteArraySlice value(env, jval, jval_off, jval_len); ROCKSDB_NAMESPACE::KVException::ThrowOnError( env, db->Merge(*write_options, key.slice(), value.slice())); + #endif } catch (ROCKSDB_NAMESPACE::KVException&) { return; } From 4d2a5fe1ae5626f8114de8899583023851d76bf3 Mon Sep 17 00:00:00 2001 From: rockeet Date: Fri, 22 Aug 2025 09:25:13 +0800 Subject: [PATCH 036/175] Build: allow user env JAVAC_ARGS & ENABLE_AUTO_CHECK_LD --- Makefile | 4 ++++ build_tools/build_detect_platform | 4 +++- 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/Makefile b/Makefile index a874b0149b..d68024f8d0 100644 --- a/Makefile +++ b/Makefile @@ -25,10 +25,14 @@ USE_RTTI=1 ROCKSDB_USE_IO_URING=0 ROCKSDB_DISABLE_TCMALLOC=1 SKIP_FORMAT_BUCK_CHECKS=1 + +ENABLE_AUTO_CHECK_LD ?= 1 +ifeq (${ENABLE_AUTO_CHECK_LD},1) ifneq ($(shell command -v ld.gold),) LDFLAGS += -fuse-ld=gold #LDFLAGS += -Wl,--icf=all # only reduce size 3.2% endif +endif # end topling specific # Transform parallel LOG output into something more readable. diff --git a/build_tools/build_detect_platform b/build_tools/build_detect_platform index 2a46b0209b..bda1c254f6 100755 --- a/build_tools/build_detect_platform +++ b/build_tools/build_detect_platform @@ -263,7 +263,9 @@ esac PLATFORM_CXXFLAGS="$PLATFORM_CXXFLAGS ${CXXFLAGS}" JAVA_LDFLAGS="$PLATFORM_LDFLAGS" JAVA_STATIC_LDFLAGS="$PLATFORM_LDFLAGS" -JAVAC_ARGS="-source 8" +if [ -z "$JAVAC_ARGS" ]; then + JAVAC_ARGS="-source 8" +fi if [ "$CROSS_COMPILE" = "true" -o "$FBCODE_BUILD" = "true" ]; then # Cross-compiling; do not try any compilation tests. From 6906f1ca8086fdcbb36e83620a13270843ee5675 Mon Sep 17 00:00:00 2001 From: rockeet Date: Fri, 22 Aug 2025 15:58:13 +0800 Subject: [PATCH 037/175] Makefile: Add export USE_FOLLY_LITE="$(USE_FOLLY_LITE)"; --- Makefile | 1 + 1 file changed, 1 insertion(+) diff --git a/Makefile b/Makefile index d68024f8d0..c29fa8b271 100644 --- a/Makefile +++ b/Makefile @@ -125,6 +125,7 @@ dummy := $(shell (export ROCKSDB_ROOT="$(CURDIR)"; \ export LIB_MODE="$(LIB_MODE)"; \ export ROCKSDB_CXX_STANDARD="$(ROCKSDB_CXX_STANDARD)"; \ export USE_FOLLY="$(USE_FOLLY)"; \ + export USE_FOLLY_LITE="$(USE_FOLLY_LITE)"; \ "$(CURDIR)/build_tools/build_detect_platform" "$(CURDIR)/make_config.mk")) ifneq (${.SHELLSTATUS},0) ifneq (${.SHELLSTATUS},) # wsl .SHELLSTATUS is empty From 92cf29c88b32d27ee26b0a3a41e562f4cb4e2d3d Mon Sep 17 00:00:00 2001 From: rockeet Date: Fri, 22 Aug 2025 15:59:41 +0800 Subject: [PATCH 038/175] perf_context_imp.h: for #if defined(NPERF_CONTEXT) --- monitoring/perf_context_imp.h | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/monitoring/perf_context_imp.h b/monitoring/perf_context_imp.h index 7863c08a3e..cfcbf59828 100644 --- a/monitoring/perf_context_imp.h +++ b/monitoring/perf_context_imp.h @@ -36,6 +36,10 @@ extern thread_local PerfContext perf_context_; #define PERF_TIMER_GUARD(metric) #define PERF_TIMER_GUARD_WITH_CLOCK(metric, clock) #define PERF_CPU_TIMER_GUARD(metric, clock) +#define PERF_TIMER_MUTEX_WAIT_GUARD(metric, stats) +#define PERF_TIMER_COND_WAIT_GUARD(metric, stats) +#define PERF_CONDITIONAL_TIMER_FOR_MUTEX_GUARD(metric, condition, stats, \ + ticker_type) #define PERF_TIMER_FULL_STATS(metric, ticker, histogram, stats) #define PERF_TIMER_WITH_HISTOGRAM(metric, histogram, stats) #define PERF_TIMER_WITH_TICKER(metric, ticker, stats, clock) From b6d93467ca37534e114735ae1e4a530deabcfb47 Mon Sep 17 00:00:00 2001 From: rockeet Date: Fri, 22 Aug 2025 16:05:39 +0800 Subject: [PATCH 039/175] KeyValuePopulator: made non-copyable --- include/rocksdb/write_batch_base.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/include/rocksdb/write_batch_base.h b/include/rocksdb/write_batch_base.h index 32b5709d4c..4bcb0dab71 100644 --- a/include/rocksdb/write_batch_base.h +++ b/include/rocksdb/write_batch_base.h @@ -30,6 +30,8 @@ struct SliceParts; // key_len() and val_len() sizes, respectively. class KeyValuePopulator { public: + KeyValuePopulator(const KeyValuePopulator&) = delete; + KeyValuePopulator& operator=(const KeyValuePopulator&) = delete; KeyValuePopulator() {} // intentionally not init key_len_ and val_len_ KeyValuePopulator(size_t key_len, size_t val_len) : key_len_(key_len), val_len_(val_len) {} From 53bff01338649bcc71849d0b833d1ee0436843c0 Mon Sep 17 00:00:00 2001 From: rockeet Date: Fri, 22 Aug 2025 16:34:38 +0800 Subject: [PATCH 040/175] KeyValuePopulator: made non-copyable - fix indent --- java/rocksjni/rocksjni.cc | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/java/rocksjni/rocksjni.cc b/java/rocksjni/rocksjni.cc index 23f9da3a16..0812ddaee6 100644 --- a/java/rocksjni/rocksjni.cc +++ b/java/rocksjni/rocksjni.cc @@ -1552,11 +1552,11 @@ void Java_org_rocksdb_RocksDB_merge__JJ_3BII_3BIIJ( reinterpret_cast(jcf_handle); if (cf_handle != nullptr) { try { - #if JNI_USE_KEY_VALUE_POPULATOR - JNIKeyValuePopulator kvp(env, jkey, jkey_off, jkey_len, jval, jval_off, jval_len); - ROCKSDB_NAMESPACE::Status s = db->Merge(*write_options, cf_handle, kvp); - ROCKSDB_NAMESPACE::KVException::ThrowOnError(env, s); - #else + #if JNI_USE_KEY_VALUE_POPULATOR + JNIKeyValuePopulator kvp(env, jkey, jkey_off, jkey_len, jval, jval_off, jval_len); + ROCKSDB_NAMESPACE::Status s = db->Merge(*write_options, cf_handle, kvp); + ROCKSDB_NAMESPACE::KVException::ThrowOnError(env, s); + #else ROCKSDB_NAMESPACE::JByteArraySlice key(env, jkey, jkey_off, jkey_len); ROCKSDB_NAMESPACE::JByteArraySlice value(env, jval, jval_off, jval_len); ROCKSDB_NAMESPACE::KVException::ThrowOnError( From 48d5fe6df1fc262bfc182a04a65032b143de033f Mon Sep 17 00:00:00 2001 From: rockeet Date: Sat, 23 Aug 2025 20:41:29 +0800 Subject: [PATCH 041/175] Makefile: add topling core lib objects into librocksdbjni --- Makefile | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/Makefile b/Makefile index c29fa8b271..88ed34547e 100644 --- a/Makefile +++ b/Makefile @@ -349,6 +349,10 @@ ifeq (${DEBUG_LEVEL}, 2) BUILD_TYPE_SIG := d OBJ_DIR := ${BUILD_ROOT}/dbg endif + +TOPLING_LIB_OBJ_LIST_FILE := ${OBJ_DIR}/shared_lib_obj_list.mk +-include ${TOPLING_CORE_DIR}/${TOPLING_LIB_OBJ_LIST_FILE} + ifneq ($(filter auto_all_tests check check_0 watch-log gen_parallel_tests %_test %_test2 jtest, $(MAKECMDGOALS)),) MAKE_UNIT_TEST ?= 1 endif @@ -2980,13 +2984,16 @@ jl/%.o: %.cc JNI_USE_KEY_VALUE_POPULATOR ?= 1 ${ALL_JNI_NATIVE_OBJECTS}: CXXFLAGS += -DJNI_USE_KEY_VALUE_POPULATOR=${JNI_USE_KEY_VALUE_POPULATOR} ${ALL_JNI_NATIVE_OBJECTS}: CXXFLAGS += -I./java/. -I./java/rocksjni $(JAVA_INCLUDE) $(ROCKSDB_PLUGIN_JNI_CXX_INCLUDEFLAGS) -rocksdbjava: $(LIB_OBJECTS) $(ALL_JNI_NATIVE_OBJECTS) ${TOPLING_CORE_DIR}/${TOPLING_ZBS_TARGET} +rocksdbjava: $(LIB_OBJECTS) $(ALL_JNI_NATIVE_OBJECTS) ${TOPLING_CORE_DIR}/${TOPLING_LIB_OBJ_LIST_FILE} ifeq ($(JAVA_HOME),) $(error JAVA_HOME is not set) endif $(AM_V_at)rm -f ./java/target/$(ROCKSDBJNILIB) - $(AM_V_at)$(CXX) $(CXXFLAGS) -shared -fPIC -o ./java/target/$(ROCKSDBJNILIB) $(ALL_JNI_NATIVE_OBJECTS) $(LIB_OBJECTS) $(JAVA_LDFLAGS) $(LDFLAGS) - $(AM_V_at)cp -a ${TOPLING_CORE_DIR}/${BUILD_ROOT}/lib_shared/*${COMPILER}*-${BUILD_TYPE_SIG}.so java/target + $(AM_V_at)$(CXX) $(CXXFLAGS) -shared -fPIC -o ./java/target/$(ROCKSDBJNILIB) \ + $(ALL_JNI_NATIVE_OBJECTS) $(LIB_OBJECTS) \ + $(addprefix ${TOPLING_CORE_DIR}/, $(TOPLING_LIB_OBJ_LIST_VAR)) \ + $(JAVA_LDFLAGS) \ + $(filter-out -L${TOPLING_CORE_DIR}% -lterark-%, $(LDFLAGS)) $(AM_V_at)cp -a sideplugin/rockside/src/topling/web/{style.css,index.html} java/target ifeq ($(STRIP_DEBUG_INFO),1) $(AM_V_at)strip java/target/*.so @@ -3212,6 +3219,11 @@ ${TOPLING_CORE_DIR}/${TOPLING_ZBS_TARGET}: LDFLAGS = ${TOPLING_CORE_DIR}/${TOPLING_ZBS_TARGET}: +make -C ${TOPLING_CORE_DIR} ${TOPLING_ZBS_TARGET} +${TOPLING_CORE_DIR}/${TOPLING_LIB_OBJ_LIST_FILE}: CXXFLAGS = +${TOPLING_CORE_DIR}/${TOPLING_LIB_OBJ_LIST_FILE}: LDFLAGS = +${TOPLING_CORE_DIR}/${TOPLING_LIB_OBJ_LIST_FILE}: + +make -C ${TOPLING_CORE_DIR} ${TOPLING_LIB_OBJ_LIST_FILE} + ${STATIC_LIBRARY}: ${BUILD_ROOT}/lib_static/libterark-zbs-${COMPILER}-${BUILD_TYPE_SIG}.a ${BUILD_ROOT}/lib_static/libterark-zbs-${COMPILER}-${BUILD_TYPE_SIG}.a: +make -C ${TOPLING_CORE_DIR} core fsa zbs From 4b429036562b141bbe1294ff17029c674307797d Mon Sep 17 00:00:00 2001 From: rockeet Date: Sat, 23 Aug 2025 22:41:24 +0800 Subject: [PATCH 042/175] Makefile: fix for without libjemalloc --- Makefile | 5 +++++ java/Makefile | 2 +- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 88ed34547e..6565b9233c 100644 --- a/Makefile +++ b/Makefile @@ -864,6 +864,11 @@ ifndef USE_FOLLY USE_FOLLY=0 endif +ifeq ($(filter -DROCKSDB_JEMALLOC,$(PLATFORM_CXXFLAGS)),) + PLATFORM_CXXFLAGS += -DTOPLING_DISABLE_JEMALLOC + export TOPLING_DISABLE_JEMALLOC := 1 +endif + ifndef GTEST_THROW_ON_FAILURE export GTEST_THROW_ON_FAILURE=1 endif diff --git a/java/Makefile b/java/Makefile index 1d760a713a..a8937205bb 100644 --- a/java/Makefile +++ b/java/Makefile @@ -271,7 +271,7 @@ JAVA_CMD := $(JAVA_HOME)/bin/java else JAVA_CMD := java endif -JAVA_CMD := env LD_PRELOAD=libjemalloc.so:librocksdbjni-linux64.so LD_LIBRARY_PATH=target:${LD_LIBRARY_PATH} ${JAVA_CMD} +JAVA_CMD := env LD_LIBRARY_PATH=target:${LD_LIBRARY_PATH} ${JAVA_CMD} endif ifeq ($(JAVAC_CMD),) From 8557ff31cdec5454651c1a1fbdb1d05132116849 Mon Sep 17 00:00:00 2001 From: rockeet Date: Sat, 23 Aug 2025 22:42:57 +0800 Subject: [PATCH 043/175] Fix TLS, static TLS is fast but less compatible tls-model=initial-exec is fast but can not be loaded indirect dynamically. make ... DISABLE_JEMALLOC=1 TOPLING_USE_DYNAMIC_TLS=1 --- Makefile | 4 ++++ port/lang.h | 2 +- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 6565b9233c..d3dc0bc6de 100644 --- a/Makefile +++ b/Makefile @@ -350,6 +350,10 @@ ifeq (${DEBUG_LEVEL}, 2) OBJ_DIR := ${BUILD_ROOT}/dbg endif +ifeq (${TOPLING_USE_DYNAMIC_TLS},1) + CXXFLAGS += -DTOPLING_USE_DYNAMIC_TLS +endif + TOPLING_LIB_OBJ_LIST_FILE := ${OBJ_DIR}/shared_lib_obj_list.mk -include ${TOPLING_CORE_DIR}/${TOPLING_LIB_OBJ_LIST_FILE} diff --git a/port/lang.h b/port/lang.h index b8cb075b96..bed73223c5 100644 --- a/port/lang.h +++ b/port/lang.h @@ -96,7 +96,7 @@ constexpr bool kMustFreeHeapAllocations = false; #undef __POPCNT__ #endif -#if defined(__GNUC__) +#if defined(__GNUC__) && !defined(TOPLING_USE_DYNAMIC_TLS) #define ROCKSDB_STATIC_TLS __attribute__((tls_model("initial-exec"))) #define ROCKSDB_RAW_TLS __thread #else From 6b5e1532f55b2ef30c7e4ac0fa2ea0211e1cb691 Mon Sep 17 00:00:00 2001 From: rockeet Date: Sat, 23 Aug 2025 22:43:38 +0800 Subject: [PATCH 044/175] java: print message on load librocksdbjni failed --- java/src/main/java/org/rocksdb/NativeLibraryLoader.java | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/java/src/main/java/org/rocksdb/NativeLibraryLoader.java b/java/src/main/java/org/rocksdb/NativeLibraryLoader.java index f7daf81fa9..25a7c0ad71 100644 --- a/java/src/main/java/org/rocksdb/NativeLibraryLoader.java +++ b/java/src/main/java/org/rocksdb/NativeLibraryLoader.java @@ -73,7 +73,8 @@ public synchronized void loadLibrary(final String tmpDir) throws IOException { //System.err.println("loaded " + jniLibraryName); return; } catch (final UnsatisfiedLinkError ule) { - System.err.println("failed " + jniLibraryName); + System.err.println("failed " + jniLibraryName + ule + " - try compile " + + "with make rocksdbjava DISABLE_JEMALLOC=1 TOPLING_USE_DYNAMIC_TLS=1"); // ignore - then try static library fallback or from jar } From 0c62aa194795e1ebda8b5ae85e713d36445b99ba Mon Sep 17 00:00:00 2001 From: rockeet Date: Mon, 25 Aug 2025 18:24:31 +0800 Subject: [PATCH 045/175] Add missing `override` --- env/fs_cat.cc | 2 +- env/fs_cat.h | 6 +++--- java/rocksjni/write_batch.cc | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/env/fs_cat.cc b/env/fs_cat.cc index aa906db0ae..6b39de21ff 100644 --- a/env/fs_cat.cc +++ b/env/fs_cat.cc @@ -570,7 +570,7 @@ struct CatLogger : public Logger { } size_t GetLogFileSize() const override { return m_local->GetLogFileSize(); } - void Flush() { + void Flush() override { m_local->Flush(); // do not: m_remote->Flush(); } diff --git a/env/fs_cat.h b/env/fs_cat.h index e698fa0a06..07a12ba1ae 100644 --- a/env/fs_cat.h +++ b/env/fs_cat.h @@ -41,7 +41,7 @@ class CatFileSystem : public FileSystem { IODebugContext* dbg) override; IOStatus ReopenWritableFile(const std::string& /*fname*/, const FileOptions&, - std::unique_ptr*, IODebugContext*); + std::unique_ptr*, IODebugContext*) override; IOStatus ReuseWritableFile(const std::string& fname, const std::string& old_fname, @@ -113,8 +113,8 @@ class CatFileSystem : public FileSystem { std::string* output_path, IODebugContext* dbg) override; - IOStatus GetTestDirectory(const IOOptions&, std::string* path, IODebugContext*); - IOStatus UnlockFile(FileLock*, const IOOptions&, IODebugContext*); + IOStatus GetTestDirectory(const IOOptions&, std::string* path, IODebugContext*) override; + IOStatus UnlockFile(FileLock*, const IOOptions&, IODebugContext*) override; private: std::shared_ptr m_local, m_remote; diff --git a/java/rocksjni/write_batch.cc b/java/rocksjni/write_batch.cc index d38f9f2185..4725168e92 100644 --- a/java/rocksjni/write_batch.cc +++ b/java/rocksjni/write_batch.cc @@ -158,7 +158,7 @@ struct JniWriteBatch : public WriteBatch { return s; } using WriteBatch::Delete; - Status Delete(CFH* cf, const Slice& k) { + Status Delete(CFH* cf, const Slice& k) override { updateNativeDataSizeFromJava(); Status s = WriteBatch::Delete(cf, k); updateJavaAddrSizeCapFromNative(); From b37971187a87d2a1249cd92f036a5471da46833e Mon Sep 17 00:00:00 2001 From: rockeet Date: Mon, 25 Aug 2025 18:26:22 +0800 Subject: [PATCH 046/175] Use boost::intrusive_ptr --- db/db_impl/db_impl_open.cc | 2 +- db/db_impl/db_impl_secondary.h | 2 +- db/log_writer.h | 2 +- env/file_system.cc | 4 ++-- include/rocksdb/file_system.h | 18 ++++++++++++++---- include/rocksdb/write_batch.h | 8 ++++---- 6 files changed, 23 insertions(+), 13 deletions(-) diff --git a/db/db_impl/db_impl_open.cc b/db/db_impl/db_impl_open.cc index 70d85f6f48..476ea032fa 100644 --- a/db/db_impl/db_impl_open.cc +++ b/db/db_impl/db_impl_open.cc @@ -1206,7 +1206,7 @@ Status DBImpl::RecoverLogFiles(const std::vector& wal_numbers, // large sequence numbers). log::Reader reader(immutable_db_options_.info_log, std::move(file_reader), &reporter, true /*checksum*/, wal_number); - std::shared_ptr fmap; + boost::intrusive_ptr fmap; if (immutable_db_options_.memtable_as_log_index) { reader.InitSetMemTableAsLogIndex(*fs_); IOStatus ios = ReadonlyFileMmap::New(&fmap, *fs_, wal_number, fname); diff --git a/db/db_impl/db_impl_secondary.h b/db/db_impl/db_impl_secondary.h index 80e090c164..f487d20499 100644 --- a/db/db_impl/db_impl_secondary.h +++ b/db/db_impl/db_impl_secondary.h @@ -41,7 +41,7 @@ class LogReaderContainer { log::FragmentBufferedReader* reader_; log::Reader::Reporter* reporter_; Status* status_; - std::shared_ptr fmap_; + boost::intrusive_ptr fmap_; ~LogReaderContainer() { delete reader_; delete reporter_; diff --git a/db/log_writer.h b/db/log_writer.h index 6c6ab1ab11..afc886ae86 100644 --- a/db/log_writer.h +++ b/db/log_writer.h @@ -128,7 +128,7 @@ class Writer { bool BufferIsEmpty(); private: - std::shared_ptr mmap_reader_; + boost::intrusive_ptr mmap_reader_; std::unique_ptr dest_; uint64_t log_number_; std::shared_ptr log_offset_; diff --git a/env/file_system.cc b/env/file_system.cc index a6a27e59fa..c5f48335dd 100644 --- a/env/file_system.cc +++ b/env/file_system.cc @@ -289,7 +289,7 @@ IOStatus FSWritableFile::Appendv(const Slice* parts, size_t num, size_t, ReadonlyFileMmap::ReadonlyFileMmap(PrivateCons) {} ReadonlyFileMmap::~ReadonlyFileMmap() = default; -std::shared_ptr ReadonlyFileMmap::New +boost::intrusive_ptr ReadonlyFileMmap::New (IOStatus* ios, FileSystem& fs, size_t fileno, const std::string& fname, size_t mmap_size) { IODebugContext dbg; @@ -307,7 +307,7 @@ std::shared_ptr ReadonlyFileMmap::New mmap_size = fsize; } fopt.mmap_size = mmap_size; - auto fmap = std::make_shared(PrivateCons()); + boost::intrusive_ptr fmap(new ReadonlyFileMmap(PrivateCons())); *ios = fs.NewRandomAccessFile(fname, fopt, &fmap->file_, &dbg); if (ios->ok()) { auto fp = fmap->file_.get(); diff --git a/include/rocksdb/file_system.h b/include/rocksdb/file_system.h index b0c2eede1f..01c1c8681a 100644 --- a/include/rocksdb/file_system.h +++ b/include/rocksdb/file_system.h @@ -34,6 +34,7 @@ #include "rocksdb/options.h" #include "rocksdb/table.h" #include "rocksdb/thread_status.h" +#include namespace ROCKSDB_NAMESPACE { @@ -1920,23 +1921,32 @@ class FSDirectoryWrapper : public FSDirectory { FSDirectory* target_; }; -class ReadonlyFileMmap : public std::enable_shared_from_this, public Slice { +class ReadonlyFileMmap : public Slice { std::unique_ptr file_; + std::atomic_int32_t ref_count_; + friend void intrusive_ptr_add_ref(ReadonlyFileMmap* p) { + p->ref_count_.fetch_add(1, std::memory_order_relaxed); + } + friend void intrusive_ptr_release(ReadonlyFileMmap* p) { + if (p->ref_count_.fetch_sub(1, std::memory_order_release) == 1) { + delete p; + } + } struct PrivateCons{}; public: ReadonlyFileMmap& operator=(const ReadonlyFileMmap&) = delete; ReadonlyFileMmap(const ReadonlyFileMmap&) = delete; ReadonlyFileMmap(PrivateCons); ~ReadonlyFileMmap(); - static std::shared_ptr + static boost::intrusive_ptr New(IOStatus* s, FileSystem& fs, size_t fileno, const std::string& fname, size_t mmap_size = 0); - static IOStatus New(std::shared_ptr* fp, FileSystem& fs, + static IOStatus New(boost::intrusive_ptr* fp, FileSystem& fs, size_t fileno, const std::string& fname, size_t mmap_size = 0) { IOStatus s; *fp = New(&s, fs, fileno, fname, mmap_size); return s; } - static std::pair, IOStatus> + static std::pair, IOStatus> New(FileSystem& fs, size_t fileno, const std::string& fname, size_t mmap_size = 0) { IOStatus ios; return {New(&ios, fs, fileno, fname, mmap_size), ios}; diff --git a/include/rocksdb/write_batch.h b/include/rocksdb/write_batch.h index 8b7423386f..40e9de596f 100644 --- a/include/rocksdb/write_batch.h +++ b/include/rocksdb/write_batch.h @@ -35,7 +35,7 @@ #include "rocksdb/status.h" #include "rocksdb/write_batch_base.h" #include "fake_atomic.h" -#include +#include namespace ROCKSDB_NAMESPACE { @@ -44,8 +44,8 @@ class ColumnFamilyHandle; struct SavePoints; struct SliceParts; class ReadonlyFileMmap; -// We know ReadonlyFileMmap is single derived enable_shared_from_this -inline auto base_enable_shared_from_this(ReadonlyFileMmap* p) { return p; } +void intrusive_ptr_add_ref(ReadonlyFileMmap*); +void intrusive_ptr_release(ReadonlyFileMmap*); struct KeyValuePassMemTable { Slice value; @@ -479,7 +479,7 @@ class WriteBatch : public WriteBatchBase { size_t GetProtectionBytesPerKey() const; struct WALFileRef { - terark::narrow_shared_ptr file_mmap; + boost::intrusive_ptr file_mmap; uint64_t file_number = UINT64_MAX; uint64_t file_offset = UINT64_MAX; }; From db57b5da584058222d16a567204841df9ea649e7 Mon Sep 17 00:00:00 2001 From: rockeet Date: Mon, 25 Aug 2025 18:27:50 +0800 Subject: [PATCH 047/175] define TOPLINGDB_WITH_FIBER_AIO=0 on android --- db/db_impl/db_impl.cc | 2 ++ 1 file changed, 2 insertions(+) diff --git a/db/db_impl/db_impl.cc b/db/db_impl/db_impl.cc index 02856947a1..c7b12e12c1 100644 --- a/db/db_impl/db_impl.cc +++ b/db/db_impl/db_impl.cc @@ -120,6 +120,8 @@ #include #if defined(_MSC_VER) #define TOPLINGDB_WITH_FIBER_AIO 0 +#elif defined(__ANDROID__) + #define TOPLINGDB_WITH_FIBER_AIO 0 #else #define TOPLINGDB_WITH_FIBER_AIO 1 #endif From ba40ef8856d6df3becc0971f9fec415a06e1360c Mon Sep 17 00:00:00 2001 From: rockeet Date: Mon, 25 Aug 2025 18:33:27 +0800 Subject: [PATCH 048/175] Fix for android --- build_tools/build_detect_platform | 3 +++ env/io_posix.h | 1 + include/rocksdb/preproc.h | 5 +++++ port/port_posix.h | 6 ++++++ table/merging_iterator.cc | 2 +- util/thread_local.cc | 4 ++++ 6 files changed, 20 insertions(+), 1 deletion(-) diff --git a/build_tools/build_detect_platform b/build_tools/build_detect_platform index bda1c254f6..dea99f2d48 100755 --- a/build_tools/build_detect_platform +++ b/build_tools/build_detect_platform @@ -344,6 +344,7 @@ EOF if ! test $ROCKSDB_DISABLE_ZLIB; then # Test whether zlib library is installed + test "$ZLIB_READY_SKIP_CHECK" = 1 || $CXX $PLATFORM_CXXFLAGS $COMMON_FLAGS -x c++ - -o test.o 2>/dev/null < int main() {} @@ -624,6 +625,7 @@ EOF fi +if [ -z "${W_shorten_64_to_32}" ]; then # TODO(tec): Fix -Wshorten-64-to-32 errors on FreeBSD and enable the warning. # -Wshorten-64-to-32 breaks compilation on FreeBSD aarch64 and i386 if ! { [ "$TARGET_OS" = FreeBSD ] && [ "$TARGET_ARCHITECTURE" = arm64 -o "$TARGET_ARCHITECTURE" = i386 ]; }; then @@ -635,6 +637,7 @@ EOF COMMON_FLAGS="$COMMON_FLAGS -Wshorten-64-to-32" fi fi +fi if [ "$PORTABLE" == "" ] || [ "$PORTABLE" == 0 ]; then if test -n "`echo $TARGET_ARCHITECTURE | grep ^ppc64`"; then diff --git a/env/io_posix.h b/env/io_posix.h index a2d4476e53..e558c7892a 100644 --- a/env/io_posix.h +++ b/env/io_posix.h @@ -30,6 +30,7 @@ // For non linux platform, the following macros are used only as place // holder. #if !(defined OS_LINUX) && !(defined OS_FREEBSD) && !(defined CYGWIN) && \ + !(defined OS_ANDROID) && \ !(defined OS_AIX) #define POSIX_FADV_NORMAL 0 /* [MC1] no further special treatment */ #define POSIX_FADV_RANDOM 1 /* [MC1] expect random page refs */ diff --git a/include/rocksdb/preproc.h b/include/rocksdb/preproc.h index 3744bd03cf..160dfa5745 100644 --- a/include/rocksdb/preproc.h +++ b/include/rocksdb/preproc.h @@ -570,4 +570,9 @@ decltype(ROCKSDB_PP_CAT2(func_on_exit_,__LINE__))> \ #define __always_inline __forceinline #endif +#if defined(__ANDROID__) + #undef __always_inline + #define __always_inline inline +#endif + // clang-format on diff --git a/port/port_posix.h b/port/port_posix.h index a654d16822..ea0259a5ca 100644 --- a/port/port_posix.h +++ b/port/port_posix.h @@ -89,9 +89,15 @@ namespace ROCKSDB_NAMESPACE { extern const bool kDefaultToAdaptiveMutex; #if PLATFORM_IS_LITTLE_ENDIAN +#if defined(__swap32) +inline uint64_t NativeOfBigEndian64(uint64_t x) { return __swap64(x); } +inline uint32_t NativeOfBigEndian32(uint32_t x) { return __swap32(x); } +inline uint16_t NativeOfBigEndian16(uint16_t x) { return __swap16(x); } +#else inline uint64_t NativeOfBigEndian64(uint64_t x) { return __bswap_64(x); } inline uint32_t NativeOfBigEndian32(uint32_t x) { return __bswap_32(x); } inline uint16_t NativeOfBigEndian16(uint16_t x) { return __bswap_16(x); } +#endif #else inline uint64_t NativeOfBigEndian64(uint64_t x) { return (x); } inline uint32_t NativeOfBigEndian32(uint32_t x) { return (x); } diff --git a/table/merging_iterator.cc b/table/merging_iterator.cc index e3326e3588..013e8144a7 100644 --- a/table/merging_iterator.cc +++ b/table/merging_iterator.cc @@ -177,7 +177,7 @@ class MaxHeapItemComparator { #else using UintPrefix = unsigned __int128; FORCE_INLINE UintPrefix bswap_prefix(UintPrefix x) { - return UintPrefix(__bswap_64(uint64_t(x))) << 64 | __bswap_64(uint64_t(x >> 64)); + return UintPrefix(NativeOfBigEndian64(uint64_t(x))) << 64 | NativeOfBigEndian64(uint64_t(x >> 64)); } #endif #endif diff --git a/util/thread_local.cc b/util/thread_local.cc index 5518c209a8..a39fc2b667 100644 --- a/util/thread_local.cc +++ b/util/thread_local.cc @@ -255,6 +255,10 @@ BOOL WINAPI DllMain(HINSTANCE h, DWORD dwReason, PVOID pv) { #endif // OS_WIN +#if !defined(__attribute_noinline__) +#define __attribute_noinline__ __attribute__((noinline)) +#endif + void ThreadLocalPtr::InitSingletons() { ThreadLocalPtr::Instance(); } __always_inline From 60c9774331c547e7de0a80ac7145ee2d989bea59 Mon Sep 17 00:00:00 2001 From: rockeet Date: Mon, 25 Aug 2025 18:36:02 +0800 Subject: [PATCH 049/175] rocksjni.cc: ignored "-Wshift-negative-value" --- java/rocksjni/rocksjni.cc | 3 +++ 1 file changed, 3 insertions(+) diff --git a/java/rocksjni/rocksjni.cc b/java/rocksjni/rocksjni.cc index 0812ddaee6..9dceb1997c 100644 --- a/java/rocksjni/rocksjni.cc +++ b/java/rocksjni/rocksjni.cc @@ -1129,6 +1129,9 @@ void Java_org_rocksdb_RocksDB_deleteRange__J_3BII_3BII( jend_key, jend_key_off, jend_key_len); } +#if defined(__GNUC__) + #pragma GCC diagnostic ignored "-Wshift-negative-value" +#endif // low 3 bits of object ptr are always 0, we use 1 bits here #define JLONG_OF_PTR(ptr) jlong(ptr) #define JLONG_OF_ERROR(err) ((jlong(err) << 3) | 1) From 3ca44494bd4e8ad7e47106fbd387f9665e45c2a6 Mon Sep 17 00:00:00 2001 From: rockeet Date: Mon, 25 Aug 2025 18:37:20 +0800 Subject: [PATCH 050/175] Makefile: Fix for cross build for android --- Makefile | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/Makefile b/Makefile index d3dc0bc6de..1318dd5ddb 100644 --- a/Makefile +++ b/Makefile @@ -324,11 +324,13 @@ ifneq ($(origin WITH_BMI2),environment) endif endif +ifndef COMPILER COMPILER := $(shell set -e; tmpfile=`mktemp -u compiler-XXXXXX`; \ ${CXX} ${TOPLING_CORE_DIR}/tools/configure/compiler.cpp -o $${tmpfile}.exe; \ ./$${tmpfile}.exe && rm -f $${tmpfile}*) UNAME_MachineSystem := $(shell uname -m -s | sed 's:[ /]:-:g') MARCH ?= $(shell uname -m) +endif ifeq "${MARCH}" "x86_64" WITH_BMI2 ?= $(shell bash ${TOPLING_CORE_DIR}/cpu_has_bmi2.sh) else @@ -551,6 +553,8 @@ else endif endif +WITH_TOPLING_DCOMPACT ?= 1 +ifeq (${WITH_TOPLING_DCOMPACT},1) ifeq (,$(wildcard sideplugin/topling-dcompact/src/dcompact)) dummy := $(shell set -e -x; \ cd sideplugin; \ @@ -574,6 +578,7 @@ ifneq (,$(wildcard sideplugin/topling-dcompact/src/dcompact)) else $(warning NotFound sideplugin/topling-dcompact, this is ok, only topling-dcompact is disabled) endif +endif # WITH_TOPLING_DCOMPACT export LD_LIBRARY_PATH:=${TOPLING_CORE_DIR}/${BUILD_ROOT}/lib_shared:${LD_LIBRARY_PATH} ifeq (${WITH_TOPLING_ROCKS},1) @@ -589,6 +594,7 @@ else endif endif +ifeq (${WITH_TOPLING_DCOMPACT},1) TOPLING_DCOMPACT_USE_ETCD := 0 ifneq (,$(wildcard sideplugin/topling-dcompact/3rdparty/etcd-cpp-apiv3/build/src/libetcd-cpp-api.${PLATFORM_SHARED_EXT})) ifneq (,$(wildcard sideplugin/topling-dcompact/3rdparty/etcd-cpp-apiv3/build/proto/gen/proto)) @@ -619,6 +625,7 @@ endif #ifeq (${TOPLING_DCOMPACT_USE_ETCD},0) # $(warning NotFound etcd-cpp-apiv3, this is ok, only etcd is disabled) #endif +endif # WITH_TOPLING_DCOMPACT #export ROCKSDB_KICK_OUT_OPTIONS_FILE=1 @@ -3278,6 +3285,8 @@ sideplugin/toplingdb-fs/${TOPLINGDB_FS_GIT_VER_SRC}: \ sideplugin/toplingdb-fs/Makefile +make -C sideplugin/toplingdb-fs ${TOPLINGDB_FS_GIT_VER_SRC} endif + +ifeq (${WITH_TOPLING_DCOMPACT},1) ifneq (,$(wildcard sideplugin/topling-dcompact/src/dcompact)) sideplugin/topling-dcompact/${TOPLING_DCOMPACT_GIT_VER_SRC}: \ $(wildcard sideplugin/topling-dcompact/src/dcompact/*.h) \ @@ -3294,6 +3303,7 @@ else cp -a sideplugin/topling-dcompact/tools/dcompact/${ORIG_OBJ_DIR}/dcompact_worker.exe ${OBJ_DIR} endif endif +endif # WITH_TOPLING_DCOMPACT ${OBJ_DIR}/sideplugin/rockside/src/topling/web/civetweb.o: CFLAGS += -DUSE_ZLIB From 92b79cbd0424099fb50c635c1d3b36ec2bf25d36 Mon Sep 17 00:00:00 2001 From: rockeet Date: Mon, 25 Aug 2025 18:37:52 +0800 Subject: [PATCH 051/175] Update submodule rockside: for android --- sideplugin/rockside | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sideplugin/rockside b/sideplugin/rockside index 30178b735b..8a570a52f8 160000 --- a/sideplugin/rockside +++ b/sideplugin/rockside @@ -1 +1 @@ -Subproject commit 30178b735b215d818e3ee4996abcf916fb0e8fbf +Subproject commit 8a570a52f80a2c7d135e04f189e71dd94dd21b58 From 9d3f5bd666c37580fe7c726538fb9f71bc945713 Mon Sep 17 00:00:00 2001 From: rockeet Date: Mon, 25 Aug 2025 18:39:39 +0800 Subject: [PATCH 052/175] Add android-build.sh --- android-build.sh | 36 ++++++++++++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) create mode 100755 android-build.sh diff --git a/android-build.sh b/android-build.sh new file mode 100755 index 0000000000..3283cc7310 --- /dev/null +++ b/android-build.sh @@ -0,0 +1,36 @@ +#!/usr/bin/bash + +# you should just change the 2 lines +ANDROID_HOME=${HOME}/osc/android +export ANDROID_NDK_HOME=${ANDROID_HOME}/ndk/26.1.10909125 + +# these lines need not change +CXX_HOME=${ANDROID_NDK_HOME}/toolchains/llvm/prebuilt/linux-x86_64 +export CXX=${CXX_HOME}/bin/aarch64-linux-android34-clang++ +export CC=${CXX_HOME}/bin/aarch64-linux-android34-clang +export LD=${CXX} +export ENABLE_AUTO_CHECK_LD=0 +export JAVAC_ARGS="-source 17 -target 17" +export BUILD_PREFIX=../build-toplingdb/ +export ROCKSDB_DISABLE_GFLAGS=1 +export CPU="-O2" # fool the make +export WITH_BMI2=na +export WITH_TOPLING_ROCKS=0 +export WITH_TOPLING_DCOMPACT=0 +export DISABLE_JEMALLOC=1 +export EXTRA_CXXFLAGS="-Wno-deprecated-builtins -Wno-shorten-64-to-32 -DBOOST_NO_CXX98_FUNCTION_BASE" +export EXTRA_CXXFLAGS="-Wno-deprecated-builtins -DBOOST_NO_CXX98_FUNCTION_BASE" +export W_shorten_64_to_32=0 +export ARCHFLAG="-arch aarch64" +export MACHINE=aarch64 +export TARGET_OS=OS_ANDROID_CROSSCOMPILE +export COMPILER=clang-17.0 +export UNAME_MachineSystem=android34-aarch64 +export UNAME_System=android34 +export MARCH=aarch64 +export TOPLING_DISABLE_FIBER_AIO=1 +export ZLIB_READY_SKIP_CHECK=1 +export IS_CYGWIN=0 + +#make UPDATE_REPO=0 DEBUG_LEVEL=0 DISABLE_JEMALLOC=1 TOPLING_USE_DYNAMIC_TLS=1 -j60 clean +make UPDATE_REPO=0 DEBUG_LEVEL=0 DISABLE_JEMALLOC=1 TOPLING_USE_DYNAMIC_TLS=1 -j60 rocksdbjava From 4ca7bfe6c78adb7aee0da1c68e14ae4ce9f350ce Mon Sep 17 00:00:00 2001 From: rockeet Date: Mon, 25 Aug 2025 19:39:41 +0800 Subject: [PATCH 053/175] Makefile: ${TOPLING_LIB_OBJ_LIST_FILE} depend on ${TOPLING_LIB_SRC_LIST_VAR} --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 1318dd5ddb..5b81f7d90d 100644 --- a/Makefile +++ b/Makefile @@ -3237,7 +3237,7 @@ ${TOPLING_CORE_DIR}/${TOPLING_ZBS_TARGET}: ${TOPLING_CORE_DIR}/${TOPLING_LIB_OBJ_LIST_FILE}: CXXFLAGS = ${TOPLING_CORE_DIR}/${TOPLING_LIB_OBJ_LIST_FILE}: LDFLAGS = -${TOPLING_CORE_DIR}/${TOPLING_LIB_OBJ_LIST_FILE}: +${TOPLING_CORE_DIR}/${TOPLING_LIB_OBJ_LIST_FILE}: $(addprefix ${TOPLING_CORE_DIR}/, ${TOPLING_LIB_SRC_LIST_VAR}) +make -C ${TOPLING_CORE_DIR} ${TOPLING_LIB_OBJ_LIST_FILE} ${STATIC_LIBRARY}: ${BUILD_ROOT}/lib_static/libterark-zbs-${COMPILER}-${BUILD_TYPE_SIG}.a From 01f4178fd1aacdeef90ef54a2dd4d03a7f3aa561 Mon Sep 17 00:00:00 2001 From: rockeet Date: Mon, 25 Aug 2025 20:59:29 +0800 Subject: [PATCH 054/175] Makefile: Add var STRIP_CMD ?= strip --- Makefile | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/Makefile b/Makefile index 5b81f7d90d..0a5d5a5b64 100644 --- a/Makefile +++ b/Makefile @@ -18,6 +18,8 @@ MACHINE ?= $(shell uname -m) ARFLAGS = ${EXTRA_ARFLAGS} rs STRIPFLAGS = -S -x +STRIP_CMD ?= strip + # beg topling specific DISABLE_WARNING_AS_ERROR=1 LIB_MODE=shared @@ -2890,7 +2892,7 @@ rocksdbjavastatic_javalib: $(LIB_OBJECTS) $(COVERAGEFLAGS) \ $(JAVA_COMPRESSIONS) $(JAVA_STATIC_LDFLAGS) cd java/target;if [ "$(DEBUG_LEVEL)" == "0" ]; then \ - strip $(STRIPFLAGS) $(ROCKSDBJNILIB); \ + ${STRIP_CMD} $(STRIPFLAGS) $(ROCKSDBJNILIB); \ fi rocksdbjava_jar: @@ -3012,7 +3014,7 @@ endif $(filter-out -L${TOPLING_CORE_DIR}% -lterark-%, $(LDFLAGS)) $(AM_V_at)cp -a sideplugin/rockside/src/topling/web/{style.css,index.html} java/target ifeq ($(STRIP_DEBUG_INFO),1) - $(AM_V_at)strip java/target/*.so + $(AM_V_at)${STRIP_CMD} java/target/*.so endif $(AM_V_at)cd java; $(JAR_CMD) -cf target/$(ROCKSDB_JAR) HISTORY*.md ifeq (${ROCKSDB_JAR_WITH_DYNAMIC_LIBS},1) From d206b1ffb931a15ed14a8e3e32703a0315a9f966 Mon Sep 17 00:00:00 2001 From: rockeet Date: Mon, 25 Aug 2025 21:00:16 +0800 Subject: [PATCH 055/175] android-build.sh: export STRIP_CMD=${CXX_HOME}/bin/llvm-strip --- android-build.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/android-build.sh b/android-build.sh index 3283cc7310..036fe54820 100755 --- a/android-build.sh +++ b/android-build.sh @@ -9,6 +9,7 @@ CXX_HOME=${ANDROID_NDK_HOME}/toolchains/llvm/prebuilt/linux-x86_64 export CXX=${CXX_HOME}/bin/aarch64-linux-android34-clang++ export CC=${CXX_HOME}/bin/aarch64-linux-android34-clang export LD=${CXX} +export STRIP_CMD=${CXX_HOME}/bin/llvm-strip export ENABLE_AUTO_CHECK_LD=0 export JAVAC_ARGS="-source 17 -target 17" export BUILD_PREFIX=../build-toplingdb/ From 35295c201d762205b2df0a472dccf6f9a63924cd Mon Sep 17 00:00:00 2001 From: rockeet Date: Mon, 25 Aug 2025 21:03:28 +0800 Subject: [PATCH 056/175] android-build.sh: STRIP_DEBUG_INFO=1 & ROCKSDB_JAR_WITH_DYNAMIC_LIBS=1 --- android-build.sh | 2 ++ 1 file changed, 2 insertions(+) diff --git a/android-build.sh b/android-build.sh index 036fe54820..443150c1e2 100755 --- a/android-build.sh +++ b/android-build.sh @@ -32,6 +32,8 @@ export MARCH=aarch64 export TOPLING_DISABLE_FIBER_AIO=1 export ZLIB_READY_SKIP_CHECK=1 export IS_CYGWIN=0 +export STRIP_DEBUG_INFO=1 +export ROCKSDB_JAR_WITH_DYNAMIC_LIBS=1 #make UPDATE_REPO=0 DEBUG_LEVEL=0 DISABLE_JEMALLOC=1 TOPLING_USE_DYNAMIC_TLS=1 -j60 clean make UPDATE_REPO=0 DEBUG_LEVEL=0 DISABLE_JEMALLOC=1 TOPLING_USE_DYNAMIC_TLS=1 -j60 rocksdbjava From c533dfd03741f3a6e26e2ccb54addbefd83ae2a3 Mon Sep 17 00:00:00 2001 From: rockeet Date: Mon, 25 Aug 2025 23:03:37 +0800 Subject: [PATCH 057/175] Build script for android: bundle snappy,lz4,bz2 zlib is already included in base lib, not need bundle. --- Makefile | 24 ++++++++++++++++++++++-- android-build.sh | 5 +++++ build_tools/build_detect_platform | 4 ++++ 3 files changed, 31 insertions(+), 2 deletions(-) diff --git a/Makefile b/Makefile index 0a5d5a5b64..6effdfece9 100644 --- a/Makefile +++ b/Makefile @@ -2711,6 +2711,23 @@ ZSTD_SHA256 ?= 98e9c3d949d1b924e28e01eccb7deed865eefebf25c2f21c702e5cd5b63b85e1 ZSTD_DOWNLOAD_BASE ?= https://github.com/facebook/zstd/archive CURL_SSL_OPTS ?= --tlsv1 +ifneq ($(wildcard libz.a),) + BUNDLED_COMPRESSION_LIBS += libz.a + CXXFLAGS += -DZLIB -I./zlib-$(ZLIB_VER) +endif +ifneq ($(wildcard libbz2.a),) + BUNDLED_COMPRESSION_LIBS += libbz2.a + CXXFLAGS += -DBZIP2 -I./bzip2-$(BZIP2_VER) +endif +ifneq ($(wildcard libsnappy.a),) + BUNDLED_COMPRESSION_LIBS += libsnappy.a + CXXFLAGS += -DSNAPPY -I./snappy-$(SNAPPY_VER) -I./snappy-$(SNAPPY_VER)/build +endif +ifneq ($(wildcard liblz4.a),) + BUNDLED_COMPRESSION_LIBS += liblz4.a + CXXFLAGS += -DLZ4 -I./lz4-$(LZ4_VER)/lib +endif + ifeq ($(PLATFORM), OS_MACOSX) ifeq (,$(findstring librocksdbjni-osx,$(ROCKSDBJNILIB))) ifeq ($(MACHINE),arm64) @@ -2783,7 +2800,7 @@ bzip2-$(BZIP2_VER).tar.gz: libbz2.a: bzip2-$(BZIP2_VER).tar.gz -rm -rf bzip2-$(BZIP2_VER) tar xvzf bzip2-$(BZIP2_VER).tar.gz - cd bzip2-$(BZIP2_VER) && $(MAKE) CFLAGS='-fPIC -O2 -g -D_FILE_OFFSET_BITS=64 $(ARCHFLAG) ${JAVA_STATIC_DEPS_CCFLAGS} ${EXTRA_CFLAGS}' LDFLAGS='${JAVA_STATIC_DEPS_LDFLAGS} ${EXTRA_LDFLAGS}' AR='ar ${EXTRA_ARFLAGS}' libbz2.a + cd bzip2-$(BZIP2_VER) && $(MAKE) CFLAGS='-fPIC -O2 -g -D_FILE_OFFSET_BITS=64 $(ARCHFLAG) ${JAVA_STATIC_DEPS_CCFLAGS} ${EXTRA_CFLAGS}' LDFLAGS='${JAVA_STATIC_DEPS_LDFLAGS} ${EXTRA_LDFLAGS}' AR='${AR} ${EXTRA_ARFLAGS}' libbz2.a CC=${CC} LD=${LD} cp bzip2-$(BZIP2_VER)/libbz2.a . snappy-$(SNAPPY_VER).tar.gz: @@ -2812,7 +2829,7 @@ lz4-$(LZ4_VER).tar.gz: liblz4.a: lz4-$(LZ4_VER).tar.gz -rm -rf lz4-$(LZ4_VER) tar xvzf lz4-$(LZ4_VER).tar.gz - cd lz4-$(LZ4_VER)/lib && $(MAKE) CFLAGS='-fPIC -O2 $(ARCHFLAG) ${JAVA_STATIC_DEPS_CCFLAGS} ${EXTRA_CFLAGS}' LDFLAGS='${JAVA_STATIC_DEPS_LDFLAGS} ${EXTRA_LDFLAGS}' all + cd lz4-$(LZ4_VER)/lib && $(MAKE) CFLAGS='-fPIC -O2 $(ARCHFLAG) ${JAVA_STATIC_DEPS_CCFLAGS} ${EXTRA_CFLAGS}' LDFLAGS='${JAVA_STATIC_DEPS_LDFLAGS} ${EXTRA_LDFLAGS}' all CC=${CC} LD=${LD} cp lz4-$(LZ4_VER)/lib/liblz4.a . zstd-$(ZSTD_VER).tar.gz: @@ -3010,6 +3027,9 @@ endif $(AM_V_at)$(CXX) $(CXXFLAGS) -shared -fPIC -o ./java/target/$(ROCKSDBJNILIB) \ $(ALL_JNI_NATIVE_OBJECTS) $(LIB_OBJECTS) \ $(addprefix ${TOPLING_CORE_DIR}/, $(TOPLING_LIB_OBJ_LIST_VAR)) \ + -Wl,--whole-archive \ + ${BUNDLED_COMPRESSION_LIBS} \ + -Wl,--no-whole-archive \ $(JAVA_LDFLAGS) \ $(filter-out -L${TOPLING_CORE_DIR}% -lterark-%, $(LDFLAGS)) $(AM_V_at)cp -a sideplugin/rockside/src/topling/web/{style.css,index.html} java/target diff --git a/android-build.sh b/android-build.sh index 443150c1e2..ef648d78f9 100755 --- a/android-build.sh +++ b/android-build.sh @@ -1,5 +1,7 @@ #!/usr/bin/bash +set -e + # you should just change the 2 lines ANDROID_HOME=${HOME}/osc/android export ANDROID_NDK_HOME=${ANDROID_HOME}/ndk/26.1.10909125 @@ -9,6 +11,7 @@ CXX_HOME=${ANDROID_NDK_HOME}/toolchains/llvm/prebuilt/linux-x86_64 export CXX=${CXX_HOME}/bin/aarch64-linux-android34-clang++ export CC=${CXX_HOME}/bin/aarch64-linux-android34-clang export LD=${CXX} +export AR=${CXX_HOME}/bin/llvm-ar export STRIP_CMD=${CXX_HOME}/bin/llvm-strip export ENABLE_AUTO_CHECK_LD=0 export JAVAC_ARGS="-source 17 -target 17" @@ -34,6 +37,8 @@ export ZLIB_READY_SKIP_CHECK=1 export IS_CYGWIN=0 export STRIP_DEBUG_INFO=1 export ROCKSDB_JAR_WITH_DYNAMIC_LIBS=1 +export JAVA_STATIC_DEPS_CXXFLAGS="-fPIC" #make UPDATE_REPO=0 DEBUG_LEVEL=0 DISABLE_JEMALLOC=1 TOPLING_USE_DYNAMIC_TLS=1 -j60 clean +make UPDATE_REPO=0 DEBUG_LEVEL=0 DISABLE_JEMALLOC=1 TOPLING_USE_DYNAMIC_TLS=1 -j60 libsnappy.a liblz4.a libbz2.a make UPDATE_REPO=0 DEBUG_LEVEL=0 DISABLE_JEMALLOC=1 TOPLING_USE_DYNAMIC_TLS=1 -j60 rocksdbjava diff --git a/build_tools/build_detect_platform b/build_tools/build_detect_platform index dea99f2d48..194bfee4d8 100755 --- a/build_tools/build_detect_platform +++ b/build_tools/build_detect_platform @@ -295,6 +295,7 @@ EOF if ! test $ROCKSDB_DISABLE_SNAPPY; then # Test whether Snappy library is installed # http://code.google.com/p/snappy/ + test -s libsnappy.a || $CXX $PLATFORM_CXXFLAGS -x c++ - -o test.o 2>/dev/null < int main() {} @@ -344,6 +345,7 @@ EOF if ! test $ROCKSDB_DISABLE_ZLIB; then # Test whether zlib library is installed + test -s libz.a || test "$ZLIB_READY_SKIP_CHECK" = 1 || $CXX $PLATFORM_CXXFLAGS $COMMON_FLAGS -x c++ - -o test.o 2>/dev/null < @@ -361,6 +363,7 @@ EOF if ! test $ROCKSDB_DISABLE_BZIP; then # Test whether bzip library is installed + test -s libbz2.a || $CXX $PLATFORM_CXXFLAGS $COMMON_FLAGS -x c++ - -o test.o 2>/dev/null < int main() {} @@ -374,6 +377,7 @@ EOF if ! test $ROCKSDB_DISABLE_LZ4; then # Test whether lz4 library is installed + test -s liblz4.a || $CXX $PLATFORM_CXXFLAGS $COMMON_FLAGS -x c++ - -o test.o 2>/dev/null < #include From 8509c6e2ff72994cfb34e0157f551ece1846a202 Mon Sep 17 00:00:00 2001 From: leipeng Date: Tue, 26 Aug 2025 14:54:56 +0800 Subject: [PATCH 058/175] Makefile: Fix dependency bugs --- Makefile | 16 ++++++++++------ java/Makefile | 5 +++++ 2 files changed, 15 insertions(+), 6 deletions(-) diff --git a/Makefile b/Makefile index 6effdfece9..57c08a1acf 100644 --- a/Makefile +++ b/Makefile @@ -359,7 +359,14 @@ ifeq (${TOPLING_USE_DYNAMIC_TLS},1) endif TOPLING_LIB_OBJ_LIST_FILE := ${OBJ_DIR}/shared_lib_obj_list.mk +ROCKS_DEP_RULES:=$(filter-out clean format check-format check-buck-targets check-headers check-sources jclean package analyze tags unity.% checkout_folly, $(MAKECMDGOALS)) +ROCKS_DEP_RULES:=$(filter-out rust-support, $(ROCKS_DEP_RULES)) +ifneq ("$(ROCKS_DEP_RULES)", "") -include ${TOPLING_CORE_DIR}/${TOPLING_LIB_OBJ_LIST_FILE} + ifneq ($(filter j% rocksdbjava%, $(MAKECMDGOALS)),) + -include java/include/java_header_list.mk + endif +endif ifneq ($(filter auto_all_tests check check_0 watch-log gen_parallel_tests %_test %_test2 jtest, $(MAKECMDGOALS)),) MAKE_UNIT_TEST ?= 1 @@ -3049,7 +3056,7 @@ install-jni: rocksdbjava mkdir -p $(INSTALL_LIBDIR) install -C -m 644 java/target/*.so $(INSTALL_LIBDIR) -rocksdbjava-header: +java/include/java_header_list.mk: $(AM_V_GEN)$(MAKE) -C java java_test jclean: @@ -3191,7 +3198,7 @@ endif # If skip dependencies is ON, skip including the dep files ifneq ($(SKIP_DEPENDS), 1) DEPFILES := $(patsubst %.cc, $(OBJ_DIR)/%.cc.d, $(ALL_SOURCES)) -ifneq ($(filter j% rocksdbjava%, $(MAKECMDGOALS)),) +ifneq (${JAVA_HEADER_LIST_VAR},) DEPFILES += $(patsubst %.cc, $(OBJ_DIR)/%.cc.d, ${ALL_JNI_NATIVE_SOURCES}) endif DEPFILES := $(patsubst %.cpp,$(OBJ_DIR)/%.cpp.d,$(DEPFILES)) @@ -3206,10 +3213,7 @@ endif # The .d file indicates .cc file's dependencies on .h files. We generate such # dependency by g++'s -MM option, whose output is a make dependency rule. -ifeq (${MAKE_RESTARTS},) - GEN_ROCKSDB_JAVA_HEADER := rocksdbjava-header -endif -$(OBJ_DIR)/java/%.cc.d: java/%.cc ${GEN_ROCKSDB_JAVA_HEADER} +$(OBJ_DIR)/java/%.cc.d: java/%.cc java/include/java_header_list.mk $(AM_V_at)mkdir -p $(@D) $(AM_V_at)$(CXX) $(CXXFLAGS) \ -Ijava -Ijava/rocksjni $(JAVA_INCLUDE) $(ROCKSDB_PLUGIN_JNI_CXX_INCLUDEFLAGS)\ diff --git a/java/Makefile b/java/Makefile index a8937205bb..a55505c0b9 100644 --- a/java/Makefile +++ b/java/Makefile @@ -500,6 +500,11 @@ java_test: java resolve_test_deps fi \ done $(AM_V_at)rm -rf $(NATIVE_INCLUDE)-test + $(AM_V_at)echo "JAVA_HEADER_LIST_VAR := $(wildcard ${NATIVE_INCLUDE}/*.h)" > $(NATIVE_INCLUDE)/java_header_list.mk.tmp + $(AM_V_at)cd $(NATIVE_INCLUDE); \ + if ! cmp -s java_header_list.mk.tmp java_header_list.mk; then \ + mv -f java_header_list.mk.tmp java_header_list.mk; \ + else rm -f java_header_list.mk.tmp; fi test: java java_test $(MAKE) run_test From b3a0114ea3e0ede9317c2c01dbb17719335669e1 Mon Sep 17 00:00:00 2001 From: rockeet Date: Wed, 27 Aug 2025 21:38:53 +0800 Subject: [PATCH 059/175] c.h: mark ToplingDB api as weak symbol This allows user code to check the dynamic librocksdb.so is ToplingDB or upstream rocksdb: if (side_plugin_repo_create) { // it is ToplingDB // open db by ToplingDB side plugin side_plugin_repo_t* repo = side_plugin_repo_create(); side_plugin_repo_import_auto_file(repo, "someconf.yml", &errmsg); // more ... } else { // it is not ToplingDB, should be upstream rocksdb } --- include/rocksdb/c.h | 40 +++++++++++++++++++++------------------- 1 file changed, 21 insertions(+), 19 deletions(-) diff --git a/include/rocksdb/c.h b/include/rocksdb/c.h index 16564ce79f..18d1bcb736 100644 --- a/include/rocksdb/c.h +++ b/include/rocksdb/c.h @@ -54,8 +54,10 @@ #else #define ROCKSDB_LIBRARY_API #endif +#define ROCKSDB_LIBRARY_API_WEAK ROCKSDB_LIBRARY_API #else #define ROCKSDB_LIBRARY_API +#define ROCKSDB_LIBRARY_API_WEAK __attribute__((__weak__)) #endif #ifdef __cplusplus @@ -587,7 +589,7 @@ extern ROCKSDB_LIBRARY_API void rocksdb_batched_multi_get_cf( rocksdb_pinnableslice_t** values, char** errs, const bool sorted_input); #if !defined(ROCKSDB_C_API_IMPLEMENTATION) -extern ROCKSDB_LIBRARY_API void rocksdb_batched_multi_get_cf_fast( +extern ROCKSDB_LIBRARY_API_WEAK void rocksdb_batched_multi_get_cf_fast( rocksdb_t* db, const rocksdb_readoptions_t* options, rocksdb_column_family_handle_t* column_family, size_t num_keys, const rocksdb_slice_t* keys_list, @@ -752,11 +754,11 @@ extern ROCKSDB_LIBRARY_API void rocksdb_iter_get_error( const rocksdb_iterator_t*, char** errptr); #if !defined(ROCKSDB_C_API_IMPLEMENTATION) -extern ROCKSDB_LIBRARY_API rocksdb_slice_t rocksdb_iter_key_fast( +extern ROCKSDB_LIBRARY_API_WEAK rocksdb_slice_t rocksdb_iter_key_fast( const rocksdb_iterator_t*); -extern ROCKSDB_LIBRARY_API rocksdb_slice_t rocksdb_iter_value_fast( +extern ROCKSDB_LIBRARY_API_WEAK rocksdb_slice_t rocksdb_iter_value_fast( const rocksdb_iterator_t*); -extern ROCKSDB_LIBRARY_API rocksdb_slice_t rocksdb_iter_timestamp_fast( +extern ROCKSDB_LIBRARY_API_WEAK rocksdb_slice_t rocksdb_iter_timestamp_fast( const rocksdb_iterator_t*); #endif @@ -1935,13 +1937,13 @@ extern ROCKSDB_LIBRARY_API rocksdb_readoptions_t* rocksdb_readoptions_create( void); extern ROCKSDB_LIBRARY_API void rocksdb_readoptions_destroy( rocksdb_readoptions_t*); -extern ROCKSDB_LIBRARY_API void rocksdb_readoptions_start_pin( +extern ROCKSDB_LIBRARY_API_WEAK void rocksdb_readoptions_start_pin( rocksdb_readoptions_t*); -extern ROCKSDB_LIBRARY_API void rocksdb_readoptions_finish_pin( +extern ROCKSDB_LIBRARY_API_WEAK void rocksdb_readoptions_finish_pin( rocksdb_readoptions_t*); -extern ROCKSDB_LIBRARY_API void rocksdb_readoptions_set_async_queue_depth( +extern ROCKSDB_LIBRARY_API_WEAK void rocksdb_readoptions_set_async_queue_depth( rocksdb_readoptions_t*, size_t); -extern ROCKSDB_LIBRARY_API size_t rocksdb_readoptions_get_async_queue_depth( +extern ROCKSDB_LIBRARY_API_WEAK size_t rocksdb_readoptions_get_async_queue_depth( rocksdb_readoptions_t*); extern ROCKSDB_LIBRARY_API void rocksdb_readoptions_set_verify_checksums( rocksdb_readoptions_t*, unsigned char); @@ -3076,33 +3078,33 @@ rocksdb_wait_for_compact_options_get_timeout( // typedef struct side_plugin_repo_t side_plugin_repo_t; -extern ROCKSDB_LIBRARY_API side_plugin_repo_t* side_plugin_repo_create(void); +extern ROCKSDB_LIBRARY_API_WEAK side_plugin_repo_t* side_plugin_repo_create(void); -extern ROCKSDB_LIBRARY_API void side_plugin_repo_import_auto_file +extern ROCKSDB_LIBRARY_API_WEAK void side_plugin_repo_import_auto_file (side_plugin_repo_t*, const char* fname, char** errptr); -extern ROCKSDB_LIBRARY_API rocksdb_t* +extern ROCKSDB_LIBRARY_API_WEAK rocksdb_t* side_plugin_repo_open(side_plugin_repo_t*, rocksdb_column_family_handle_t***, size_t* num_cf, char** errptr); -extern ROCKSDB_LIBRARY_API void side_plugin_repo_start_http(side_plugin_repo_t*, char** errptr); -extern ROCKSDB_LIBRARY_API void side_plugin_repo_close_http(side_plugin_repo_t*); +extern ROCKSDB_LIBRARY_API_WEAK void side_plugin_repo_start_http(side_plugin_repo_t*, char** errptr); +extern ROCKSDB_LIBRARY_API_WEAK void side_plugin_repo_close_http(side_plugin_repo_t*); -extern ROCKSDB_LIBRARY_API rocksdb_options_t* +extern ROCKSDB_LIBRARY_API_WEAK rocksdb_options_t* side_plugin_repo_get_db_options(side_plugin_repo_t*, const char* name, char** errptr); -extern ROCKSDB_LIBRARY_API void +extern ROCKSDB_LIBRARY_API_WEAK void side_plugin_repo_put_db_options(side_plugin_repo_t*, const char* name, rocksdb_options_t*); -extern ROCKSDB_LIBRARY_API rocksdb_options_t* +extern ROCKSDB_LIBRARY_API_WEAK rocksdb_options_t* side_plugin_repo_get_cf_options(side_plugin_repo_t*, const char* name, char** errptr); -extern ROCKSDB_LIBRARY_API void +extern ROCKSDB_LIBRARY_API_WEAK void side_plugin_repo_put_cf_options(side_plugin_repo_t*, const char* name, rocksdb_options_t*); -extern ROCKSDB_LIBRARY_API void side_plugin_repo_close_all(side_plugin_repo_t*); +extern ROCKSDB_LIBRARY_API_WEAK void side_plugin_repo_close_all(side_plugin_repo_t*); -extern ROCKSDB_LIBRARY_API const char* rocksdb_get_name(rocksdb_t*); +extern ROCKSDB_LIBRARY_API_WEAK const char* rocksdb_get_name(rocksdb_t*); #ifdef __cplusplus } /* end extern "C" */ From fd02108f7e37baef206b9affd5a934b0948bf427 Mon Sep 17 00:00:00 2001 From: rockeet Date: Wed, 27 Aug 2025 23:10:13 +0800 Subject: [PATCH 060/175] RocksIterator.key()/value(): Mark as final --- java/src/main/java/org/rocksdb/RocksIterator.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/java/src/main/java/org/rocksdb/RocksIterator.java b/java/src/main/java/org/rocksdb/RocksIterator.java index f317820175..26ef0be3c9 100644 --- a/java/src/main/java/org/rocksdb/RocksIterator.java +++ b/java/src/main/java/org/rocksdb/RocksIterator.java @@ -37,7 +37,7 @@ protected RocksIterator(final RocksDB rocksDB, final long nativeHandle) { * * @return key for the current entry. */ - public byte[] key() { + public final byte[] key() { assert(isOwningHandle()); assert(isValid()); long keyPtr = getZeroCopyKeyPtr(); @@ -234,7 +234,7 @@ public int key(final ByteBuffer key) { *

REQUIRES: !AtEnd() && !AtStart()

* @return value for the current entry. */ - public byte[] value() { + public final byte[] value() { assert(isOwningHandle()); fetchValue(); long valueLen = getZeroCopyValueLen(); From 9a333077df55417f84c72437bf664d1bd59a4cf6 Mon Sep 17 00:00:00 2001 From: leipeng Date: Fri, 29 Aug 2025 01:48:45 +0800 Subject: [PATCH 061/175] Makefile: timing build commands --- Makefile | 55 ++++++++++++++++++++++++++++--------------------------- 1 file changed, 28 insertions(+), 27 deletions(-) diff --git a/Makefile b/Makefile index 57c08a1acf..d1be1b2519 100644 --- a/Makefile +++ b/Makefile @@ -642,6 +642,13 @@ endif # WITH_TOPLING_DCOMPACT # EXTRA_LIB_SOURCES single file compiling is slow LIB_SOURCES := ${EXTRA_LIB_SOURCES} ${LIB_SOURCES} +ifeq ($(PLATFORM),OS_MACOSX) + # this needs homebrew + TIME_CMD := gtime +else + TIME_CMD := /usr/bin/time +endif + AM_DEFAULT_VERBOSITY ?= 0 AM_V_GEN = $(am__v_GEN_$(V)) @@ -653,26 +660,19 @@ am__v_at_ = $(am__v_at_$(AM_DEFAULT_VERBOSITY)) am__v_at_0 = @ am__v_at_1 = -AM_V_CC = $(am__v_CC_$(V)) -am__v_CC_ = $(am__v_CC_$(AM_DEFAULT_VERBOSITY)) -am__v_CC_0 = @echo " CC " $@; -am__v_CC_1 = +AM_V_CC = ${AM_V_at}mkdir -p $(dir $@) && ${TIME_CMD} -f "%e %S" -o >(printf '${OBJ_DIR} CC ${suffix $@} %6.2f %5.2f %s\n' `cat` $<) AM_V_CCLD = $(am__v_CCLD_$(V)) am__v_CCLD_ = $(am__v_CCLD_$(AM_DEFAULT_VERBOSITY)) ifneq ($(SKIP_LINK), 1) -am__v_CCLD_0 = @echo " CCLD " $@; -am__v_CCLD_1 = +AM_V_CCLD = ${AM_V_at}${TIME_CMD} -f "%e %S" -o >(printf '${OBJ_DIR} LD so %6.2f %5.2f %s\n' `cat` $@) else am__v_CCLD_0 = @echo " !CCLD " $@; true skip am__v_CCLD_1 = true skip endif -AM_V_AR = $(am__v_AR_$(V)) -am__v_AR_ = $(am__v_AR_$(AM_DEFAULT_VERBOSITY)) -am__v_AR_0 = @echo " AR " $@; -am__v_AR_1 = +AM_V_AR = ${AM_V_at}${TIME_CMD} -f "%e %S" -o >(printf '${OBJ_DIR} AR .a %6.2f %5.2f %s\n' `cat` $@) -AM_LINK = $(AM_V_CCLD)$(CXX) -L. $(patsubst lib%.a, -l%, $(patsubst lib%.$(PLATFORM_SHARED_EXT), -l%, $^)) $(EXEC_LDFLAGS) -o $@ $(LDFLAGS) $(COVERAGEFLAGS) +AM_LINK = $(AM_V_CCLD) $(CXX) -L. $(patsubst lib%.a, -l%, $(patsubst lib%.$(PLATFORM_SHARED_EXT), -l%, $^)) $(EXEC_LDFLAGS) -o $@ $(LDFLAGS) $(COVERAGEFLAGS) AM_SHARE = $(AM_V_CCLD) $(CXX) $(PLATFORM_SHARED_LDFLAGS)$@ -L. $(patsubst lib%.$(PLATFORM_SHARED_EXT), -l%, $^) $(EXTRA_SHARED_LIB_LIB) $(EXEC_LDFLAGS) $(LDFLAGS) -o $@ ROCKSDB_PLUGIN_MKS = $(foreach plugin, $(ROCKSDB_PLUGINS), plugin/$(plugin)/*.mk) @@ -2378,7 +2378,7 @@ ldb: $(OBJ_DIR)/tools/ldb.o $(TOOLS_LIBRARY) $(LIBRARY) $(AM_LINK) iostats_context_test: $(OBJ_DIR)/monitoring/iostats_context_test.o $(TEST_LIBRARY) $(LIBRARY) - $(AM_V_CCLD)$(CXX) $^ $(EXEC_LDFLAGS) -o $@ $(LDFLAGS) + $(AM_V_CCLD) $(CXX) $^ $(EXEC_LDFLAGS) -o $@ $(LDFLAGS) persistent_cache_test: $(OBJ_DIR)/utilities/persistent_cache/persistent_cache_test.o $(TEST_LIBRARY) $(LIBRARY) $(AM_LINK) @@ -3021,17 +3021,17 @@ rocksdbjavastaticnexusbundlejar: rocksdbjavageneratepom # A version of each $(LIBOBJECTS) compiled with -fPIC jl/%.o: %.cc - $(AM_V_CC)mkdir -p $(@D) && $(CXX) $(CXXFLAGS) -fPIC -c $< -o $@ $(COVERAGEFLAGS) + $(AM_V_CC) $(CXX) $(CXXFLAGS) -fPIC -c $< -o $@ $(COVERAGEFLAGS) JNI_USE_KEY_VALUE_POPULATOR ?= 1 ${ALL_JNI_NATIVE_OBJECTS}: CXXFLAGS += -DJNI_USE_KEY_VALUE_POPULATOR=${JNI_USE_KEY_VALUE_POPULATOR} ${ALL_JNI_NATIVE_OBJECTS}: CXXFLAGS += -I./java/. -I./java/rocksjni $(JAVA_INCLUDE) $(ROCKSDB_PLUGIN_JNI_CXX_INCLUDEFLAGS) -rocksdbjava: $(LIB_OBJECTS) $(ALL_JNI_NATIVE_OBJECTS) ${TOPLING_CORE_DIR}/${TOPLING_LIB_OBJ_LIST_FILE} +java/target/$(ROCKSDBJNILIB): $(LIB_OBJECTS) $(ALL_JNI_NATIVE_OBJECTS) ${TOPLING_CORE_DIR}/${TOPLING_LIB_OBJ_LIST_FILE} ifeq ($(JAVA_HOME),) $(error JAVA_HOME is not set) endif $(AM_V_at)rm -f ./java/target/$(ROCKSDBJNILIB) - $(AM_V_at)$(CXX) $(CXXFLAGS) -shared -fPIC -o ./java/target/$(ROCKSDBJNILIB) \ + $(AM_V_CCLD) $(CXX) $(CXXFLAGS) -shared -fPIC -o java/target/$(ROCKSDBJNILIB) \ $(ALL_JNI_NATIVE_OBJECTS) $(LIB_OBJECTS) \ $(addprefix ${TOPLING_CORE_DIR}/, $(TOPLING_LIB_OBJ_LIST_VAR)) \ -Wl,--whole-archive \ @@ -3043,6 +3043,8 @@ endif ifeq ($(STRIP_DEBUG_INFO),1) $(AM_V_at)${STRIP_CMD} java/target/*.so endif + +rocksdbjava: java/target/$(ROCKSDBJNILIB) $(AM_V_at)cd java; $(JAR_CMD) -cf target/$(ROCKSDB_JAR) HISTORY*.md ifeq (${ROCKSDB_JAR_WITH_DYNAMIC_LIBS},1) $(AM_V_at)cd java/target; $(JAR_CMD) -uf $(ROCKSDB_JAR) *.so @@ -3168,28 +3170,28 @@ IOSVERSION=$(shell defaults read $(PLATFORMSROOT)/iPhoneOS.platform/version CFBu else ifeq ($(HAVE_POWER8),1) $(OBJ_DIR)/util/crc32c_ppc.o: util/crc32c_ppc.c - $(AM_V_CC)$(CC) $(CFLAGS) -c $< -o $@ + $(AM_V_CC) $(CC) $(CFLAGS) -c $< -o $@ $(OBJ_DIR)/util/crc32c_ppc_asm.o: util/crc32c_ppc_asm.S - $(AM_V_CC)$(CC) $(CFLAGS) -c $< -o $@ + $(AM_V_CC) $(CC) $(CFLAGS) -c $< -o $@ endif $(OBJ_DIR)/%.o: %.cc - $(AM_V_CC)mkdir -p $(@D) && $(CXX) $(CXXFLAGS) -c $< -o $@ $(COVERAGEFLAGS) + $(AM_V_CC) $(CXX) $(CXXFLAGS) -c $< -o $@ $(COVERAGEFLAGS) $(OBJ_DIR)/%.o: %.cpp - $(AM_V_CC)mkdir -p $(@D) && $(CXX) $(CXXFLAGS) -c $< -o $@ $(COVERAGEFLAGS) + $(AM_V_CC) $(CXX) $(CXXFLAGS) -c $< -o $@ $(COVERAGEFLAGS) $(OBJ_DIR)/%.o: %.c - $(AM_V_CC)mkdir -p $(@D) && $(CC) $(CFLAGS) -c $< -o $@ + $(AM_V_CC) $(CC) $(CFLAGS) -c $< -o $@ $(OBJ_DIR)/%.s: %.cc - $(AM_V_CC)mkdir -p $(@D) && $(CXX) $(CXXFLAGS) -Wa,-adhln -fverbose-asm -masm=intel -S $< -o $@ $(COVERAGEFLAGS) + $(AM_V_CC) $(CXX) $(CXXFLAGS) -Wa,-adhln -fverbose-asm -masm=intel -S $< -o $@ $(COVERAGEFLAGS) $(OBJ_DIR)/%.s: %.cpp - $(AM_V_CC)mkdir -p $(@D) && $(CXX) $(CXXFLAGS) -fverbose-asm -masm=intel -S $< -o $@ $(COVERAGEFLAGS) + $(AM_V_CC) $(CXX) $(CXXFLAGS) -fverbose-asm -masm=intel -S $< -o $@ $(COVERAGEFLAGS) $(OBJ_DIR)/%.s: %.c - $(AM_V_CC)mkdir -p $(@D) && $(CC) $(CFLAGS) -fverbose-asm -masm=intel -S $< -o $@ + $(AM_V_CC) $(CC) $(CFLAGS) -fverbose-asm -masm=intel -S $< -o $@ endif # --------------------------------------------------------------------------- @@ -3214,18 +3216,17 @@ endif # The .d file indicates .cc file's dependencies on .h files. We generate such # dependency by g++'s -MM option, whose output is a make dependency rule. $(OBJ_DIR)/java/%.cc.d: java/%.cc java/include/java_header_list.mk - $(AM_V_at)mkdir -p $(@D) - $(AM_V_at)$(CXX) $(CXXFLAGS) \ + $(AM_V_CC) $(CXX) $(CXXFLAGS) \ -Ijava -Ijava/rocksjni $(JAVA_INCLUDE) $(ROCKSDB_PLUGIN_JNI_CXX_INCLUDEFLAGS)\ -MM -MT'$@' -MT'$(<:%.cc=$(OBJ_DIR)/%.o)' "$<" -o '$@' $(OBJ_DIR)/%.cc.d: %.cc - @mkdir -p $(@D) && $(CXX) $(CXXFLAGS) $(PLATFORM_SHARED_CFLAGS) \ + $(AM_V_CC) $(CXX) $(CXXFLAGS) $(PLATFORM_SHARED_CFLAGS) \ -MM -MT'$@' -MT'$(<:.cc=.o)' -MT'$(<:%.cc=$(OBJ_DIR)/%.o)' \ "$<" -o '$@' $(OBJ_DIR)/%.cpp.d: %.cpp - @mkdir -p $(@D) && $(CXX) $(CXXFLAGS) $(PLATFORM_SHARED_CFLAGS) \ + $(AM_V_CC) $(CXX) $(CXXFLAGS) $(PLATFORM_SHARED_CFLAGS) \ -MM -MT'$@' -MT'$(<:.cpp=.o)' -MT'$(<:%.cpp=$(OBJ_DIR)/%.o)' \ "$<" -o '$@' From b8d7ce314b275e7bedc39a76910e909e97e2aeae Mon Sep 17 00:00:00 2001 From: leipeng Date: Fri, 29 Aug 2025 16:39:51 +0800 Subject: [PATCH 062/175] Update pom and .github/workflows --- .github/workflows/topling-jni.yml | 132 +++++++++++++++++++----------- .gitignore | 3 +- java/jmh/pom.xml | 23 +++--- java/pom.xml.template | 12 +-- 4 files changed, 103 insertions(+), 67 deletions(-) diff --git a/.github/workflows/topling-jni.yml b/.github/workflows/topling-jni.yml index 52f6e3f6ca..e21007a051 100644 --- a/.github/workflows/topling-jni.yml +++ b/.github/workflows/topling-jni.yml @@ -4,22 +4,28 @@ name: "build topling-jni" on: workflow_dispatch: inputs: - repository_url: - required: true - default: 'topling/toplingdb' - repository_branch: + version_suffix: required: false - default: 'memtable_as_log_index' - test: + default: '-SNAPSHOT' + use_lto: + required: false + description: Link Use LTO + default: '0' + unit_test: required: false type: boolean - description: test SideGetBenchmarks + description: Unit Test default: false deploy_maven: required: false type: boolean description: publish to maven repo default: true + jmh_test: + required: false + type: boolean + description: JMH Test + default: false jobs: build: @@ -28,36 +34,28 @@ jobs: env: GCC_VER: "11.3" # TODO: better get from the 'gcc --version' GITHUB_TOKEN: ${{ github.token }} - REP_URL: ${{ inputs.repository_url }} permissions: contents: read packages: write steps: - name: Checkout - uses: actions/checkout@v3 + uses: actions/checkout@v4 with: - repository: ${{ inputs.repository_url }} - ref: ${{ inputs.repository_branch }} fetch-depth: 1 - name: Set up JDK 11 - uses: actions/setup-java@v3 + uses: actions/setup-java@v4 with: java-version: '11' distribution: 'temurin' cache: maven server-id: github # Value of the distributionManagement/repository/id field of the pom.xml settings-path: ${{ github.workspace }} # location for the settings.xml file - #- name: Cache Maven # Replace by setup-java now - # uses: actions/cache@v3 - # with: - # path: ~/.m2/repository - # key: ${{ runner.os }}-m2-${{ hashFiles('**/pom.xml') }} - # restore-keys: ${{ runner.os }}-m2 - - name: Init Env & Compile RocksDB + - name: Install Prerequisites run: | + set -xe cat $GITHUB_WORKSPACE/settings.xml sudo apt-get update -y && sudo apt-get install -y \ libjemalloc-dev libaio-dev libgflags-dev zlib1g-dev \ @@ -68,62 +66,96 @@ jobs: git submodule update --init --recursive mkdir -p ~/.ssh && mkdir -p /opt/lib ssh-keyscan -t rsa github.com >> ~/.ssh/known_hosts - # this step could take a long time? - make -j`nproc` DEBUG_LEVEL=0 shared_lib - sudo make install-shared PREFIX=/opt - ls -l /opt/lib - - name: Compile RocksDBJava + echo nproc = `nproc` + + - name: Run Unit Test + if: ${{ inputs.unit_test }} + run: | + set -xe + echo $JAVA_HOME + make jtest -j`nproc` DEBUG_LEVEL=1 UPDATE_REPO=0 \ + DISABLE_JEMALLOC=1 TOPLING_USE_DYNAMIC_TLS=1 \ + ROCKSDB_JAR_WITH_DYNAMIC_LIBS=1 + + - name: Compile RocksDBJava Release run: | + set -xe echo $JAVA_HOME - make rocksdbjava -j`nproc` DEBUG_LEVEL=0 STRIP_DEBUG_INFO=1 ROCKSDB_JAR_WITH_DYNAMIC_LIBS=1 + env USE_LTO=${{inputs.use_lto}} \ + make rocksdbjava -j`nproc` DEBUG_LEVEL=0 UPDATE_REPO=0 \ + DISABLE_JEMALLOC=1 TOPLING_USE_DYNAMIC_TLS=1 \ + STRIP_DEBUG_INFO=1 ROCKSDB_JAR_WITH_DYNAMIC_LIBS=1 + ROCKSDB_VERSION=`build_tools/version.sh full` + ROCKSDB_JAVA_VERSION=${ROCKSDB_VERSION}${{inputs.version_suffix}} + cd java + # F\?ROCKSDB_JAVA_VERSION for both flink and normal rocksdbjni + sed 's/\${F\?ROCKSDB_JAVA_VERSION}/'"${ROCKSDB_JAVA_VERSION}/" pom.xml.template > pom.xml - - name: Move to Local Maven Repo + - name: Move to Local & Publish rocksjni to GitHub Packages run: | - cd java/target || exit - cp -v rocksdbjni-8.10.2-linux64.jar rocksdbjni-8.10.2-SNAPSHOT-linux64.jar - mvn install:install-file -ntp -Dfile=rocksdbjni-8.10.2-SNAPSHOT-linux64.jar \ - -DgroupId=org.rocksdb -DartifactId=rocksdbjni -Dversion=8.10.2-SNAPSHOT -Dpackaging=jar + set -xe + ROCKSDB_VERSION=`build_tools/version.sh full` + ROCKSDB_JAVA_VERSION=${ROCKSDB_VERSION}${{inputs.version_suffix}} + cd java/target + mvn install:install-file -ntp -e \ + -DpomFile=../pom.xml \ + -Dfile=rocksdbjni-${ROCKSDB_VERSION}-linux64.jar \ + -Dversion=${ROCKSDB_JAVA_VERSION} # TODO: why 'deploy' doesn't include install step here? if we only use deploy, will lack local jar if ${{ inputs.deploy_maven }}; then # TODO: what's the pom file for it? add with '-DpomFile=/xx/pom.xml' mvn deploy:deploy-file -e -s $GITHUB_WORKSPACE/settings.xml \ - -DpomFile=$GITHUB_WORKSPACE/java/pom.xml.template \ - -Durl=https://maven.pkg.github.com/$REP_URL -DrepositoryId=github \ - -Dfile=rocksdbjni-8.10.2-SNAPSHOT-linux64.jar -DgroupId=org.rocksdb \ - -DartifactId=rocksdbjni -Dversion=8.10.2-SNAPSHOT -Dpackaging=jar + -DpomFile=../pom.xml \ + -Durl=https://maven.pkg.github.com/$GITHUB_REPOSITORY -DrepositoryId=github \ + -Dfile=rocksdbjni-${ROCKSDB_VERSION}-linux64.jar \ + -Dversion=${ROCKSDB_JAVA_VERSION} fi # for compile jmh.jar to test the performance - name: Build SideGetBenchmarks with Maven run: | - echo ${{ github.workspace }} && echo $GITHUB_WORKSPACE - pwd && ls -l - (cd java/jmh && ls -l && pwd) || exit - mvn clean package -e -ntp -f $GITHUB_WORKSPACE/java/jmh/pom.xml # -B in non-interactive (Batch) mode + set -xe + echo ${{github.workspace}} && echo $GITHUB_WORKSPACE + ROCKSDB_VERSION=`build_tools/version.sh full` + ROCKSDB_JAVA_VERSION=${ROCKSDB_VERSION}${{inputs.version_suffix}} + cd java/jmh && ls -l && pwd + db_artifactId=`sed -n 's/.*\(f\?rocksdbjni\)<\/artifactId>.*/\1/p' ../pom.xml` + mvn clean package -e -ntp -f pom.xml \ + -D db.artifactId=${db_artifactId} \ + -D db.version=${ROCKSDB_JAVA_VERSION} \ - - name: Run SideGetBenchmarks & Check it - if: ${{ inputs.test }} + - name: Run JMH SideGetBenchmarks + if: ${{ inputs.jmh_test }} run: | + set -xe + ROCKSDB_VERSION=`build_tools/version.sh full` + ROCKSDB_JAVA_VERSION=${ROCKSDB_VERSION}${{inputs.version_suffix}} mkdir -p /dev/shm/db_bench_community - cd $GITHUB_WORKSPACE/java/jmh || exit + cd $GITHUB_WORKSPACE/java/jmh ls ../../sideplugin/rockside/src/topling/web cp -v $GITHUB_WORKSPACE/sideplugin/rockside/src/topling/web/{style.css,index.html} /dev/shm/db_bench_community - echo $LD_LIBRARY_PATH - export LD_LIBRARY_PATH=/opt/lib:$LD_LIBRARY_PATH # for libterark-* - echo $LD_LIBRARY_PATH && ls -l /opt/lib + export LD_LIBRARY_PATH=$GITHUB_WORKSPACE/java/target:$LD_LIBRARY_PATH # Note: webserver should visit while running - export LD_PRELOAD=libterark-zbs-g++-11.3-r.so:libterark-fsa-g++-11.3-r.so:libjemalloc.so + db_artifactId=`sed -n 's/.*\(f\?rocksdbjni\)<\/artifactId>.*/\1/p' ../pom.xml` java -jar target/rocksdbjni-jmh-1.0-SNAPSHOT-benchmarks.jar \ + -D db.artifactId=${db_artifactId} \ + -D db.version=${ROCKSDB_JAVA_VERSION} \ -p keyCount=1000 -p keySize=128 -p valueSize=32768 \ -p sideConf=$GITHUB_WORKSPACE/sideplugin/rockside/sample-conf/db_bench_community.yaml SideGetBenchmarks - - name: Publish JAR to GitHub Packages + - name: Publish ToplingDB JMH JAR to GitHub Packages if: ${{ inputs.deploy_maven }} run: | - cd $GITHUB_WORKSPACE/java/jmh || exit - ls -l $GITHUB_WORKSPACE && tail -15 pom.xml - mvn deploy -e -f $GITHUB_WORKSPACE/java/jmh/pom.xml -s $GITHUB_WORKSPACE/settings.xml \ - -DaltDeploymentRepository=github::default::https://maven.pkg.github.com/$REP_URL + set -xe + ROCKSDB_VERSION=`build_tools/version.sh full` + ROCKSDB_JAVA_VERSION=${ROCKSDB_VERSION}${{inputs.version_suffix}} + cd $GITHUB_WORKSPACE/java/jmh + db_artifactId=`sed -n 's/.*\(f\?rocksdbjni\)<\/artifactId>.*/\1/p' ../pom.xml` + mvn deploy -e -f pom.xml \ + -D db.artifactId=${db_artifactId} \ + -D db.version=${ROCKSDB_JAVA_VERSION} \ + -s $GITHUB_WORKSPACE/settings.xml \ + -DaltDeploymentRepository=github::default::https://maven.pkg.github.com/$GITHUB_REPOSITORY #env: # GITHUB_TOKEN: ${{ github.token }} diff --git a/.gitignore b/.gitignore index 7a36c4dfc9..6b6adc03fd 100644 --- a/.gitignore +++ b/.gitignore @@ -66,7 +66,8 @@ java/out java/target java/test-libs java/*.log -java/include/org_rocksdb_*.h +java/include +java/pom.xml .idea/ *.iml diff --git a/java/jmh/pom.xml b/java/jmh/pom.xml index f9f9474f45..ff8bd727d5 100644 --- a/java/jmh/pom.xml +++ b/java/jmh/pom.xml @@ -4,18 +4,18 @@ xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"> 4.0.0 - org.rocksdb + com.github.topling rocksdbjni-jmh 1.0-SNAPSHOT - http://rocksdb.org/ + https://github.com/topling/toplingdb rocksdbjni-jmh JMH Benchmarks for RocksDB Java API - Facebook, Inc. - https://www.facebook.com + Topling, Inc. + https://topling.cn @@ -32,9 +32,9 @@ - scm:git:git://github.com/facebook/rocksdb.git - scm:git:git@github.com:facebook/rocksdb.git - http://github.com/facebook/rocksdb/ + scm:git:https://github.com/topling/toplingdb.git + scm:git:git@github.com:topling/toplingdb.git + https://github.com/topling/toplingdb/ @@ -44,13 +44,16 @@ 1.22 benchmarks + + rocksdbjni + 8.10.2-SNAPSHOT - org.rocksdb - rocksdbjni - 8.10.2-SNAPSHOT + com.github.topling + ${db.artifactId} + ${db.version} diff --git a/java/pom.xml.template b/java/pom.xml.template index 9dd9c74f34..7410e0b8b1 100644 --- a/java/pom.xml.template +++ b/java/pom.xml.template @@ -2,7 +2,7 @@ 4.0.0 - org.rocksdb + com.github.topling rocksdbjni ${ROCKSDB_JAVA_VERSION} @@ -10,8 +10,8 @@ RocksDB fat jar that contains .so files for linux32 and linux64 (glibc and musl-libc), jnilib files for Mac OSX, and a .dll for Windows x64. - https://rocksdb.org - 2012 + https://github.com/topling/toplingdb + 2025 @@ -82,7 +82,7 @@ 2.18.1 ${argLine} -ea -Xcheck:jni -Djava.library.path=${project.build.directory} - false + false false ${project.build.directory}/* @@ -146,7 +146,7 @@ 4.7.2.1 spotbugs-exclude.xml - + @@ -175,7 +175,7 @@ - + From 8b7c642f6c737c632423ec492cda476d629d8a88 Mon Sep 17 00:00:00 2001 From: leipeng Date: Fri, 29 Aug 2025 20:00:47 +0800 Subject: [PATCH 063/175] Add .github/workflows/android-jni.yml --- .github/workflows/android-jni.yml | 104 ++++++++++++++++++++++++++++++ android-build.sh | 6 +- 2 files changed, 108 insertions(+), 2 deletions(-) create mode 100644 .github/workflows/android-jni.yml diff --git a/.github/workflows/android-jni.yml b/.github/workflows/android-jni.yml new file mode 100644 index 0000000000..f74aaf3b69 --- /dev/null +++ b/.github/workflows/android-jni.yml @@ -0,0 +1,104 @@ +name: "build android-jni" + +on: + workflow_dispatch: + inputs: + version_suffix: + required: false + default: '-SNAPSHOT' + use_lto: + required: false + description: Link Use LTO + default: '0' + verbose_build_output: + required: false + description: run make with V=... + default: '0' + publish: + required: false + type: boolean + description: publish to github + default: true + +env: + NDK_VERSION: 26.1.10909125 + ANDROID_ABI: arm64-v8a + ANDROID_PLATFORM: android-34 + ANDROID_NDK_ROOT: ${{github.workspace}}/android-ndk + +jobs: + build: + runs-on: ubuntu-24.04 + env: + GITHUB_TOKEN: ${{github.token}} + permissions: + contents: read + packages: write + + steps: + - name: Checkout + uses: actions/checkout@v4 + with: + fetch-depth: 1 + + - name: Set up JDK 17 + uses: actions/setup-java@v4 + with: + java-version: '17' + distribution: 'temurin' + cache: maven + server-id: github # Value of the distributionManagement/repository/id field of the pom.xml + settings-path: ${{github.workspace}} # location for the settings.xml file + + - name: Setup Android SDK + uses: android-actions/setup-android@v3 + + - name: Display NDK information + run: | + set -xe + env | egrep 'ANDROID|NDK' + #find /usr/local/lib/android/sdk -type d + $ANDROID_NDK_HOME/ndk-build --version + + - name: Init submodule & Setup ssh + run: | + git submodule update --init --recursive + mkdir -p ~/.ssh && mkdir -p /opt/lib + ssh-keyscan -t rsa github.com >> ~/.ssh/known_hosts + echo nproc = `nproc` + + - name: Git clone SidePlugin(s) + run: | + export ROCKSDB_DISABLE_GFLAGS=1 + make clean # this triggers auto git clone + + - name: Compile RocksDB Java Release + run: | + set -xe + echo $JAVA_HOME + export V=${{inputs.verbose_build_output}} + export USE_LTO=${{inputs.use_lto}} + bash -x android-build.sh + + - name: Generate java/pom.xml from template + run: | + set -xe + ROCKSDB_VERSION=`build_tools/version.sh full` + ROCKSDB_JAVA_VERSION=${ROCKSDB_VERSION}${{inputs.version_suffix}} + cd java + # F\?ROCKSDB_JAVA_VERSION for both flink and normal rocksdbjni + sed -e 's/\${F\?ROCKSDB_JAVA_VERSION}/'"${ROCKSDB_JAVA_VERSION}/" \ + -e 's/\(.*\)\(f\?rocksdbjni\)\(<\/artifactId>.*\)/\1\2-'"$ANDROID_PLATFORM"'\3/' \ + pom.xml.template > pom.xml + + - name: Publish rocksjni-android jar to GitHub Packages + run: | + ROCKSDB_VERSION=`build_tools/version.sh full` + ROCKSDB_JAVA_VERSION=${ROCKSDB_VERSION}${{inputs.version_suffix}} + set -xe + cd java/target + mvn deploy:deploy-file -e -s $GITHUB_WORKSPACE/settings.xml \ + -DpomFile=../pom.xml \ + -Durl=https://maven.pkg.github.com/$GITHUB_REPOSITORY -DrepositoryId=github \ + -Dfile=rocksdbjni-${ROCKSDB_VERSION}-linux64.jar \ + -Dversion=${ROCKSDB_JAVA_VERSION} diff --git a/android-build.sh b/android-build.sh index ef648d78f9..8bc7e6b7bb 100755 --- a/android-build.sh +++ b/android-build.sh @@ -3,8 +3,10 @@ set -e # you should just change the 2 lines -ANDROID_HOME=${HOME}/osc/android -export ANDROID_NDK_HOME=${ANDROID_HOME}/ndk/26.1.10909125 +if [ -z "$ANDROID_NDK_HOME" ]; then + ANDROID_HOME=${HOME}/osc/android + export ANDROID_NDK_HOME=${ANDROID_HOME}/ndk/26.1.10909125 +fi # these lines need not change CXX_HOME=${ANDROID_NDK_HOME}/toolchains/llvm/prebuilt/linux-x86_64 From 903021a743c1b500a7e00942ad246ffb9538fb36 Mon Sep 17 00:00:00 2001 From: leipeng Date: Fri, 29 Aug 2025 21:18:31 +0800 Subject: [PATCH 064/175] Makefile: CXXFLAGS += $(ARCHFLAG) just for osx And: android-build.sh: comment out ARCHFLAG --- Makefile | 3 +++ android-build.sh | 4 ++-- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/Makefile b/Makefile index d1be1b2519..9eab8039be 100644 --- a/Makefile +++ b/Makefile @@ -713,7 +713,10 @@ $(error pkg-config failed) endif endif +# the commit msg says it is for osx, and it fails for android x64-aarch64 build +ifeq ($(PLATFORM),OS_MACOSX) CXXFLAGS += $(ARCHFLAG) +endif ifeq (,$(shell $(CXX) -fsyntax-only -march=armv8-a+crc+crypto -xc /dev/null 2>&1)) ifneq ($(PLATFORM),OS_MACOSX) diff --git a/android-build.sh b/android-build.sh index 8bc7e6b7bb..45daababa1 100755 --- a/android-build.sh +++ b/android-build.sh @@ -17,7 +17,7 @@ export AR=${CXX_HOME}/bin/llvm-ar export STRIP_CMD=${CXX_HOME}/bin/llvm-strip export ENABLE_AUTO_CHECK_LD=0 export JAVAC_ARGS="-source 17 -target 17" -export BUILD_PREFIX=../build-toplingdb/ +#export BUILD_PREFIX=../build-toplingdb/ # default is empty export ROCKSDB_DISABLE_GFLAGS=1 export CPU="-O2" # fool the make export WITH_BMI2=na @@ -27,7 +27,7 @@ export DISABLE_JEMALLOC=1 export EXTRA_CXXFLAGS="-Wno-deprecated-builtins -Wno-shorten-64-to-32 -DBOOST_NO_CXX98_FUNCTION_BASE" export EXTRA_CXXFLAGS="-Wno-deprecated-builtins -DBOOST_NO_CXX98_FUNCTION_BASE" export W_shorten_64_to_32=0 -export ARCHFLAG="-arch aarch64" +#export ARCHFLAG="-arch aarch64" export MACHINE=aarch64 export TARGET_OS=OS_ANDROID_CROSSCOMPILE export COMPILER=clang-17.0 From 0e14f74bb498382195fbfdc660ce95371e3a1e16 Mon Sep 17 00:00:00 2001 From: leipeng Date: Sat, 30 Aug 2025 11:17:51 +0800 Subject: [PATCH 065/175] fs_cat.cc: fix clang bad warn move/std::move --- env/fs_cat.cc | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/env/fs_cat.cc b/env/fs_cat.cc index 6b39de21ff..4d07273378 100644 --- a/env/fs_cat.cc +++ b/env/fs_cat.cc @@ -26,13 +26,17 @@ IOStatus CopyAcrossFS(FileSystem* dest, FileSystem* src, if (!ios.ok()) { return ios; } - auto src_reader = make_unique(move(srcfile), fname); + // we have using namespace std; + // fuck clang warns on use `move` instead of `std::move` + auto src_reader = make_unique(std::move(srcfile), fname); std::unique_ptr dstfile; ios = dest->NewWritableFile(fname, fo, &dstfile, dbg); if (!ios.ok()) { return ios; } - auto dest_writer = make_unique(move(dstfile), fname, fo); + // we have using namespace std; + // fuck clang warns on use `move` instead of `std::move` + auto dest_writer = make_unique(std::move(dstfile), fname, fo); const size_t bufsize = 1024 * 1024; #if defined(_MSC_VER) char* buffer = (char*)_aligned_malloc(bufsize, 4096); From 0b3015a8d59a357998853fe05b16f5a4f8c67daa Mon Sep 17 00:00:00 2001 From: leipeng Date: Sat, 30 Aug 2025 16:13:33 +0800 Subject: [PATCH 066/175] update submodule rockside --- sideplugin/rockside | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sideplugin/rockside b/sideplugin/rockside index 8a570a52f8..092f70ee2e 160000 --- a/sideplugin/rockside +++ b/sideplugin/rockside @@ -1 +1 @@ -Subproject commit 8a570a52f80a2c7d135e04f189e71dd94dd21b58 +Subproject commit 092f70ee2ed7c43380f4415386851896d1ec7a89 From 285a8262f3b005798a07ae2f8c746401521afc71 Mon Sep 17 00:00:00 2001 From: leipeng Date: Sat, 30 Aug 2025 12:09:26 +0800 Subject: [PATCH 067/175] Makefile: depend include all .d Note: this deleted some IBM power8 related depend gen --- Makefile | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) diff --git a/Makefile b/Makefile index 9eab8039be..b51dd91719 100644 --- a/Makefile +++ b/Makefile @@ -3233,25 +3233,19 @@ $(OBJ_DIR)/%.cpp.d: %.cpp -MM -MT'$@' -MT'$(<:.cpp=.o)' -MT'$(<:%.cpp=$(OBJ_DIR)/%.o)' \ "$<" -o '$@' -ifeq ($(HAVE_POWER8),1) DEPFILES_C = $(patsubst %.c, $(OBJ_DIR)/%.c.d, $(LIB_SOURCES_C)) DEPFILES_ASM = $(patsubst %.S, $(OBJ_DIR)/%.S.d, $(LIB_SOURCES_ASM)) $(OBJ_DIR)/%.c.d: %.c - @$(CXX) $(CXXFLAGS) $(PLATFORM_SHARED_CFLAGS) \ - -MM -MT'$@' -MT'$(<:.c=.o)' "$<" -o '$@' + $(AM_V_CC) $(CC) $(CFLAGS) $(PLATFORM_SHARED_CFLAGS) \ + -MM -MT'$@' -MT'$(<:.c=.o)' -MT'$(<:%.c=$(OBJ_DIR)/%.o)' \ + "$<" -o '$@' $(OBJ_DIR)/%.S.d: %.S @$(CXX) $(CXXFLAGS) $(PLATFORM_SHARED_CFLAGS) \ -MM -MT'$@' -MT'$(<:.S=.o)' "$<" -o '$@' -$(DEPFILES_C): %.c.d - -$(DEPFILES_ASM): %.S.d -depend: $(DEPFILES) $(DEPFILES_C) $(DEPFILES_ASM) -else depend: $(DEPFILES) -endif build_subset_tests: $(ROCKSDBTESTS_SUBSET) $(AM_V_GEN)if [ -n "$${ROCKSDBTESTS_SUBSET_TESTS_TO_FILE}" ]; then echo "$(ROCKSDBTESTS_SUBSET)" > "$${ROCKSDBTESTS_SUBSET_TESTS_TO_FILE}"; else echo "$(ROCKSDBTESTS_SUBSET)"; fi From b4cd676f6a3c9ad16ff8b81acc17fc1813043840 Mon Sep 17 00:00:00 2001 From: leipeng Date: Sat, 30 Aug 2025 15:43:56 +0800 Subject: [PATCH 068/175] Makefile: -flto=auto on link --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index b51dd91719..452e34b0d3 100644 --- a/Makefile +++ b/Makefile @@ -228,7 +228,7 @@ endif # supported natively in Makefile). ifeq ($(USE_LTO), 1) CXXFLAGS += -flto - LDFLAGS += -flto -fuse-linker-plugin + LDFLAGS += -flto=auto -fuse-linker-plugin endif # `COERCE_CONTEXT_SWITCH=1` will inject spurious wakeup and From cc8bd51f7a733209ed90e0721e31a301b6fbb120 Mon Sep 17 00:00:00 2001 From: leipeng Date: Sat, 30 Aug 2025 11:20:34 +0800 Subject: [PATCH 069/175] change ./github/workflow --- .github/workflows/android-jni.yml | 2 +- .github/workflows/topling-jni.yml | 57 +++++++++++++++++++++++-------- 2 files changed, 44 insertions(+), 15 deletions(-) diff --git a/.github/workflows/android-jni.yml b/.github/workflows/android-jni.yml index f74aaf3b69..bf55b77b3e 100644 --- a/.github/workflows/android-jni.yml +++ b/.github/workflows/android-jni.yml @@ -9,7 +9,7 @@ on: use_lto: required: false description: Link Use LTO - default: '0' + default: '1' verbose_build_output: required: false description: run make with V=... diff --git a/.github/workflows/topling-jni.yml b/.github/workflows/topling-jni.yml index e21007a051..49bbd05cc6 100644 --- a/.github/workflows/topling-jni.yml +++ b/.github/workflows/topling-jni.yml @@ -10,7 +10,7 @@ on: use_lto: required: false description: Link Use LTO - default: '0' + default: '1' unit_test: required: false type: boolean @@ -62,26 +62,58 @@ jobs: libbz2-dev libcurl4-gnutls-dev liburing-dev \ libsnappy-dev libbz2-dev liblz4-dev libzstd-dev - gcc --version + - name: Init submodule & Setup ssh + run: | git submodule update --init --recursive mkdir -p ~/.ssh && mkdir -p /opt/lib ssh-keyscan -t rsa github.com >> ~/.ssh/known_hosts - echo nproc = `nproc` - - name: Run Unit Test + - name: Git clone SidePlugin(s) + run: | + # this will be auto triggerred in normal build, we defined + # it here is just for seperate steps more clearly. + # there is no dedicated target for this purpose, so we use `clean` + # as the target, it need not any `clean` at this point. + # `clean` is just used for triggers auto git clone + make clean + + - name: make depend for Compile Unit Test + if: ${{ inputs.unit_test }} + run: | + # this will be auto triggerred in normal build, we defined + # it here is just for seperate steps more clearly. + # need to explicit set MAKE_UNIT_TEST=1 + echo JAVA_HOME = $JAVA_HOME + set -xe + make depend java/include/java_header_list.mk \ + MAKE_UNIT_TEST=1 \ + -j`nproc` DEBUG_LEVEL=1 UPDATE_REPO=0 \ + DISABLE_JEMALLOC=1 TOPLING_USE_DYNAMIC_TLS=1 + + - name: Compile & Run Java Unit Test if: ${{ inputs.unit_test }} run: | + echo JAVA_HOME = $JAVA_HOME set -xe - echo $JAVA_HOME + # Makefile will auto set MAKE_UNIT_TEST=1 for `jtest` make jtest -j`nproc` DEBUG_LEVEL=1 UPDATE_REPO=0 \ + DISABLE_JEMALLOC=1 TOPLING_USE_DYNAMIC_TLS=1 + + - name: make depend for Compile RocksDBJava Release + run: | + echo JAVA_HOME = $JAVA_HOME + set -xe + env USE_LTO=${{inputs.use_lto}} \ + make depend java/include/java_header_list.mk \ + -j`nproc` DEBUG_LEVEL=0 UPDATE_REPO=0 \ DISABLE_JEMALLOC=1 TOPLING_USE_DYNAMIC_TLS=1 \ - ROCKSDB_JAR_WITH_DYNAMIC_LIBS=1 + STRIP_DEBUG_INFO=1 ROCKSDB_JAR_WITH_DYNAMIC_LIBS=1 - name: Compile RocksDBJava Release run: | + echo JAVA_HOME = $JAVA_HOME set -xe - echo $JAVA_HOME env USE_LTO=${{inputs.use_lto}} \ make rocksdbjava -j`nproc` DEBUG_LEVEL=0 UPDATE_REPO=0 \ DISABLE_JEMALLOC=1 TOPLING_USE_DYNAMIC_TLS=1 \ @@ -94,6 +126,7 @@ jobs: - name: Move to Local & Publish rocksjni to GitHub Packages run: | + echo JAVA_HOME = $JAVA_HOME set -xe ROCKSDB_VERSION=`build_tools/version.sh full` ROCKSDB_JAVA_VERSION=${ROCKSDB_VERSION}${{inputs.version_suffix}} @@ -123,7 +156,7 @@ jobs: db_artifactId=`sed -n 's/.*\(f\?rocksdbjni\)<\/artifactId>.*/\1/p' ../pom.xml` mvn clean package -e -ntp -f pom.xml \ -D db.artifactId=${db_artifactId} \ - -D db.version=${ROCKSDB_JAVA_VERSION} \ + -D db.version=${ROCKSDB_JAVA_VERSION} - name: Run JMH SideGetBenchmarks if: ${{ inputs.jmh_test }} @@ -133,14 +166,12 @@ jobs: ROCKSDB_JAVA_VERSION=${ROCKSDB_VERSION}${{inputs.version_suffix}} mkdir -p /dev/shm/db_bench_community cd $GITHUB_WORKSPACE/java/jmh - ls ../../sideplugin/rockside/src/topling/web cp -v $GITHUB_WORKSPACE/sideplugin/rockside/src/topling/web/{style.css,index.html} /dev/shm/db_bench_community export LD_LIBRARY_PATH=$GITHUB_WORKSPACE/java/target:$LD_LIBRARY_PATH # Note: webserver should visit while running db_artifactId=`sed -n 's/.*\(f\?rocksdbjni\)<\/artifactId>.*/\1/p' ../pom.xml` - java -jar target/rocksdbjni-jmh-1.0-SNAPSHOT-benchmarks.jar \ - -D db.artifactId=${db_artifactId} \ - -D db.version=${ROCKSDB_JAVA_VERSION} \ + java --add-opens java.base/java.nio=ALL-UNNAMED \ + -jar target/rocksdbjni-jmh-1.0-SNAPSHOT-benchmarks.jar \ -p keyCount=1000 -p keySize=128 -p valueSize=32768 \ -p sideConf=$GITHUB_WORKSPACE/sideplugin/rockside/sample-conf/db_bench_community.yaml SideGetBenchmarks @@ -157,5 +188,3 @@ jobs: -D db.version=${ROCKSDB_JAVA_VERSION} \ -s $GITHUB_WORKSPACE/settings.xml \ -DaltDeploymentRepository=github::default::https://maven.pkg.github.com/$GITHUB_REPOSITORY - #env: - # GITHUB_TOKEN: ${{ github.token }} From 4bef1e1ad662fb05128afa4776c3c87b0bcb76b3 Mon Sep 17 00:00:00 2001 From: rockeet Date: Sun, 31 Aug 2025 11:54:26 +0800 Subject: [PATCH 070/175] Add .github/workflows/topling-jni-release.yml --- .github/workflows/topling-jni-release.yml | 88 +++++++++++++++++++++++ 1 file changed, 88 insertions(+) create mode 100644 .github/workflows/topling-jni-release.yml diff --git a/.github/workflows/topling-jni-release.yml b/.github/workflows/topling-jni-release.yml new file mode 100644 index 0000000000..da2169ce16 --- /dev/null +++ b/.github/workflows/topling-jni-release.yml @@ -0,0 +1,88 @@ +name: Publish to Release + +on: + release: + types: [created] + +jobs: + build-and-publish: + runs-on: ubuntu-latest + permissions: + contents: write + packages: write + + steps: + - name: Checkout + uses: actions/checkout@v4 + with: + fetch-depth: 1 + + - name: Set up JDK 11 + uses: actions/setup-java@v4 + with: + java-version: '11' + distribution: 'temurin' + cache: maven + server-id: github # Value of the distributionManagement/repository/id field of the pom.xml + settings-path: ${{ github.workspace }} # location for the settings.xml file + + - name: Install Prerequisites + run: | + set -xe + cat $GITHUB_WORKSPACE/settings.xml + sudo apt-get update -y && sudo apt-get install -y \ + libjemalloc-dev libaio-dev libgflags-dev zlib1g-dev \ + libbz2-dev libcurl4-gnutls-dev liburing-dev \ + libsnappy-dev libbz2-dev liblz4-dev libzstd-dev + + - name: Init submodule & Setup ssh + run: | + git submodule update --init --recursive + mkdir -p ~/.ssh && mkdir -p /opt/lib + ssh-keyscan -t rsa github.com >> ~/.ssh/known_hosts + echo nproc = `nproc` + + - name: Git clone SidePlugin(s) + run: | + # this will be auto triggerred in normal build, we defined + # it here is just for seperate steps more clearly. + # there is no dedicated target for this purpose, so we use `clean` + # as the target, it need not any `clean` at this point. + # `clean` is just used for triggers auto git clone + make clean + + - name: make depend for Compile RocksDBJava Release + run: | + echo JAVA_HOME = $JAVA_HOME + set -xe + env USE_LTO=1 \ + make depend java/include/java_header_list.mk \ + -j`nproc` DEBUG_LEVEL=0 UPDATE_REPO=0 \ + DISABLE_JEMALLOC=1 TOPLING_USE_DYNAMIC_TLS=1 \ + STRIP_DEBUG_INFO=1 ROCKSDB_JAR_WITH_DYNAMIC_LIBS=1 + + - name: Compile RocksDBJava Release + run: | + echo JAVA_HOME = $JAVA_HOME + set -xe + env USE_LTO=1 \ + make rocksdbjava -j`nproc` DEBUG_LEVEL=0 UPDATE_REPO=0 \ + DISABLE_JEMALLOC=1 TOPLING_USE_DYNAMIC_TLS=1 \ + STRIP_DEBUG_INFO=1 ROCKSDB_JAR_WITH_DYNAMIC_LIBS=1 + + - name: Prerelease and Generate Checksums + env: + GITHUB_TOKEN: ${{secrets.GITHUB_TOKEN}} + run: | + set -xe + ROCKSDB_VERSION=`build_tools/version.sh full` + # ex: topling-8.10.2-frocksdb-1.0, part will be ignored + TOPLING_VERSION=`echo ${GITHUB_REF} | sed 's:^refs/tags/topling-'${ROCKSDB_VERSION}'[-_a-z]*\([.0-9]\):\1:'` + ROCKSDB_JAVA_VERSION=${ROCKSDB_VERSION}-topling-${TOPLING_VERSION} + cd java/target + db_artifactId=`sed -n 's/.*\(f\?rocksdbjni\)<\/artifactId>.*/\1/p' ../pom.xml.template` + TARGET_JAR=${db_artifactId}-${ROCKSDB_JAVA_VERSION}.jar + mv rocksdbjni-${ROCKSDB_VERSION}-linux64.jar ${TARGET_JAR} + shasum -a 1 ${TARGET_JAR} > ${TARGET_JAR}.sha1 + md5sum ${TARGET_JAR} > ${TARGET_JAR}.md5 + gh release upload --clobber ${{github.ref_name}} ${TARGET_JAR}* From e6dd406a2e1b4096fd564f1ab195e5ba811a66aa Mon Sep 17 00:00:00 2001 From: rockeet Date: Tue, 2 Sep 2025 18:29:57 +0800 Subject: [PATCH 071/175] java: Add ColumnFamilyOptions.mergeOperatorName() --- java/src/main/java/org/rocksdb/ColumnFamilyOptions.java | 7 +++++++ .../java/org/rocksdb/ColumnFamilyOptionsInterface.java | 3 +++ 2 files changed, 10 insertions(+) diff --git a/java/src/main/java/org/rocksdb/ColumnFamilyOptions.java b/java/src/main/java/org/rocksdb/ColumnFamilyOptions.java index 607a17936e..3457983409 100644 --- a/java/src/main/java/org/rocksdb/ColumnFamilyOptions.java +++ b/java/src/main/java/org/rocksdb/ColumnFamilyOptions.java @@ -215,6 +215,7 @@ public ColumnFamilyOptions setMergeOperatorName(final String name) { "Merge operator name must not be null."); } setMergeOperatorName(nativeHandle_, name); + mergeOperatorName_ = name; return this; } @@ -222,9 +223,13 @@ public ColumnFamilyOptions setMergeOperatorName(final String name) { public ColumnFamilyOptions setMergeOperator( final MergeOperator mergeOperator) { setMergeOperator(nativeHandle_, mergeOperator.nativeHandle_); + mergeOperator_ = mergeOperator; return this; } + @Override public String mergeOperatorName() { return mergeOperatorName_; } + @Override public MergeOperator mergeOperator() { return mergeOperator_; } + @Override public ColumnFamilyOptions setCompactionFilter( final AbstractCompactionFilter> @@ -1557,4 +1562,6 @@ private native void setPrepopulateBlobCache( private CompressionOptions compressionOptions_; private SstPartitionerFactory sstPartitionerFactory_; private ConcurrentTaskLimiter compactionThreadLimiter_; + private MergeOperator mergeOperator_; + private String mergeOperatorName_; } diff --git a/java/src/main/java/org/rocksdb/ColumnFamilyOptionsInterface.java b/java/src/main/java/org/rocksdb/ColumnFamilyOptionsInterface.java index 4776773bd8..dca69717b0 100644 --- a/java/src/main/java/org/rocksdb/ColumnFamilyOptionsInterface.java +++ b/java/src/main/java/org/rocksdb/ColumnFamilyOptionsInterface.java @@ -172,6 +172,9 @@ T setComparator( */ T setMergeOperator(MergeOperator mergeOperator); + String mergeOperatorName(); + MergeOperator mergeOperator(); + /** * A single CompactionFilter instance to call into during compaction. * Allows an application to modify/delete a key-value during background From 8ed1550bca4d71aaf6cf40a266196f47bc252f7c Mon Sep 17 00:00:00 2001 From: rockeet Date: Tue, 2 Sep 2025 18:36:05 +0800 Subject: [PATCH 072/175] change groupId from com.github.topling to cn.topling com.github.topling was used for github package, but github packages needs authentication even for read public packages. This requires the user setup settings.xml for access github packages, this is unacceptable, so we revert to use cn.topling as groupId. --- java/jmh/pom.xml | 4 ++-- java/pom.xml.template | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/java/jmh/pom.xml b/java/jmh/pom.xml index ff8bd727d5..c6f4ccf95c 100644 --- a/java/jmh/pom.xml +++ b/java/jmh/pom.xml @@ -4,7 +4,7 @@ xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"> 4.0.0 - com.github.topling + cn.topling rocksdbjni-jmh 1.0-SNAPSHOT @@ -51,7 +51,7 @@ - com.github.topling + cn.topling ${db.artifactId} ${db.version} diff --git a/java/pom.xml.template b/java/pom.xml.template index 7410e0b8b1..f626cc0abb 100644 --- a/java/pom.xml.template +++ b/java/pom.xml.template @@ -2,7 +2,7 @@ 4.0.0 - com.github.topling + cn.topling rocksdbjni ${ROCKSDB_JAVA_VERSION} From 2b3f7ed853c8aa1dc21b66b64e72386032c8f3af Mon Sep 17 00:00:00 2001 From: leipeng Date: Wed, 3 Sep 2025 15:41:09 +0800 Subject: [PATCH 073/175] Options.java: Add missing merge operator members --- java/src/main/java/org/rocksdb/Options.java | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/java/src/main/java/org/rocksdb/Options.java b/java/src/main/java/org/rocksdb/Options.java index 29f5e8e0d2..9dc077581b 100644 --- a/java/src/main/java/org/rocksdb/Options.java +++ b/java/src/main/java/org/rocksdb/Options.java @@ -233,15 +233,20 @@ public Options setMergeOperatorName(final String name) { "Merge operator name must not be null."); } setMergeOperatorName(nativeHandle_, name); + mergeOperatorName_ = name; return this; } @Override public Options setMergeOperator(final MergeOperator mergeOperator) { setMergeOperator(nativeHandle_, mergeOperator.nativeHandle_); + mergeOperator_ = mergeOperator; return this; } + @Override public String mergeOperatorName() { return mergeOperatorName_; } + @Override public MergeOperator mergeOperator() { return mergeOperator_; } + @Override public Options setCompactionFilter( final AbstractCompactionFilter> @@ -2583,4 +2588,6 @@ private native void setPrepopulateBlobCache( private WriteBufferManager writeBufferManager_; private SstPartitionerFactory sstPartitionerFactory_; private ConcurrentTaskLimiter compactionThreadLimiter_; + private MergeOperator mergeOperator_; + private String mergeOperatorName_; } From 54aa76b53546a4a0400cf6fffd9ccba3b5606288 Mon Sep 17 00:00:00 2001 From: rockeet Date: Thu, 4 Sep 2025 16:56:18 +0800 Subject: [PATCH 074/175] Makefile: var DISABLE_JEMALLOC & ROCKSDB_DISABLE_JEMALLOC --- Makefile | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/Makefile b/Makefile index 452e34b0d3..cee3f32432 100644 --- a/Makefile +++ b/Makefile @@ -109,6 +109,20 @@ endif $(info $$DEBUG_LEVEL is $(DEBUG_LEVEL), $$LIB_MODE is $(LIB_MODE)) +ifneq (${DISABLE_JEMALLOC},) +ifneq (${ROCKSDB_DISABLE_JEMALLOC},) + ifneq (${DISABLE_JEMALLOC},${ROCKSDB_DISABLE_JEMALLOC}) + $(error DISABLE_JEMALLOC="${DISABLE_JEMALLOC}" but ROCKSDB_DISABLE_JEMALLOC="${ROCKSDB_DISABLE_JEMALLOC}") + endif +endif +endif +ifeq (${DISABLE_JEMALLOC},1) + ifeq (${ROCKSDB_DISABLE_JEMALLOC},) + export_ROCKSDB_DISABLE_JEMALLOC := export ROCKSDB_DISABLE_JEMALLOC=1; + export ROCKSDB_DISABLE_JEMALLOC = 1 + endif +endif + # Detect what platform we're building on. # Export some common variables that might have been passed as Make variables # instead of environment variables. @@ -121,6 +135,7 @@ dummy := $(shell (export ROCKSDB_ROOT="$(CURDIR)"; \ export PORTABLE="$(PORTABLE)"; \ export ROCKSDB_NO_FBCODE="$(ROCKSDB_NO_FBCODE)"; \ export ROCKSDB_USE_IO_URING="$(ROCKSDB_USE_IO_URING)"; \ + ${export_ROCKSDB_DISABLE_JEMALLOC} \ export ROCKSDB_DISABLE_TCMALLOC="$(ROCKSDB_DISABLE_TCMALLOC)"; \ export ROCKSDB_DISABLE_ZSTD=1; \ export USE_CLANG="$(USE_CLANG)"; \ From c90180fcceefa77e5bc0d2b7434fcce4cd56eff4 Mon Sep 17 00:00:00 2001 From: rockeet Date: Thu, 4 Sep 2025 18:12:02 +0800 Subject: [PATCH 075/175] Makefile: export ROCKSDB_HOME = $(realpath .) --- Makefile | 1 + 1 file changed, 1 insertion(+) diff --git a/Makefile b/Makefile index cee3f32432..dfa5f775ff 100644 --- a/Makefile +++ b/Makefile @@ -27,6 +27,7 @@ USE_RTTI=1 ROCKSDB_USE_IO_URING=0 ROCKSDB_DISABLE_TCMALLOC=1 SKIP_FORMAT_BUCK_CHECKS=1 +export ROCKSDB_HOME = $(realpath .) ENABLE_AUTO_CHECK_LD ?= 1 ifeq (${ENABLE_AUTO_CHECK_LD},1) From 5f9774b27ea6c1d8de4b359c82926896bbef5c23 Mon Sep 17 00:00:00 2001 From: rockeet Date: Thu, 4 Sep 2025 18:14:11 +0800 Subject: [PATCH 076/175] Makefile: librocksdb*: do not need libterark* --- Makefile | 20 +++++++------------- 1 file changed, 7 insertions(+), 13 deletions(-) diff --git a/Makefile b/Makefile index dfa5f775ff..e983030671 100644 --- a/Makefile +++ b/Makefile @@ -383,6 +383,7 @@ ifneq ("$(ROCKS_DEP_RULES)", "") -include java/include/java_header_list.mk endif endif +TOPLING_LIB_OBJECTS = $(addprefix ${TOPLING_CORE_DIR}/, ${TOPLING_LIB_OBJ_LIST_VAR}) ifneq ($(filter auto_all_tests check check_0 watch-log gen_parallel_tests %_test %_test2 jtest, $(MAKECMDGOALS)),) MAKE_UNIT_TEST ?= 1 @@ -413,8 +414,6 @@ CXXFLAGS += \ -I${TOPLING_CORE_DIR}/boost-include \ -I${TOPLING_CORE_DIR}/3rdparty/zstd -LDFLAGS += -L${TOPLING_CORE_DIR}/${BUILD_ROOT}/lib_shared \ - -lterark-{zbs,fsa,core}-${COMPILER}-${BUILD_TYPE_SIG} TOPLING_ZBS_TARGET := ${BUILD_ROOT}/lib_shared/libterark-zbs-${COMPILER}-${BUILD_TYPE_SIG}.${PLATFORM_SHARED_EXT} GIT_TOPLING_ROCKS ?= git@github.com:rockeet/topling-rocks @@ -1366,8 +1365,8 @@ $(SHARED3): $(SHARED4) ln -fs $(SHARED4) $(SHARED3) endif # PLATFORM_SHARED_VERSIONED -$(SHARED4): $(LIB_OBJECTS) - $(AM_V_CCLD) $(CXX) $(PLATFORM_SHARED_LDFLAGS)$(SHARED3) $(LIB_OBJECTS) $(EXTRA_SHARED_LIB_LIB) -Wl,-rpath,'$$ORIGIN' $(LDFLAGS) -o $@ +$(SHARED4): $(LIB_OBJECTS) ${TOPLING_CORE_DIR}/${TOPLING_LIB_OBJ_LIST_FILE} + $(AM_V_CCLD) $(CXX) $(PLATFORM_SHARED_LDFLAGS)$(SHARED3) $(LIB_OBJECTS) $(TOPLING_LIB_OBJECTS) $(EXTRA_SHARED_LIB_LIB) -Wl,-rpath,'$$ORIGIN' $(LDFLAGS) -o $@ endif # PLATFORM_SHARED_EXT .PHONY: check clean coverage ldb_tests package dbg gen-pc build_size \ @@ -1799,7 +1798,7 @@ package: # --------------------------------------------------------------------------- $(STATIC_LIBRARY): $(LIB_OBJECTS) $(AM_V_AR)rm -f $@ $(SHARED1) $(SHARED2) $(SHARED3) $(SHARED4) - $(AM_V_at)$(AR) $(ARFLAGS) $@ $(LIB_OBJECTS) + $(AM_V_at)$(AR) $(ARFLAGS) $@ $(LIB_OBJECTS) $(TOPLING_LIB_OBJECTS) $(STATIC_TEST_LIBRARY): $(TEST_OBJECTS) $(AM_V_AR)rm -f $@ $(SHARED_TEST_LIBRARY) @@ -2625,20 +2624,16 @@ install-headers: gen-pc cp -ar ${TOPLING_CORE_DIR}/boost-include/boost $(DESTDIR)/$(PREFIX)/include install -C -m 644 rocksdb.pc $(INSTALL_LIBDIR)/pkgconfig/rocksdb.pc -install-static: TOPLING_LIB_STATIC_FILES := $(shell echo ${TOPLING_CORE_DIR}/${BUILD_ROOT}/lib_static/libterark-{core,fsa,zbs}-${COMPILER}-${BUILD_TYPE_SIG}.a) install-static: $(LIBRARY) static_lib install -d $(INSTALL_LIBDIR) install -C -m 755 $(LIBRARY) $(INSTALL_LIBDIR) - cp -a ${TOPLING_LIB_STATIC_FILES} $(INSTALL_LIBDIR) -install-shared: TOPLING_LIB_SHARED_FILES := $(shell echo ${TOPLING_CORE_DIR}/${BUILD_ROOT}/lib_shared/libterark-{core,fsa,zbs}-${COMPILER}-${BUILD_TYPE_SIG}.${PLATFORM_SHARED_EXT}) install-shared: $(SHARED4) shared_lib install -d $(INSTALL_LIBDIR) install -C -m 755 $(SHARED4) $(INSTALL_LIBDIR) ln -fs $(SHARED4) $(INSTALL_LIBDIR)/$(SHARED3) ln -fs $(SHARED4) $(INSTALL_LIBDIR)/$(SHARED2) ln -fs $(SHARED4) $(INSTALL_LIBDIR)/$(SHARED1) - cp -a ${TOPLING_LIB_SHARED_FILES} $(INSTALL_LIBDIR) install: install-${LIB_MODE} @@ -2667,7 +2662,6 @@ gen-pc: -echo 'Description: An embeddable persistent key-value store for fast storage' >> rocksdb.pc -echo Version: $(shell ./build_tools/version.sh full) >> rocksdb.pc -echo 'Libs: -L$${libdir} $(EXEC_LDFLAGS) -lrocksdb' >> rocksdb.pc - -echo 'Libs.private: -lterark-zbs-r -lterark-fsa-r -lterark-core-r $(PLATFORM_LDFLAGS)' >> rocksdb.pc -echo 'Cflags: -I$${includedir} $(PLATFORM_CXXFLAGS)' >> rocksdb.pc -echo 'Requires: $(subst ",,$(ROCKSDB_PLUGIN_PKGCONFIG_REQUIRES))' >> rocksdb.pc @@ -3050,14 +3044,14 @@ ifeq ($(JAVA_HOME),) $(error JAVA_HOME is not set) endif $(AM_V_at)rm -f ./java/target/$(ROCKSDBJNILIB) - $(AM_V_CCLD) $(CXX) $(CXXFLAGS) -shared -fPIC -o java/target/$(ROCKSDBJNILIB) \ + $(AM_V_CCLD) $(CXX) -shared -fPIC -o java/target/$(ROCKSDBJNILIB) \ $(ALL_JNI_NATIVE_OBJECTS) $(LIB_OBJECTS) \ - $(addprefix ${TOPLING_CORE_DIR}/, $(TOPLING_LIB_OBJ_LIST_VAR)) \ + $(TOPLING_LIB_OBJECTS) \ -Wl,--whole-archive \ ${BUNDLED_COMPRESSION_LIBS} \ -Wl,--no-whole-archive \ $(JAVA_LDFLAGS) \ - $(filter-out -L${TOPLING_CORE_DIR}% -lterark-%, $(LDFLAGS)) + $(LDFLAGS) $(AM_V_at)cp -a sideplugin/rockside/src/topling/web/{style.css,index.html} java/target ifeq ($(STRIP_DEBUG_INFO),1) $(AM_V_at)${STRIP_CMD} java/target/*.so From 4fef9fb8e16cc9ded411cb59136baece70f8b52a Mon Sep 17 00:00:00 2001 From: rockeet Date: Thu, 4 Sep 2025 19:28:27 +0800 Subject: [PATCH 077/175] topling-jni-release.yml: upload index.html style.css dcompact_worker --- .github/workflows/topling-jni-release.yml | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/.github/workflows/topling-jni-release.yml b/.github/workflows/topling-jni-release.yml index da2169ce16..e4c191a6ca 100644 --- a/.github/workflows/topling-jni-release.yml +++ b/.github/workflows/topling-jni-release.yml @@ -66,10 +66,20 @@ jobs: echo JAVA_HOME = $JAVA_HOME set -xe env USE_LTO=1 \ - make rocksdbjava -j`nproc` DEBUG_LEVEL=0 UPDATE_REPO=0 \ + make rocksdbjava install-dcompact -j`nproc` \ + DEBUG_LEVEL=0 UPDATE_REPO=0 PREFIX=install-here \ DISABLE_JEMALLOC=1 TOPLING_USE_DYNAMIC_TLS=1 \ STRIP_DEBUG_INFO=1 ROCKSDB_JAR_WITH_DYNAMIC_LIBS=1 + - name: Strip and Change dcompact_worker + run: | + MAJOR_DOT_MINOR=`build_tools/version.sh major`.`build_tools/version.sh minor` + exebin=install-here/bin/dcompact_worker.exe + strip ${exebin} + patchelf --replace-needed librocksdb.so.${MAJOR_DOT_MINOR} librocksdbjni-linux64.so ${exebin} + gzip ${exebin} + mv ${exebin}.gz java/target/dcompact_worker.gz + - name: Prerelease and Generate Checksums env: GITHUB_TOKEN: ${{secrets.GITHUB_TOKEN}} @@ -85,4 +95,4 @@ jobs: mv rocksdbjni-${ROCKSDB_VERSION}-linux64.jar ${TARGET_JAR} shasum -a 1 ${TARGET_JAR} > ${TARGET_JAR}.sha1 md5sum ${TARGET_JAR} > ${TARGET_JAR}.md5 - gh release upload --clobber ${{github.ref_name}} ${TARGET_JAR}* + gh release upload --clobber ${{github.ref_name}} ${TARGET_JAR}* index.html style.css dcompact_worker.gz From 5ea21d40037a3033f75690a093ff1d7d0af98718 Mon Sep 17 00:00:00 2001 From: rockeet Date: Thu, 4 Sep 2025 20:53:34 +0800 Subject: [PATCH 078/175] Revert "Options.java: Add missing merge operator members" This reverts commit 2b3f7ed853c8aa1dc21b66b64e72386032c8f3af. --- java/src/main/java/org/rocksdb/Options.java | 7 ------- 1 file changed, 7 deletions(-) diff --git a/java/src/main/java/org/rocksdb/Options.java b/java/src/main/java/org/rocksdb/Options.java index 9dc077581b..29f5e8e0d2 100644 --- a/java/src/main/java/org/rocksdb/Options.java +++ b/java/src/main/java/org/rocksdb/Options.java @@ -233,20 +233,15 @@ public Options setMergeOperatorName(final String name) { "Merge operator name must not be null."); } setMergeOperatorName(nativeHandle_, name); - mergeOperatorName_ = name; return this; } @Override public Options setMergeOperator(final MergeOperator mergeOperator) { setMergeOperator(nativeHandle_, mergeOperator.nativeHandle_); - mergeOperator_ = mergeOperator; return this; } - @Override public String mergeOperatorName() { return mergeOperatorName_; } - @Override public MergeOperator mergeOperator() { return mergeOperator_; } - @Override public Options setCompactionFilter( final AbstractCompactionFilter> @@ -2588,6 +2583,4 @@ private native void setPrepopulateBlobCache( private WriteBufferManager writeBufferManager_; private SstPartitionerFactory sstPartitionerFactory_; private ConcurrentTaskLimiter compactionThreadLimiter_; - private MergeOperator mergeOperator_; - private String mergeOperatorName_; } From 2e30de8e15cef0527f965d68e5a36f6728d870f9 Mon Sep 17 00:00:00 2001 From: rockeet Date: Thu, 4 Sep 2025 20:53:58 +0800 Subject: [PATCH 079/175] Revert "java: Add ColumnFamilyOptions.mergeOperatorName()" This reverts commit e6dd406a2e1b4096fd564f1ab195e5ba811a66aa. --- java/src/main/java/org/rocksdb/ColumnFamilyOptions.java | 7 ------- .../java/org/rocksdb/ColumnFamilyOptionsInterface.java | 3 --- 2 files changed, 10 deletions(-) diff --git a/java/src/main/java/org/rocksdb/ColumnFamilyOptions.java b/java/src/main/java/org/rocksdb/ColumnFamilyOptions.java index 3457983409..607a17936e 100644 --- a/java/src/main/java/org/rocksdb/ColumnFamilyOptions.java +++ b/java/src/main/java/org/rocksdb/ColumnFamilyOptions.java @@ -215,7 +215,6 @@ public ColumnFamilyOptions setMergeOperatorName(final String name) { "Merge operator name must not be null."); } setMergeOperatorName(nativeHandle_, name); - mergeOperatorName_ = name; return this; } @@ -223,13 +222,9 @@ public ColumnFamilyOptions setMergeOperatorName(final String name) { public ColumnFamilyOptions setMergeOperator( final MergeOperator mergeOperator) { setMergeOperator(nativeHandle_, mergeOperator.nativeHandle_); - mergeOperator_ = mergeOperator; return this; } - @Override public String mergeOperatorName() { return mergeOperatorName_; } - @Override public MergeOperator mergeOperator() { return mergeOperator_; } - @Override public ColumnFamilyOptions setCompactionFilter( final AbstractCompactionFilter> @@ -1562,6 +1557,4 @@ private native void setPrepopulateBlobCache( private CompressionOptions compressionOptions_; private SstPartitionerFactory sstPartitionerFactory_; private ConcurrentTaskLimiter compactionThreadLimiter_; - private MergeOperator mergeOperator_; - private String mergeOperatorName_; } diff --git a/java/src/main/java/org/rocksdb/ColumnFamilyOptionsInterface.java b/java/src/main/java/org/rocksdb/ColumnFamilyOptionsInterface.java index dca69717b0..4776773bd8 100644 --- a/java/src/main/java/org/rocksdb/ColumnFamilyOptionsInterface.java +++ b/java/src/main/java/org/rocksdb/ColumnFamilyOptionsInterface.java @@ -172,9 +172,6 @@ T setComparator( */ T setMergeOperator(MergeOperator mergeOperator); - String mergeOperatorName(); - MergeOperator mergeOperator(); - /** * A single CompactionFilter instance to call into during compaction. * Allows an application to modify/delete a key-value during background From e6b19053ac46c127853053e6f1777fd473ee2415 Mon Sep 17 00:00:00 2001 From: rockeet Date: Thu, 4 Sep 2025 20:58:36 +0800 Subject: [PATCH 080/175] update submdoule rockside --- sideplugin/rockside | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sideplugin/rockside b/sideplugin/rockside index 092f70ee2e..9c609d7f21 160000 --- a/sideplugin/rockside +++ b/sideplugin/rockside @@ -1 +1 @@ -Subproject commit 092f70ee2ed7c43380f4415386851896d1ec7a89 +Subproject commit 9c609d7f21ec2e1f51791f3ee60fe059cc7fd6be From f0b95b6e3341d0ff3666a84166470bc4103ea9bc Mon Sep 17 00:00:00 2001 From: rockeet Date: Sat, 6 Sep 2025 13:06:19 +0800 Subject: [PATCH 081/175] Makefile: Fix change $(AM_V_AR)rm to $(AM_V_at)rm --- Makefile | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/Makefile b/Makefile index e983030671..8a04afa733 100644 --- a/Makefile +++ b/Makefile @@ -1723,7 +1723,7 @@ unity.cc: Makefile util/build_version.cc.in mv $@-t $@ unity.a: $(OBJ_DIR)/unity.o - $(AM_V_AR)rm -f $@ + $(AM_V_at)rm -f $@ $(AM_V_at)$(AR) $(ARFLAGS) $@ $(OBJ_DIR)/unity.o @@ -1797,31 +1797,31 @@ package: # Unit tests and tools # --------------------------------------------------------------------------- $(STATIC_LIBRARY): $(LIB_OBJECTS) - $(AM_V_AR)rm -f $@ $(SHARED1) $(SHARED2) $(SHARED3) $(SHARED4) + $(AM_V_at)rm -f $@ $(SHARED1) $(SHARED2) $(SHARED3) $(SHARED4) $(AM_V_at)$(AR) $(ARFLAGS) $@ $(LIB_OBJECTS) $(TOPLING_LIB_OBJECTS) $(STATIC_TEST_LIBRARY): $(TEST_OBJECTS) - $(AM_V_AR)rm -f $@ $(SHARED_TEST_LIBRARY) + $(AM_V_at)rm -f $@ $(SHARED_TEST_LIBRARY) $(AM_V_at)$(AR) $(ARFLAGS) $@ $^ $(STATIC_TOOLS_LIBRARY): $(TOOL_OBJECTS) - $(AM_V_AR)rm -f $@ $(SHARED_TOOLS_LIBRARY) + $(AM_V_at)rm -f $@ $(SHARED_TOOLS_LIBRARY) $(AM_V_at)$(AR) $(ARFLAGS) $@ $^ $(STATIC_STRESS_LIBRARY): $(ANALYZE_OBJECTS) $(STRESS_OBJECTS) $(TESTUTIL) - $(AM_V_AR)rm -f $@ $(SHARED_STRESS_LIBRARY) + $(AM_V_at)rm -f $@ $(SHARED_STRESS_LIBRARY) $(AM_V_at)$(AR) $(ARFLAGS) $@ $^ $(SHARED_TEST_LIBRARY): $(TEST_OBJECTS) $(SHARED1) - $(AM_V_AR)rm -f $@ $(STATIC_TEST_LIBRARY) + $(AM_V_at)rm -f $@ $(STATIC_TEST_LIBRARY) $(AM_SHARE) $(SHARED_TOOLS_LIBRARY): $(TOOL_OBJECTS) $(SHARED1) - $(AM_V_AR)rm -f $@ $(STATIC_TOOLS_LIBRARY) + $(AM_V_at)rm -f $@ $(STATIC_TOOLS_LIBRARY) $(AM_SHARE) $(SHARED_STRESS_LIBRARY): $(ANALYZE_OBJECTS) $(STRESS_OBJECTS) $(TESTUTIL) $(SHARED_TOOLS_LIBRARY) $(SHARED1) - $(AM_V_AR)rm -f $@ $(STATIC_STRESS_LIBRARY) + $(AM_V_at)rm -f $@ $(STATIC_STRESS_LIBRARY) $(AM_SHARE) db_bench: $(OBJ_DIR)/tools/db_bench.o $(BENCH_OBJECTS) $(TESTUTIL) $(LIBRARY) From 8500ef7807c14f7c7650dbe88165d9c5cf9affc1 Mon Sep 17 00:00:00 2001 From: rockeet Date: Sat, 6 Sep 2025 13:06:48 +0800 Subject: [PATCH 082/175] Makefile: LDFLAGS += ${TOPLING_CORE_LD_LIBS_EXTRA} --- Makefile | 1 + 1 file changed, 1 insertion(+) diff --git a/Makefile b/Makefile index 8a04afa733..2541f7fbbf 100644 --- a/Makefile +++ b/Makefile @@ -384,6 +384,7 @@ ifneq ("$(ROCKS_DEP_RULES)", "") endif endif TOPLING_LIB_OBJECTS = $(addprefix ${TOPLING_CORE_DIR}/, ${TOPLING_LIB_OBJ_LIST_VAR}) +LDFLAGS += ${TOPLING_CORE_LD_LIBS_EXTRA} ifneq ($(filter auto_all_tests check check_0 watch-log gen_parallel_tests %_test %_test2 jtest, $(MAKECMDGOALS)),) MAKE_UNIT_TEST ?= 1 From 3885db3d201fda6f798c3e78aa7ee1cae6dbedc7 Mon Sep 17 00:00:00 2001 From: rockeet Date: Sat, 6 Sep 2025 13:30:53 +0800 Subject: [PATCH 083/175] Makefile: ROCKS_DEP_RULES:=$(filter-out watch-log, $(ROCKS_DEP_RULES)) --- Makefile | 2 ++ 1 file changed, 2 insertions(+) diff --git a/Makefile b/Makefile index 2541f7fbbf..b27057bd30 100644 --- a/Makefile +++ b/Makefile @@ -377,6 +377,7 @@ endif TOPLING_LIB_OBJ_LIST_FILE := ${OBJ_DIR}/shared_lib_obj_list.mk ROCKS_DEP_RULES:=$(filter-out clean format check-format check-buck-targets check-headers check-sources jclean package analyze tags unity.% checkout_folly, $(MAKECMDGOALS)) ROCKS_DEP_RULES:=$(filter-out rust-support, $(ROCKS_DEP_RULES)) +ROCKS_DEP_RULES:=$(filter-out watch-log, $(ROCKS_DEP_RULES)) ifneq ("$(ROCKS_DEP_RULES)", "") -include ${TOPLING_CORE_DIR}/${TOPLING_LIB_OBJ_LIST_FILE} ifneq ($(filter j% rocksdbjava%, $(MAKECMDGOALS)),) @@ -3355,6 +3356,7 @@ rust-support: $(filter-out util/build_version.cc, ${LIB_SOURCES}) $(OBJ_DIR)/uti #If so, include the dependencies; if not, do not include the dependency files ROCKS_DEP_RULES:=$(filter-out clean format check-format check-buck-targets check-headers check-sources jclean package analyze tags unity.% checkout_folly, $(MAKECMDGOALS)) ROCKS_DEP_RULES:=$(filter-out rust-support, $(ROCKS_DEP_RULES)) +ROCKS_DEP_RULES:=$(filter-out watch-log, $(ROCKS_DEP_RULES)) ifneq ("$(ROCKS_DEP_RULES)", "") -include $(DEPFILES) endif From 0a1b1e8d87b8e082a9716c6e155f2cd1f13914df Mon Sep 17 00:00:00 2001 From: rockeet Date: Sat, 6 Sep 2025 14:58:50 +0800 Subject: [PATCH 084/175] Makefile: export ROCKSDBJNILIB & LD_PRELOAD in java make --- Makefile | 1 + java/Makefile | 9 +++++++++ 2 files changed, 10 insertions(+) diff --git a/Makefile b/Makefile index b27057bd30..baddf8dff0 100644 --- a/Makefile +++ b/Makefile @@ -2792,6 +2792,7 @@ ifeq ($(PLATFORM), OS_OPENBSD) ROCKSDB_JAR = rocksdbjni-$(ROCKSDB_JAVA_VERSION)-openbsd$(ARCH).jar endif export SHA256_CMD +export ROCKSDBJNILIB zlib-$(ZLIB_VER).tar.gz: curl --fail --output zlib-$(ZLIB_VER).tar.gz --location ${ZLIB_DOWNLOAD_BASE}/zlib-$(ZLIB_VER).tar.gz diff --git a/java/Makefile b/java/Makefile index a55505c0b9..7d0ec89365 100644 --- a/java/Makefile +++ b/java/Makefile @@ -271,6 +271,15 @@ JAVA_CMD := $(JAVA_HOME)/bin/java else JAVA_CMD := java endif +ifneq (${DISABLE_JEMALLOC},1) +LD_PRELOAD := libjemalloc.so +endif +ifeq ($(TOPLING_USE_DYNAMIC_TLS),1) +LD_PRELOAD := ${LD_PRELOAD}:${ROCKSDBJNILIB} +endif +ifneq ($(LD_PRELOAD),) +JAVA_CMD := LD_PRELOAD=${LD_PRELOAD} ${JAVA_CMD} +endif JAVA_CMD := env LD_LIBRARY_PATH=target:${LD_LIBRARY_PATH} ${JAVA_CMD} endif From ea131f2b1d408eb6878200a07df0d0f0def7d57b Mon Sep 17 00:00:00 2001 From: rockeet Date: Sat, 6 Sep 2025 13:19:52 +0800 Subject: [PATCH 085/175] Add Iterator::CountKeysInRange(beg, end, fixed_user_key_len=0) --- db/arena_wrapped_db_iter.cc | 10 ++++++++++ db/arena_wrapped_db_iter.h | 3 +++ db/db_iter.cc | 26 ++++++++++++++++++++++++++ db/db_iter.h | 1 + include/rocksdb/iterator.h | 6 ++++++ 5 files changed, 46 insertions(+) diff --git a/db/arena_wrapped_db_iter.cc b/db/arena_wrapped_db_iter.cc index a7bb5e9187..fb1bb87d82 100644 --- a/db/arena_wrapped_db_iter.cc +++ b/db/arena_wrapped_db_iter.cc @@ -65,6 +65,16 @@ Slice Iterator::SeekForPrevWithKey(const Slice& target) { return Slice(nullptr, 0); } +size_t Iterator::CountKeysInRange(const Slice& beg, const Slice& end, size_t) { + size_t count = 0; + Seek(beg); + while (Valid()) { + ++count; + Next(); + } + return count; +} + ArenaWrappedDBIter::ArenaWrappedDBIter() { // do nothing } diff --git a/db/arena_wrapped_db_iter.h b/db/arena_wrapped_db_iter.h index 691287d3b4..ed8d45dba3 100644 --- a/db/arena_wrapped_db_iter.h +++ b/db/arena_wrapped_db_iter.h @@ -73,6 +73,9 @@ class ArenaWrappedDBIter final : public Iterator { void Prev() override { db_iter_->Prev(); } Slice NextWithKey() override { return db_iter_->NextWithKey(); } Slice PrevWithKey() override { return db_iter_->PrevWithKey(); } + size_t CountKeysInRange(const Slice& beg, const Slice& end, size_t fixed_user_key_len) override { + return db_iter_->CountKeysInRange(beg, end, fixed_user_key_len); + } ROCKSDB_FLATTEN Slice key() const override { return db_iter_->key(); } diff --git a/db/db_iter.cc b/db/db_iter.cc index 37ec39b43f..da79a064e9 100644 --- a/db/db_iter.cc +++ b/db/db_iter.cc @@ -2094,6 +2094,32 @@ void DBIter::SeekToLast() { } } +size_t DBIter::CountKeysInRange(const Slice& beg, const Slice& end, + size_t fixed_user_key_len) { + if (fixed_user_key_len > 255) { + fixed_user_key_len = 0; // ignore invalid arg to disable optimization + } + if (beg.size() != fixed_user_key_len || + end.size() != fixed_user_key_len) { + fixed_user_key_len = 0; // ignore invalid arg to disable optimization + } + auto old_fixed_user_key_len = this->fixed_user_key_len_; + const Slice* old_upper_bound = this->iterate_upper_bound_; + this->fixed_user_key_len_ = fixed_user_key_len; + this->iterate_upper_bound_ = &end; + this->SetFuncPtr(); + Slice cur_key = this->SeekWithKey(beg); + size_t count = 0; + while (cur_key.data() != nullptr) { + ++count; + cur_key = this->NextWithKey(); + } + this->fixed_user_key_len_ = old_fixed_user_key_len; + this->iterate_upper_bound_ = old_upper_bound; + this->SetFuncPtr(); + return count; +} + Iterator* NewDBIterator(Env* env, const ReadOptions& read_options, const ImmutableOptions& ioptions, const MutableCFOptions& mutable_cf_options, diff --git a/db/db_iter.h b/db/db_iter.h index 56dcff7212..91545db4b3 100644 --- a/db/db_iter.h +++ b/db/db_iter.h @@ -235,6 +235,7 @@ class DBIter final : public Iterator { void Prev() final override; Slice NextWithKey() final override; Slice PrevWithKey() final override; + size_t CountKeysInRange(const Slice& beg, const Slice& end, size_t fixed_user_key_len) final override; // 'target' does not contain timestamp, even if user timestamp feature is // enabled. void Seek(const Slice& target) final override; diff --git a/include/rocksdb/iterator.h b/include/rocksdb/iterator.h index 3bbaf30f5a..5fdd851c25 100644 --- a/include/rocksdb/iterator.h +++ b/include/rocksdb/iterator.h @@ -158,6 +158,12 @@ class Iterator : public Cleanable { Slice SeekToLastWithKey(); Slice SeekWithKey(const Slice& target); Slice SeekForPrevWithKey(const Slice& target); + + // if fixed_user_key_len > 0, user keys are assumed to be of the given length + // - this will trigger aggressive optimization for non-transactional db iter + // - if fixed_user_key_len is not equal to beg/end user key length, it is ignored + // if fixed_user_key_len == 0, user keys are of variable length, no optimizations + virtual size_t CountKeysInRange(const Slice& beg, const Slice& end, size_t fixed_user_key_len = 0); }; // Return an empty iterator (yields nothing). From dd56e00b11549b5234c01880d5b587881946d177 Mon Sep 17 00:00:00 2001 From: rockeet Date: Sat, 6 Sep 2025 14:57:39 +0800 Subject: [PATCH 086/175] java: Add RocksIterator::countKeysInRange() & unit test --- java/rocksjni/iterator.cc | 22 ++++++++++++ .../main/java/org/rocksdb/RocksIterator.java | 8 +++++ .../java/org/rocksdb/RocksIteratorTest.java | 35 +++++++++++++++++++ 3 files changed, 65 insertions(+) diff --git a/java/rocksjni/iterator.cc b/java/rocksjni/iterator.cc index 7a5828a978..6709f3b712 100644 --- a/java/rocksjni/iterator.cc +++ b/java/rocksjni/iterator.cc @@ -435,3 +435,25 @@ JNIEXPORT void JNICALL Java_org_rocksdb_RocksIterator_nativeRefreshForDatabaseGC } ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew(env, s); } + +#define AllocaSliceFromJByteArray(slice, jbytearr) \ + ROCKSDB_NAMESPACE::Slice slice; \ + slice.size_ = env->GetArrayLength(jbytearr); \ + slice.data_ = (const char*)alloca(slice.size_); \ + env->GetByteArrayRegion(jbytearr, 0, slice.size_, (jbyte*)slice.data_); \ + if (env->ExceptionCheck()) { return -1; } + +/* + * Class: org_rocksdb_RocksIterator + * Method: countKeysInRange0 + * Signature: (J[B[BI)J + */ +JNIEXPORT jlong JNICALL Java_org_rocksdb_RocksIterator_countKeysInRange0 +(JNIEnv* env, jobject, jlong jiter, jbyteArray jbeg_key, jbyteArray jend_key, jint fixed_user_key_len) +{ + auto zc_it = reinterpret_cast(jiter); + auto iter = zc_it->iter; + AllocaSliceFromJByteArray(beg_key, jbeg_key); + AllocaSliceFromJByteArray(end_key, jend_key); + return iter->CountKeysInRange(beg_key, end_key, fixed_user_key_len); +} diff --git a/java/src/main/java/org/rocksdb/RocksIterator.java b/java/src/main/java/org/rocksdb/RocksIterator.java index 26ef0be3c9..1f42b20d66 100644 --- a/java/src/main/java/org/rocksdb/RocksIterator.java +++ b/java/src/main/java/org/rocksdb/RocksIterator.java @@ -400,6 +400,14 @@ public final boolean isDefaultEagerFetchValue() { prev0(nativeHandle_ | eagerFetchValue_); } + public long countKeysInRange(byte[] beg, byte[] end) { + return countKeysInRange0(nativeHandle_, beg, end, 0); + } + public long countKeysInRange(byte[] beg, byte[] end, int fixedUserKeyLen) { + return countKeysInRange0(nativeHandle_, beg, end, fixedUserKeyLen); + } + private native long countKeysInRange0(long handle, byte[] beg, byte[] end, int fixedUserKeyLen); + // iter position is kept and native key/value ptr may be updated public final void refreshForDatabaseGC() throws RocksDBException { nativeRefreshForDatabaseGC(nativeHandle_); diff --git a/java/src/test/java/org/rocksdb/RocksIteratorTest.java b/java/src/test/java/org/rocksdb/RocksIteratorTest.java index a330e242f8..8700099289 100644 --- a/java/src/test/java/org/rocksdb/RocksIteratorTest.java +++ b/java/src/test/java/org/rocksdb/RocksIteratorTest.java @@ -232,6 +232,41 @@ public void rocksIteratorSimple() throws RocksDBException { } } + @Test + public void rocksIteratorCount() throws RocksDBException { + try (final Options options = + new Options().setCreateIfMissing(true).setCreateMissingColumnFamilies(true); + final RocksDB db = RocksDB.open(options, dbFolder.getRoot().getAbsolutePath())) { + db.put("key1-001".getBytes(), "value11".getBytes()); + db.put("key1-002".getBytes(), "value12".getBytes()); + db.put("key1-003".getBytes(), "value13".getBytes()); + db.put("key2-001".getBytes(), "value21".getBytes()); + db.put("key2-002".getBytes(), "value22".getBytes()); + db.put("key2-003".getBytes(), "value23".getBytes()); + + try (final RocksIterator iterator = db.newIterator()) { + assertThat(iterator.countKeysInRange("key1-000".getBytes(), "key2-000".getBytes(), 8)).isEqualTo(3); + assertThat(iterator.isValid()).isFalse(); + assertThat(iterator.countKeysInRange("key1-000".getBytes(), "key3-000".getBytes(), 8)).isEqualTo(6); + assertThat(iterator.isValid()).isFalse(); + assertThat(iterator.countKeysInRange("key1-002".getBytes(), "key2-002".getBytes(), 8)).isEqualTo(3); + assertThat(iterator.isValid()).isFalse(); + iterator.status(); + assertThat(iterator.countKeysInRange("key1-000".getBytes(), "key2-000".getBytes())).isEqualTo(3); + assertThat(iterator.isValid()).isFalse(); + assertThat(iterator.countKeysInRange("key1-000".getBytes(), "key3-000".getBytes())).isEqualTo(6); + assertThat(iterator.isValid()).isFalse(); + assertThat(iterator.countKeysInRange("key1-002".getBytes(), "key2-002".getBytes())).isEqualTo(3); + assertThat(iterator.isValid()).isFalse(); + iterator.status(); + assertThat(iterator.countKeysInRange(new byte[0], new byte[0])).isEqualTo(0); + assertThat(iterator.countKeysInRange(new byte[0], new byte[]{-1})).isEqualTo(6); + assertThat(iterator.countKeysInRange(new byte[]{-1}, new byte[0])).isEqualTo(0); + iterator.status(); + } + } + } + @Test public void rocksIterator() throws RocksDBException { try (final Options options = From ece4d10588c4ccde31be5d0c618808d93a4458fe Mon Sep 17 00:00:00 2001 From: rockeet Date: Sun, 7 Sep 2025 22:13:25 +0800 Subject: [PATCH 087/175] rockisde Impl_OpenDB_tpl: params.name = dbname: skip check `!dbname.empty()` --- sideplugin/rockside | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sideplugin/rockside b/sideplugin/rockside index 9c609d7f21..01e0f02b8f 160000 --- a/sideplugin/rockside +++ b/sideplugin/rockside @@ -1 +1 @@ -Subproject commit 9c609d7f21ec2e1f51791f3ee60fe059cc7fd6be +Subproject commit 01e0f02b8f2eabb261a6d6a9414b478d7bb968bd From 05bef372ad1fa11d5237ba66a3d9cee382ddda66 Mon Sep 17 00:00:00 2001 From: leipeng Date: Tue, 9 Sep 2025 12:05:21 +0800 Subject: [PATCH 088/175] rockside: Add template class DcompactSerDeFunc --- sideplugin/rockside | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sideplugin/rockside b/sideplugin/rockside index 01e0f02b8f..5dfddde8e1 160000 --- a/sideplugin/rockside +++ b/sideplugin/rockside @@ -1 +1 @@ -Subproject commit 01e0f02b8f2eabb261a6d6a9414b478d7bb968bd +Subproject commit 5dfddde8e1e448aea5a08f0e691155a32d8d8494 From b14bb736c74a484f4b3adebc753b6c9c59d48e20 Mon Sep 17 00:00:00 2001 From: leipeng Date: Thu, 18 Sep 2025 14:50:28 +0800 Subject: [PATCH 089/175] build_detect_platform: Fix for cygwin --- build_tools/build_detect_platform | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/build_tools/build_detect_platform b/build_tools/build_detect_platform index 194bfee4d8..198222bb0c 100755 --- a/build_tools/build_detect_platform +++ b/build_tools/build_detect_platform @@ -235,7 +235,8 @@ EOF PLATFORM_LDFLAGS="$PLATFORM_LDFLAGS -lpthread" # PORT_FILES=port/dragonfly/dragonfly_specific.cc ;; - Cygwin) + Cygwin*) + CYGWIN*) PLATFORM=CYGWIN PLATFORM_SHARED_CFLAGS="" PLATFORM_CXXFLAGS="-std=gnu++17" From 6ffa51643be92e81adecc22cfbb323bed84d1c03 Mon Sep 17 00:00:00 2001 From: rockeet Date: Fri, 19 Sep 2025 15:27:16 +0800 Subject: [PATCH 090/175] update submodule rockside --- sideplugin/rockside | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sideplugin/rockside b/sideplugin/rockside index 5dfddde8e1..691cba3cb7 160000 --- a/sideplugin/rockside +++ b/sideplugin/rockside @@ -1 +1 @@ -Subproject commit 5dfddde8e1e448aea5a08f0e691155a32d8d8494 +Subproject commit 691cba3cb703243059df53cbe0521fd97257b665 From 2c17922829d18d1ff6a443819abfef47afab0b74 Mon Sep 17 00:00:00 2001 From: leipeng Date: Thu, 25 Sep 2025 16:34:38 +0800 Subject: [PATCH 091/175] topling-jni-release.yml: bundle compression libs and set ROCKSDB_DISABLE_GFLAGS=1 --- .github/workflows/topling-jni-release.yml | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/.github/workflows/topling-jni-release.yml b/.github/workflows/topling-jni-release.yml index e4c191a6ca..3a7226670c 100644 --- a/.github/workflows/topling-jni-release.yml +++ b/.github/workflows/topling-jni-release.yml @@ -31,9 +31,7 @@ jobs: set -xe cat $GITHUB_WORKSPACE/settings.xml sudo apt-get update -y && sudo apt-get install -y \ - libjemalloc-dev libaio-dev libgflags-dev zlib1g-dev \ - libbz2-dev libcurl4-gnutls-dev liburing-dev \ - libsnappy-dev libbz2-dev liblz4-dev libzstd-dev + libaio-dev zlib1g-dev libcurl4-gnutls-dev liburing-dev - name: Init submodule & Setup ssh run: | @@ -49,13 +47,22 @@ jobs: # there is no dedicated target for this purpose, so we use `clean` # as the target, it need not any `clean` at this point. # `clean` is just used for triggers auto git clone + env USE_LTO=1 \ + ROCKSDB_DISABLE_GFLAGS=1 \ make clean + - name: make bundled compression libs + run: | + env USE_LTO=1 \ + ROCKSDB_DISABLE_GFLAGS=1 \ + make UPDATE_REPO=0 DEBUG_LEVEL=0 DISABLE_JEMALLOC=1 TOPLING_USE_DYNAMIC_TLS=1 -j60 libsnappy.a liblz4.a libbz2.a + - name: make depend for Compile RocksDBJava Release run: | echo JAVA_HOME = $JAVA_HOME set -xe env USE_LTO=1 \ + ROCKSDB_DISABLE_GFLAGS=1 \ make depend java/include/java_header_list.mk \ -j`nproc` DEBUG_LEVEL=0 UPDATE_REPO=0 \ DISABLE_JEMALLOC=1 TOPLING_USE_DYNAMIC_TLS=1 \ @@ -66,6 +73,7 @@ jobs: echo JAVA_HOME = $JAVA_HOME set -xe env USE_LTO=1 \ + ROCKSDB_DISABLE_GFLAGS=1 \ make rocksdbjava install-dcompact -j`nproc` \ DEBUG_LEVEL=0 UPDATE_REPO=0 PREFIX=install-here \ DISABLE_JEMALLOC=1 TOPLING_USE_DYNAMIC_TLS=1 \ From 1629f14bb3ecccbc19983d213e8616de02990393 Mon Sep 17 00:00:00 2001 From: leipeng Date: Thu, 25 Sep 2025 16:51:11 +0800 Subject: [PATCH 092/175] Fix build_detect_platform --- build_tools/build_detect_platform | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/build_tools/build_detect_platform b/build_tools/build_detect_platform index 198222bb0c..718d59b35a 100755 --- a/build_tools/build_detect_platform +++ b/build_tools/build_detect_platform @@ -235,8 +235,7 @@ EOF PLATFORM_LDFLAGS="$PLATFORM_LDFLAGS -lpthread" # PORT_FILES=port/dragonfly/dragonfly_specific.cc ;; - Cygwin*) - CYGWIN*) + Cygwin*|CYGWIN*) PLATFORM=CYGWIN PLATFORM_SHARED_CFLAGS="" PLATFORM_CXXFLAGS="-std=gnu++17" From 2747584064e93684bbf54cd8691c1df9b2433f99 Mon Sep 17 00:00:00 2001 From: leipeng Date: Thu, 25 Sep 2025 17:33:00 +0800 Subject: [PATCH 093/175] Makefile: remove -lxxx on bundled compression lib xxx --- Makefile | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/Makefile b/Makefile index baddf8dff0..a7d61d868d 100644 --- a/Makefile +++ b/Makefile @@ -2736,18 +2736,26 @@ CURL_SSL_OPTS ?= --tlsv1 ifneq ($(wildcard libz.a),) BUNDLED_COMPRESSION_LIBS += libz.a CXXFLAGS += -DZLIB -I./zlib-$(ZLIB_VER) + PLATFORM_LDFLAGS=$(filter-out -lz,$(PLATFORM_LDFLAGS)) + JAVA_LDFLAGS=$(filter-out -lz,$(JAVA_LDFLAGS)) endif ifneq ($(wildcard libbz2.a),) BUNDLED_COMPRESSION_LIBS += libbz2.a CXXFLAGS += -DBZIP2 -I./bzip2-$(BZIP2_VER) + PLATFORM_LDFLAGS=$(filter-out -lbz2,$(PLATFORM_LDFLAGS)) + JAVA_LDFLAGS=$(filter-out -lbz2,$(JAVA_LDFLAGS)) endif ifneq ($(wildcard libsnappy.a),) BUNDLED_COMPRESSION_LIBS += libsnappy.a CXXFLAGS += -DSNAPPY -I./snappy-$(SNAPPY_VER) -I./snappy-$(SNAPPY_VER)/build + PLATFORM_LDFLAGS=$(filter-out -lsnappy,$(PLATFORM_LDFLAGS)) + JAVA_LDFLAGS=$(filter-out -lsnappy,$(JAVA_LDFLAGS)) endif ifneq ($(wildcard liblz4.a),) BUNDLED_COMPRESSION_LIBS += liblz4.a CXXFLAGS += -DLZ4 -I./lz4-$(LZ4_VER)/lib + PLATFORM_LDFLAGS=$(filter-out -llz4,$(PLATFORM_LDFLAGS)) + JAVA_LDFLAGS=$(filter-out -llz4,$(JAVA_LDFLAGS)) endif ifeq ($(PLATFORM), OS_MACOSX) From ac00a9f63ff60d50ed3b2b509cf34f5bcf20a67d Mon Sep 17 00:00:00 2001 From: leipeng Date: Thu, 25 Sep 2025 17:52:05 +0800 Subject: [PATCH 094/175] Makefile: remove redundant code, keep prefering bundled compression lib --- Makefile | 23 ++++------------------- 1 file changed, 4 insertions(+), 19 deletions(-) diff --git a/Makefile b/Makefile index a7d61d868d..4a83f744a9 100644 --- a/Makefile +++ b/Makefile @@ -2734,28 +2734,16 @@ ZSTD_DOWNLOAD_BASE ?= https://github.com/facebook/zstd/archive CURL_SSL_OPTS ?= --tlsv1 ifneq ($(wildcard libz.a),) - BUNDLED_COMPRESSION_LIBS += libz.a - CXXFLAGS += -DZLIB -I./zlib-$(ZLIB_VER) - PLATFORM_LDFLAGS=$(filter-out -lz,$(PLATFORM_LDFLAGS)) - JAVA_LDFLAGS=$(filter-out -lz,$(JAVA_LDFLAGS)) + CXXFLAGS += -I./zlib-$(ZLIB_VER) endif ifneq ($(wildcard libbz2.a),) - BUNDLED_COMPRESSION_LIBS += libbz2.a - CXXFLAGS += -DBZIP2 -I./bzip2-$(BZIP2_VER) - PLATFORM_LDFLAGS=$(filter-out -lbz2,$(PLATFORM_LDFLAGS)) - JAVA_LDFLAGS=$(filter-out -lbz2,$(JAVA_LDFLAGS)) + CXXFLAGS += -I./bzip2-$(BZIP2_VER) endif ifneq ($(wildcard libsnappy.a),) - BUNDLED_COMPRESSION_LIBS += libsnappy.a - CXXFLAGS += -DSNAPPY -I./snappy-$(SNAPPY_VER) -I./snappy-$(SNAPPY_VER)/build - PLATFORM_LDFLAGS=$(filter-out -lsnappy,$(PLATFORM_LDFLAGS)) - JAVA_LDFLAGS=$(filter-out -lsnappy,$(JAVA_LDFLAGS)) + CXXFLAGS += -I./snappy-$(SNAPPY_VER) -I./snappy-$(SNAPPY_VER)/build endif ifneq ($(wildcard liblz4.a),) - BUNDLED_COMPRESSION_LIBS += liblz4.a - CXXFLAGS += -DLZ4 -I./lz4-$(LZ4_VER)/lib - PLATFORM_LDFLAGS=$(filter-out -llz4,$(PLATFORM_LDFLAGS)) - JAVA_LDFLAGS=$(filter-out -llz4,$(JAVA_LDFLAGS)) + CXXFLAGS += -I./lz4-$(LZ4_VER)/lib endif ifeq ($(PLATFORM), OS_MACOSX) @@ -3058,9 +3046,6 @@ endif $(AM_V_CCLD) $(CXX) -shared -fPIC -o java/target/$(ROCKSDBJNILIB) \ $(ALL_JNI_NATIVE_OBJECTS) $(LIB_OBJECTS) \ $(TOPLING_LIB_OBJECTS) \ - -Wl,--whole-archive \ - ${BUNDLED_COMPRESSION_LIBS} \ - -Wl,--no-whole-archive \ $(JAVA_LDFLAGS) \ $(LDFLAGS) $(AM_V_at)cp -a sideplugin/rockside/src/topling/web/{style.css,index.html} java/target From cb616c282171f9cc43404d48985cb285eecf9a93 Mon Sep 17 00:00:00 2001 From: leipeng Date: Thu, 25 Sep 2025 18:15:26 +0800 Subject: [PATCH 095/175] Revert "Makefile: remove redundant code, keep prefering bundled compression lib" This reverts commit ac00a9f63ff60d50ed3b2b509cf34f5bcf20a67d. because shared lib will be searched first, if there are shared lib in system lib dir, it will be used. we prefer bundled compression lib if it exists. --- Makefile | 23 +++++++++++++++++++---- 1 file changed, 19 insertions(+), 4 deletions(-) diff --git a/Makefile b/Makefile index 4a83f744a9..a7d61d868d 100644 --- a/Makefile +++ b/Makefile @@ -2734,16 +2734,28 @@ ZSTD_DOWNLOAD_BASE ?= https://github.com/facebook/zstd/archive CURL_SSL_OPTS ?= --tlsv1 ifneq ($(wildcard libz.a),) - CXXFLAGS += -I./zlib-$(ZLIB_VER) + BUNDLED_COMPRESSION_LIBS += libz.a + CXXFLAGS += -DZLIB -I./zlib-$(ZLIB_VER) + PLATFORM_LDFLAGS=$(filter-out -lz,$(PLATFORM_LDFLAGS)) + JAVA_LDFLAGS=$(filter-out -lz,$(JAVA_LDFLAGS)) endif ifneq ($(wildcard libbz2.a),) - CXXFLAGS += -I./bzip2-$(BZIP2_VER) + BUNDLED_COMPRESSION_LIBS += libbz2.a + CXXFLAGS += -DBZIP2 -I./bzip2-$(BZIP2_VER) + PLATFORM_LDFLAGS=$(filter-out -lbz2,$(PLATFORM_LDFLAGS)) + JAVA_LDFLAGS=$(filter-out -lbz2,$(JAVA_LDFLAGS)) endif ifneq ($(wildcard libsnappy.a),) - CXXFLAGS += -I./snappy-$(SNAPPY_VER) -I./snappy-$(SNAPPY_VER)/build + BUNDLED_COMPRESSION_LIBS += libsnappy.a + CXXFLAGS += -DSNAPPY -I./snappy-$(SNAPPY_VER) -I./snappy-$(SNAPPY_VER)/build + PLATFORM_LDFLAGS=$(filter-out -lsnappy,$(PLATFORM_LDFLAGS)) + JAVA_LDFLAGS=$(filter-out -lsnappy,$(JAVA_LDFLAGS)) endif ifneq ($(wildcard liblz4.a),) - CXXFLAGS += -I./lz4-$(LZ4_VER)/lib + BUNDLED_COMPRESSION_LIBS += liblz4.a + CXXFLAGS += -DLZ4 -I./lz4-$(LZ4_VER)/lib + PLATFORM_LDFLAGS=$(filter-out -llz4,$(PLATFORM_LDFLAGS)) + JAVA_LDFLAGS=$(filter-out -llz4,$(JAVA_LDFLAGS)) endif ifeq ($(PLATFORM), OS_MACOSX) @@ -3046,6 +3058,9 @@ endif $(AM_V_CCLD) $(CXX) -shared -fPIC -o java/target/$(ROCKSDBJNILIB) \ $(ALL_JNI_NATIVE_OBJECTS) $(LIB_OBJECTS) \ $(TOPLING_LIB_OBJECTS) \ + -Wl,--whole-archive \ + ${BUNDLED_COMPRESSION_LIBS} \ + -Wl,--no-whole-archive \ $(JAVA_LDFLAGS) \ $(LDFLAGS) $(AM_V_at)cp -a sideplugin/rockside/src/topling/web/{style.css,index.html} java/target From 3cc609a93a53ac8d1ba426884cd9316917bf84e2 Mon Sep 17 00:00:00 2001 From: leipeng Date: Thu, 25 Sep 2025 18:24:50 +0800 Subject: [PATCH 096/175] Makefile: fix recursive var, use := --- Makefile | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/Makefile b/Makefile index a7d61d868d..199d5346f7 100644 --- a/Makefile +++ b/Makefile @@ -2736,26 +2736,26 @@ CURL_SSL_OPTS ?= --tlsv1 ifneq ($(wildcard libz.a),) BUNDLED_COMPRESSION_LIBS += libz.a CXXFLAGS += -DZLIB -I./zlib-$(ZLIB_VER) - PLATFORM_LDFLAGS=$(filter-out -lz,$(PLATFORM_LDFLAGS)) - JAVA_LDFLAGS=$(filter-out -lz,$(JAVA_LDFLAGS)) + PLATFORM_LDFLAGS:=$(filter-out -lz,$(PLATFORM_LDFLAGS)) + JAVA_LDFLAGS:=$(filter-out -lz,$(JAVA_LDFLAGS)) endif ifneq ($(wildcard libbz2.a),) BUNDLED_COMPRESSION_LIBS += libbz2.a CXXFLAGS += -DBZIP2 -I./bzip2-$(BZIP2_VER) - PLATFORM_LDFLAGS=$(filter-out -lbz2,$(PLATFORM_LDFLAGS)) - JAVA_LDFLAGS=$(filter-out -lbz2,$(JAVA_LDFLAGS)) + PLATFORM_LDFLAGS:=$(filter-out -lbz2,$(PLATFORM_LDFLAGS)) + JAVA_LDFLAGS:=$(filter-out -lbz2,$(JAVA_LDFLAGS)) endif ifneq ($(wildcard libsnappy.a),) BUNDLED_COMPRESSION_LIBS += libsnappy.a CXXFLAGS += -DSNAPPY -I./snappy-$(SNAPPY_VER) -I./snappy-$(SNAPPY_VER)/build - PLATFORM_LDFLAGS=$(filter-out -lsnappy,$(PLATFORM_LDFLAGS)) - JAVA_LDFLAGS=$(filter-out -lsnappy,$(JAVA_LDFLAGS)) + PLATFORM_LDFLAGS:=$(filter-out -lsnappy,$(PLATFORM_LDFLAGS)) + JAVA_LDFLAGS:=$(filter-out -lsnappy,$(JAVA_LDFLAGS)) endif ifneq ($(wildcard liblz4.a),) BUNDLED_COMPRESSION_LIBS += liblz4.a CXXFLAGS += -DLZ4 -I./lz4-$(LZ4_VER)/lib - PLATFORM_LDFLAGS=$(filter-out -llz4,$(PLATFORM_LDFLAGS)) - JAVA_LDFLAGS=$(filter-out -llz4,$(JAVA_LDFLAGS)) + PLATFORM_LDFLAGS:=$(filter-out -llz4,$(PLATFORM_LDFLAGS)) + JAVA_LDFLAGS:=$(filter-out -llz4,$(JAVA_LDFLAGS)) endif ifeq ($(PLATFORM), OS_MACOSX) From 5351bb2402334213404bd5107bc8bf83f3e4b37e Mon Sep 17 00:00:00 2001 From: leipeng Date: Thu, 25 Sep 2025 18:32:23 +0800 Subject: [PATCH 097/175] Makefile: remove BUNDLED_COMPRESSION_LIBS --- Makefile | 23 ++++++++--------------- 1 file changed, 8 insertions(+), 15 deletions(-) diff --git a/Makefile b/Makefile index 199d5346f7..f020ae389c 100644 --- a/Makefile +++ b/Makefile @@ -2734,28 +2734,24 @@ ZSTD_DOWNLOAD_BASE ?= https://github.com/facebook/zstd/archive CURL_SSL_OPTS ?= --tlsv1 ifneq ($(wildcard libz.a),) - BUNDLED_COMPRESSION_LIBS += libz.a CXXFLAGS += -DZLIB -I./zlib-$(ZLIB_VER) - PLATFORM_LDFLAGS:=$(filter-out -lz,$(PLATFORM_LDFLAGS)) - JAVA_LDFLAGS:=$(filter-out -lz,$(JAVA_LDFLAGS)) + PLATFORM_LDFLAGS:=$(filter-out -lz,$(PLATFORM_LDFLAGS)) libz.a + JAVA_LDFLAGS:=$(filter-out -lz,$(JAVA_LDFLAGS)) libz.a endif ifneq ($(wildcard libbz2.a),) - BUNDLED_COMPRESSION_LIBS += libbz2.a CXXFLAGS += -DBZIP2 -I./bzip2-$(BZIP2_VER) - PLATFORM_LDFLAGS:=$(filter-out -lbz2,$(PLATFORM_LDFLAGS)) - JAVA_LDFLAGS:=$(filter-out -lbz2,$(JAVA_LDFLAGS)) + PLATFORM_LDFLAGS:=$(filter-out -lbz2,$(PLATFORM_LDFLAGS)) libbz2.a + JAVA_LDFLAGS:=$(filter-out -lbz2,$(JAVA_LDFLAGS)) libbz2.a endif ifneq ($(wildcard libsnappy.a),) - BUNDLED_COMPRESSION_LIBS += libsnappy.a CXXFLAGS += -DSNAPPY -I./snappy-$(SNAPPY_VER) -I./snappy-$(SNAPPY_VER)/build - PLATFORM_LDFLAGS:=$(filter-out -lsnappy,$(PLATFORM_LDFLAGS)) - JAVA_LDFLAGS:=$(filter-out -lsnappy,$(JAVA_LDFLAGS)) + PLATFORM_LDFLAGS:=$(filter-out -lsnappy,$(PLATFORM_LDFLAGS)) libsnappy.a + JAVA_LDFLAGS:=$(filter-out -lsnappy,$(JAVA_LDFLAGS)) libsnappy.a endif ifneq ($(wildcard liblz4.a),) - BUNDLED_COMPRESSION_LIBS += liblz4.a CXXFLAGS += -DLZ4 -I./lz4-$(LZ4_VER)/lib - PLATFORM_LDFLAGS:=$(filter-out -llz4,$(PLATFORM_LDFLAGS)) - JAVA_LDFLAGS:=$(filter-out -llz4,$(JAVA_LDFLAGS)) + PLATFORM_LDFLAGS:=$(filter-out -llz4,$(PLATFORM_LDFLAGS)) liblz4.a + JAVA_LDFLAGS:=$(filter-out -llz4,$(JAVA_LDFLAGS)) liblz4.a endif ifeq ($(PLATFORM), OS_MACOSX) @@ -3058,9 +3054,6 @@ endif $(AM_V_CCLD) $(CXX) -shared -fPIC -o java/target/$(ROCKSDBJNILIB) \ $(ALL_JNI_NATIVE_OBJECTS) $(LIB_OBJECTS) \ $(TOPLING_LIB_OBJECTS) \ - -Wl,--whole-archive \ - ${BUNDLED_COMPRESSION_LIBS} \ - -Wl,--no-whole-archive \ $(JAVA_LDFLAGS) \ $(LDFLAGS) $(AM_V_at)cp -a sideplugin/rockside/src/topling/web/{style.css,index.html} java/target From 36d4ec92660be91a4a867cde9793fbe91cf6118a Mon Sep 17 00:00:00 2001 From: rockeet Date: Sun, 28 Sep 2025 23:44:08 +0800 Subject: [PATCH 098/175] workflow: replace libcurl4-gnutls-dev to libcurl4-openssl-dev --- .github/workflows/topling-jni-release.yml | 2 +- .github/workflows/topling-jni.yml | 5 ++++- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/.github/workflows/topling-jni-release.yml b/.github/workflows/topling-jni-release.yml index 3a7226670c..2f7094f31f 100644 --- a/.github/workflows/topling-jni-release.yml +++ b/.github/workflows/topling-jni-release.yml @@ -31,7 +31,7 @@ jobs: set -xe cat $GITHUB_WORKSPACE/settings.xml sudo apt-get update -y && sudo apt-get install -y \ - libaio-dev zlib1g-dev libcurl4-gnutls-dev liburing-dev + libaio-dev zlib1g-dev libcurl4-openssl-dev liburing-dev - name: Init submodule & Setup ssh run: | diff --git a/.github/workflows/topling-jni.yml b/.github/workflows/topling-jni.yml index 49bbd05cc6..f3f8566194 100644 --- a/.github/workflows/topling-jni.yml +++ b/.github/workflows/topling-jni.yml @@ -59,7 +59,7 @@ jobs: cat $GITHUB_WORKSPACE/settings.xml sudo apt-get update -y && sudo apt-get install -y \ libjemalloc-dev libaio-dev libgflags-dev zlib1g-dev \ - libbz2-dev libcurl4-gnutls-dev liburing-dev \ + libbz2-dev libcurl4-openssl-dev liburing-dev \ libsnappy-dev libbz2-dev liblz4-dev libzstd-dev - name: Init submodule & Setup ssh @@ -124,6 +124,9 @@ jobs: # F\?ROCKSDB_JAVA_VERSION for both flink and normal rocksdbjni sed 's/\${F\?ROCKSDB_JAVA_VERSION}/'"${ROCKSDB_JAVA_VERSION}/" pom.xml.template > pom.xml + - name: Show librocksdbjni dependency + run: ldd java/target/*.so + - name: Move to Local & Publish rocksjni to GitHub Packages run: | echo JAVA_HOME = $JAVA_HOME From 12049dba6a23c169a23a6debc626ace04102dd3b Mon Sep 17 00:00:00 2001 From: rockeet Date: Thu, 2 Oct 2025 21:30:05 +0800 Subject: [PATCH 099/175] Fix clang warnings --- db/db_memtable_test.cc | 2 +- db/db_test_util.h | 6 +++--- db/write_batch.cc | 4 ++++ include/rocksdb/file_system.h | 2 +- util/crc32c.cc | 2 +- utilities/transactions/lock/point/point_lock_tracker.cc | 6 +++--- 6 files changed, 13 insertions(+), 9 deletions(-) diff --git a/db/db_memtable_test.cc b/db/db_memtable_test.cc index dd161f042e..2942379729 100644 --- a/db/db_memtable_test.cc +++ b/db/db_memtable_test.cc @@ -96,7 +96,7 @@ class MockMemTableRepFactory : public MemTableRepFactory { const MutableCFOptions& mcfo, const MemTableRep::KeyComparator& cmp, Allocator* allocator, const SliceTransform* transform, Logger* logger, - uint32_t column_family_id) { + uint32_t column_family_id) override { last_column_family_id_ = column_family_id; if (g_cspp_fac) { auto ucmp = cmp.icomparator()->user_comparator(); diff --git a/db/db_test_util.h b/db/db_test_util.h index 2dea33965e..46b53ee26b 100644 --- a/db/db_test_util.h +++ b/db/db_test_util.h @@ -275,7 +275,7 @@ class SpecialEnv : public EnvWrapper { } intptr_t FileDescriptor() const final { return base_->FileDescriptor(); } - void SetFileSize(uint64_t fsize) { base_->SetFileSize(fsize); } + void SetFileSize(uint64_t fsize) final { base_->SetFileSize(fsize); } private: SpecialEnv* env_; @@ -353,7 +353,7 @@ class SpecialEnv : public EnvWrapper { } uint64_t GetFileSize() final { return base_->GetFileSize(); } intptr_t FileDescriptor() const final { return base_->FileDescriptor(); } - void SetFileSize(uint64_t fsize) { base_->SetFileSize(fsize); } + void SetFileSize(uint64_t fsize) final { base_->SetFileSize(fsize); } private: SpecialEnv* env_; @@ -384,7 +384,7 @@ class SpecialEnv : public EnvWrapper { return base_->Allocate(offset, len); } intptr_t FileDescriptor() const final { return base_->FileDescriptor(); } - void SetFileSize(uint64_t fsize) { base_->SetFileSize(fsize); } + void SetFileSize(uint64_t fsize) final { base_->SetFileSize(fsize); } private: SpecialEnv* env_; diff --git a/db/write_batch.cc b/db/write_batch.cc index 4765b46ac4..165571a24c 100644 --- a/db/write_batch.cc +++ b/db/write_batch.cc @@ -74,6 +74,10 @@ #include #include +#if defined(__clang__) +#pragma clang diagnostic ignored "-Wshorten-64-to-32" +#endif + namespace ROCKSDB_NAMESPACE { // anon namespace for file-local types diff --git a/include/rocksdb/file_system.h b/include/rocksdb/file_system.h index 01c1c8681a..7953ce48c2 100644 --- a/include/rocksdb/file_system.h +++ b/include/rocksdb/file_system.h @@ -1951,7 +1951,7 @@ class ReadonlyFileMmap : public Slice { IOStatus ios; return {New(&ios, fs, fileno, fname, mmap_size), ios}; } - uint32_t fileno; + uint64_t fileno; std::shared_ptr tail_pos; }; diff --git a/util/crc32c.cc b/util/crc32c.cc index 5ae7b6fc0e..3a82774cc8 100644 --- a/util/crc32c.cc +++ b/util/crc32c.cc @@ -1224,7 +1224,7 @@ static uint32_t gf_multiply_crc32c_hw(uint64_t crc1, uint64_t crc2) { // Use hardware crc32c to do reduction from 64 -> 32 bytes const auto res2 = _mm_cvtsi128_si64(res1); - const auto res3 = _mm_crc32_u32(0, res2); + const auto res3 = _mm_crc32_u32(0, uint32_t(res2)); const auto res4 = _mm_extract_epi32(res1, 1); return res3 ^ res4; #else diff --git a/utilities/transactions/lock/point/point_lock_tracker.cc b/utilities/transactions/lock/point/point_lock_tracker.cc index 4b84cecc1c..dc1092bda0 100644 --- a/utilities/transactions/lock/point/point_lock_tracker.cc +++ b/utilities/transactions/lock/point/point_lock_tracker.cc @@ -62,8 +62,8 @@ struct PointLockTrackerDelta : public LockTracker { using CFMap = terark::VectorIndexMap; CFMap cf_delta_vec_; const PointLockTracker* base_tracker_; - class CF_Iter; - class Key_Iter; + struct CF_Iter; + struct Key_Iter; PointLockTrackerDelta(const PointLockTracker*); PointLockTrackerDelta(const PointLockTrackerDelta&) = delete; PointLockTrackerDelta& operator=(const PointLockTrackerDelta&) = delete; @@ -357,7 +357,7 @@ PointLockStatus PointLockTracker::GetPointLockStatus( status.locked = true; status.exclusive = key_info.exclusive; status.seq = key_info.seq; - status.iter = idx; + status.iter = uint32_t(idx); } status.hint = hint; } From 202921c97a6a943a66de65338d5604b4daf615f3 Mon Sep 17 00:00:00 2001 From: rockeet Date: Fri, 3 Oct 2025 19:33:43 +0800 Subject: [PATCH 100/175] submodule rockside: json.h: Remove spaces in `operator "" _json` --- sideplugin/rockside | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sideplugin/rockside b/sideplugin/rockside index 691cba3cb7..b7adef2777 160000 --- a/sideplugin/rockside +++ b/sideplugin/rockside @@ -1 +1 @@ -Subproject commit 691cba3cb703243059df53cbe0521fd97257b665 +Subproject commit b7adef2777da4461898649d5f04d25b68dd6f941 From e7417c80ab06dd800866180f5d19ef5908fe908a Mon Sep 17 00:00:00 2001 From: rockeet Date: Tue, 7 Oct 2025 15:23:49 +0800 Subject: [PATCH 101/175] Use TOPLING_USE_BOUND_PMF and ExtractFuncPtr --- db/db_iter.cc | 10 ++-- db/db_iter.h | 2 +- db/version_set.cc | 7 ++- table/iterator_wrapper.h | 48 ++++++++----------- .../write_batch_with_index_internal.cc | 44 +++++++---------- .../write_batch_with_index_internal.h | 3 +- 6 files changed, 44 insertions(+), 70 deletions(-) diff --git a/db/db_iter.cc b/db/db_iter.cc index da79a064e9..067107b58b 100644 --- a/db/db_iter.cc +++ b/db/db_iter.cc @@ -112,10 +112,6 @@ DBIter::DBIter(Env* _env, const ReadOptions& read_options, user_comparator_.user_comparator()->timestamp_size()); enable_perf_timer_ = perf_level >= PerfLevel::kEnableTimeExceptForMutex; fixed_user_key_len_ = read_options.fixed_user_key_len; -#if defined(_MSC_VER) || defined(__clang__) -#else - #pragma GCC diagnostic ignored "-Wpmf-conversions" -#endif SetFuncPtr(); } @@ -418,7 +414,7 @@ bool DBIter::SetValueAndColumnsFromMergeResult(const Status& merge_status, // more entry for the prefix can be found. __always_inline bool DBIter::FindNextUserEntry(bool skipping_saved_key, const Slice* prefix) { -#if defined(_MSC_VER) || defined(__clang__) +#if !TOPLING_USE_BOUND_PMF return (this->*m_find_next_entry)(skipping_saved_key, prefix); #else return m_find_next_entry(this, skipping_saved_key, prefix); @@ -578,10 +574,10 @@ bool DBIter::FindNextUserEntryPerf(bool skipping_saved_key, const Slice* prefix) (skipping_saved_key, prefix); } void DBIter::SetFuncPtr() { -#if defined(_MSC_VER) || defined(__clang__) +#if !TOPLING_USE_BOUND_PMF #define BOUND_PMF(func) func #else - #define BOUND_PMF(func) (FindNextUserEntryFN)(this->*func) + #define BOUND_PMF(func) ExtractFuncPtr(this, func) #endif #define SetFindNext(FuncName, CmpNoTS) \ if (false) {} \ diff --git a/db/db_iter.h b/db/db_iter.h index 91545db4b3..3a88484c12 100644 --- a/db/db_iter.h +++ b/db/db_iter.h @@ -426,7 +426,7 @@ class DBIter final : public Iterator { template bool FindNextUserEntryPerf(bool skipping_saved_key, const Slice* prefix); void SetFuncPtr(); -#if defined(_MSC_VER) || defined(__clang__) +#if !TOPLING_USE_BOUND_PMF typedef bool (DBIter::*FindNextUserEntryFN)(bool, const Slice*); #else typedef bool (*FindNextUserEntryFN)(DBIter*, bool, const Slice*); diff --git a/db/version_set.cc b/db/version_set.cc index 301a95ddbe..0318d7ab7a 100644 --- a/db/version_set.cc +++ b/db/version_set.cc @@ -237,17 +237,16 @@ Status OverlapWithIterator(const Comparator* ucmp, // in a smaller level, later levels are irrelevant (unless we // are MergeInProgress). class FilePicker { -#if defined(_MSC_VER) || defined(__clang__) +#if !TOPLING_USE_BOUND_PMF typedef FdWithKeyRange* (FilePicker::*GetNextFileFN)(); #define Set_m_get_next_file(Cmp) \ m_get_next_file = &FilePicker::GetNextFileTmpl #else typedef FdWithKeyRange* (*GetNextFileFN)(FilePicker*); - #pragma GCC diagnostic ignored "-Wpmf-conversions" #define Set_m_get_next_file(Cmp) \ do { \ auto func = &FilePicker::GetNextFileTmpl; \ - m_get_next_file = (GetNextFileFN)(this->*func); \ + m_get_next_file = ExtractFuncPtr(this, func); \ } while (0) #endif GetNextFileFN m_get_next_file; @@ -309,7 +308,7 @@ class FilePicker { __always_inline FdWithKeyRange* GetNextFile() { - #if defined(_MSC_VER) || defined(__clang__) + #if !TOPLING_USE_BOUND_PMF return (this->*m_get_next_file)(); #else return m_get_next_file(this); diff --git a/table/iterator_wrapper.h b/table/iterator_wrapper.h index 8ae86031f0..2156628c76 100644 --- a/table/iterator_wrapper.h +++ b/table/iterator_wrapper.h @@ -14,10 +14,9 @@ #include "table/internal_iterator.h" #include "test_util/sync_point.h" -#if defined(_MSC_VER) || defined(__clang__) -#else - #pragma GCC diagnostic push - #pragma GCC diagnostic ignored "-Wpmf-conversions" +#include +#if TOPLING_USE_BOUND_PMF +using terark::ExtractFuncPtr; #endif namespace ROCKSDB_NAMESPACE { @@ -51,12 +50,11 @@ class IteratorWrapperBase { if (iter_ == nullptr) { result_.is_valid = false; } else { - #if defined(_MSC_VER) || defined(__clang__) - #else - next_and_get_result_ = (NextAndGetResultFN) - (_iter->*(&InternalIteratorBase::NextAndGetResult)); - prepare_and_get_value_ = (PrepareAndGetValueFN) - (_iter->*(&InternalIteratorBase::PrepareAndGetValue)); + #if TOPLING_USE_BOUND_PMF + next_and_get_result_ = ExtractFuncPtr + (_iter, &InternalIteratorBase::NextAndGetResult); + prepare_and_get_value_ = ExtractFuncPtr + (_iter, &InternalIteratorBase::PrepareAndGetValue); #endif Update(); } @@ -138,7 +136,7 @@ class IteratorWrapperBase { } */ //return result_.value_prepared = iter_->PrepareAndGetValue(v); - #if defined(_MSC_VER) || defined(__clang__) + #if !TOPLING_USE_BOUND_PMF return iter_->PrepareAndGetValue(v); // do minimal work #else return prepare_and_get_value_(iter_, v); @@ -149,7 +147,7 @@ class IteratorWrapperBase { #endif bool Next() { assert(iter_); -#if defined(_MSC_VER) || defined(__clang__) +#if !TOPLING_USE_BOUND_PMF const bool is_valid = iter_->NextAndGetResult(&result_); #else const bool is_valid = next_and_get_result_(iter_, &result_); @@ -258,8 +256,7 @@ class IteratorWrapperBase { #ifdef ROCKSDB_ASSERT_STATUS_CHECKED mutable bool status_checked_after_invalid_ = true; #endif -#if defined(_MSC_VER) || defined(__clang__) -#else +#if TOPLING_USE_BOUND_PMF typedef bool (*NextAndGetResultFN)(InternalIteratorBase*, IterateResult*); typedef bool (*PrepareAndGetValueFN)(InternalIteratorBase*, TValue*); NextAndGetResultFN next_and_get_result_ = nullptr; @@ -278,12 +275,11 @@ class ThinIteratorWrapperBase { auto old_iter = iter_; iter_ = i; if (i) { - #if defined(_MSC_VER) || defined(__clang__) - #else - next_and_get_result_ = (NextAndGetResultFN) - (i->*(&InternalIteratorBase::NextAndGetResult)); - prepare_and_get_value_ = (PrepareAndGetValueFN) - (i->*(&InternalIteratorBase::PrepareAndGetValue)); + #if TOPLING_USE_BOUND_PMF + next_and_get_result_ = ExtractFuncPtr + (i, &InternalIteratorBase::NextAndGetResult); + prepare_and_get_value_ = ExtractFuncPtr + (i, &InternalIteratorBase::PrepareAndGetValue); #endif } return old_iter; @@ -309,7 +305,7 @@ class ThinIteratorWrapperBase { bool PrepareValue() { assert(Valid()); return iter_->PrepareValue(); } bool PrepareAndGetValue(TValue* v) { assert(Valid()); - #if defined(_MSC_VER) || defined(__clang__) + #if !TOPLING_USE_BOUND_PMF return iter_->PrepareAndGetValue(v); #else return prepare_and_get_value_(iter_, v); @@ -318,7 +314,7 @@ class ThinIteratorWrapperBase { void Next() { assert(Valid()); iter_->Next(); } bool NextAndGetResult(IterateResult* r) { assert(iter_); - #if defined(_MSC_VER) || defined(__clang__) + #if !TOPLING_USE_BOUND_PMF return iter_->NextAndGetResult(r); #else return next_and_get_result_(iter_, r); @@ -357,8 +353,7 @@ class ThinIteratorWrapperBase { } private: InternalIteratorBase* iter_; -#if defined(_MSC_VER) || defined(__clang__) -#else +#if TOPLING_USE_BOUND_PMF typedef bool (*NextAndGetResultFN)(InternalIteratorBase*, IterateResult*); typedef bool (*PrepareAndGetValueFN)(InternalIteratorBase*, TValue*); NextAndGetResultFN next_and_get_result_ = nullptr; @@ -375,8 +370,3 @@ template extern InternalIteratorBase* NewEmptyInternalIterator(Arena* arena); } // namespace ROCKSDB_NAMESPACE - -#if defined(_MSC_VER) || defined(__clang__) -#else - #pragma GCC diagnostic pop -#endif diff --git a/utilities/write_batch_with_index/write_batch_with_index_internal.cc b/utilities/write_batch_with_index/write_batch_with_index_internal.cc index e1fc5d65e8..acb05dd3c8 100644 --- a/utilities/write_batch_with_index/write_batch_with_index_internal.cc +++ b/utilities/write_batch_with_index/write_batch_with_index_internal.cc @@ -47,21 +47,19 @@ BaseDeltaIterator::BaseDeltaIterator(ColumnFamilyHandle* column_family, delta_valid_ = false; delta_status_code_ = Status::kOk; opt_cmp_type_ = comparator->opt_cmp_type(); - #if defined(_MSC_VER) || defined(__clang__) - #else - #pragma GCC diagnostic ignored "-Wpmf-conversions" - base_iter_valid_ = (BaseIterValidFN )(base_iterator->*(&Iterator::Valid)); - base_iter_next_ = (BaseIterScanFN )(base_iterator->*(&Iterator::Next)); - base_iter_get_key_ = (BaseIterGetSliceFN)(base_iterator->*(&Iterator::key)); - base_iter_get_value_ = (BaseIterGetSliceFN)(base_iterator->*(&Iterator::value)); - delta_iter_next_key_ = (DeltaIterScanKeyFN)(delta_iterator->*(&WBWIIterator::NextKey)); - delta_iter_user_key_ = (DeltaIterUserKeyFN)(delta_iterator->*(&InternalIterator::user_key)); + #if TOPLING_USE_BOUND_PMF + base_iter_valid_ = ExtractFuncPtr( base_iterator, &Iterator::Valid); + base_iter_next_ = ExtractFuncPtr( base_iterator, &Iterator::Next); + base_iter_get_key_ = ExtractFuncPtr( base_iterator, &Iterator::key); + base_iter_get_value_ = ExtractFuncPtr( base_iterator, &Iterator::value); + delta_iter_next_key_ = ExtractFuncPtr(delta_iterator, &WBWIIterator::NextKey); + delta_iter_user_key_ = ExtractFuncPtr(delta_iterator, &InternalIterator::user_key); #endif } __always_inline bool BaseDeltaIterator::UpdateDeltaKey(bool is_valid) { if (LIKELY(is_valid)) { - #if defined(_MSC_VER) || defined(__clang__) + #if !TOPLING_USE_BOUND_PMF this->delta_key = delta_iterator_->user_key(); #else this->delta_key = delta_iter_user_key_(delta_iterator_.get()); @@ -153,7 +151,7 @@ void BaseDeltaIterator::Next() { } if (DeltaValid() && BaseValid()) { if (0 == comparator_->CompareWithoutTimestamp( - #if defined(_MSC_VER) || defined(__clang__) + #if !TOPLING_USE_BOUND_PMF delta_key, /*a_has_ts=*/false, base_iterator_->key(), #else @@ -215,7 +213,7 @@ void BaseDeltaIterator::Prev() { } Slice BaseDeltaIterator::key() const { - #if defined(_MSC_VER) || defined(__clang__) + #if !TOPLING_USE_BOUND_PMF return current_at_base_ ? base_iterator_->key() : delta_key; #else @@ -326,22 +324,14 @@ inline bool BaseDeltaIterator::AdvanceIter(WBWIIterator* i, bool forward) { } inline bool BaseDeltaIterator::AdvanceIterImpl(WBWIIterator* i, bool forward) { if (forward) { - #if defined(_MSC_VER) || defined(__clang__) - return i->NextKey(); - #else - return delta_iter_next_key_(i); - #endif + return TOPLING_IF_BOUND_PMF_CALL(delta_iter_next_key_, NextKey, i); } else { return i->PrevKey(); } } inline void BaseDeltaIterator::AdvanceIter(Iterator* i, bool forward) { if (forward) { - #if defined(_MSC_VER) || defined(__clang__) - i->Next(); - #else - base_iter_next_(i); - #endif + TOPLING_IF_BOUND_PMF(base_iter_next_(i), i->Next()); } else { i->Prev(); } @@ -350,7 +340,7 @@ inline void BaseDeltaIterator::AdvanceIter(Iterator* i, bool forward) { inline void BaseDeltaIterator::AdvanceDelta(bool const_forward) { assert(const_forward == forward_); if (const_forward) { - #if defined(_MSC_VER) || defined(__clang__) + #if !TOPLING_USE_BOUND_PMF delta_valid_ = delta_iterator_->NextKey(); #else delta_valid_ = delta_iter_next_key_(delta_iterator_.get()); @@ -364,7 +354,7 @@ inline void BaseDeltaIterator::AdvanceDelta(bool const_forward) { inline void BaseDeltaIterator::AdvanceBase(bool const_forward) { assert(const_forward == forward_); if (const_forward) { - #if defined(_MSC_VER) || defined(__clang__) + #if !TOPLING_USE_BOUND_PMF base_iterator_->Next(); #else base_iter_next_(base_iterator_.get()); @@ -492,7 +482,7 @@ void BaseDeltaIterator::SetValueAndColumnsFromDelta() { } inline bool BaseDeltaIterator::BaseValid() const { - #if defined(_MSC_VER) || defined(__clang__) + #if !TOPLING_USE_BOUND_PMF return base_iterator_->Valid(); #else return base_iter_valid_(base_iterator_.get()); @@ -547,7 +537,7 @@ void BaseDeltaIterator::UpdateCurrentTpl(bool const_forward, CmpNoTS cmp) { return; } equal_keys_ = false; - #if defined(_MSC_VER) || defined(__clang__) + #if !TOPLING_USE_BOUND_PMF if (UNLIKELY(!base_iterator_->Valid())) #else if (UNLIKELY(!base_iter_valid_(base_iterator_))) @@ -580,7 +570,7 @@ void BaseDeltaIterator::UpdateCurrentTpl(bool const_forward, CmpNoTS cmp) { SetValueAndColumnsFromBase(); return; } else { - #if defined(_MSC_VER) || defined(__clang__) + #if !TOPLING_USE_BOUND_PMF int compare = const_forward ? cmp.compare(delta_key, base_iterator_->key()) : cmp.compare(base_iterator_->key(), delta_key) diff --git a/utilities/write_batch_with_index/write_batch_with_index_internal.h b/utilities/write_batch_with_index/write_batch_with_index_internal.h index 9e60511c41..f4cc349421 100644 --- a/utilities/write_batch_with_index/write_batch_with_index_internal.h +++ b/utilities/write_batch_with_index/write_batch_with_index_internal.h @@ -127,8 +127,7 @@ class BaseDeltaIterator final : public Iterator { std::unique_ptr base_iterator_; std::unique_ptr delta_iterator_; Slice delta_key; - #if defined(_MSC_VER) || defined(__clang__) - #else + #if TOPLING_USE_BOUND_PMF typedef bool (*BaseIterValidFN)(const Iterator*); typedef void (*BaseIterScanFN)(Iterator*); // Prev/Next typedef Slice (*BaseIterGetSliceFN)(const Iterator*); // key/value From 91a8692a40e2ad3a2ce5652bcb2a6d529e74743d Mon Sep 17 00:00:00 2001 From: rockeet Date: Tue, 7 Oct 2025 18:42:32 +0800 Subject: [PATCH 102/175] db_bench: Add option wkey_file and rkey_file Read keys from a file instead of auto generate --- tools/db_bench_tool.cc | 43 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 43 insertions(+) diff --git a/tools/db_bench_tool.cc b/tools/db_bench_tool.cc index bcaa516173..0a5d6e4746 100644 --- a/tools/db_bench_tool.cc +++ b/tools/db_bench_tool.cc @@ -1181,6 +1181,33 @@ DEFINE_bool(io_uring_enabled, true, "If true, enable the use of IO uring if the platform supports it"); extern "C" bool RocksDbIOUringEnable() { return FLAGS_io_uring_enabled; } DEFINE_string(json, "", "json config file."); +DEFINE_string(wkey_file, "", "read text key file instead of generating for write."); +DEFINE_string(rkey_file, "", "read text key file instead of generating for read."); +struct TextInputFile : std::ifstream { + const std::string m_fname; + std::string m_line; + TextInputFile(const std::string& fname) : m_fname(fname) { + if (!fname.empty()) { + this->open(fname.c_str()); + if (!this->is_open()) { + fprintf(stderr, "Could not open keyfile %s\n", fname.c_str()); + exit(1); + } + } + } + const std::string& ReadLine() { + if (!std::getline(*this, m_line)) { + this->clear(); + this->seekg(0, std::ios::beg); // Rewind and start over. + if (!std::getline(*this, m_line)) { + fprintf(stderr, "Could not read from file %s\n", + m_fname.c_str()); + exit(1); + } + } + return m_line; + } +}; DEFINE_bool(adaptive_readahead, false, "carry forward internal auto readahead size from one file to next " @@ -5163,6 +5190,7 @@ class Benchmark { void DoWrite(ThreadState* thread, WriteMode write_mode) { const int test_duration = write_mode == RANDOM ? FLAGS_duration : 0; const int64_t num_ops = writes_ == 0 ? num_ : writes_; + TextInputFile key_file(FLAGS_wkey_file); size_t num_key_gens = 1; if (db_.db == nullptr) { @@ -5432,7 +5460,11 @@ class Benchmark { } else { rand_num = key_gens[id]->Next(); } + if (key_file.is_open()) { + key = Slice(key_file.ReadLine()); + } else { GenerateKeyFromInt(rand_num, FLAGS_num, &key); + } Slice val; if (kNumDispAndPersEntries > 0) { random_value = rnd_disposable_entry.RandomString( @@ -6176,6 +6208,7 @@ class Benchmark { } void ReadRandom(ThreadState* thread) { + TextInputFile key_file(FLAGS_rkey_file.c_str()); int64_t read = 0; int64_t found = 0; int64_t bytes = 0; @@ -6206,6 +6239,9 @@ class Benchmark { // We use same key_rand as seed for key and column family so that we can // deterministically find the cfh corresponding to a particular key, as it // is done in DoWrite method. + if (key_file.is_open()) { + key = Slice(key_file.ReadLine()); + } else { if (entries_per_batch_ > 1 && FLAGS_multiread_stride) { if (++num_keys == entries_per_batch_) { num_keys = 0; @@ -6221,6 +6257,7 @@ class Benchmark { key_rand = GetRandomKey(&thread->rand); } GenerateKeyFromInt(key_rand, FLAGS_num, &key); + } read++; std::string* ts_ptr = nullptr; #if defined(TOPLINGDB_WITH_TIMESTAMP) @@ -6276,6 +6313,12 @@ class Benchmark { fprintf(stderr, "Get returned an error: %s\n", s.ToString().c_str()); abort(); } + else if (key_file.is_open()) { + static bool print_not_found = terark::getEnvBool("PRINT_NOT_FOUND"); + if (print_not_found) { + fprintf(stderr, "NotFound: key = %s\n", key.ToString().c_str()); + } + } if (limiter != nullptr && read % 256 == 255) { thread->shared->read_rate_limiter->Request( From 311334fe98b5a34b52863c0d226451e796344482 Mon Sep 17 00:00:00 2001 From: rockeet Date: Fri, 10 Oct 2025 19:27:23 +0800 Subject: [PATCH 103/175] io_posix.cc: Fix clang warning -Wshorten-64-to-32 --- env/io_posix.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/env/io_posix.cc b/env/io_posix.cc index c1f4eae1d7..196033f189 100644 --- a/env/io_posix.cc +++ b/env/io_posix.cc @@ -1344,7 +1344,7 @@ IOStatus PosixWritableFile::Appendv(const Slice* parts, size_t num, static_assert(offsetof(struct iovec, iov_len) == offsetof(Slice, size_)); ssize_t done; while (true) { - done = writev(fd_, pvec, num); + done = writev(fd_, pvec, int(num)); if (done < 0) { if (errno == EINTR) { continue; From 95dd4d00847211dafcdbde1eda4557264eebe3d6 Mon Sep 17 00:00:00 2001 From: rockeet Date: Sat, 11 Oct 2025 19:56:52 +0800 Subject: [PATCH 104/175] java/Makefile: bugfix checking TOPLING_USE_DYNAMIC_TLS --- java/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/java/Makefile b/java/Makefile index 7d0ec89365..d000ee870a 100644 --- a/java/Makefile +++ b/java/Makefile @@ -274,7 +274,7 @@ endif ifneq (${DISABLE_JEMALLOC},1) LD_PRELOAD := libjemalloc.so endif -ifeq ($(TOPLING_USE_DYNAMIC_TLS),1) +ifneq ($(TOPLING_USE_DYNAMIC_TLS),1) LD_PRELOAD := ${LD_PRELOAD}:${ROCKSDBJNILIB} endif ifneq ($(LD_PRELOAD),) From 96a5a164f11158fc9cfe56c3ee92a4cd5071360a Mon Sep 17 00:00:00 2001 From: rockeet Date: Sat, 11 Oct 2025 19:29:45 +0800 Subject: [PATCH 105/175] Makefile: remove setting LD_LIBRARY_PATH & quiet make clean --- Makefile | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/Makefile b/Makefile index f020ae389c..8f81791167 100644 --- a/Makefile +++ b/Makefile @@ -606,7 +606,6 @@ else endif endif # WITH_TOPLING_DCOMPACT -export LD_LIBRARY_PATH:=${TOPLING_CORE_DIR}/${BUILD_ROOT}/lib_shared:${LD_LIBRARY_PATH} ifeq (${WITH_TOPLING_ROCKS},1) ifneq (,$(wildcard sideplugin/topling-rocks)) CXXFLAGS += -Isideplugin/topling-rocks/src @@ -1746,14 +1745,14 @@ clean-not-downloaded: clean-ext-libraries-bin clean-rocks clean-not-downloaded-r clean-rocks: # Not practical to exactly match all versions/variants in naming (e.g. debug or not) - rm -f ${LIBNAME}*.so* ${LIBNAME}*.a - rm -f $(BENCHMARKS) $(TOOLS) $(TESTS) $(PARALLEL_TEST) $(MICROBENCHS) - rm -rf $(CLEAN_FILES) ios-x86 ios-arm scan_build_report + $(AM_V_at)rm -f ${LIBNAME}*.so* ${LIBNAME}*.a + $(AM_V_at)rm -f $(BENCHMARKS) $(TOOLS) $(TESTS) $(PARALLEL_TEST) $(MICROBENCHS) + $(AM_V_at)rm -rf $(CLEAN_FILES) ios-x86 ios-arm scan_build_report rm -rf ${OBJ_DIR} rm -rf sideplugin/topling-dcompact/tools/dcompact/build +$(MAKE) -C ${TOPLING_CORE_DIR} clean - $(FIND) . -name "*.[oda]" -exec rm -f {} \; - $(FIND) . -type f \( -name "*.gcda" -o -name "*.gcno" \) -exec rm -f {} \; + $(AM_V_at)$(FIND) . -name "*.[oda]" -exec rm -f {} \; + $(AM_V_at)$(FIND) . -type f \( -name "*.gcda" -o -name "*.gcno" \) -exec rm -f {} \; clean-rocksjava: clean-rocks rm -rf jl jls From b2da6959786c75dd5be0d422265361a4fea3e8db Mon Sep 17 00:00:00 2001 From: leipeng Date: Mon, 13 Oct 2025 02:32:38 +0800 Subject: [PATCH 106/175] Fix for centos7: std::string _GLIBCXX_USE_CXX11_ABI == 0 --- db/db_iter.cc | 2 +- include/rocksdb/c.h | 2 +- util/string_util.cc | 55 --------------------------------------------- util/string_util.h | 4 ---- 4 files changed, 2 insertions(+), 61 deletions(-) diff --git a/db/db_iter.cc b/db/db_iter.cc index 067107b58b..5bb11b15b2 100644 --- a/db/db_iter.cc +++ b/db/db_iter.cc @@ -157,7 +157,7 @@ bool DBIter::ParseKey(ParsedInternalKey* ikey) { #endif } -#if defined(__GLIBCXX__) +#if defined(__GLIBCXX__) && (!defined(_GLIBCXX_USE_CXX11_ABI) || _GLIBCXX_USE_CXX11_ABI) void string_clear_no_touch_memory(std::string*); template struct string_clear_thief { diff --git a/include/rocksdb/c.h b/include/rocksdb/c.h index 18d1bcb736..263b40491a 100644 --- a/include/rocksdb/c.h +++ b/include/rocksdb/c.h @@ -150,7 +150,7 @@ struct rocksdb_slice_t { size_t size; }; typedef struct rocksdb_slice_t rocksdb_slice_t; -#if defined(__GLIBCXX__) && _GLIBCXX_USE_CXX11_ABI +#if defined(__GLIBCXX__) && (!defined(_GLIBCXX_USE_CXX11_ABI) || _GLIBCXX_USE_CXX11_ABI) struct rocksdb_pinnableslice_t { const char* data; size_t size; diff --git a/util/string_util.cc b/util/string_util.cc index 78c7e745d6..b47140cbdc 100644 --- a/util/string_util.cc +++ b/util/string_util.cc @@ -542,59 +542,4 @@ std::string errnoStr(int err) { return result; } -#if __cpp_lib_string_resize_and_overwrite >= 202110L - void stdstr_resize_no_init(std::string& str, std::size_t sz) { - str.resize_and_overwrite(sz, [](char*, size_t sz) { return sz; }); - } -#elif (defined(__clang_major__) && __clang_major__ <= 11) || \ - (defined(_MSC_VER) && _MSC_VER<=1920) - // old clang has bug in global friend function. discard it. - // old msvc don't support visit private, discard it. - void stdstr_resize_no_init(std::string& str, std::size_t sz) { str.resize(sz); } -#else - -#if defined(__GLIBCXX__) || defined(_LIBCPP_VERSION) - template - class string_thief { - public: - friend void string_set_length_hacker(std::string& bank, std::size_t sz) { - (bank.*p)(sz); - } - }; -#elif defined(_MSVC_STL_VERSION) - template - class string_thief { - public: - friend void string_set_length_hacker(std::string& bank, std::size_t sz) { - (bank.*p)._Myval2._Mysize = sz; - } - }; -#endif - -#if defined(__GLIBCXX__) // libstdc++ - template class string_thief; -#elif defined(_LIBCPP_VERSION) - template class string_thief; -#elif defined(_MSVC_STL_VERSION) - template class string_thief; -#endif - -#if defined(__GLIBCXX__) || defined(_LIBCPP_VERSION) || defined(_MSVC_STL_VERSION) - void string_set_length_hacker(std::string& bank, std::size_t sz); -#endif - - inline void stdstr_resize_no_init(std::string& str, std::size_t sz) { -#if defined(__GLIBCXX__) || defined(_LIBCPP_VERSION) || defined(_MSVC_STL_VERSION) - str.reserve(sz); - string_set_length_hacker(str, sz); - str[sz] = '\0'; -#else - str.resize(sz); -#endif - } -#endif - } // namespace ROCKSDB_NAMESPACE diff --git a/util/string_util.h b/util/string_util.h index 0b77a6250c..999081ebba 100644 --- a/util/string_util.h +++ b/util/string_util.h @@ -182,8 +182,4 @@ extern const std::string kNullptrString; // the argument err extern std::string errnoStr(int err); -// Our hacker function, resize a std::string without initializing memory -// (but still has '\0' in the end of string) -void stdstr_resize_no_init(std::string& str, std::size_t sz); - } // namespace ROCKSDB_NAMESPACE From 51dd519b0a332595462e03decd81bb4cc33a1361 Mon Sep 17 00:00:00 2001 From: rockeet Date: Thu, 16 Oct 2025 16:24:16 +0800 Subject: [PATCH 107/175] _mm_maskz_loadu_epi8: use _bzhi_u32(-1, len) to generate mask --- db/version_edit.h | 2 +- table/merging_iterator.cc | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/db/version_edit.h b/db/version_edit.h index 56d9ab4545..3901cb3903 100644 --- a/db/version_edit.h +++ b/db/version_edit.h @@ -404,7 +404,7 @@ inline uint64_t HostPrefixCache(const Slice& ikey) { #if defined(__AVX512VL__) && defined(__AVX512BW__) //#pragma message "__AVX512VL__ && __AVX512BW__, use _mm_maskz_loadu_epi8" // load 128 bits, keep low 64 bits, discard high 64 bits - auto mask = uint16_t(~(-1 << (ikey.size_ - 8))); + auto mask = _bzhi_u32(-1, ikey.size_ - 8); auto m128 = _mm_maskz_loadu_epi8(mask, ikey.data_); data = (uint64_t)_mm_extract_epi64(m128, 0); #else diff --git a/table/merging_iterator.cc b/table/merging_iterator.cc index 013e8144a7..0ec3b1596b 100644 --- a/table/merging_iterator.cc +++ b/table/merging_iterator.cc @@ -219,7 +219,7 @@ FORCE_INLINE UintPrefix HostPrefixCacheUK(const Slice& uk) { } else { #if defined(__AVX512VL__) && defined(__AVX512BW__) #pragma message "__AVX512VL__ && __AVX512BW__, use _mm_maskz_loadu_epi8" - auto mask = uint16_t(~(-1 << uk.size_)); + auto mask = _bzhi_u32(-1, uk.size_); return bswap_prefix((UintPrefix)_mm_maskz_loadu_epi8(mask, uk.data_)); #else return bswap_prefix(LoadPrefixZeroSuffixDynaLen(uk.data_, uk.size_)); @@ -235,7 +235,7 @@ FORCE_INLINE UintPrefix HostPrefixCacheIK(const Slice& ik) { return bswap_prefix(unaligned_load(ik.data_)); } else { #if defined(__AVX512VL__) && defined(__AVX512BW__) - auto mask = uint16_t(~(-1 << (ik.size_ - 8))); + auto mask = _bzhi_u32(-1, ik.size_ - 8); return bswap_prefix((UintPrefix)_mm_maskz_loadu_epi8(mask, ik.data_)); #else if (LIKELY(8 + 8 == ik.size_)) { From 6b9defc5346dbb50f8b7934bafc60415e8c94556 Mon Sep 17 00:00:00 2001 From: rockeet Date: Thu, 16 Oct 2025 18:51:22 +0800 Subject: [PATCH 108/175] TableCache::Get: use bound pmf for skip check row_cache --- db/table_cache.cc | 52 ++++++++++++++++++++++++++++++++++++++++++++++- db/table_cache.h | 35 ++++++++++++++++++++++++++++++- 2 files changed, 85 insertions(+), 2 deletions(-) diff --git a/db/table_cache.cc b/db/table_cache.cc index c864ba9688..d82ca92a49 100644 --- a/db/table_cache.cc +++ b/db/table_cache.cc @@ -84,6 +84,10 @@ TableCache::TableCache(const ImmutableOptions& ioptions, // If the same cache is shared by multiple instances, we need to // disambiguate its entries. PutVarint64(&row_cache_id_, ioptions_.row_cache->NewId()); + m_get = terark::ExtractFuncPtr(this, &TableCache::GetWithRowCache); + } + else { + m_get = terark::ExtractFuncPtr(this, &TableCache::GetNoneRowCache); } } @@ -436,7 +440,7 @@ bool TableCache::GetFromRowCache(const Slice& user_key, IterKey& row_cache_key, return found; } -Status TableCache::Get( +Status TableCache::GetWithRowCache( const ReadOptions& options, const InternalKeyComparator& internal_comparator, const FileMetaData& file_meta, const Slice& k, GetContext* get_context, @@ -527,6 +531,52 @@ Status TableCache::Get( return s; } +Status TableCache::GetNoneRowCache( + const ReadOptions& options, + const InternalKeyComparator& internal_comparator, + const FileMetaData& file_meta, const Slice& k, GetContext* get_context, + uint8_t block_protection_bytes_per_key, + const std::shared_ptr& prefix_extractor, + HistogramImpl* file_read_hist, bool skip_filters, int level, + size_t max_file_size_for_l0_meta_pin) { + TableReader* t = file_meta.fd.table_reader; + TypedHandle* handle = nullptr; + if (UNLIKELY(t == nullptr)) { + Status s = FindTable(options, file_options_, internal_comparator, file_meta, + &handle, block_protection_bytes_per_key, prefix_extractor, + options.read_tier == kBlockCacheTier /* no_io */, + file_read_hist, skip_filters, level, + true /* prefetch_index_and_filter_in_cache */, + max_file_size_for_l0_meta_pin, file_meta.temperature); + if (s.ok()) { + t = cache_.Value(handle); + } else { + return s; + } + } + auto* max_covering_tombstone_seq = get_context->max_covering_tombstone_seq(); + if (UNLIKELY(max_covering_tombstone_seq && !options.ignore_range_deletions)) { + std::unique_ptr range_del_iter( + t->NewRangeTombstoneIterator(options)); + if (range_del_iter != nullptr) { + auto seq = range_del_iter->MaxCoveringTombstoneSeqnum(ExtractUserKey(k)); + if (seq > *max_covering_tombstone_seq) { + *max_covering_tombstone_seq = seq; + if (get_context->NeedTimestamp()) { + get_context->SetTimestampFromRangeTombstone(range_del_iter->timestamp()); + } + } + } + } + if (LIKELY(handle == nullptr)) { // optimize for compiler tail call + return t->Get(options, k, get_context, prefix_extractor.get(), skip_filters); + } else { + Status s = t->Get(options, k, get_context, prefix_extractor.get(), skip_filters); + cache_.Release(handle); + return s; + } +} + void TableCache::UpdateRangeTombstoneSeqnums( const ReadOptions& options, TableReader* t, MultiGetContext::Range& table_range) { diff --git a/db/table_cache.h b/db/table_cache.h index 14fdd6e7a6..f20e863e2d 100644 --- a/db/table_cache.h +++ b/db/table_cache.h @@ -112,6 +112,7 @@ class TableCache { // recorded // @param skip_filters Disables loading/accessing the filter block // @param level The level this table is at, -1 for "not set / don't know" + inline Status Get( const ReadOptions& options, const InternalKeyComparator& internal_comparator, @@ -119,8 +120,40 @@ class TableCache { uint8_t block_protection_bytes_per_key, const std::shared_ptr& prefix_extractor = nullptr, HistogramImpl* file_read_hist = nullptr, bool skip_filters = false, - int level = -1, size_t max_file_size_for_l0_meta_pin = 0); + int level = -1, size_t max_file_size_for_l0_meta_pin = 0) { + return m_get(this, options, internal_comparator, file_meta, k, get_context, + block_protection_bytes_per_key, prefix_extractor, file_read_hist, + skip_filters, level, max_file_size_for_l0_meta_pin); + } + +private: + Status GetWithRowCache( + const ReadOptions& options, + const InternalKeyComparator& internal_comparator, + const FileMetaData& file_meta, const Slice& k, GetContext* get_context, + uint8_t block_protection_bytes_per_key, + const std::shared_ptr& prefix_extractor, + HistogramImpl* file_read_hist, bool skip_filters, + int level, size_t max_file_size_for_l0_meta_pin); + Status GetNoneRowCache( + const ReadOptions& options, + const InternalKeyComparator& internal_comparator, + const FileMetaData& file_meta, const Slice& k, GetContext* get_context, + uint8_t block_protection_bytes_per_key, + const std::shared_ptr& prefix_extractor, + HistogramImpl* file_read_hist, bool skip_filters, + int level, size_t max_file_size_for_l0_meta_pin); + + Status (*m_get)(TableCache*, + const ReadOptions& options, + const InternalKeyComparator& internal_comparator, + const FileMetaData& file_meta, const Slice& k, GetContext* get_context, + uint8_t block_protection_bytes_per_key, + const std::shared_ptr& prefix_extractor, + HistogramImpl* file_read_hist, bool skip_filters, + int level, size_t max_file_size_for_l0_meta_pin); +public: // Return the range delete tombstone iterator of the file specified by // `file_meta`. Status GetRangeTombstoneIterator( From 037c737ab1574268eaaf7be1ca81ae309081c54d Mon Sep 17 00:00:00 2001 From: rockeet Date: Thu, 16 Oct 2025 18:55:04 +0800 Subject: [PATCH 109/175] version_edit.h: HostPrefixCache fix clang -Wshorten-64-to-32 --- db/version_edit.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/db/version_edit.h b/db/version_edit.h index 3901cb3903..ce35758d41 100644 --- a/db/version_edit.h +++ b/db/version_edit.h @@ -404,7 +404,7 @@ inline uint64_t HostPrefixCache(const Slice& ikey) { #if defined(__AVX512VL__) && defined(__AVX512BW__) //#pragma message "__AVX512VL__ && __AVX512BW__, use _mm_maskz_loadu_epi8" // load 128 bits, keep low 64 bits, discard high 64 bits - auto mask = _bzhi_u32(-1, ikey.size_ - 8); + auto mask = _bzhi_u32(-1, uint32_t(ikey.size_ - 8)); auto m128 = _mm_maskz_loadu_epi8(mask, ikey.data_); data = (uint64_t)_mm_extract_epi64(m128, 0); #else From 82df62bce879989c55cf0056dbe153fbd33bfd38 Mon Sep 17 00:00:00 2001 From: rockeet Date: Thu, 16 Oct 2025 18:56:17 +0800 Subject: [PATCH 110/175] db_iter.cc: SetFindNext: FixLen: optimize for avx512 --- db/db_iter.cc | 39 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 39 insertions(+) diff --git a/db/db_iter.cc b/db/db_iter.cc index 5bb11b15b2..d04d10984a 100644 --- a/db/db_iter.cc +++ b/db/db_iter.cc @@ -526,6 +526,22 @@ struct BytewiseCmpNoTS { else return SliceBytewiseLess(x, y); } + #if defined(__AVX512VL__) && defined(__AVX512BW__) + __always_inline + bool operator()(const Slice& x, const Slice& y, Const<64>) const { + // return x < y; + ROCKSDB_ASSERT_EQ(x.size(), y.size()); + ROCKSDB_ASSERT_LE(x.size(), 64); + __mmask64 msk = _bzhi_u64(-1, x.size()); + __m512i xxx = _mm512_maskz_loadu_epi8(msk, x.data()); + __m512i yyy = _mm512_maskz_loadu_epi8(msk, y.data()); + __mmask64 cmp = _mm512_cmpneq_epi8_mask(xxx, yyy); + if (cmp == 0) // all zero means all eq, any one means not eq + return false; + auto pos = _tzcnt_u64(cmp); + return (uint8_t)x[pos] < (uint8_t)y[pos]; + } + #endif int compare(const Slice& x, const Slice& y) const { return x.compare(y); } }; @@ -547,6 +563,22 @@ struct RevBytewiseCmpNoTS { else return SliceBytewiseLess(y, x); } + #if defined(__AVX512VL__) && defined(__AVX512BW__) + __always_inline + bool operator()(const Slice& x, const Slice& y, Const<64>) const { + // return y < x; + ROCKSDB_ASSERT_EQ(x.size(), y.size()); + ROCKSDB_ASSERT_LE(x.size(), 64); + __mmask64 msk = _bzhi_u64(-1, x.size()); + __m512i xxx = _mm512_maskz_loadu_epi8(msk, x.data()); + __m512i yyy = _mm512_maskz_loadu_epi8(msk, y.data()); + __mmask64 cmp = _mm512_cmpneq_epi8_mask(xxx, yyy); + if (cmp == 0) // all zero means all eq, any one means not eq + return false; + auto pos = _tzcnt_u64(cmp); + return (uint8_t)y[pos] < (uint8_t)y[pos]; + } + #endif int compare(const Slice& x, const Slice& y) const { return y.compare(x); } }; @@ -579,6 +611,12 @@ void DBIter::SetFuncPtr() { #else #define BOUND_PMF(func) ExtractFuncPtr(this, func) #endif + #if defined(__AVX512VL__) && defined(__AVX512BW__) + #define SetFindNext(FuncName, CmpNoTS) \ + if (fixed_user_key_len_ != 0 && fixed_user_key_len_ <= 64) \ + SetFindNext3(FuncName, 64, CmpNoTS); \ + else SetFindNext3(FuncName, 0, CmpNoTS) + #else #define SetFindNext(FuncName, CmpNoTS) \ if (false) {} \ else if ( 8 == fixed_user_key_len_) SetFindNext3(FuncName, 8, CmpNoTS); \ @@ -589,6 +627,7 @@ void DBIter::SetFuncPtr() { else if (28 == fixed_user_key_len_) SetFindNext3(FuncName, 28, CmpNoTS); \ else if (32 == fixed_user_key_len_) SetFindNext3(FuncName, 32, CmpNoTS); \ else SetFindNext3(FuncName, 0, CmpNoTS) + #endif #define SetFindNext3(FuncName, FixLen, CmpNoTS) \ if (read_callback_) \ SetFindNext4(FuncName, kTrue , FixLen, CmpNoTS); \ From 78776e1d2a486f488c1b3eb531b12bcb1a21a022 Mon Sep 17 00:00:00 2001 From: rockeet Date: Fri, 17 Oct 2025 00:32:09 +0800 Subject: [PATCH 111/175] Makefile: remove -fno-builtin-memcmp --- Makefile | 2 ++ 1 file changed, 2 insertions(+) diff --git a/Makefile b/Makefile index 8f81791167..f8315786b0 100644 --- a/Makefile +++ b/Makefile @@ -152,6 +152,8 @@ ifneq (${.SHELLSTATUS},0) endif # this file is generated by the previous line to set build flags and sources include make_config.mk +PLATFORM_CCFLAGS := $(filter-out -fno-builtin-memcmp, ${PLATFORM_CCFLAGS}) +PLATFORM_CXXFLAGS := $(filter-out -fno-builtin-memcmp, ${PLATFORM_CXXFLAGS}) # defined in make_config.mk ROCKSDB_FULL_VERSION := ${ROCKSDB_MAJOR}.${ROCKSDB_MINOR}.${ROCKSDB_PATCH} From 41cf4226b44b023444ef956cee0bf0bb79e326c3 Mon Sep 17 00:00:00 2001 From: rockeet Date: Fri, 17 Oct 2025 16:08:10 +0800 Subject: [PATCH 112/175] DBImpl::GetImpl() better inline --- db/db_impl/db_impl.cc | 18 +----------------- db/db_impl/db_impl.h | 15 ++++++++++----- 2 files changed, 11 insertions(+), 22 deletions(-) diff --git a/db/db_impl/db_impl.cc b/db/db_impl/db_impl.cc index c7b12e12c1..9bb9fe5f1b 100644 --- a/db/db_impl/db_impl.cc +++ b/db/db_impl/db_impl.cc @@ -2118,13 +2118,6 @@ Status DBImpl::Get(const ReadOptions& read_options, return Get(read_options, column_family, key, value, /*timestamp=*/nullptr); } -Status DBImpl::GetImpl(const ReadOptions& read_options, - ColumnFamilyHandle* column_family, const Slice& key, - PinnableSlice* value) { - return GetImpl(read_options, column_family, key, value, - /*timestamp=*/nullptr); -} - Status DBImpl::Get(const ReadOptions& _read_options, ColumnFamilyHandle* column_family, const Slice& key, PinnableSlice* value, std::string* timestamp) { @@ -2148,22 +2141,13 @@ Status DBImpl::Get(const ReadOptions& _read_options, const ReadOptions& read_options(_read_options); #endif - Status s = GetImpl(read_options, column_family, key, value, timestamp); - return s; -} - -Status DBImpl::GetImpl(const ReadOptions& read_options, - ColumnFamilyHandle* column_family, const Slice& key, - PinnableSlice* value, std::string* timestamp) { GetImplOptions get_impl_options; get_impl_options.column_family = column_family; get_impl_options.value = value; #if defined(TOPLINGDB_WITH_TIMESTAMP) get_impl_options.timestamp = timestamp; #endif - - Status s = GetImpl(read_options, key, get_impl_options); - return s; + return GetImpl(read_options, key, get_impl_options); } Status DBImpl::GetEntity(const ReadOptions& _read_options, diff --git a/db/db_impl/db_impl.h b/db/db_impl/db_impl.h index 134cca14f8..97bc9c3e2e 100644 --- a/db/db_impl/db_impl.h +++ b/db/db_impl/db_impl.h @@ -642,13 +642,18 @@ class DBImpl : public DB { int* number_of_operands = nullptr; }; + __always_inline Status GetImpl(const ReadOptions& read_options, ColumnFamilyHandle* column_family, const Slice& key, - PinnableSlice* value); - - Status GetImpl(const ReadOptions& read_options, - ColumnFamilyHandle* column_family, const Slice& key, - PinnableSlice* value, std::string* timestamp); + PinnableSlice* value, std::string* timestamp = nullptr) { + GetImplOptions get_impl_options; + get_impl_options.column_family = column_family; + get_impl_options.value = value; + #if defined(TOPLINGDB_WITH_TIMESTAMP) + get_impl_options.timestamp = timestamp; + #endif + return GetImpl(read_options, key, get_impl_options); + } // Function that Get and KeyMayExist call with no_io true or false // Note: 'value_found' from KeyMayExist propagates here From 9a5f69f0c0f9f7e0028dff2ef2add31c10a284eb Mon Sep 17 00:00:00 2001 From: rockeet Date: Fri, 17 Oct 2025 22:48:17 +0800 Subject: [PATCH 113/175] Version::Get() early static dispatch & templatize FilePicker --- db/version_set.cc | 64 ++++++++++++++++------------------------------- db/version_set.h | 32 ++++++++++++++++++++++-- 2 files changed, 52 insertions(+), 44 deletions(-) diff --git a/db/version_set.cc b/db/version_set.cc index 0318d7ab7a..4a1836df70 100644 --- a/db/version_set.cc +++ b/db/version_set.cc @@ -236,29 +236,13 @@ Status OverlapWithIterator(const Comparator* ucmp, // levels. Therefore we are guaranteed that if we find data // in a smaller level, later levels are irrelevant (unless we // are MergeInProgress). +template class FilePicker { -#if !TOPLING_USE_BOUND_PMF - typedef FdWithKeyRange* (FilePicker::*GetNextFileFN)(); - #define Set_m_get_next_file(Cmp) \ - m_get_next_file = &FilePicker::GetNextFileTmpl -#else - typedef FdWithKeyRange* (*GetNextFileFN)(FilePicker*); - #define Set_m_get_next_file(Cmp) \ - do { \ - auto func = &FilePicker::GetNextFileTmpl; \ - m_get_next_file = ExtractFuncPtr(this, func); \ - } while (0) -#endif - GetNextFileFN m_get_next_file; - typedef int (*FindFileInRangeFN)(const InternalKeyComparator*, - const LevelFilesBrief& file_level, Slice key, - size_t left, size_t right); - FindFileInRangeFN m_find_file_in_range; __always_inline int FindFileInRange(const InternalKeyComparator& icmp, const LevelFilesBrief& file_level, const Slice& key, size_t left, size_t right) { - return m_find_file_in_range(&icmp, file_level, key, left, right); + return (int)FindFileInRangeTmpl(IKCmp{&icmp}, file_level, key, left, right); } public: FilePicker(const Slice& user_key, const Slice& ikey, @@ -279,18 +263,6 @@ class FilePicker { file_indexer_(file_indexer), user_comparator_(user_comparator), internal_comparator_(internal_comparator) { - if (IsForwardBytewiseComparator(user_comparator)) { - Set_m_get_next_file(ForwardBytewiseCompareUserKeyNoTS); - m_find_file_in_range = &FindFileInRangeInst; - } - else if (IsReverseBytewiseComparator(user_comparator)) { - Set_m_get_next_file(ReverseBytewiseCompareUserKeyNoTS); - m_find_file_in_range = &FindFileInRangeInst; - } - else { - Set_m_get_next_file(VirtualFunctionCompareUserKeyNoTS); - m_find_file_in_range = &FindFileInRangeInst; - } // Setup member variables to search first level. search_ended_ = !PrepareNextLevel(); if (!search_ended_) { @@ -306,17 +278,8 @@ class FilePicker { int GetCurrentLevel() const { return curr_level_; } - __always_inline FdWithKeyRange* GetNextFile() { - #if !TOPLING_USE_BOUND_PMF - return (this->*m_get_next_file)(); - #else - return m_get_next_file(this); - #endif - } - template - FdWithKeyRange* GetNextFileTmpl() { - Compare cmp{user_comparator_}; + UKCmp cmp{user_comparator_}; while (!search_ended_) { // Loops over different levels. while (curr_index_in_curr_level_ < curr_file_level_->num_files) { // Loops over all files in current level. @@ -2498,6 +2461,22 @@ Version::Version(ColumnFamilyData* column_family_data, VersionSet* vset, FSSupportedOps::kAsyncIO)) { use_async_io_ = true; } + if (cfd_) { + using terark::ExtractFuncPtr; + using GetFP = decltype(m_get); + if (IsForwardBytewiseComparator(user_comparator())) { + m_get = ExtractFuncPtr(this, &Version::GetInst + ); + } + else if (IsReverseBytewiseComparator(user_comparator())) { + m_get = ExtractFuncPtr(this, &Version::GetInst + ); + } + else { + m_get = ExtractFuncPtr(this, &Version::GetInst + ); + } + } } Status Version::GetBlob(const ReadOptions& read_options, const Slice& user_key, @@ -2629,8 +2608,9 @@ void Version::MultiGetBlob( } } +template ROCKSDB_FLATTEN -void Version::Get(const ReadOptions& read_options, const LookupKey& k, +void Version::GetInst(const ReadOptions& read_options, const LookupKey& k, PinnableSlice* value, PinnableWideColumns* columns, std::string* timestamp, Status* status, MergeContext* merge_context, @@ -2676,7 +2656,7 @@ void Version::Get(const ReadOptions& read_options, const LookupKey& k, pinned_iters_mgr->StartPinning(); } - FilePicker fp(user_key, ikey, &storage_info_.level_files_brief_, + FilePicker fp(user_key, ikey, &storage_info_.level_files_brief_, storage_info_.num_non_empty_levels_, &storage_info_.file_indexer_, user_comparator(), internal_comparator()); diff --git a/db/version_set.h b/db/version_set.h index d506fa8563..647ae00fbf 100644 --- a/db/version_set.h +++ b/db/version_set.h @@ -894,14 +894,42 @@ class Version { // merge_context.operands_list and don't merge the operands // REQUIRES: lock is not held // REQUIRES: pinned_iters_mgr != nullptr - void Get(const ReadOptions&, const LookupKey& key, PinnableSlice* value, + void Get(const ReadOptions& ro, const LookupKey& key, PinnableSlice* value, PinnableWideColumns* columns, std::string* timestamp, Status* status, MergeContext* merge_context, SequenceNumber* max_covering_tombstone_seq, PinnedIteratorsManager* pinned_iters_mgr, bool* value_found = nullptr, bool* key_exists = nullptr, SequenceNumber* seq = nullptr, ReadCallback* callback = nullptr, - bool* is_blob = nullptr, bool do_merge = true); + bool* is_blob = nullptr, bool do_merge = true) + { + return m_get(this, ro, key, value, columns, timestamp, status, + merge_context, max_covering_tombstone_seq, pinned_iters_mgr, + value_found, key_exists, seq, callback, is_blob, do_merge); + } + +private: + template + void GetInst(const ReadOptions&, const LookupKey& key, PinnableSlice* value, + PinnableWideColumns* columns, std::string* timestamp, Status* status, + MergeContext* merge_context, + SequenceNumber* max_covering_tombstone_seq, + PinnedIteratorsManager* pinned_iters_mgr, + bool* value_found, bool* key_exists, + SequenceNumber* seq, ReadCallback* callback, + bool* is_blob, bool do_merge); + + void (*m_get)(Version*, + const ReadOptions&, const LookupKey& key, PinnableSlice* value, + PinnableWideColumns* columns, std::string* timestamp, Status* status, + MergeContext* merge_context, + SequenceNumber* max_covering_tombstone_seq, + PinnedIteratorsManager* pinned_iters_mgr, + bool* value_found, bool* key_exists, + SequenceNumber* seq, ReadCallback* callback, + bool* is_blob, bool do_merge); + +public: void MultiGet(const ReadOptions&, MultiGetRange* range, ReadCallback* callback = nullptr); From 823b4e51a2e0722cd80ae574009f509c4475d5cf Mon Sep 17 00:00:00 2001 From: rockeet Date: Fri, 17 Oct 2025 23:19:08 +0800 Subject: [PATCH 114/175] GetContext: TOPLINGDB_WITH_TIMESTAMP for ukey_with_ts_found_ --- table/get_context.cc | 2 ++ table/get_context.h | 6 ++++++ 2 files changed, 8 insertions(+) diff --git a/table/get_context.cc b/table/get_context.cc index 7dd216b2e5..c2a4dc887b 100644 --- a/table/get_context.cc +++ b/table/get_context.cc @@ -373,9 +373,11 @@ bool GetContext::SaveValue(const ParsedInternalKey& parsed_key, if (kNotFound == state_) { state_ = kFound; if (do_merge_) { + #if defined(TOPLINGDB_WITH_TIMESTAMP) if (type == kTypeBlobIndex && ucmp_->timestamp_size() != 0) { ukey_with_ts_found_.PinSelf(parsed_key.user_key); } + #endif if (LIKELY(pinnable_val_ != nullptr)) { Slice value_to_use = value; diff --git a/table/get_context.h b/table/get_context.h index aaa13a6a14..8579d08982 100644 --- a/table/get_context.h +++ b/table/get_context.h @@ -199,11 +199,15 @@ class GetContext { bool has_callback() const { return callback_ != nullptr; } const Slice& ukey_to_get_blob_value() const { + #if defined(TOPLINGDB_WITH_TIMESTAMP) if (!ukey_with_ts_found_.empty()) { return ukey_with_ts_found_; } else { return user_key_; } + #else + return user_key_; + #endif } uint64_t get_tracing_get_id() const { return tracing_get_id_; } @@ -238,7 +242,9 @@ class GetContext { // When a blob index is found with the user key containing timestamp, // this copies the corresponding user key on record in the sst file // and is later used for blob verification. +#if defined(TOPLINGDB_WITH_TIMESTAMP) PinnableSlice ukey_with_ts_found_; +#endif PinnableSlice* pinnable_val_; PinnableWideColumns* columns_; std::string* timestamp_; From 1b5408e5b71998d3e1538ac237650cc023843d7c Mon Sep 17 00:00:00 2001 From: rockeet Date: Sat, 18 Oct 2025 19:07:46 +0800 Subject: [PATCH 115/175] db_iter.cc: RevBytewiseCmpNoTS reuse call BytewiseCmpNoTS --- db/db_iter.cc | 21 +-------------------- 1 file changed, 1 insertion(+), 20 deletions(-) diff --git a/db/db_iter.cc b/db/db_iter.cc index d04d10984a..d34ccb70d5 100644 --- a/db/db_iter.cc +++ b/db/db_iter.cc @@ -558,27 +558,8 @@ struct RevBytewiseCmpNoTS { __always_inline bool operator()(const Slice& x, const Slice& y, Const) const { // return y < x; - if constexpr (FixLen) - return RawBytewiseLess(y.data_, x.data_); - else - return SliceBytewiseLess(y, x); + return BytewiseCmpNoTS(nullptr)(y, x, Const()); } - #if defined(__AVX512VL__) && defined(__AVX512BW__) - __always_inline - bool operator()(const Slice& x, const Slice& y, Const<64>) const { - // return y < x; - ROCKSDB_ASSERT_EQ(x.size(), y.size()); - ROCKSDB_ASSERT_LE(x.size(), 64); - __mmask64 msk = _bzhi_u64(-1, x.size()); - __m512i xxx = _mm512_maskz_loadu_epi8(msk, x.data()); - __m512i yyy = _mm512_maskz_loadu_epi8(msk, y.data()); - __mmask64 cmp = _mm512_cmpneq_epi8_mask(xxx, yyy); - if (cmp == 0) // all zero means all eq, any one means not eq - return false; - auto pos = _tzcnt_u64(cmp); - return (uint8_t)y[pos] < (uint8_t)y[pos]; - } - #endif int compare(const Slice& x, const Slice& y) const { return y.compare(x); } }; From d7b9d416e887ccc87c2861a99b3268b743c17abd Mon Sep 17 00:00:00 2001 From: rockeet Date: Sat, 18 Oct 2025 19:08:53 +0800 Subject: [PATCH 116/175] db_iter.cc: BytewiseCmpNoTS improve avx512 --- db/db_iter.cc | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/db/db_iter.cc b/db/db_iter.cc index d34ccb70d5..8e059d7081 100644 --- a/db/db_iter.cc +++ b/db/db_iter.cc @@ -535,11 +535,13 @@ struct BytewiseCmpNoTS { __mmask64 msk = _bzhi_u64(-1, x.size()); __m512i xxx = _mm512_maskz_loadu_epi8(msk, x.data()); __m512i yyy = _mm512_maskz_loadu_epi8(msk, y.data()); - __mmask64 cmp = _mm512_cmpneq_epi8_mask(xxx, yyy); - if (cmp == 0) // all zero means all eq, any one means not eq - return false; - auto pos = _tzcnt_u64(cmp); - return (uint8_t)x[pos] < (uint8_t)y[pos]; + __mmask64 neq = _mm512_cmpneq_epi8_mask(xxx, yyy); + __mmask64 lt = _mm512_cmplt_epi8_mask (xxx, yyy); + auto pos = _tzcnt_u64(neq); // pos = 64 when neq is 0(should return false) + ROCKSDB_ASSUME(pos <= 64); // gcc does not know this, tell it(clang knows) + //return (lt >> pos & 1) != 0; // maybe pos == 64 so this is wrong + return (_bextr_u64(-1, pos, 1) & lt) != 0; + // _bextr_u64(-1, pos, 1) == 0 when pos is 64 } #endif int compare(const Slice& x, const Slice& y) const { return x.compare(y); } From 98027363b44063b2d5cc5e0ae1647c7ffa162ebb Mon Sep 17 00:00:00 2001 From: rockeet Date: Sat, 18 Oct 2025 19:10:51 +0800 Subject: [PATCH 117/175] db_iter.cc: BytewiseCmpNoTS::operator() Add Const 4,8,12,16 overload compiler(gcc) loop unroll is not graceful, newer gcc even has regressions, do it manully --- db/db_iter.cc | 32 ++++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) diff --git a/db/db_iter.cc b/db/db_iter.cc index 8e059d7081..5882a37f6d 100644 --- a/db/db_iter.cc +++ b/db/db_iter.cc @@ -526,6 +526,38 @@ struct BytewiseCmpNoTS { else return SliceBytewiseLess(x, y); } + // compiler(gcc) loop unroll is not graceful, newer gcc even has regressions, + // do it manully + __always_inline + bool operator()(const Slice& x, const Slice& y, Const<4>) const { + // return x < y; + return NativeOfBigEndian32(unaligned_load(x.data_)) + < NativeOfBigEndian32(unaligned_load(y.data_)); + } + __always_inline + bool operator()(const Slice& x, const Slice& y, Const<8>) const { + // return x < y; + return NativeOfBigEndian64(unaligned_load(x.data_)) + < NativeOfBigEndian64(unaligned_load(y.data_)); + } + #if defined(__GNUC__) && __GNUC__ >= 11 + __always_inline + bool operator()(const Slice& x, const Slice& y, Const<12>) const { + // return x < y; + uint64_t x0 = NativeOfBigEndian64(unaligned_load(x.data_)); + uint64_t y0 = NativeOfBigEndian64(unaligned_load(y.data_)); + uint32_t x1 = NativeOfBigEndian32(unaligned_load(x.data_ + 8)); + uint32_t y1 = NativeOfBigEndian32(unaligned_load(y.data_ + 8)); + return ((unsigned __int128)x0 << 64 | x1) + < ((unsigned __int128)y0 << 64 | y1); + } + __always_inline + bool operator()(const Slice& x, const Slice& y, Const<16>) const { + // return x < y; + return __builtin_bswap128(unaligned_load(x.data_)) + < __builtin_bswap128(unaligned_load(y.data_)); + } + #endif #if defined(__AVX512VL__) && defined(__AVX512BW__) __always_inline bool operator()(const Slice& x, const Slice& y, Const<64>) const { From 87ae51ef5972b40cef096aaafdaf52f9f069662a Mon Sep 17 00:00:00 2001 From: rockeet Date: Sat, 18 Oct 2025 19:45:22 +0800 Subject: [PATCH 118/175] db_iter.cc: SetFuncPtr() FixLen 8,12,16 on avx512 --- db/db_iter.cc | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/db/db_iter.cc b/db/db_iter.cc index 5882a37f6d..0689806349 100644 --- a/db/db_iter.cc +++ b/db/db_iter.cc @@ -628,7 +628,11 @@ void DBIter::SetFuncPtr() { #endif #if defined(__AVX512VL__) && defined(__AVX512BW__) #define SetFindNext(FuncName, CmpNoTS) \ - if (fixed_user_key_len_ != 0 && fixed_user_key_len_ <= 64) \ + if (false) {} \ + else if ( 8 == fixed_user_key_len_) SetFindNext3(FuncName, 8, CmpNoTS); \ + else if (12 == fixed_user_key_len_) SetFindNext3(FuncName, 12, CmpNoTS); \ + else if (16 == fixed_user_key_len_) SetFindNext3(FuncName, 16, CmpNoTS); \ + else if (fixed_user_key_len_ != 0 && fixed_user_key_len_ <= 64) \ SetFindNext3(FuncName, 64, CmpNoTS); \ else SetFindNext3(FuncName, 0, CmpNoTS) #else From 803395a32491729cf6fe396922c036ff13ed4360 Mon Sep 17 00:00:00 2001 From: rockeet Date: Sat, 18 Oct 2025 19:52:02 +0800 Subject: [PATCH 119/175] db_iter.cc: saved_key_.SetUserKey for flame graph flame graph will show SetUserKey<8> on FixLen == 8 --- db/db_iter.cc | 9 ++++++--- db/db_iter.h | 7 +++++++ 2 files changed, 13 insertions(+), 3 deletions(-) diff --git a/db/db_iter.cc b/db/db_iter.cc index 0689806349..282469253f 100644 --- a/db/db_iter.cc +++ b/db/db_iter.cc @@ -787,7 +787,8 @@ bool DBIter::FindNextUserEntryInternalTmpl(bool skipping_saved_key, return true; } else { FixLen != 0 ? // to propagate const FixLen - saved_key_.SetUserKey(ikey_.user_key.data_, FixLen) : + //saved_key_.SetUserKey(ikey_.user_key.data_, FixLen) : + saved_key_.SetUserKey(ikey_.user_key.data_) : // for flame saved_key_.SetUserKey( ikey_.user_key, !pin_thru_lifetime_ || !iter_.iter()->IsKeyPinned() /* copy */); @@ -800,7 +801,8 @@ bool DBIter::FindNextUserEntryInternalTmpl(bool skipping_saved_key, if (timestamp_lb_) { saved_key_.SetInternalKey(ikey_); } else if (FixLen != 0) { // to propagate const FixLen - saved_key_.SetUserKey(ikey_.user_key.data_, FixLen); + //saved_key_.SetUserKey(ikey_.user_key.data_, FixLen); + saved_key_.SetUserKey(ikey_.user_key.data_); // for flame } else { saved_key_.SetUserKey( ikey_.user_key, !pin_thru_lifetime_ || @@ -849,7 +851,8 @@ bool DBIter::FindNextUserEntryInternalTmpl(bool skipping_saved_key, return false; } FixLen != 0 ? // to propagate const FixLen - saved_key_.SetUserKey(ikey_.user_key.data_, FixLen) : + //saved_key_.SetUserKey(ikey_.user_key.data_, FixLen) : + saved_key_.SetUserKey(ikey_.user_key.data_) : // for flame saved_key_.SetUserKey( ikey_.user_key, !pin_thru_lifetime_ || !iter_.iter()->IsKeyPinned() /* copy */); diff --git a/db/db_iter.h b/db/db_iter.h index 3a88484c12..4fe4a11511 100644 --- a/db/db_iter.h +++ b/db/db_iter.h @@ -447,6 +447,13 @@ class DBIter final : public Iterator { // do not write last 8 bytes(seq + value_type) }); } + template + void SetUserKey(const char* uk) { + key.risk_assign_local(UserKeyLen + 8, [=](char* buf, size_t) { + memcpy(buf, uk, UserKeyLen); + // do not write last 8 bytes(seq + value_type) + }); + } void SetUserKey(const char* uk, size_t uk_len) { key.risk_assign_local(uk_len + 8, [=](char* buf, size_t) { memcpy(buf, uk, uk_len); From d5afd8ac47c1579d3b24ad0be44d2badf402240a Mon Sep 17 00:00:00 2001 From: rockeet Date: Sat, 18 Oct 2025 19:54:30 +0800 Subject: [PATCH 120/175] dbformat.h: Add TOPLINGDB_USE_MANUAL_MEMCMP --- db/dbformat.h | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/db/dbformat.h b/db/dbformat.h index db4c61a6c6..1967251fb8 100644 --- a/db/dbformat.h +++ b/db/dbformat.h @@ -1113,7 +1113,12 @@ __always_inline uint64_t GetUnalignedU64(const void* ptr) noexcept { return x; } +#if !defined(TOPLINGDB_USE_MANUAL_MEMCMP) + #define TOPLINGDB_USE_MANUAL_MEMCMP 1 +#endif + __always_inline bool SliceBytewiseLess(const Slice& x, const Slice& y) { +#if TOPLINGDB_USE_MANUAL_MEMCMP auto px = (const unsigned char*)x.data(); size_t nx = x.size(); auto py = (const unsigned char*)y.data(); size_t ny = y.size(); size_t i = 0, n = std::min(nx, ny); @@ -1137,11 +1142,14 @@ __always_inline bool SliceBytewiseLess(const Slice& x, const Slice& y) { return ux < uy; } return nx < ny; +#else + return x < y; +#endif } struct BytewiseCompareInternalKey { __always_inline bool operator()(Slice x, Slice y) const noexcept { - #if 0 // when unaligned load is slow + #if !TOPLINGDB_USE_MANUAL_MEMCMP size_t n = std::min(x.size_, y.size_) - 8; int cmp = memcmp(x.data_, y.data_, n); if (0 != cmp) return cmp < 0; @@ -1220,7 +1228,7 @@ struct VirtualFunctionLessUserKey { const Comparator* cmp; }; -#if 0 +#if !TOPLINGDB_USE_MANUAL_MEMCMP __always_inline int BytewiseCompare(Slice x, Slice y) noexcept { size_t n = std::min(x.size_, y.size_); int cmp = memcmp(x.data_, y.data_, n); From 495c4b3e5d3f9051e04d784b5089140e7553d5d3 Mon Sep 17 00:00:00 2001 From: rockeet Date: Sat, 18 Oct 2025 19:57:07 +0800 Subject: [PATCH 121/175] TableCache: always inline GetNoneRowCache This also remove m_get for bound pmf GetNoneRowCache or GetWithRowCache --- db/table_cache.cc | 50 ---------------------------------- db/table_cache.h | 68 +++++++++++++++++++++++++++++++++++++++-------- 2 files changed, 57 insertions(+), 61 deletions(-) diff --git a/db/table_cache.cc b/db/table_cache.cc index d82ca92a49..0dd427049e 100644 --- a/db/table_cache.cc +++ b/db/table_cache.cc @@ -84,10 +84,6 @@ TableCache::TableCache(const ImmutableOptions& ioptions, // If the same cache is shared by multiple instances, we need to // disambiguate its entries. PutVarint64(&row_cache_id_, ioptions_.row_cache->NewId()); - m_get = terark::ExtractFuncPtr(this, &TableCache::GetWithRowCache); - } - else { - m_get = terark::ExtractFuncPtr(this, &TableCache::GetNoneRowCache); } } @@ -531,52 +527,6 @@ Status TableCache::GetWithRowCache( return s; } -Status TableCache::GetNoneRowCache( - const ReadOptions& options, - const InternalKeyComparator& internal_comparator, - const FileMetaData& file_meta, const Slice& k, GetContext* get_context, - uint8_t block_protection_bytes_per_key, - const std::shared_ptr& prefix_extractor, - HistogramImpl* file_read_hist, bool skip_filters, int level, - size_t max_file_size_for_l0_meta_pin) { - TableReader* t = file_meta.fd.table_reader; - TypedHandle* handle = nullptr; - if (UNLIKELY(t == nullptr)) { - Status s = FindTable(options, file_options_, internal_comparator, file_meta, - &handle, block_protection_bytes_per_key, prefix_extractor, - options.read_tier == kBlockCacheTier /* no_io */, - file_read_hist, skip_filters, level, - true /* prefetch_index_and_filter_in_cache */, - max_file_size_for_l0_meta_pin, file_meta.temperature); - if (s.ok()) { - t = cache_.Value(handle); - } else { - return s; - } - } - auto* max_covering_tombstone_seq = get_context->max_covering_tombstone_seq(); - if (UNLIKELY(max_covering_tombstone_seq && !options.ignore_range_deletions)) { - std::unique_ptr range_del_iter( - t->NewRangeTombstoneIterator(options)); - if (range_del_iter != nullptr) { - auto seq = range_del_iter->MaxCoveringTombstoneSeqnum(ExtractUserKey(k)); - if (seq > *max_covering_tombstone_seq) { - *max_covering_tombstone_seq = seq; - if (get_context->NeedTimestamp()) { - get_context->SetTimestampFromRangeTombstone(range_del_iter->timestamp()); - } - } - } - } - if (LIKELY(handle == nullptr)) { // optimize for compiler tail call - return t->Get(options, k, get_context, prefix_extractor.get(), skip_filters); - } else { - Status s = t->Get(options, k, get_context, prefix_extractor.get(), skip_filters); - cache_.Release(handle); - return s; - } -} - void TableCache::UpdateRangeTombstoneSeqnums( const ReadOptions& options, TableReader* t, MultiGetContext::Range& table_range) { diff --git a/db/table_cache.h b/db/table_cache.h index f20e863e2d..ee85f7ba4e 100644 --- a/db/table_cache.h +++ b/db/table_cache.h @@ -112,7 +112,7 @@ class TableCache { // recorded // @param skip_filters Disables loading/accessing the filter block // @param level The level this table is at, -1 for "not set / don't know" - inline + __always_inline Status Get( const ReadOptions& options, const InternalKeyComparator& internal_comparator, @@ -121,7 +121,13 @@ class TableCache { const std::shared_ptr& prefix_extractor = nullptr, HistogramImpl* file_read_hist = nullptr, bool skip_filters = false, int level = -1, size_t max_file_size_for_l0_meta_pin = 0) { - return m_get(this, options, internal_comparator, file_meta, k, get_context, + if (LIKELY(!ioptions_.row_cache)) { + // important: inline expantion at calling place(Version::Get) + return GetNoneRowCache(options, internal_comparator, file_meta, k, get_context, + block_protection_bytes_per_key, prefix_extractor, file_read_hist, + skip_filters, level, max_file_size_for_l0_meta_pin); + } + return GetWithRowCache(options, internal_comparator, file_meta, k, get_context, block_protection_bytes_per_key, prefix_extractor, file_read_hist, skip_filters, level, max_file_size_for_l0_meta_pin); } @@ -144,15 +150,6 @@ class TableCache { HistogramImpl* file_read_hist, bool skip_filters, int level, size_t max_file_size_for_l0_meta_pin); - Status (*m_get)(TableCache*, - const ReadOptions& options, - const InternalKeyComparator& internal_comparator, - const FileMetaData& file_meta, const Slice& k, GetContext* get_context, - uint8_t block_protection_bytes_per_key, - const std::shared_ptr& prefix_extractor, - HistogramImpl* file_read_hist, bool skip_filters, - int level, size_t max_file_size_for_l0_meta_pin); - public: // Return the range delete tombstone iterator of the file specified by // `file_meta`. @@ -327,4 +324,53 @@ class TableCache { std::string db_session_id_; }; +// important: inline expantion at calling place(Version::Get) +__always_inline +Status TableCache::GetNoneRowCache( + const ReadOptions& options, + const InternalKeyComparator& internal_comparator, + const FileMetaData& file_meta, const Slice& k, GetContext* get_context, + uint8_t block_protection_bytes_per_key, + const std::shared_ptr& prefix_extractor, + HistogramImpl* file_read_hist, bool skip_filters, int level, + size_t max_file_size_for_l0_meta_pin) +{ + TableReader* t = file_meta.fd.table_reader; + TypedHandle* handle = nullptr; + if (UNLIKELY(t == nullptr)) { + Status s = FindTable(options, file_options_, internal_comparator, file_meta, + &handle, block_protection_bytes_per_key, prefix_extractor, + options.read_tier == kBlockCacheTier /* no_io */, + file_read_hist, skip_filters, level, + true /* prefetch_index_and_filter_in_cache */, + max_file_size_for_l0_meta_pin, file_meta.temperature); + if (s.ok()) { + t = cache_.Value(handle); + } else { + return s; + } + } + auto* max_covering_tombstone_seq = get_context->max_covering_tombstone_seq(); + if (UNLIKELY(max_covering_tombstone_seq && !options.ignore_range_deletions)) { + std::unique_ptr range_del_iter( + t->NewRangeTombstoneIterator(options)); + if (range_del_iter != nullptr) { + auto seq = range_del_iter->MaxCoveringTombstoneSeqnum(ExtractUserKey(k)); + if (seq > *max_covering_tombstone_seq) { + *max_covering_tombstone_seq = seq; + if (get_context->NeedTimestamp()) { + get_context->SetTimestampFromRangeTombstone(range_del_iter->timestamp()); + } + } + } + } + if (LIKELY(handle == nullptr)) { // optimize for compiler tail call + return t->Get(options, k, get_context, prefix_extractor.get(), skip_filters); + } else { + Status s = t->Get(options, k, get_context, prefix_extractor.get(), skip_filters); + cache_.Release(handle); + return s; + } +} + } // namespace ROCKSDB_NAMESPACE \ No newline at end of file From 4326784b4158c831ce56e724fa70560bc5cf5920 Mon Sep 17 00:00:00 2001 From: rockeet Date: Sat, 18 Oct 2025 19:57:45 +0800 Subject: [PATCH 122/175] db_impl.cc: flatten DBImpl::Get --- db/db_impl/db_impl.cc | 1 + 1 file changed, 1 insertion(+) diff --git a/db/db_impl/db_impl.cc b/db/db_impl/db_impl.cc index 9bb9fe5f1b..097163c60d 100644 --- a/db/db_impl/db_impl.cc +++ b/db/db_impl/db_impl.cc @@ -2118,6 +2118,7 @@ Status DBImpl::Get(const ReadOptions& read_options, return Get(read_options, column_family, key, value, /*timestamp=*/nullptr); } +ROCKSDB_FLATTEN Status DBImpl::Get(const ReadOptions& _read_options, ColumnFamilyHandle* column_family, const Slice& key, PinnableSlice* value, std::string* timestamp) { From 57df7f01d9829b6b34a87aa354ce45b58187c778 Mon Sep 17 00:00:00 2001 From: rockeet Date: Sat, 18 Oct 2025 19:58:57 +0800 Subject: [PATCH 123/175] ROCKSDB_ASSUME(cond) = [[assume(cond)]] for gcc13+ --- include/rocksdb/preproc.h | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/include/rocksdb/preproc.h b/include/rocksdb/preproc.h index 160dfa5745..e49229edd1 100644 --- a/include/rocksdb/preproc.h +++ b/include/rocksdb/preproc.h @@ -483,7 +483,11 @@ #elif defined(__clang__) #define ROCKSDB_ASSUME(cond) __builtin_assume(cond) #elif defined(__GNUC__) +#if __GNUC__ >= 13 +#define ROCKSDB_ASSUME(cond) [[assume(cond)]] +#else #define ROCKSDB_ASSUME(cond) ((cond) ? static_cast(0) : __builtin_unreachable()) +#endif #else #define ROCKSDB_ASSUME(cond) static_cast(!!(cond)) #endif From f8e32494964af3a82751466640fcf91623e1a440 Mon Sep 17 00:00:00 2001 From: rockeet Date: Sat, 18 Oct 2025 20:00:21 +0800 Subject: [PATCH 124/175] db_bench_tool.cc: readseq & nextwithkey: wrap on end of iter --- tools/db_bench_tool.cc | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/tools/db_bench_tool.cc b/tools/db_bench_tool.cc index 0a5d6e4746..871e0deee2 100644 --- a/tools/db_bench_tool.cc +++ b/tools/db_bench_tool.cc @@ -5970,7 +5970,12 @@ class Benchmark { int64_t bytes = 0; const auto limiter = thread->shared->read_rate_limiter.get(); const bool omit_value = FLAGS_scan_omit_value; - for (iter->SeekToFirst(); i < reads_ && iter->Valid(); iter->Next()) { + for (iter->SeekToFirst(); i < reads_; iter->Next()) { + if (UNLIKELY(!iter->Valid())) { // wrap if does not reach reads_ + iter->SeekToFirst(); + if (!iter->Valid()) + continue; // safe keep loop even on empty db + } if (omit_value) { bytes += omit_key ? key_size : iter->key().size(); } else { @@ -6018,7 +6023,12 @@ class Benchmark { int64_t i = 0, bytes = 0; const auto limiter = thread->shared->read_rate_limiter.get(); const bool omit_value = FLAGS_scan_omit_value; - for (Slice key = iter->SeekToFirstWithKey(); i < reads_ && key.data(); ) { + for (Slice key = iter->SeekToFirstWithKey(); i < reads_; ) { + if (UNLIKELY(!key.data())) { // end of iter, wrap if does not reach reads_ + key = iter->SeekToFirstWithKey(); + if (!key.data()) + continue; // safe keep loop even on empty db + } bytes += key.size(); if (!omit_value) { bytes += iter->value().size(); From 142b94bae5b21fe00899a06eb022e587de7b0dcd Mon Sep 17 00:00:00 2001 From: rockeet Date: Sun, 19 Oct 2025 08:24:57 +0800 Subject: [PATCH 125/175] db_bench_tool.cc: Fix ScanNextWithKey: fixed_user_key_len --- tools/db_bench_tool.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/db_bench_tool.cc b/tools/db_bench_tool.cc index 871e0deee2..5b69c83d13 100644 --- a/tools/db_bench_tool.cc +++ b/tools/db_bench_tool.cc @@ -6018,7 +6018,7 @@ class Benchmark { options.adaptive_readahead = FLAGS_adaptive_readahead; options.async_io = FLAGS_async_io; options.auto_readahead_size = FLAGS_auto_readahead_size; - options.fixed_user_key_len = FLAGS_scan_omit_key ? FLAGS_scan_omit_key : 0; + options.fixed_user_key_len = FLAGS_scan_omit_key ? FLAGS_key_size : 0; Iterator* iter = db->NewIterator(options); int64_t i = 0, bytes = 0; const auto limiter = thread->shared->read_rate_limiter.get(); From 5e7951533f5a8ec2f96e7854beb100867ea3e6fb Mon Sep 17 00:00:00 2001 From: rockeet Date: Sun, 19 Oct 2025 00:03:58 +0800 Subject: [PATCH 126/175] CMakeLists.txt: Fix PORTABLE MATCHES Regex Regex should be ^(Reg)$ --- CMakeLists.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 7258fc1e99..2ddc181203 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -272,7 +272,7 @@ if(CMAKE_SYSTEM_PROCESSOR MATCHES "loongarch64") endif(CMAKE_SYSTEM_PROCESSOR MATCHES "loongarch64") set(PORTABLE 0 CACHE STRING "Minimum CPU arch to support, or 0 = current CPU, 1 = baseline CPU") -if(PORTABLE MATCHES "1|ON|YES|TRUE|Y") +if(PORTABLE MATCHES "^(1|ON|YES|TRUE|Y)$") # Usually nothing to do; compiler default is typically the most general if(NOT MSVC) if(CMAKE_SYSTEM_PROCESSOR MATCHES "^s390x") @@ -282,7 +282,7 @@ if(PORTABLE MATCHES "1|ON|YES|TRUE|Y") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -march=loongarch64") endif() endif() -elseif(PORTABLE MATCHES "0|OFF|NO|FALSE|N") +elseif(PORTABLE MATCHES "^(0|OFF|NO|FALSE|N)$") if(MSVC) # NOTE: No auto-detection of current CPU, but instead assume some useful # level of optimization is supported From 819d43761c7382bcce5c701380ab644f37b87d40 Mon Sep 17 00:00:00 2001 From: rockeet Date: Sun, 19 Oct 2025 00:05:24 +0800 Subject: [PATCH 127/175] MemoryEqual & SliceEqual: add avx512 & move from slice.h to dbformat.h --- db/dbformat.h | 64 +++++++++++++++++++++++++++++++++++++++++ include/rocksdb/slice.h | 24 ---------------- 2 files changed, 64 insertions(+), 24 deletions(-) diff --git a/db/dbformat.h b/db/dbformat.h index 1967251fb8..9158cb9b7f 100644 --- a/db/dbformat.h +++ b/db/dbformat.h @@ -1117,6 +1117,70 @@ __always_inline uint64_t GetUnalignedU64(const void* ptr) noexcept { #define TOPLINGDB_USE_MANUAL_MEMCMP 1 #endif +__always_inline bool MemoryEqual(const void* vx, const void* vy, size_t n) { + auto px = (const unsigned char*)vx; + auto py = (const unsigned char*)vy; + #if defined(__AVX512VL__) && defined(__AVX512BW__) + #if 0 + { + // n is small value in most cases, and pointers are not likely aligned + // to 64, so it is better to do not handle such case + size_t j = size_t(vx) & 63; + if (j && (size_t(vy) & 63) == j) { + j = std::min(64 - j, n); + __mmask64 msk = _bzhi_u64(-1, j); + __m512i xxx = _mm512_maskz_loadu_epi8(msk, px); + __m512i yyy = _mm512_maskz_loadu_epi8(msk, py); + __mmask64 neq = _mm512_cmpneq_epi8_mask(xxx, yyy); + if (0 != neq) + return false; + n -= j; + px += j; + py += j; + } + } + #endif + // px and py maybe aligned, it should be faster if aligned + for (; UNLIKELY(n >= 64); n -= 64, px += 64, py += 64) { + __m512i xxx = _mm512_loadu_epi8(px); + __m512i yyy = _mm512_loadu_epi8(py); + __mmask64 neq = _mm512_cmpneq_epi8_mask(xxx, yyy); + if (0 != neq) + return false; + } + // n is unlikely aligned to 64, skip check n has no harm + // if (n) // do not check + { + __mmask64 msk = _bzhi_u64(-1, n); + __m512i xxx = _mm512_maskz_loadu_epi8(msk, px); + __m512i yyy = _mm512_maskz_loadu_epi8(msk, py); + __mmask64 neq = _mm512_cmpneq_epi8_mask(xxx, yyy); + return 0 == neq; + } + // return true; + #else + size_t i = 0; + for (; i + 8 <= n; i += 8) { + if (*(const uint64_t*)(px + i) != *(const uint64_t*)(py + i)) + return false; + } + if (n % sizeof(uint64_t) >= 4) { + if (*(const uint32_t*)(px + i) != *(const uint32_t*)(py + i)) + return false; + else + i += 4; + } + for (; i < n; i++) { + if (px[i] != py[i]) + return false; + } + return true; + #endif +} +__always_inline bool SliceEqual(const Slice& x, const Slice& y) { + return x.size() == y.size() && MemoryEqual(x.data(), y.data(), x.size()); +} + __always_inline bool SliceBytewiseLess(const Slice& x, const Slice& y) { #if TOPLINGDB_USE_MANUAL_MEMCMP auto px = (const unsigned char*)x.data(); size_t nx = x.size(); diff --git a/include/rocksdb/slice.h b/include/rocksdb/slice.h index 269c147f54..589f8d1597 100644 --- a/include/rocksdb/slice.h +++ b/include/rocksdb/slice.h @@ -283,30 +283,6 @@ struct SliceParts { int num_parts; }; -__always_inline bool MemoryEqual(const void* vx, const void* vy, size_t n) { - auto px = (const unsigned char*)vx; - auto py = (const unsigned char*)vy; - size_t i = 0; - for (; i + 8 <= n; i += 8) { - if (*(const uint64_t*)(px + i) != *(const uint64_t*)(py + i)) - return false; - } - if (n % sizeof(uint64_t) >= 4) { - if (*(const uint32_t*)(px + i) != *(const uint32_t*)(py + i)) - return false; - else - i += 4; - } - for (; i < n; i++) { - if (px[i] != py[i]) - return false; - } - return true; -} -__always_inline bool SliceEqual(const Slice& x, const Slice& y) { - return x.size() == y.size() && MemoryEqual(x.data(), y.data(), x.size()); -} - inline bool operator==(const Slice& x, const Slice& y) { return ((x.size() == y.size()) && (memcmp(x.data(), y.data(), x.size()) == 0)); From 01b32da3568d977e62b435c6c3913fd8d0dbb52a Mon Sep 17 00:00:00 2001 From: rockeet Date: Sun, 19 Oct 2025 07:37:32 +0800 Subject: [PATCH 128/175] DBIter::SetUserKey to SetUK: Fix for avx512 FixLen==64 Unit Tests were not run yet --- db/db_iter.cc | 40 ++++++++++++++++++++++++++++++++++------ db/db_iter.h | 13 +------------ 2 files changed, 35 insertions(+), 18 deletions(-) diff --git a/db/db_iter.cc b/db/db_iter.cc index 282469253f..17dc570cb6 100644 --- a/db/db_iter.cc +++ b/db/db_iter.cc @@ -612,6 +612,37 @@ struct VirtualCmpNoTS { const Comparator* cmp; }; +template +__always_inline +void DBIter::FastIterKey::SetUK(const Slice& uk_slice) { + auto uk_ptr = uk_slice.data(); + auto uk_len = uk_slice.size(); + if constexpr (UserKeyLen == 0) { + key.risk_assign_local(uk_len + 8, [=](char* buf, size_t) { + memcpy(buf, uk_ptr, uk_len); // runtime memcpy + // do not write last 8 bytes(seq + value_type) + }); + } else if constexpr (UserKeyLen == 64) { + #if defined(__AVX512VL__) && defined(__AVX512BW__) + ROCKSDB_ASSERT_LE(uk_len, UserKeyLen); + key.risk_assign_local(uk_len + 8, [=](char* buf, size_t) { + __mmask64 mask = _bzhi_u64(-1, uk_len); + __m512i r512 = _mm512_maskz_loadu_epi8(mask, uk_ptr); + _mm512_mask_storeu_epi8(buf, mask, r512); + // do not write last 8 bytes(seq + value_type) + }); + #else + static_assert(false, "UserKeyLen == 64 should not on non-avx512"); + #endif + } else { + ROCKSDB_ASSERT_EQ(uk_len, UserKeyLen); + key.risk_assign_local(UserKeyLen + 8, [=](char* buf, size_t) { + memcpy(buf, uk_ptr, UserKeyLen); // fixed copy + // do not write last 8 bytes(seq + value_type) + }); + } +} + using TriBool = DBIter::TriBool; template @@ -787,8 +818,7 @@ bool DBIter::FindNextUserEntryInternalTmpl(bool skipping_saved_key, return true; } else { FixLen != 0 ? // to propagate const FixLen - //saved_key_.SetUserKey(ikey_.user_key.data_, FixLen) : - saved_key_.SetUserKey(ikey_.user_key.data_) : // for flame + saved_key_.SetUK(ikey_.user_key) : saved_key_.SetUserKey( ikey_.user_key, !pin_thru_lifetime_ || !iter_.iter()->IsKeyPinned() /* copy */); @@ -801,8 +831,7 @@ bool DBIter::FindNextUserEntryInternalTmpl(bool skipping_saved_key, if (timestamp_lb_) { saved_key_.SetInternalKey(ikey_); } else if (FixLen != 0) { // to propagate const FixLen - //saved_key_.SetUserKey(ikey_.user_key.data_, FixLen); - saved_key_.SetUserKey(ikey_.user_key.data_); // for flame + saved_key_.SetUK(ikey_.user_key); } else { saved_key_.SetUserKey( ikey_.user_key, !pin_thru_lifetime_ || @@ -851,8 +880,7 @@ bool DBIter::FindNextUserEntryInternalTmpl(bool skipping_saved_key, return false; } FixLen != 0 ? // to propagate const FixLen - //saved_key_.SetUserKey(ikey_.user_key.data_, FixLen) : - saved_key_.SetUserKey(ikey_.user_key.data_) : // for flame + saved_key_.SetUK(ikey_.user_key) : saved_key_.SetUserKey( ikey_.user_key, !pin_thru_lifetime_ || !iter_.iter()->IsKeyPinned() /* copy */); diff --git a/db/db_iter.h b/db/db_iter.h index 4fe4a11511..07652db408 100644 --- a/db/db_iter.h +++ b/db/db_iter.h @@ -448,18 +448,7 @@ class DBIter final : public Iterator { }); } template - void SetUserKey(const char* uk) { - key.risk_assign_local(UserKeyLen + 8, [=](char* buf, size_t) { - memcpy(buf, uk, UserKeyLen); - // do not write last 8 bytes(seq + value_type) - }); - } - void SetUserKey(const char* uk, size_t uk_len) { - key.risk_assign_local(uk_len + 8, [=](char* buf, size_t) { - memcpy(buf, uk, uk_len); - // do not write last 8 bytes(seq + value_type) - }); - } + void SetUK(const Slice& uk); void SetInternalKey(const ParsedInternalKey& ikey) { SetInternalKey(ikey.user_key, ikey.sequence, ikey.type); } From 2f9862402fa8f67af07d70137f9ff5a1e09f9253 Mon Sep 17 00:00:00 2001 From: rockeet Date: Sun, 19 Oct 2025 08:20:46 +0800 Subject: [PATCH 129/175] DBIter::Next/NextWithKey() Safe improve: remove ClearSavedValue() This ClearSavedValue() was introduced in: https://github.com/facebook/rocksdb/pull/10934 --- db/db_iter.cc | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/db/db_iter.cc b/db/db_iter.cc index 17dc570cb6..ba98425f4b 100644 --- a/db/db_iter.cc +++ b/db/db_iter.cc @@ -228,7 +228,9 @@ void DBIter::Next() { local_stats_.next_count_++; if (ok) { - ClearSavedValue(); + // see: https://github.com/facebook/rocksdb/pull/10934 + // I think this ClearSavedValue() is not needed, remove it passes UT + // ClearSavedValue(); if (prefix_same_as_start_) { assert(prefix_extractor_ != nullptr); @@ -288,7 +290,9 @@ Slice DBIter::NextWithKey() { local_stats_.next_count_++; if (LIKELY(ok)) { - ClearSavedValue(); + // see: https://github.com/facebook/rocksdb/pull/10934 + // I think this ClearSavedValue() is not needed, remove it passes UT + // ClearSavedValue(); if (prefix_same_as_start_) { assert(prefix_extractor_ != nullptr); From 48e6be8aae92518cc8c78547dc39c1288642a89b Mon Sep 17 00:00:00 2001 From: rockeet Date: Sun, 19 Oct 2025 08:05:55 +0800 Subject: [PATCH 130/175] DBIter::ResetValueAndColumns() Safe improve: value_.clear() -> .size_ = 0 skip set value_.data_, improves a little --- db/db_iter.cc | 10 ++++------ db/db_iter.h | 2 +- 2 files changed, 5 insertions(+), 7 deletions(-) diff --git a/db/db_iter.cc b/db/db_iter.cc index ba98425f4b..9fba41b0f4 100644 --- a/db/db_iter.cc +++ b/db/db_iter.cc @@ -202,8 +202,8 @@ void DBIter::Next() { ReleaseTempPinnedData(); ResetBlobValue(); ResetValueAndColumns(); - local_stats_.skip_count_ += num_internal_keys_skipped_; - local_stats_.skip_count_--; + local_stats_.next_count_++; + local_stats_.skip_count_ += num_internal_keys_skipped_ - 1; num_internal_keys_skipped_ = 0; bool ok = true; if (UNLIKELY(direction_ == kReverse)) { @@ -226,7 +226,6 @@ void DBIter::Next() { ok = iter_.Valid(); } - local_stats_.next_count_++; if (ok) { // see: https://github.com/facebook/rocksdb/pull/10934 // I think this ClearSavedValue() is not needed, remove it passes UT @@ -264,8 +263,8 @@ Slice DBIter::NextWithKey() { ReleaseTempPinnedData(); ResetBlobValue(); ResetValueAndColumns(); - local_stats_.skip_count_ += num_internal_keys_skipped_; - local_stats_.skip_count_--; + local_stats_.next_count_++; + local_stats_.skip_count_ += num_internal_keys_skipped_ - 1; num_internal_keys_skipped_ = 0; bool ok = true; if (UNLIKELY(direction_ == kReverse)) { @@ -288,7 +287,6 @@ Slice DBIter::NextWithKey() { ok = iter_.Valid(); } - local_stats_.next_count_++; if (LIKELY(ok)) { // see: https://github.com/facebook/rocksdb/pull/10934 // I think this ClearSavedValue() is not needed, remove it passes UT diff --git a/db/db_iter.h b/db/db_iter.h index 07652db408..7b493cce8d 100644 --- a/db/db_iter.h +++ b/db/db_iter.h @@ -373,7 +373,7 @@ class DBIter final : public Iterator { ValueType result_type); void ResetValueAndColumns() { - value_.clear(); + value_.size_ = 0; // clear without reset .data_ = "" #if defined(TOPLINGDB_WITH_WIDE_COLUMNS) wide_columns_.clear(); #endif From 3d0c0dd658b7be1d95e7f7a1522d195100388ad5 Mon Sep 17 00:00:00 2001 From: rockeet Date: Sun, 19 Oct 2025 08:24:09 +0800 Subject: [PATCH 131/175] DBIter::Next/NextWithKey() Aggressive improve: remove ResetValueAndColumns() All Unit Tests passed --- db/db_iter.cc | 10 ++++++---- db/db_iter.h | 5 +++-- 2 files changed, 9 insertions(+), 6 deletions(-) diff --git a/db/db_iter.cc b/db/db_iter.cc index 9fba41b0f4..ce195fc2d1 100644 --- a/db/db_iter.cc +++ b/db/db_iter.cc @@ -201,7 +201,7 @@ void DBIter::Next() { // Release temporarily pinned blocks from last operation ReleaseTempPinnedData(); ResetBlobValue(); - ResetValueAndColumns(); +//ResetValueAndColumns(); local_stats_.next_count_++; local_stats_.skip_count_ += num_internal_keys_skipped_ - 1; num_internal_keys_skipped_ = 0; @@ -262,7 +262,7 @@ Slice DBIter::NextWithKey() { // Release temporarily pinned blocks from last operation ReleaseTempPinnedData(); ResetBlobValue(); - ResetValueAndColumns(); +//ResetValueAndColumns(); local_stats_.next_count_++; local_stats_.skip_count_ += num_internal_keys_skipped_ - 1; num_internal_keys_skipped_ = 0; @@ -357,8 +357,10 @@ bool DBIter::SetBlobValueIfNeeded(const Slice& user_key, bool DBIter::SetValueAndColumnsFromEntity(Slice slice) { #if defined(TOPLINGDB_WITH_WIDE_COLUMNS) - assert(value_.empty()); - assert(wide_columns_.empty()); + //assert(value_.empty()); + //assert(wide_columns_.empty()); + value_.clear(); + wide_columns_.clear(); const Status s = WideColumnSerialization::Deserialize(slice, wide_columns_); diff --git a/db/db_iter.h b/db/db_iter.h index 7b493cce8d..12af8aa475 100644 --- a/db/db_iter.h +++ b/db/db_iter.h @@ -358,11 +358,12 @@ class DBIter final : public Iterator { } void SetValueAndColumnsFromPlain(const Slice& slice) { - assert(value_.empty()); + //assert(value_.empty()); value_ = slice; #if defined(TOPLINGDB_WITH_WIDE_COLUMNS) - assert(wide_columns_.empty()); + //assert(wide_columns_.empty()); + wide_columns_.clear(); wide_columns_.emplace_back(kDefaultWideColumnName, slice); #endif } From 934f24e7ee7e7c696c6acdd5b7527c4c28aeac50 Mon Sep 17 00:00:00 2001 From: rockeet Date: Sun, 19 Oct 2025 10:22:37 +0800 Subject: [PATCH 132/175] DBIter::FastIterKey::SetUK: use key.assign() on var keylen And change FastIterKey::key cap to 72 for larger than 64 for avx512 Now SetUK<0> for var keylen is not used and avx512 is still in progress, so the bug will not be triggerred. --- db/db_iter.cc | 3 ++- db/db_iter.h | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/db/db_iter.cc b/db/db_iter.cc index ce195fc2d1..8cbae95a44 100644 --- a/db/db_iter.cc +++ b/db/db_iter.cc @@ -619,10 +619,11 @@ struct VirtualCmpNoTS { template __always_inline void DBIter::FastIterKey::SetUK(const Slice& uk_slice) { + static_assert(UserKeyLen < sizeof(key)); auto uk_ptr = uk_slice.data(); auto uk_len = uk_slice.size(); if constexpr (UserKeyLen == 0) { - key.risk_assign_local(uk_len + 8, [=](char* buf, size_t) { + key.assign(uk_len + 8, [=](char* buf, size_t) { memcpy(buf, uk_ptr, uk_len); // runtime memcpy // do not write last 8 bytes(seq + value_type) }); diff --git a/db/db_iter.h b/db/db_iter.h index 12af8aa475..f6fd25093a 100644 --- a/db/db_iter.h +++ b/db/db_iter.h @@ -440,7 +440,7 @@ class DBIter final : public Iterator { #else #define ROCKSDB_TEST_PinnedDataIterator 0 struct FastIterKey { - terark::minimal_sso<64, false> key; + terark::minimal_sso<72, false> key; // avx512 max is 64, 72 > 64 void Clear() { key.clear(); } void SetUserKey(const Slice& uk, bool copy = true) { key.assign(uk.size_ + 8, [=](char* buf, size_t len) { From 97032c91093ceb966b20f75878067733c8fb29f7 Mon Sep 17 00:00:00 2001 From: rockeet Date: Sun, 19 Oct 2025 10:24:25 +0800 Subject: [PATCH 133/175] DBIter::NextWithKey() improve by manual inline this.key() --- db/db_iter.cc | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/db/db_iter.cc b/db/db_iter.cc index 8cbae95a44..dfcc633b3b 100644 --- a/db/db_iter.cc +++ b/db/db_iter.cc @@ -300,11 +300,15 @@ Slice DBIter::NextWithKey() { FindNextUserEntry(true /* skipping the current user key */, nullptr); } if (LIKELY(valid_)) { + Slice ukey_and_ts = saved_key_.GetUserKey(); local_stats_.next_found_count_++; - local_stats_.bytes_read_ += saved_key_.Size(); + local_stats_.bytes_read_ += ukey_and_ts.size(); if (is_value_prepared_) local_stats_.bytes_read_ += value_.size_; - return this->key(); + if (timestamp_lb_) + return saved_key_.GetInternalKey(); + else + return Slice(ukey_and_ts.data(), ukey_and_ts.size() - timestamp_size_); } } else { is_key_seqnum_zero_ = false; From a34c98695038e2e2661a76b88a6c105fea2c91c5 Mon Sep 17 00:00:00 2001 From: rockeet Date: Mon, 20 Oct 2025 20:26:33 +0800 Subject: [PATCH 134/175] DBIter::SetFuncPtr() destroy saved_key_ This will avoid memory leak in risk_assign_local when sso saved_key_ is heap allocated. --- db/db_iter.cc | 1 + 1 file changed, 1 insertion(+) diff --git a/db/db_iter.cc b/db/db_iter.cc index dfcc633b3b..73ae01b50b 100644 --- a/db/db_iter.cc +++ b/db/db_iter.cc @@ -661,6 +661,7 @@ bool DBIter::FindNextUserEntryPerf(bool skipping_saved_key, const Slice* prefix) (skipping_saved_key, prefix); } void DBIter::SetFuncPtr() { + saved_key_.key.destroy(); // == faster clear + shrink_to_fit #if !TOPLING_USE_BOUND_PMF #define BOUND_PMF(func) func #else From bb0314cf6224dbc9775fd15b025ac574cf0cd294 Mon Sep 17 00:00:00 2001 From: rockeet Date: Sat, 25 Oct 2025 11:21:25 +0800 Subject: [PATCH 135/175] DBIter::Next() Add missed likely --- db/db_iter.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/db/db_iter.cc b/db/db_iter.cc index 73ae01b50b..09df94b7a8 100644 --- a/db/db_iter.cc +++ b/db/db_iter.cc @@ -213,7 +213,7 @@ void DBIter::Next() { } else { ok = iter_.Valid(); } - } else if (!current_entry_is_merged_) { + } else if (LIKELY(!current_entry_is_merged_)) { // If the current value is not a merge, the iter position is the // current key, which is already returned. We can safely issue a // Next() without checking the current key. @@ -226,7 +226,7 @@ void DBIter::Next() { ok = iter_.Valid(); } - if (ok) { + if (LIKELY(ok)) { // see: https://github.com/facebook/rocksdb/pull/10934 // I think this ClearSavedValue() is not needed, remove it passes UT // ClearSavedValue(); From 2d8289b489ca8d0e93810acb69f2a4f6f7986acf Mon Sep 17 00:00:00 2001 From: rockeet Date: Sat, 25 Oct 2025 11:33:24 +0800 Subject: [PATCH 136/175] DBIter::SetFuncPtr() tidy align macro SetFindNext3 body --- db/db_iter.cc | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/db/db_iter.cc b/db/db_iter.cc index 09df94b7a8..2473a1ebd8 100644 --- a/db/db_iter.cc +++ b/db/db_iter.cc @@ -689,9 +689,9 @@ void DBIter::SetFuncPtr() { else SetFindNext3(FuncName, 0, CmpNoTS) #endif #define SetFindNext3(FuncName, FixLen, CmpNoTS) \ - if (read_callback_) \ - SetFindNext4(FuncName, kTrue , FixLen, CmpNoTS); \ - else SetFindNext4(FuncName, kFalse, FixLen, CmpNoTS) + if (read_callback_) \ + SetFindNext4(FuncName, kTrue , FixLen, CmpNoTS);\ + else SetFindNext4(FuncName, kFalse , FixLen, CmpNoTS) #define SetFindNext4(FuncName, MayHasCallback, FixLen, CmpNoTS) \ do { \ auto func = prefix_same_as_start_ \ From 036f3bb4efc9fef8d4883f019eb1d7964240e706 Mon Sep 17 00:00:00 2001 From: rockeet Date: Sat, 25 Oct 2025 11:22:46 +0800 Subject: [PATCH 137/175] DBIter::Next() & NextWithKey() remove ResetBlobValue() on FixLen optimization's fast path --- db/db_iter.cc | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/db/db_iter.cc b/db/db_iter.cc index 2473a1ebd8..9b3a1539c8 100644 --- a/db/db_iter.cc +++ b/db/db_iter.cc @@ -200,7 +200,7 @@ void DBIter::Next() { // Release temporarily pinned blocks from last operation ReleaseTempPinnedData(); - ResetBlobValue(); +//ResetBlobValue(); // Moved to FindNextUserEntryInternalTmpl //ResetValueAndColumns(); local_stats_.next_count_++; local_stats_.skip_count_ += num_internal_keys_skipped_ - 1; @@ -245,6 +245,7 @@ void DBIter::Next() { local_stats_.bytes_read_ += value_.size_; } } else { + ResetBlobValue(); // unlikely path is_key_seqnum_zero_ = false; valid_ = false; } @@ -261,7 +262,7 @@ Slice DBIter::NextWithKey() { // Release temporarily pinned blocks from last operation ReleaseTempPinnedData(); - ResetBlobValue(); +//ResetBlobValue(); // Moved to FindNextUserEntryInternalTmpl //ResetValueAndColumns(); local_stats_.next_count_++; local_stats_.skip_count_ += num_internal_keys_skipped_ - 1; @@ -314,6 +315,7 @@ Slice DBIter::NextWithKey() { is_key_seqnum_zero_ = false; valid_ = false; } + ResetBlobValue(); // unlikely path return Slice(nullptr, 0); } @@ -693,6 +695,10 @@ void DBIter::SetFuncPtr() { SetFindNext4(FuncName, kTrue , FixLen, CmpNoTS);\ else SetFindNext4(FuncName, kFalse , FixLen, CmpNoTS) #define SetFindNext4(FuncName, MayHasCallback, FixLen, CmpNoTS) \ + if (expose_blob_index_) \ + SetFindNext5(FuncName, MayHasCallback, 0, CmpNoTS);\ + else SetFindNext5(FuncName, MayHasCallback, FixLen, CmpNoTS) + #define SetFindNext5(FuncName, MayHasCallback, FixLen, CmpNoTS) \ do { \ auto func = prefix_same_as_start_ \ ? iterate_upper_bound_ \ @@ -734,6 +740,14 @@ bool DBIter::FindNextUserEntryInternalTmpl(bool skipping_saved_key, assert(direction_ == kForward); current_entry_is_merged_ = false; + if (FixLen == 0) { + ResetBlobValue(); + } else { + // This is assert, not verify, just for debug build + ROCKSDB_ASSERT_F(!expose_blob_index_, + "FixLen optimization does not support legacy Stacked BlobDB"); + } + // How many times in a row we have skipped an entry with user key less than // or equal to saved_key_. We could skip these entries either because // sequence numbers were too high or because skipping_saved_key = true. From 5556b17b3d3cee9b0398bb52fa1533c59878b26c Mon Sep 17 00:00:00 2001 From: rockeet Date: Sat, 25 Oct 2025 18:06:58 +0800 Subject: [PATCH 138/175] Add RetryNextAndGetResult for LevelIterator: direct call file_iter With RetryNextAndGetResult(), we can direct call to file_iter.NextAndGetResult(), if it failed(is_valid==false), we call LevelIterator.RetryNextAndGetResult(). Default RetryNextAndGetResult() do nothing and returns false Default IteratorWrapper.work_iter_ is same as iter_, to compatible for other InternalIterator(s) LevelIterator::RetryNextAndGetResult() and PrepareScan() are overrided for switch file_iter_ This change reduces DBIter::Next() & NextWithKey() about 2.5 nanoseconds, now db_bench shows: if DB is already compacted, each NextWithKey() only use about 23 nanoseconds on Xeon 2682 v4 -- with db_bench `-omit_key` and `-omit_value` which enable ReadOptions.fixed_user_key_len optimization and do not call iter->value() --- db/version_set.cc | 37 +++++++++++++++++++++++++++++++++++++ table/internal_iterator.h | 7 +++++++ table/iterator_wrapper.h | 15 +++++++++++++-- 3 files changed, 57 insertions(+), 2 deletions(-) diff --git a/db/version_set.cc b/db/version_set.cc index 4a1836df70..eac92a65d4 100644 --- a/db/version_set.cc +++ b/db/version_set.cc @@ -1194,6 +1194,25 @@ class LevelIterator final : public InternalIterator { void SkipEmptyFileBackward(); void SetFileIterator(InternalIterator* iter); void InitFileIterator(size_t new_file_index); + bool RetryNextAndGetResult(IterateResult*) override; + void PrepareScan(IteratorWrapper* iw) override { + assert(iw != nullptr); + my_wrapper_ = iw; + UpdateScanFunc(iw); + } + void UpdateScanFunc(IteratorWrapper* iw) { + if (to_return_sentinel_ || file_iter_.iter() == nullptr) { + iw->work_iter_ = this; + iw->next_and_get_result_ = ForgeFuncPtr(this, + &LevelIterator::NextAndGetResult); + } else { + iw->work_iter_ = file_iter_.iter(); + iw->next_and_get_result_ = ForgeFuncPtr(file_iter_.iter(), + &InternalIterator::NextAndGetResult); + } + retry_already_goes_invalid_ = false; + } + IteratorWrapper* my_wrapper_ = nullptr; const Slice& file_smallest_key(size_t file_index) const { assert(file_index < flevel_->num_files); @@ -1355,6 +1374,7 @@ class LevelIterator final : public InternalIterator { bool prefix_exhausted_ = false; // Whether next/prev key is a sentinel key. bool to_return_sentinel_ = false; + bool retry_already_goes_invalid_ = false; // Sets flags for if we should return the sentinel key next. // The condition for returning sentinel is reaching the end of current @@ -1572,6 +1592,17 @@ bool LevelIterator::NextAndGetResult(IterateResult* result) { bool is_valid = !to_return_sentinel_ && file_iter_.NextAndGetResult(result); result->is_valid = is_valid; if (UNLIKELY(!is_valid)) { + retry_already_goes_invalid_ = false; + return RetryNextAndGetResult(result); + } else { + return true; + } +} + +bool LevelIterator::RetryNextAndGetResult(IterateResult* result) { + ROCKSDB_ASSERT_EQ(result->is_valid, false); + bool is_valid = false; + if (!retry_already_goes_invalid_) { if (to_return_sentinel_) { ClearSentinel(); } else if (range_tombstone_iter_) { @@ -1582,6 +1613,9 @@ bool LevelIterator::NextAndGetResult(IterateResult* result) { is_next_read_sequential_ = false; is_valid = Valid(); result->is_valid = is_valid; + if (my_wrapper_) { + UpdateScanFunc(my_wrapper_); + } if (is_valid) { // This could be set in TrySetDeleteRangeSentinel() or // SkipEmptyFileForward() above. @@ -1598,6 +1632,9 @@ bool LevelIterator::NextAndGetResult(IterateResult* result) { result->value_prepared = !allow_unprepared_value_; } } + else { + retry_already_goes_invalid_ = true; + } } return is_valid; } diff --git a/table/internal_iterator.h b/table/internal_iterator.h index 4b3cf45d65..6dd737221c 100644 --- a/table/internal_iterator.h +++ b/table/internal_iterator.h @@ -68,6 +68,8 @@ struct IterateResult { }; static_assert(sizeof(IterateResult) == 16); +template class IteratorWrapperBase; + template class InternalIteratorBase : public Cleanable { public: @@ -153,6 +155,11 @@ class InternalIteratorBase : public Cleanable { } return is_valid; } + virtual bool RetryNextAndGetResult(IterateResult* result) { + ROCKSDB_ASSERT_EQ(result->is_valid, false); + return false; + } + virtual void PrepareScan(IteratorWrapperBase*) {} // Moves to the previous entry in the source. After this call, Valid() is // true iff the iterator was not positioned at the first entry in source. diff --git a/table/iterator_wrapper.h b/table/iterator_wrapper.h index 2156628c76..ec63138015 100644 --- a/table/iterator_wrapper.h +++ b/table/iterator_wrapper.h @@ -17,6 +17,7 @@ #include #if TOPLING_USE_BOUND_PMF using terark::ExtractFuncPtr; +using terark::ForgeFuncPtr; #endif namespace ROCKSDB_NAMESPACE { @@ -25,7 +26,7 @@ namespace ROCKSDB_NAMESPACE { // the valid() and key() results for an underlying iterator. // This can help avoid virtual function calls and also gives better // cache locality. -template +template class IteratorWrapperBase { public: IteratorWrapperBase() : iter_(nullptr) {} @@ -47,6 +48,9 @@ class IteratorWrapperBase { InternalIteratorBase* old_iter = iter_; iter_ = _iter; + #if TOPLING_USE_BOUND_PMF + work_iter_ = _iter; + #endif if (iter_ == nullptr) { result_.is_valid = false; } else { @@ -150,7 +154,11 @@ class IteratorWrapperBase { #if !TOPLING_USE_BOUND_PMF const bool is_valid = iter_->NextAndGetResult(&result_); #else - const bool is_valid = next_and_get_result_(iter_, &result_); + bool is_valid = next_and_get_result_(work_iter_, &result_); + if (UNLIKELY(!is_valid)) { + // maybe update work_iter_ and next_and_get_result_ + is_valid = iter_->RetryNextAndGetResult(&result_); + } #endif assert(is_valid == result_.is_valid); assert(!result_.is_valid || iter_->status().ok()); @@ -238,6 +246,7 @@ class IteratorWrapperBase { protected: void Update() { + iter_->PrepareScan(this); UpdateImpl(iter_->Valid()); } void UpdateImpl(bool is_valid) { @@ -257,6 +266,8 @@ class IteratorWrapperBase { mutable bool status_checked_after_invalid_ = true; #endif #if TOPLING_USE_BOUND_PMF + public: + InternalIteratorBase* work_iter_ = nullptr; typedef bool (*NextAndGetResultFN)(InternalIteratorBase*, IterateResult*); typedef bool (*PrepareAndGetValueFN)(InternalIteratorBase*, TValue*); NextAndGetResultFN next_and_get_result_ = nullptr; From 935c62d2b631cc10d318b4e903eca5adc8c33d09 Mon Sep 17 00:00:00 2001 From: rockeet Date: Sat, 25 Oct 2025 21:38:02 +0800 Subject: [PATCH 139/175] DBIter::Next/NextWithKey() move `ok = iter_.Next();` to block end Thus inline expanded iter_.Next() may be branch merged with later sentences --- db/db_iter.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/db/db_iter.cc b/db/db_iter.cc index 9b3a1539c8..c6b1cebfe0 100644 --- a/db/db_iter.cc +++ b/db/db_iter.cc @@ -219,9 +219,9 @@ void DBIter::Next() { // Next() without checking the current key. // If the current key is a merge, very likely iter already points // to the next internal position. + PERF_COUNTER_ADD(internal_key_skipped_count, 1); assert(iter_.Valid()); ok = iter_.Next(); - PERF_COUNTER_ADD(internal_key_skipped_count, 1); } else { ok = iter_.Valid(); } @@ -281,9 +281,9 @@ Slice DBIter::NextWithKey() { // Next() without checking the current key. // If the current key is a merge, very likely iter already points // to the next internal position. + PERF_COUNTER_ADD(internal_key_skipped_count, 1); assert(iter_.Valid()); ok = iter_.Next(); - PERF_COUNTER_ADD(internal_key_skipped_count, 1); } else { ok = iter_.Valid(); } From 1663b22323183ca0f64ecf78c0d0679c0e3d4060 Mon Sep 17 00:00:00 2001 From: rockeet Date: Sun, 26 Oct 2025 13:40:39 +0800 Subject: [PATCH 140/175] DBIter: FindNextUserEntry add template arg bool CheckMaxSkip Because max_skippable_internal_keys_ is default for do not check in most cases, so remove the runtime overhead. --- db/db_iter.cc | 27 ++++++++++++++++++--------- db/db_iter.h | 4 ++-- 2 files changed, 20 insertions(+), 11 deletions(-) diff --git a/db/db_iter.cc b/db/db_iter.cc index c6b1cebfe0..9d55aa1f2a 100644 --- a/db/db_iter.cc +++ b/db/db_iter.cc @@ -656,10 +656,10 @@ void DBIter::FastIterKey::SetUK(const Slice& uk_slice) { using TriBool = DBIter::TriBool; -template +template bool DBIter::FindNextUserEntryPerf(bool skipping_saved_key, const Slice* prefix) { PERF_TIMER_GUARD(find_next_user_entry_time); - return FindNextUserEntryInternalTmpl + return FindNextUserEntryInternalTmpl (skipping_saved_key, prefix); } void DBIter::SetFuncPtr() { @@ -699,14 +699,18 @@ void DBIter::SetFuncPtr() { SetFindNext5(FuncName, MayHasCallback, 0, CmpNoTS);\ else SetFindNext5(FuncName, MayHasCallback, FixLen, CmpNoTS) #define SetFindNext5(FuncName, MayHasCallback, FixLen, CmpNoTS) \ + if (max_skippable_internal_keys_ >= UINT_MAX) \ + SetFindNext6(FuncName, MayHasCallback, FixLen, CmpNoTS, false ); \ + else SetFindNext6(FuncName, MayHasCallback, FixLen, CmpNoTS, true ) + #define SetFindNext6(FuncName, MayHasCallback, FixLen, CmpNoTS, CheckMaxSkip) \ do { \ auto func = prefix_same_as_start_ \ ? iterate_upper_bound_ \ - ? &DBIter::template FuncName \ - : &DBIter::template FuncName \ + ? &DBIter::template FuncName \ + : &DBIter::template FuncName \ : iterate_upper_bound_ \ - ? &DBIter::template FuncName \ - : &DBIter::template FuncName; \ + ? &DBIter::template FuncName \ + : &DBIter::template FuncName; \ m_find_next_entry = BOUND_PMF(func); \ } while (0) if (enable_perf_timer_) { @@ -730,7 +734,7 @@ void DBIter::SetFuncPtr() { } } -template +template bool DBIter::FindNextUserEntryInternalTmpl(bool skipping_saved_key, const Slice* prefix) { CmpNoTS cmpNoTS{user_comparator_.user_comparator()}; @@ -803,8 +807,13 @@ bool DBIter::FindNextUserEntryInternalTmpl(bool skipping_saved_key, break; } - if (TooManyInternalKeysSkipped()) { - return false; + if constexpr (CheckMaxSkip) { + if (TooManyInternalKeysSkipped()) { + return false; + } + } + else { + num_internal_keys_skipped_++; } assert(ikey_.user_key.size() >= timestamp_size_); diff --git a/db/db_iter.h b/db/db_iter.h index f6fd25093a..7be9349c6f 100644 --- a/db/db_iter.h +++ b/db/db_iter.h @@ -279,7 +279,7 @@ class DBIter final : public Iterator { // If `prefix` is not null, the iterator needs to stop when all keys for the // prefix are exhausted and the iterator is set to invalid. bool FindNextUserEntry(bool skipping_saved_key, const Slice* prefix); - template + template bool FindNextUserEntryInternalTmpl(bool, const Slice* prefix); bool ParseKey(ParsedInternalKey* key); bool MergeValuesNewToOld(); @@ -424,7 +424,7 @@ class DBIter final : public Iterator { // uncommitted data in db as in WriteUnCommitted. SequenceNumber sequence_; - template + template bool FindNextUserEntryPerf(bool skipping_saved_key, const Slice* prefix); void SetFuncPtr(); #if !TOPLING_USE_BOUND_PMF From 2250194d8f27809690a781910bf04b5a1572b15d Mon Sep 17 00:00:00 2001 From: rockeet Date: Sun, 26 Oct 2025 14:49:35 +0800 Subject: [PATCH 141/175] DBIter: optimize normal case prefix_same_as_start_ == false --- db/db_iter.cc | 26 ++++++++++++-------------- 1 file changed, 12 insertions(+), 14 deletions(-) diff --git a/db/db_iter.cc b/db/db_iter.cc index 9d55aa1f2a..6adb219a58 100644 --- a/db/db_iter.cc +++ b/db/db_iter.cc @@ -231,13 +231,7 @@ void DBIter::Next() { // I think this ClearSavedValue() is not needed, remove it passes UT // ClearSavedValue(); - if (prefix_same_as_start_) { - assert(prefix_extractor_ != nullptr); - const Slice prefix = prefix_.GetUserKey(); - FindNextUserEntry(true /* skipping the current user key */, &prefix); - } else { - FindNextUserEntry(true /* skipping the current user key */, nullptr); - } + FindNextUserEntry(true /* skipping the current user key */, nullptr); if (LIKELY(valid_)) { local_stats_.next_found_count_++; local_stats_.bytes_read_ += saved_key_.Size(); @@ -293,13 +287,7 @@ Slice DBIter::NextWithKey() { // I think this ClearSavedValue() is not needed, remove it passes UT // ClearSavedValue(); - if (prefix_same_as_start_) { - assert(prefix_extractor_ != nullptr); - const Slice prefix = prefix_.GetUserKey(); - FindNextUserEntry(true /* skipping the current user key */, &prefix); - } else { - FindNextUserEntry(true /* skipping the current user key */, nullptr); - } + FindNextUserEntry(true /* skipping the current user key */, nullptr); if (LIKELY(valid_)) { Slice ukey_and_ts = saved_key_.GetUserKey(); local_stats_.next_found_count_++; @@ -752,6 +740,16 @@ bool DBIter::FindNextUserEntryInternalTmpl(bool skipping_saved_key, "FixLen optimization does not support legacy Stacked BlobDB"); } + assert(HasPrefix == prefix_same_as_start_); + Slice tmp_prefix; + if constexpr (HasPrefix) { + if (skipping_saved_key) { + assert(nullptr == prefix); // called by Next & NextWithKey + tmp_prefix = prefix_.GetUserKey(); + prefix = &tmp_prefix; + } + } + // How many times in a row we have skipped an entry with user key less than // or equal to saved_key_. We could skip these entries either because // sequence numbers were too high or because skipping_saved_key = true. From 5b3d7752173b84facc467307b5668b95d9fb346d Mon Sep 17 00:00:00 2001 From: rockeet Date: Sun, 26 Oct 2025 16:40:27 +0800 Subject: [PATCH 142/175] DBIter::FindNextUserEntryInternalTmpl: Add an unlikely --- db/db_iter.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/db/db_iter.cc b/db/db_iter.cc index 6adb219a58..9b15f1f27b 100644 --- a/db/db_iter.cc +++ b/db/db_iter.cc @@ -961,7 +961,7 @@ bool DBIter::FindNextUserEntryInternalTmpl(bool skipping_saved_key, // TODO(lth): If we reseek to sequence number greater than ikey_.sequence, // then it does not make sense to reseek as we would actually land further // away from the desired key. There is opportunity for optimization here. - if (num_skipped > max_skip_ && !reseek_done) { + if (UNLIKELY(!reseek_done && num_skipped > max_skip_)) { is_key_seqnum_zero_ = false; num_skipped = 0; reseek_done = true; From ab39db16ef41ae46ae7d56abc35726dfe6df70bb Mon Sep 17 00:00:00 2001 From: rockeet Date: Mon, 27 Oct 2025 10:55:57 +0800 Subject: [PATCH 143/175] DBIter::Next/NextWithKey() Move ReleaseTempPinnedData to MergeValuesNewToOld Move cost from fast path to slow path --- db/db_iter.cc | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/db/db_iter.cc b/db/db_iter.cc index 9b15f1f27b..1ff5a5546a 100644 --- a/db/db_iter.cc +++ b/db/db_iter.cc @@ -199,7 +199,7 @@ void DBIter::Next() { #endif // Release temporarily pinned blocks from last operation - ReleaseTempPinnedData(); +//ReleaseTempPinnedData(); // Moved to MergeValuesNewToOld //ResetBlobValue(); // Moved to FindNextUserEntryInternalTmpl //ResetValueAndColumns(); local_stats_.next_count_++; @@ -255,7 +255,7 @@ Slice DBIter::NextWithKey() { #endif // Release temporarily pinned blocks from last operation - ReleaseTempPinnedData(); +//ReleaseTempPinnedData(); // Moved to MergeValuesNewToOld //ResetBlobValue(); // Moved to FindNextUserEntryInternalTmpl //ResetValueAndColumns(); local_stats_.next_count_++; @@ -1028,6 +1028,7 @@ bool DBIter::MergeValuesNewToOld() { } // Temporarily pin the blocks that hold merge operands + ReleaseTempPinnedData(); TempPinData(); merge_context_.Clear(); // Start the merge process by pushing the first operand From 9b1c3ba4af35c45714e88ff5d77ac4737deed9ff Mon Sep 17 00:00:00 2001 From: rockeet Date: Mon, 27 Oct 2025 10:57:43 +0800 Subject: [PATCH 144/175] DBIter::Next/NextWithKey() check FindNextUserEntry retval instead of valid_ FindNextUserEntry() is always consistent with valid_ --- db/db_iter.cc | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/db/db_iter.cc b/db/db_iter.cc index 1ff5a5546a..bb03be18a9 100644 --- a/db/db_iter.cc +++ b/db/db_iter.cc @@ -231,8 +231,8 @@ void DBIter::Next() { // I think this ClearSavedValue() is not needed, remove it passes UT // ClearSavedValue(); - FindNextUserEntry(true /* skipping the current user key */, nullptr); - if (LIKELY(valid_)) { + bool skipping_saved_key_true = true; + if (LIKELY(FindNextUserEntry(skipping_saved_key_true, nullptr))) { local_stats_.next_found_count_++; local_stats_.bytes_read_ += saved_key_.Size(); if (is_value_prepared_) @@ -287,8 +287,8 @@ Slice DBIter::NextWithKey() { // I think this ClearSavedValue() is not needed, remove it passes UT // ClearSavedValue(); - FindNextUserEntry(true /* skipping the current user key */, nullptr); - if (LIKELY(valid_)) { + bool skipping_saved_key_true = true; + if (LIKELY(FindNextUserEntry(skipping_saved_key_true, nullptr))) { Slice ukey_and_ts = saved_key_.GetUserKey(); local_stats_.next_found_count_++; local_stats_.bytes_read_ += ukey_and_ts.size(); @@ -1009,7 +1009,7 @@ bool DBIter::FindNextUserEntryInternalTmpl(bool skipping_saved_key, } while (iter_.Valid()); valid_ = false; - return iter_.status().ok(); + return false; } // Merge values of the same user key starting from the current iter_ position From 8a6f9bd59c457ab7e8ceae4ab137ca99a08cc225 Mon Sep 17 00:00:00 2001 From: rockeet Date: Mon, 27 Oct 2025 17:18:05 +0800 Subject: [PATCH 145/175] DBIter::FastIterKey::SetUK() workaround g++11 if constexpr bug --- db/db_iter.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/db/db_iter.cc b/db/db_iter.cc index bb03be18a9..5ba0130694 100644 --- a/db/db_iter.cc +++ b/db/db_iter.cc @@ -630,7 +630,7 @@ void DBIter::FastIterKey::SetUK(const Slice& uk_slice) { _mm512_mask_storeu_epi8(buf, mask, r512); // do not write last 8 bytes(seq + value_type) }); - #else + #elif defined(__clang__) || !defined(__GNUC__) || __GNUC__ >= 13 static_assert(false, "UserKeyLen == 64 should not on non-avx512"); #endif } else { From 01f228acc7bb0b4f255d38f40f8a5c3735829993 Mon Sep 17 00:00:00 2001 From: rockeet Date: Mon, 27 Oct 2025 17:51:55 +0800 Subject: [PATCH 146/175] db_bench_tool.cc: %11.6f micros/op for subnano precision --- tools/db_bench_tool.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/db_bench_tool.cc b/tools/db_bench_tool.cc index 5b69c83d13..d71d58fd5d 100644 --- a/tools/db_bench_tool.cc +++ b/tools/db_bench_tool.cc @@ -2477,7 +2477,7 @@ class Stats { double throughput = (double)done_ / elapsed; fprintf(stdout, - "%-12s : %11.3f micros/op %ld ops/sec %.3f seconds %" PRIu64 + "%-12s : %11.6f micros/op %ld ops/sec %.3f seconds %" PRIu64 " operations;%s%s\n", name.ToString().c_str(), seconds_ * 1e6 / done_, (long)throughput, elapsed, done_, (extra.empty() ? "" : " "), extra.c_str()); From 21cd609441b23c61fda23a272d2ae1fb52f22970 Mon Sep 17 00:00:00 2001 From: rockeet Date: Mon, 27 Oct 2025 17:37:45 +0800 Subject: [PATCH 147/175] Makefile: LDFLAGS += -latomic --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index f8315786b0..1decc8b01b 100644 --- a/Makefile +++ b/Makefile @@ -1033,7 +1033,7 @@ endif # topling specific WARNING_FLAGS WARNING_FLAGS := -Wall -Wno-shadow ifeq "$(shell a=${COMPILER};echo $${a:0:5})" "clang" - #LDFLAGS += -latomic + LDFLAGS += -latomic #$(error LDFLAGS = ${LDFLAGS}) WARNING_FLAGS += -Wno-deprecated-builtins endif From 9dc75f1b79e3dfcb5db2b7bb3393009c5becbd10 Mon Sep 17 00:00:00 2001 From: rockeet Date: Mon, 27 Oct 2025 16:33:14 +0800 Subject: [PATCH 148/175] .github/workflow: Add unit test and db_bench Squashed commits: - .github/workflow/db_bench.yml: remove ulimit because it is forbided in action - .github/workflow/db_bench & unit-test[-avx512] remove make depend step - .github/workflow: Add db_bench-avx512 - .github/workflow: db_bench[-avx512] -num=50000000 to avoid oom --- .github/workflows/db_bench-avx512.yml | 93 ++++++++++++++++++++++++++ .github/workflows/db_bench.yml | 92 +++++++++++++++++++++++++ .github/workflows/unit-test-avx512.yml | 66 ++++++++++++++++++ .github/workflows/unit-test.yml | 65 ++++++++++++++++++ 4 files changed, 316 insertions(+) create mode 100644 .github/workflows/db_bench-avx512.yml create mode 100644 .github/workflows/db_bench.yml create mode 100644 .github/workflows/unit-test-avx512.yml create mode 100644 .github/workflows/unit-test.yml diff --git a/.github/workflows/db_bench-avx512.yml b/.github/workflows/db_bench-avx512.yml new file mode 100644 index 0000000000..9d2fa4c867 --- /dev/null +++ b/.github/workflows/db_bench-avx512.yml @@ -0,0 +1,93 @@ +# TODO: How to cache make files / speed up build progress here? +name: "db_bench-avx512" + +on: + workflow_dispatch: + inputs: + use_clang: + type: boolean + required: false + default: false + description: use clang + +jobs: + build: + # refer https://github.com/actions/runner-images to get the details + runs-on: ubuntu-latest + env: + GITHUB_TOKEN: ${{github.token}} + permissions: + contents: read + packages: write + + steps: + - name: Checkout + uses: actions/checkout@v4 + with: + fetch-depth: 1 + + - name: Install Prerequisites + run: | + set -xe + sudo apt-get update -y && sudo apt-get install -y \ + libjemalloc-dev libaio-dev libgflags-dev zlib1g-dev \ + libbz2-dev libcurl4-openssl-dev liburing-dev \ + libsnappy-dev libbz2-dev liblz4-dev libzstd-dev + + - name: Init submodule & Setup ssh + run: | + git submodule update --init --recursive + mkdir -p ~/.ssh && mkdir -p /opt/lib + ssh-keyscan -t rsa github.com >> ~/.ssh/known_hosts + echo nproc = `nproc` + + - name: Git clone SidePlugin(s) + run: | + # this will be auto triggerred in normal build, we defined + # it here is just for seperate steps more clearly. + # there is no dedicated target for this purpose, so we use `clean` + # as the target, it need not any `clean` at this point. + # `clean` is just used for triggers auto git clone + make clean + + - name: Compile db_bench + run: | + set -xe + if ${{inputs.use_clang}}; then + export CXX=clang++ + export CC=clang + fi + export CPU=-march=skylake-avx512 + make db_bench memtablerep_bench -j`nproc` DEBUG_LEVEL=0 UPDATE_REPO=0 + + - name: Run db_bench + run: | + set -xe + make db_bench -j`nproc` DEBUG_LEVEL=0 UPDATE_REPO=0 + export TOPLINGDB_GetContext_sampling=kNone + export ROCKSDB_KICK_OUT_OPTIONS_FILE=1 + args=( + -json sideplugin/rockside/sample-conf/db_bench_enterprise.yaml + -num=50000000 + -key_size=8 + -value_size=10 + -batch_size=1000 + -benchmarks=fillseq,compact,nextwithkey,nextwithkey,nextwithkey,readseq,readseq,readseq,readrandom,readrandom,readrandom + -scan_omit_key -scan_omit_value # ToplingDB specific + -enable_zero_copy # ToplingDB specific, for point search by Get/MultiGet + -progress_reports=false + ) + ./db_bench ${args[@]} + + - name: Run memtablerep_bench + run: | + args=( + -benchmarks=fillrandom,readrandom,readwrite + -item_size=0 + -num_operations=100000000 + -write_buffer_size=536870912 + ) + set -xe + ./memtablerep_bench ${args[@]} -memtablerep=skiplist + echo + ./memtablerep_bench ${args[@]} -memtablerep='cspp:{"mem_cap":"16G","use_hugepage":false}' diff --git a/.github/workflows/db_bench.yml b/.github/workflows/db_bench.yml new file mode 100644 index 0000000000..0ae0d1eb28 --- /dev/null +++ b/.github/workflows/db_bench.yml @@ -0,0 +1,92 @@ +# TODO: How to cache make files / speed up build progress here? +name: "db_bench" + +on: + workflow_dispatch: + inputs: + use_clang: + type: boolean + required: false + default: false + description: use clang + +jobs: + build: + # refer https://github.com/actions/runner-images to get the details + runs-on: ubuntu-latest + env: + GITHUB_TOKEN: ${{github.token}} + permissions: + contents: read + packages: write + + steps: + - name: Checkout + uses: actions/checkout@v4 + with: + fetch-depth: 1 + + - name: Install Prerequisites + run: | + set -xe + sudo apt-get update -y && sudo apt-get install -y \ + libjemalloc-dev libaio-dev libgflags-dev zlib1g-dev \ + libbz2-dev libcurl4-openssl-dev liburing-dev \ + libsnappy-dev libbz2-dev liblz4-dev libzstd-dev + + - name: Init submodule & Setup ssh + run: | + git submodule update --init --recursive + mkdir -p ~/.ssh && mkdir -p /opt/lib + ssh-keyscan -t rsa github.com >> ~/.ssh/known_hosts + echo nproc = `nproc` + + - name: Git clone SidePlugin(s) + run: | + # this will be auto triggerred in normal build, we defined + # it here is just for seperate steps more clearly. + # there is no dedicated target for this purpose, so we use `clean` + # as the target, it need not any `clean` at this point. + # `clean` is just used for triggers auto git clone + make clean + + - name: Compile db_bench + run: | + set -xe + if ${{inputs.use_clang}}; then + export CXX=clang++ + export CC=clang + fi + make db_bench memtablerep_bench -j`nproc` DEBUG_LEVEL=0 UPDATE_REPO=0 + + - name: Run db_bench + run: | + set -xe + make db_bench -j`nproc` DEBUG_LEVEL=0 UPDATE_REPO=0 + export TOPLINGDB_GetContext_sampling=kNone + export ROCKSDB_KICK_OUT_OPTIONS_FILE=1 + args=( + -json sideplugin/rockside/sample-conf/db_bench_enterprise.yaml + -num=50000000 + -key_size=8 + -value_size=10 + -batch_size=1000 + -benchmarks=fillseq,compact,nextwithkey,nextwithkey,nextwithkey,readseq,readseq,readseq,readrandom,readrandom,readrandom + -scan_omit_key -scan_omit_value # ToplingDB specific + -enable_zero_copy # ToplingDB specific, for point search by Get/MultiGet + -progress_reports=false + ) + ./db_bench ${args[@]} + + - name: Run memtablerep_bench + run: | + args=( + -benchmarks=fillrandom,readrandom,readwrite + -item_size=0 + -num_operations=100000000 + -write_buffer_size=536870912 + ) + set -xe + ./memtablerep_bench ${args[@]} -memtablerep=skiplist + echo + ./memtablerep_bench ${args[@]} -memtablerep='cspp:{"mem_cap":"16G","use_hugepage":false}' diff --git a/.github/workflows/unit-test-avx512.yml b/.github/workflows/unit-test-avx512.yml new file mode 100644 index 0000000000..8190a55b61 --- /dev/null +++ b/.github/workflows/unit-test-avx512.yml @@ -0,0 +1,66 @@ +# TODO: How to cache make files / speed up build progress here? +name: "unit-test-avx512" + +on: + workflow_dispatch: + inputs: + use_clang: + type: boolean + required: false + default: false + description: use clang + +jobs: + build: + # refer https://github.com/actions/runner-images to get the details + runs-on: ubuntu-latest + env: + GITHUB_TOKEN: ${{github.token}} + permissions: + contents: read + packages: write + + steps: + - name: Checkout + uses: actions/checkout@v4 + with: + fetch-depth: 1 + + - name: Install Prerequisites + run: | + set -xe + sudo apt-get update -y && sudo apt-get install -y \ + libjemalloc-dev libaio-dev libgflags-dev zlib1g-dev \ + libbz2-dev libcurl4-openssl-dev liburing-dev \ + libsnappy-dev libbz2-dev liblz4-dev libzstd-dev clang + + - name: Init submodule & Setup ssh + run: | + git submodule update --init --recursive + mkdir -p ~/.ssh && mkdir -p /opt/lib + ssh-keyscan -t rsa github.com >> ~/.ssh/known_hosts + echo nproc = `nproc` + + - name: Git clone SidePlugin(s) + run: | + # this will be auto triggerred in normal build, we defined + # it here is just for seperate steps more clearly. + # there is no dedicated target for this purpose, so we use `clean` + # as the target, it need not any `clean` at this point. + # `clean` is just used for triggers auto git clone + set -xe + if ${{inputs.use_clang}}; then + export CXX=clang++ + export CC=clang + fi + make clean + + - name: Compile & Run Unit Test + run: | + set -xe + if ${{inputs.use_clang}}; then + export CXX=clang++ + export CC=clang + fi + export CPU=-march=skylake-avx512 + make check -j`nproc` DEBUG_LEVEL=1 UPDATE_REPO=0 diff --git a/.github/workflows/unit-test.yml b/.github/workflows/unit-test.yml new file mode 100644 index 0000000000..3aae639a40 --- /dev/null +++ b/.github/workflows/unit-test.yml @@ -0,0 +1,65 @@ +# TODO: How to cache make files / speed up build progress here? +name: "unit-test" + +on: + workflow_dispatch: + inputs: + use_clang: + type: boolean + required: false + default: false + description: use clang + +jobs: + build: + # refer https://github.com/actions/runner-images to get the details + runs-on: ubuntu-latest + env: + GITHUB_TOKEN: ${{github.token}} + permissions: + contents: read + packages: write + + steps: + - name: Checkout + uses: actions/checkout@v4 + with: + fetch-depth: 1 + + - name: Install Prerequisites + run: | + set -xe + sudo apt-get update -y && sudo apt-get install -y \ + libjemalloc-dev libaio-dev libgflags-dev zlib1g-dev \ + libbz2-dev libcurl4-openssl-dev liburing-dev \ + libsnappy-dev libbz2-dev liblz4-dev libzstd-dev clang + + - name: Init submodule & Setup ssh + run: | + git submodule update --init --recursive + mkdir -p ~/.ssh && mkdir -p /opt/lib + ssh-keyscan -t rsa github.com >> ~/.ssh/known_hosts + echo nproc = `nproc` + + - name: Git clone SidePlugin(s) + run: | + # this will be auto triggerred in normal build, we defined + # it here is just for seperate steps more clearly. + # there is no dedicated target for this purpose, so we use `clean` + # as the target, it need not any `clean` at this point. + # `clean` is just used for triggers auto git clone + set -xe + if ${{inputs.use_clang}}; then + export CXX=clang++ + export CC=clang + fi + make clean + + - name: Compile & Run Unit Test + run: | + set -xe + if ${{inputs.use_clang}}; then + export CXX=clang++ + export CC=clang + fi + make check -j`nproc` DEBUG_LEVEL=1 UPDATE_REPO=0 From fa81bd18978903e077a4a04520d4d61b3d8220ae Mon Sep 17 00:00:00 2001 From: rockeet Date: Mon, 27 Oct 2025 22:04:24 +0800 Subject: [PATCH 149/175] merging_iterator.cc: LoadPrefixZeroSuffix() specialize for PrefixLen == 8 gcc does not optimize old code gracefully, see: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=122444 With ToplingDB UintIndex by db_bench, latency is reduced from 39.35 ns to 37.82 ns -- on WSL2 Xeon 2682 v4 --- table/merging_iterator.cc | 3 +++ 1 file changed, 3 insertions(+) diff --git a/table/merging_iterator.cc b/table/merging_iterator.cc index 0ec3b1596b..9d60926f02 100644 --- a/table/merging_iterator.cc +++ b/table/merging_iterator.cc @@ -199,6 +199,9 @@ __always_inline UintPrefix LoadPrefixZeroSuffix(const void* src) { un.u64[1] = ((const uint32_t*)src)[2]; // zero extend uint32 to uint64 return un.u128; } + if (PrefixLen == 8) { // help gcc optimize better + return UintPrefix(*(const uint64_t*)src); + } #endif UintPrefix dst; memcpy(&dst, src, PrefixLen); From a609d308965ef59105a30530f801736778145d37 Mon Sep 17 00:00:00 2001 From: rockeet Date: Mon, 27 Oct 2025 23:19:38 +0800 Subject: [PATCH 150/175] db_bench.sh: some changes --- db_bench.sh | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/db_bench.sh b/db_bench.sh index 9c2320a426..3f0c11b30a 100644 --- a/db_bench.sh +++ b/db_bench.sh @@ -8,16 +8,24 @@ cp sideplugin/rockside/src/topling/web/{index.html,style.css} /dev/shm/db_bench_ export TOPLINGDB_GetContext_sampling=kNone export ROCKSDB_KICK_OUT_OPTIONS_FILE=1 -export LD_LIBRARY_PATH=/opt/lib:/usr/local/lib:$LD_LIBRARY_PATH +#export PRINT_NOT_FOUND=true ulimit -n 100000 args=( -json sideplugin/rockside/sample-conf/db_bench_enterprise.yaml - -num=10000000 -key_size=16 + -num=10000000 -key_size=8 -value_size=2000 -batch_size=100 #-benchmarks=fillseq,compact,nextwithkey,nextwithkey,nextwithkey,nextwithkey,nextwithkey,readseq,readseq,readseq,readseq,readseq -benchmarks=fillrandom,readrandom + #-benchmarks=fillseq,compact + #-benchmarks=compact -use_existing_db + #-benchmarks=readrandom + #-benchmarks=readseq + #-benchmarks=nextwithkey + #-wkey_file=${HOME}/wikipedia-title-seq.txt + #-rkey_file=${HOME}/wikipedia-title-seq.txt #-threads=8 + -scan_omit_key -scan_omit_value -enable_zero_copy # ToplingDB specific, for point search by Get/MultiGet ) ./db_bench ${args[@]} "$@" From 2e47b3f19676b4433501d5fab574a0ea9ce48ead Mon Sep 17 00:00:00 2001 From: rockeet Date: Tue, 28 Oct 2025 10:21:29 +0800 Subject: [PATCH 151/175] merging_iterator.cc: HeapItemAndPrefix: put iter_type before item_ptr Prepare for next commit --- table/merging_iterator.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/table/merging_iterator.cc b/table/merging_iterator.cc index 9d60926f02..99e763b25d 100644 --- a/table/merging_iterator.cc +++ b/table/merging_iterator.cc @@ -264,8 +264,8 @@ struct HeapItemAndPrefix { UpdatePrefixCache(*this, &item->iter); } UintPrefix key_prefix = 0; - HeapItem* item_ptr; HeapItem::Type iter_type; + HeapItem* item_ptr; HeapItem* operator->() const noexcept { return item_ptr; } From 7982f0af37494f4515d0e3baca30e753db0832ca Mon Sep 17 00:00:00 2001 From: rockeet Date: Tue, 28 Oct 2025 11:57:10 +0800 Subject: [PATCH 152/175] merging_iterator.cc: key_prefix be byte[23] on avx512 and squashed commits: - merging_iterator.cc: extract macro MERGE_ITER_CMP_PREFIX(cmp) - merging_iterator.cc: change sizeof(HeapItemAndPrefixFast.prefix_cache) to 24 on avx512 Because iter_type is useless for HeapItemAndPrefixFast, prefix_cache can be 24, and 24 fit 6 uint32 fields, in MyTopling, it fits 5 user fields(except 1 uint32 prefix for table/index id) - merging_iterator.cc: UpdatePrefixCache() bug fix: use user_key & load/store mask is different --- table/merging_iterator.cc | 100 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 100 insertions(+) diff --git a/table/merging_iterator.cc b/table/merging_iterator.cc index 99e763b25d..e94568a989 100644 --- a/table/merging_iterator.cc +++ b/table/merging_iterator.cc @@ -159,6 +159,15 @@ class MaxHeapItemComparator { #define FORCE_INLINE inline #endif +#if defined(__AVX512VL__) && defined(__AVX512BW__) + // can be defined as 23 or 16 + #define MERGE_ITER_PREFIX_LEN 23 +#else + #define MERGE_ITER_PREFIX_LEN 16 +#endif + +#if MERGE_ITER_PREFIX_LEN == 16 + #if 0 #define bswap_prefix __bswap_64 using UintPrefix = uint64_t; @@ -256,6 +265,15 @@ FORCE_INLINE UintPrefix HostPrefixCacheIK(const Slice& ik) { #error "HostPrefixCacheIK: Not support bigendian yet" #endif } +#else // MERGE_ITER_PREFIX_LEN + +struct UintPrefix { + static_assert(MERGE_ITER_PREFIX_LEN == 23); + unsigned char data[MERGE_ITER_PREFIX_LEN] = {0}; + UintPrefix(int=0) {} +}; + +#endif // MERGE_ITER_PREFIX_LEN struct HeapItemAndPrefix { FORCE_INLINE HeapItemAndPrefix() = default; @@ -271,12 +289,29 @@ struct HeapItemAndPrefix { FORCE_INLINE friend void UpdatePrefixCache(HeapItemAndPrefix& x, IteratorWrapper* iter) { ROCKSDB_ASSERT_EQ(&x.item_ptr->iter, iter); +#if MERGE_ITER_PREFIX_LEN == 16 if (LIKELY(HeapItem::ITERATOR == x.iter_type)) x.key_prefix = HostPrefixCacheIK(iter->key()); else x.key_prefix = HostPrefixCacheUK(x.item_ptr->tombstone_pik.user_key); +#else + static_assert(sizeof(HeapItemAndPrefix) == 32); + static_assert(MERGE_ITER_PREFIX_LEN == 23); + const Slice uk = x.GetUserKey(iter); + __mmask32 mskl = _bzhi_u32(-1, std::min(uk.size(), sizeof(x.key_prefix))); + __mmask32 msks = _bzhi_u32(-1, sizeof(x.key_prefix)); + __m256i r256 = _mm256_maskz_loadu_epi8(mskl, uk.data()); + _mm256_mask_storeu_epi8(&x.key_prefix, msks, r256); // do not byte swap +#endif + } + FORCE_INLINE Slice GetUserKey(const IteratorWrapper* iter) const { + if (LIKELY(HeapItem::ITERATOR == iter_type)) + return iter->user_key(); + else + return item_ptr->tombstone_pik.user_key; } }; +#if MERGE_ITER_PREFIX_LEN == 16 struct HeapItemAndPrefixFast : HeapItemAndPrefix { using HeapItemAndPrefix::HeapItemAndPrefix; FORCE_INLINE friend void UpdatePrefixCache(HeapItemAndPrefixFast& x, IteratorWrapper* iter) { @@ -285,6 +320,26 @@ struct HeapItemAndPrefixFast : HeapItemAndPrefix { x.key_prefix = HostPrefixCacheIK(iter->key()); } }; +#else +struct HeapItemAndPrefixFast { + FORCE_INLINE HeapItemAndPrefixFast() = default; + FORCE_INLINE HeapItemAndPrefixFast(HeapItem* item) : item_ptr(item) { + UpdatePrefixCache(*this, &item->iter); + } + static_assert(MERGE_ITER_PREFIX_LEN == 23); + unsigned char key_prefix[24]; + HeapItem* item_ptr; + HeapItem* operator->() const noexcept { return item_ptr; } + FORCE_INLINE friend void UpdatePrefixCache(HeapItemAndPrefixFast& x, IteratorWrapper* iter) { + ROCKSDB_ASSERT_EQ(HeapItem::ITERATOR, x.item_ptr->type); + const Slice uk = iter->user_key(); + __mmask32 mskl = _bzhi_u32(-1, std::min(uk.size(), sizeof(x.key_prefix))); + __mmask32 msks = _bzhi_u32(-1, sizeof(x.key_prefix)); + __m256i r256 = _mm256_maskz_loadu_epi8(mskl, uk.data()); + _mm256_mask_storeu_epi8(&x.key_prefix, msks, r256); // do not byte swap + } +}; +#endif static_assert(sizeof(HeapItemAndPrefixFast) == sizeof(HeapItemAndPrefix)); inline static void UpdatePrefixCache(HeapItem*, IteratorWrapper*) {} @@ -368,8 +423,25 @@ class MinHeapBytewiseComp { MinHeapBytewiseComp(const InternalKeyComparator*) {} FORCE_INLINE bool operator()(HeapItemAndPrefix const &a, HeapItemAndPrefix const &b) const { +#if MERGE_ITER_PREFIX_LEN == 16 if (LIKELY(a.key_prefix != b.key_prefix)) return a.key_prefix > b.key_prefix; +#else +//------------------------------------------------------------------- + #define MERGE_ITER_CMP_PREFIX(cmp) \ + __mmask32 mask = _bzhi_u32(-1, sizeof(a.key_prefix)); \ + __m256i a256 = _mm256_maskz_loadu_epi8(mask, &a.key_prefix); \ + __m256i b256 = _mm256_maskz_loadu_epi8(mask, &b.key_prefix); \ + __mmask32 cneq = _mm256_cmpneq_epi8_mask(a256, b256); \ + if (LIKELY(cneq != 0)) { \ + __mmask32 cmp = _mm256_cmp##cmp##_epi8_mask(a256, b256); \ + auto pos = _tzcnt_u32(cneq); \ + ROCKSDB_ASSUME(pos < sizeof(a.key_prefix)); \ + return (cmp & (1u << pos)) != 0; \ + } +//------------------------------------------------------------------- + MERGE_ITER_CMP_PREFIX(gt) // must has no semicolon ';' +#endif else if (LIKELY(a.iter_type == HeapItem::ITERATOR)) { if (LIKELY(b.iter_type == HeapItem::ITERATOR)) return BytewiseCompareInternalKey(b->iter.key(), a->iter.key()); @@ -388,8 +460,12 @@ class MinHeapBytewiseComp { IterOnly(const InternalKeyComparator*) {} FORCE_INLINE bool operator()(HeapItemAndPrefixFast const &a, HeapItemAndPrefixFast const &b) const { +#if MERGE_ITER_PREFIX_LEN == 16 if (LIKELY(a.key_prefix != b.key_prefix)) return a.key_prefix > b.key_prefix; +#else + MERGE_ITER_CMP_PREFIX(gt) // must has no semicolon ';' +#endif else return BytewiseCompareInternalKey(b->iter.key(), a->iter.key()); } @@ -401,8 +477,12 @@ class MaxHeapBytewiseComp { MaxHeapBytewiseComp(const InternalKeyComparator*) {} FORCE_INLINE bool operator()(HeapItemAndPrefix const &a, HeapItemAndPrefix const &b) const { +#if MERGE_ITER_PREFIX_LEN == 16 if (LIKELY(a.key_prefix != b.key_prefix)) return a.key_prefix < b.key_prefix; +#else + MERGE_ITER_CMP_PREFIX(lt) // must has no semicolon ';' +#endif else if (LIKELY(a.iter_type == HeapItem::ITERATOR)) { if (LIKELY(b.iter_type == HeapItem::ITERATOR)) return BytewiseCompareInternalKey(a->iter.key(), b->iter.key()); @@ -421,8 +501,12 @@ class MaxHeapBytewiseComp { IterOnly(const InternalKeyComparator*) {} FORCE_INLINE bool operator()(HeapItemAndPrefixFast const &a, HeapItemAndPrefixFast const &b) const { +#if MERGE_ITER_PREFIX_LEN == 16 if (LIKELY(a.key_prefix != b.key_prefix)) return a.key_prefix < b.key_prefix; +#else + MERGE_ITER_CMP_PREFIX(lt) // must has no semicolon ';' +#endif else return BytewiseCompareInternalKey(a->iter.key(), b->iter.key()); } @@ -434,8 +518,12 @@ class MinHeapRevBytewiseComp { MinHeapRevBytewiseComp(const InternalKeyComparator*) {} FORCE_INLINE bool operator()(HeapItemAndPrefix const &a, HeapItemAndPrefix const &b) const { +#if MERGE_ITER_PREFIX_LEN == 16 if (LIKELY(a.key_prefix != b.key_prefix)) return a.key_prefix < b.key_prefix; +#else + MERGE_ITER_CMP_PREFIX(lt) // must has no semicolon ';' +#endif else if (LIKELY(a.iter_type == HeapItem::ITERATOR)) { if (LIKELY(b.iter_type == HeapItem::ITERATOR)) return RevBytewiseCompareInternalKey(b->iter.key(), a->iter.key()); @@ -454,8 +542,12 @@ class MinHeapRevBytewiseComp { IterOnly(const InternalKeyComparator*) {} FORCE_INLINE bool operator()(HeapItemAndPrefixFast const &a, HeapItemAndPrefixFast const &b) const { +#if MERGE_ITER_PREFIX_LEN == 16 if (LIKELY(a.key_prefix != b.key_prefix)) return a.key_prefix < b.key_prefix; +#else + MERGE_ITER_CMP_PREFIX(lt) // must has no semicolon ';' +#endif else return RevBytewiseCompareInternalKey(b->iter.key(), a->iter.key()); } @@ -467,8 +559,12 @@ class MaxHeapRevBytewiseComp { MaxHeapRevBytewiseComp(const InternalKeyComparator*) {} FORCE_INLINE bool operator()(HeapItemAndPrefix const &a, HeapItemAndPrefix const &b) const { +#if MERGE_ITER_PREFIX_LEN == 16 if (LIKELY(a.key_prefix != b.key_prefix)) return a.key_prefix > b.key_prefix; +#else + MERGE_ITER_CMP_PREFIX(gt) // must has no semicolon ';' +#endif else if (LIKELY(a.iter_type == HeapItem::ITERATOR)) { if (LIKELY(b.iter_type == HeapItem::ITERATOR)) return RevBytewiseCompareInternalKey(a->iter.key(), b->iter.key()); @@ -487,8 +583,12 @@ class MaxHeapRevBytewiseComp { IterOnly(const InternalKeyComparator*) {} FORCE_INLINE bool operator()(HeapItemAndPrefixFast const &a, HeapItemAndPrefixFast const &b) const { +#if MERGE_ITER_PREFIX_LEN == 16 if (LIKELY(a.key_prefix != b.key_prefix)) return a.key_prefix > b.key_prefix; +#else + MERGE_ITER_CMP_PREFIX(gt) // must has no semicolon ';' +#endif else return RevBytewiseCompareInternalKey(a->iter.key(), b->iter.key()); } From 3e4c9d59b71cba4ad151d8e659c1f2af88e6b312 Mon Sep 17 00:00:00 2001 From: rockeet Date: Tue, 28 Oct 2025 16:55:49 +0800 Subject: [PATCH 153/175] merging_iterator.cc: impl MERGE_ITER_CMP_PREFIX on non-avx512 This makes the code simple and NOT repeat --- table/merging_iterator.cc | 44 ++++++--------------------------------- 1 file changed, 6 insertions(+), 38 deletions(-) diff --git a/table/merging_iterator.cc b/table/merging_iterator.cc index e94568a989..96c4fb5e01 100644 --- a/table/merging_iterator.cc +++ b/table/merging_iterator.cc @@ -424,8 +424,11 @@ class MinHeapBytewiseComp { FORCE_INLINE bool operator()(HeapItemAndPrefix const &a, HeapItemAndPrefix const &b) const { #if MERGE_ITER_PREFIX_LEN == 16 - if (LIKELY(a.key_prefix != b.key_prefix)) - return a.key_prefix > b.key_prefix; + #define MERGE_ITER_CMP_gt > + #define MERGE_ITER_CMP_lt < + #define MERGE_ITER_CMP_PREFIX(cmp) \ + if (LIKELY(a.key_prefix != b.key_prefix)) \ + return a.key_prefix MERGE_ITER_CMP_##cmp b.key_prefix; #else //------------------------------------------------------------------- #define MERGE_ITER_CMP_PREFIX(cmp) \ @@ -440,8 +443,8 @@ class MinHeapBytewiseComp { return (cmp & (1u << pos)) != 0; \ } //------------------------------------------------------------------- - MERGE_ITER_CMP_PREFIX(gt) // must has no semicolon ';' #endif + MERGE_ITER_CMP_PREFIX(gt) // must has no semicolon ';' else if (LIKELY(a.iter_type == HeapItem::ITERATOR)) { if (LIKELY(b.iter_type == HeapItem::ITERATOR)) return BytewiseCompareInternalKey(b->iter.key(), a->iter.key()); @@ -460,12 +463,7 @@ class MinHeapBytewiseComp { IterOnly(const InternalKeyComparator*) {} FORCE_INLINE bool operator()(HeapItemAndPrefixFast const &a, HeapItemAndPrefixFast const &b) const { -#if MERGE_ITER_PREFIX_LEN == 16 - if (LIKELY(a.key_prefix != b.key_prefix)) - return a.key_prefix > b.key_prefix; -#else MERGE_ITER_CMP_PREFIX(gt) // must has no semicolon ';' -#endif else return BytewiseCompareInternalKey(b->iter.key(), a->iter.key()); } @@ -477,12 +475,7 @@ class MaxHeapBytewiseComp { MaxHeapBytewiseComp(const InternalKeyComparator*) {} FORCE_INLINE bool operator()(HeapItemAndPrefix const &a, HeapItemAndPrefix const &b) const { -#if MERGE_ITER_PREFIX_LEN == 16 - if (LIKELY(a.key_prefix != b.key_prefix)) - return a.key_prefix < b.key_prefix; -#else MERGE_ITER_CMP_PREFIX(lt) // must has no semicolon ';' -#endif else if (LIKELY(a.iter_type == HeapItem::ITERATOR)) { if (LIKELY(b.iter_type == HeapItem::ITERATOR)) return BytewiseCompareInternalKey(a->iter.key(), b->iter.key()); @@ -501,12 +494,7 @@ class MaxHeapBytewiseComp { IterOnly(const InternalKeyComparator*) {} FORCE_INLINE bool operator()(HeapItemAndPrefixFast const &a, HeapItemAndPrefixFast const &b) const { -#if MERGE_ITER_PREFIX_LEN == 16 - if (LIKELY(a.key_prefix != b.key_prefix)) - return a.key_prefix < b.key_prefix; -#else MERGE_ITER_CMP_PREFIX(lt) // must has no semicolon ';' -#endif else return BytewiseCompareInternalKey(a->iter.key(), b->iter.key()); } @@ -518,12 +506,7 @@ class MinHeapRevBytewiseComp { MinHeapRevBytewiseComp(const InternalKeyComparator*) {} FORCE_INLINE bool operator()(HeapItemAndPrefix const &a, HeapItemAndPrefix const &b) const { -#if MERGE_ITER_PREFIX_LEN == 16 - if (LIKELY(a.key_prefix != b.key_prefix)) - return a.key_prefix < b.key_prefix; -#else MERGE_ITER_CMP_PREFIX(lt) // must has no semicolon ';' -#endif else if (LIKELY(a.iter_type == HeapItem::ITERATOR)) { if (LIKELY(b.iter_type == HeapItem::ITERATOR)) return RevBytewiseCompareInternalKey(b->iter.key(), a->iter.key()); @@ -542,12 +525,7 @@ class MinHeapRevBytewiseComp { IterOnly(const InternalKeyComparator*) {} FORCE_INLINE bool operator()(HeapItemAndPrefixFast const &a, HeapItemAndPrefixFast const &b) const { -#if MERGE_ITER_PREFIX_LEN == 16 - if (LIKELY(a.key_prefix != b.key_prefix)) - return a.key_prefix < b.key_prefix; -#else MERGE_ITER_CMP_PREFIX(lt) // must has no semicolon ';' -#endif else return RevBytewiseCompareInternalKey(b->iter.key(), a->iter.key()); } @@ -559,12 +537,7 @@ class MaxHeapRevBytewiseComp { MaxHeapRevBytewiseComp(const InternalKeyComparator*) {} FORCE_INLINE bool operator()(HeapItemAndPrefix const &a, HeapItemAndPrefix const &b) const { -#if MERGE_ITER_PREFIX_LEN == 16 - if (LIKELY(a.key_prefix != b.key_prefix)) - return a.key_prefix > b.key_prefix; -#else MERGE_ITER_CMP_PREFIX(gt) // must has no semicolon ';' -#endif else if (LIKELY(a.iter_type == HeapItem::ITERATOR)) { if (LIKELY(b.iter_type == HeapItem::ITERATOR)) return RevBytewiseCompareInternalKey(a->iter.key(), b->iter.key()); @@ -583,12 +556,7 @@ class MaxHeapRevBytewiseComp { IterOnly(const InternalKeyComparator*) {} FORCE_INLINE bool operator()(HeapItemAndPrefixFast const &a, HeapItemAndPrefixFast const &b) const { -#if MERGE_ITER_PREFIX_LEN == 16 - if (LIKELY(a.key_prefix != b.key_prefix)) - return a.key_prefix > b.key_prefix; -#else MERGE_ITER_CMP_PREFIX(gt) // must has no semicolon ';' -#endif else return RevBytewiseCompareInternalKey(a->iter.key(), b->iter.key()); } From 90aee26f64dfcb5df914e9efe7eb620054eff710 Mon Sep 17 00:00:00 2001 From: rockeet Date: Wed, 29 Oct 2025 09:07:54 +0800 Subject: [PATCH 154/175] avx512 optimize: use blsi instead of tzcnt for less compare --- db/db_iter.cc | 6 +----- table/merging_iterator.cc | 4 +--- 2 files changed, 2 insertions(+), 8 deletions(-) diff --git a/db/db_iter.cc b/db/db_iter.cc index 5ba0130694..ebb6945640 100644 --- a/db/db_iter.cc +++ b/db/db_iter.cc @@ -567,11 +567,7 @@ struct BytewiseCmpNoTS { __m512i yyy = _mm512_maskz_loadu_epi8(msk, y.data()); __mmask64 neq = _mm512_cmpneq_epi8_mask(xxx, yyy); __mmask64 lt = _mm512_cmplt_epi8_mask (xxx, yyy); - auto pos = _tzcnt_u64(neq); // pos = 64 when neq is 0(should return false) - ROCKSDB_ASSUME(pos <= 64); // gcc does not know this, tell it(clang knows) - //return (lt >> pos & 1) != 0; // maybe pos == 64 so this is wrong - return (_bextr_u64(-1, pos, 1) & lt) != 0; - // _bextr_u64(-1, pos, 1) == 0 when pos is 64 + return (lt & _blsi_u64(neq)) != 0; } #endif int compare(const Slice& x, const Slice& y) const { return x.compare(y); } diff --git a/table/merging_iterator.cc b/table/merging_iterator.cc index 96c4fb5e01..5cc69c491d 100644 --- a/table/merging_iterator.cc +++ b/table/merging_iterator.cc @@ -438,9 +438,7 @@ class MinHeapBytewiseComp { __mmask32 cneq = _mm256_cmpneq_epi8_mask(a256, b256); \ if (LIKELY(cneq != 0)) { \ __mmask32 cmp = _mm256_cmp##cmp##_epi8_mask(a256, b256); \ - auto pos = _tzcnt_u32(cneq); \ - ROCKSDB_ASSUME(pos < sizeof(a.key_prefix)); \ - return (cmp & (1u << pos)) != 0; \ + return (cmp & _blsi_u32(cneq)) != 0; \ } //------------------------------------------------------------------- #endif From 4240fbdac3ed80f1c8a0db92e7c7dd4d29bff3d9 Mon Sep 17 00:00:00 2001 From: rockeet Date: Wed, 29 Oct 2025 10:17:55 +0800 Subject: [PATCH 155/175] avx512 optimize: use -neq instead of blsi(neq) blsi(x) = x & -x, while lt is a subset of neq, so (lt & neq) == lt, so lt & (neq & -neq) == (lt & neq) & -neq == lt & -neq --- db/db_iter.cc | 2 +- table/merging_iterator.cc | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/db/db_iter.cc b/db/db_iter.cc index ebb6945640..0ca895c7c2 100644 --- a/db/db_iter.cc +++ b/db/db_iter.cc @@ -567,7 +567,7 @@ struct BytewiseCmpNoTS { __m512i yyy = _mm512_maskz_loadu_epi8(msk, y.data()); __mmask64 neq = _mm512_cmpneq_epi8_mask(xxx, yyy); __mmask64 lt = _mm512_cmplt_epi8_mask (xxx, yyy); - return (lt & _blsi_u64(neq)) != 0; + return (lt & -neq) != 0; } #endif int compare(const Slice& x, const Slice& y) const { return x.compare(y); } diff --git a/table/merging_iterator.cc b/table/merging_iterator.cc index 5cc69c491d..bf09dd9c19 100644 --- a/table/merging_iterator.cc +++ b/table/merging_iterator.cc @@ -438,7 +438,7 @@ class MinHeapBytewiseComp { __mmask32 cneq = _mm256_cmpneq_epi8_mask(a256, b256); \ if (LIKELY(cneq != 0)) { \ __mmask32 cmp = _mm256_cmp##cmp##_epi8_mask(a256, b256); \ - return (cmp & _blsi_u32(cneq)) != 0; \ + return (cmp & -cneq) != 0; \ } //------------------------------------------------------------------- #endif From f0a06244e54b1b5a74f97686ec2d38aa16613146 Mon Sep 17 00:00:00 2001 From: rockeet Date: Wed, 29 Oct 2025 18:09:13 +0800 Subject: [PATCH 156/175] DBIter: CmpNoTS::equal() add template arg FixLen --- db/db_iter.cc | 48 ++++++++++++++++++++++++++++++++++++++++++++++-- db/db_iter.h | 4 ++-- 2 files changed, 48 insertions(+), 4 deletions(-) diff --git a/db/db_iter.cc b/db/db_iter.cc index 0ca895c7c2..53709254ee 100644 --- a/db/db_iter.cc +++ b/db/db_iter.cc @@ -508,7 +508,49 @@ using Const = std::integral_constant; struct BytewiseCmpNoTS { BytewiseCmpNoTS(const Comparator*) {} + template __always_inline bool equal(const Slice& x, const Slice& y) const { + if constexpr (FixLen != 0) { + ROCKSDB_ASSERT_EQ(x.size(), y.size()); + ROCKSDB_ASSERT_EQ(x.size(), FixLen); + } + if constexpr (FixLen == 4) { + return unaligned_load(x.data()) + == unaligned_load(y.data()); + } + if constexpr (FixLen == 8) { + return unaligned_load(x.data()) + == unaligned_load(y.data()); + } + #if defined(__GNUC__) && __GNUC__ >= 11 + if constexpr (FixLen == 12) { + uint64_t x0 = unaligned_load(x.data()); + uint64_t y0 = unaligned_load(y.data()); + uint32_t x1 = unaligned_load(x.data() + 8); + uint32_t y1 = unaligned_load(y.data() + 8); + return x0 == y0 && x1 == y1; + } + if constexpr (FixLen == 16) { + return unaligned_load(x.data()) + == unaligned_load(y.data()); + } + #endif + #if defined(__AVX512VL__) && defined(__AVX512BW__) + if constexpr (FixLen == 64) { + ROCKSDB_ASSERT_EQ(x.size(), y.size()); + ROCKSDB_ASSERT_LE(x.size(), 64); + ROCKSDB_ASSERT_GT(x.size(), 0); + __mmask64 msk = _bzhi_u64(-1, x.size()); + __m512i xxx = _mm512_maskz_loadu_epi8(msk, x.data()); + __m512i yyy = _mm512_maskz_loadu_epi8(msk, y.data()); + __mmask64 neq = _mm512_cmpneq_epi8_mask(xxx, yyy); + return 0 == neq; + } + #endif + if constexpr (FixLen != 0) { // constant propagate FixLen + return MemoryEqual(x.data(), y.data(), FixLen); + } + // FixLen == 0 means general compare return SliceEqual(x, y); } __always_inline bool operator()(const Slice& x, const Slice& y) const { @@ -575,8 +617,9 @@ struct BytewiseCmpNoTS { struct RevBytewiseCmpNoTS { RevBytewiseCmpNoTS(const Comparator*) {} + template __always_inline bool equal(const Slice& x, const Slice& y) const { - return SliceEqual(x, y); + return BytewiseCmpNoTS(nullptr).equal(y, x); } __always_inline bool operator()(const Slice& x, const Slice& y) const { // return y < x; @@ -592,6 +635,7 @@ struct RevBytewiseCmpNoTS { }; struct VirtualCmpNoTS { + template bool equal(const Slice& x, const Slice& y) const { return cmp->CompareWithoutTimestamp(x, y) == 0; } @@ -825,7 +869,7 @@ bool DBIter::FindNextUserEntryInternalTmpl(bool skipping_saved_key, // level. This may change in the future. if ((!is_prev_key_seqnum_zero || timestamp_size_ > 0) && skipping_saved_key && - EqKeyForSkip(saved_key_.GetUserKey(), ikey_.user_key, cmpNoTS)) { + EqKeyForSkip(saved_key_.GetUserKey(), ikey_.user_key, cmpNoTS)) { num_skipped++; // skip this entry PERF_COUNTER_ADD(internal_key_skipped_count, 1); } else { diff --git a/db/db_iter.h b/db/db_iter.h index 7be9349c6f..f45d66c056 100644 --- a/db/db_iter.h +++ b/db/db_iter.h @@ -338,11 +338,11 @@ class DBIter final : public Iterator { : c(a, b); } - template + template inline bool EqKeyForSkip(const Slice& a, const Slice& b, const CmpNoTS& c) { return timestamp_lb_ != nullptr // semantic exactly same with origin code ? user_comparator_.Compare(a, b) >= 0 // ^^^^^^^^^^^^^^^^^^^^^ - : c.equal(a, b); + : c.template equal(a, b); } // Retrieves the blob value for the specified user key using the given blob From 581e74e58a99810029569cb8692b1736f2075900 Mon Sep 17 00:00:00 2001 From: rockeet Date: Wed, 29 Oct 2025 18:25:19 +0800 Subject: [PATCH 157/175] DBIter: remove unused code --- db/db_iter.cc | 58 --------------------------------------------------- db/db_iter.h | 7 ------- 2 files changed, 65 deletions(-) diff --git a/db/db_iter.cc b/db/db_iter.cc index 53709254ee..4f51eb6f56 100644 --- a/db/db_iter.cc +++ b/db/db_iter.cc @@ -419,56 +419,6 @@ bool DBIter::FindNextUserEntry(bool skipping_saved_key, const Slice* prefix) { #endif } -template -struct FixedLenCmpNoTS { - static_assert(KeyLen % 4 == 0); - __always_inline bool equal(const Slice& x, const Slice& y) const { - const char* px = x.data(); - const char* py = y.data(); - for (size_t i = 0; i < KeyLen / 8; i++) { - if (((const uint64_t*)(px))[i] != ((const uint64_t*)(py))[i]) - return false; - } - if (KeyLen % 8) - return ((const uint32_t*)(px))[KeyLen/4 - 1] == ((const uint32_t*)(py))[KeyLen/4 - 1]; - else - return true; - } - __always_inline bool operator()(const Slice& x, const Slice& y) const { - const char* px = x.data(); - const char* py = y.data(); - for (size_t i = 0; i < KeyLen / 8; i++) { - auto ux = NATIVE_OF_BIG_ENDIAN(((const uint64_t*)(px))[i]); - auto uy = NATIVE_OF_BIG_ENDIAN(((const uint64_t*)(py))[i]); - if (ux != uy) - return ux < uy; - } - if (KeyLen % 8) { - auto ux = NATIVE_OF_BIG_ENDIAN(((const uint32_t*)(px))[KeyLen/4 - 1]); - auto uy = NATIVE_OF_BIG_ENDIAN(((const uint32_t*)(py))[KeyLen/4 - 1]); - return ux < uy; - } else - return false; // equal - } - __always_inline int compare(const Slice& x, const Slice& y) const { - const char* px = x.data(); - const char* py = y.data(); - for (size_t i = 0; i < KeyLen / 8; i++) { - auto ux = NATIVE_OF_BIG_ENDIAN(((const uint64_t*)(px))[i]); - auto uy = NATIVE_OF_BIG_ENDIAN(((const uint64_t*)(py))[i]); - if (ux != uy) - return ux < uy ? -1 : +1; - } - if (KeyLen % 8) { - auto ux = NATIVE_OF_BIG_ENDIAN(((const uint32_t*)(px))[KeyLen/4 - 1]); - auto uy = NATIVE_OF_BIG_ENDIAN(((const uint32_t*)(py))[KeyLen/4 - 1]); - if (ux != uy) - return ux < uy ? -1 : +1; - } - return 0; - } -}; - template __always_inline // const propagate param FixLen bool RawBytewiseLess(const void* x, const void* y) { @@ -553,10 +503,6 @@ struct BytewiseCmpNoTS { // FixLen == 0 means general compare return SliceEqual(x, y); } - __always_inline bool operator()(const Slice& x, const Slice& y) const { - // return x < y; - return SliceBytewiseLess(x, y); - } template __always_inline bool operator()(const Slice& x, const Slice& y, Const) const { @@ -621,10 +567,6 @@ struct RevBytewiseCmpNoTS { __always_inline bool equal(const Slice& x, const Slice& y) const { return BytewiseCmpNoTS(nullptr).equal(y, x); } - __always_inline bool operator()(const Slice& x, const Slice& y) const { - // return y < x; - return SliceBytewiseLess(y, x); - } template __always_inline bool operator()(const Slice& x, const Slice& y, Const) const { diff --git a/db/db_iter.h b/db/db_iter.h index f45d66c056..c2f3c8949b 100644 --- a/db/db_iter.h +++ b/db/db_iter.h @@ -331,13 +331,6 @@ class DBIter final : public Iterator { : user_comparator_.CompareWithoutTimestamp(a, b); } - template - inline bool CmpKeyForSkip(const Slice& a, const Slice& b, const CmpNoTS& c) { - return timestamp_lb_ != nullptr - ? user_comparator_.Compare(a, b) < 0 - : c(a, b); - } - template inline bool EqKeyForSkip(const Slice& a, const Slice& b, const CmpNoTS& c) { return timestamp_lb_ != nullptr // semantic exactly same with origin code From 52410497732486607aa0d3ed91d11ccd16f16938 Mon Sep 17 00:00:00 2001 From: rockeet Date: Wed, 29 Oct 2025 18:29:54 +0800 Subject: [PATCH 158/175] DBIter::SetFuncPtr() reduce template instant for VirtualCmpNoTS VirtualCmpNoTS is not our optimization target, just let it works, this is still faster than upstream rocksdb by static bound condtion `prefix_same_as_start_` and `iterate_upper_bound_`. --- db/db_iter.cc | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/db/db_iter.cc b/db/db_iter.cc index 4f51eb6f56..bb19f948d7 100644 --- a/db/db_iter.cc +++ b/db/db_iter.cc @@ -690,7 +690,7 @@ void DBIter::SetFuncPtr() { else if (user_comparator_.IsReverseBytewise()) { SetFindNext(FindNextUserEntryPerf, RevBytewiseCmpNoTS); } else { - SetFindNext(FindNextUserEntryPerf, VirtualCmpNoTS); + SetFindNext6(FindNextUserEntryPerf, kUnknown, 0, VirtualCmpNoTS, true); } } else { @@ -699,7 +699,8 @@ void DBIter::SetFuncPtr() { } else if (user_comparator_.IsReverseBytewise()) { SetFindNext(FindNextUserEntryInternalTmpl, RevBytewiseCmpNoTS); } else { - SetFindNext(FindNextUserEntryInternalTmpl, VirtualCmpNoTS); + // intentional use FindNextUserEntryPerf, to reduce template instant + SetFindNext6(FindNextUserEntryPerf, kUnknown, 0, VirtualCmpNoTS, true); } } } From f4225870c8c1ed70ff9c94fb9edc77ecdfaca55a Mon Sep 17 00:00:00 2001 From: rockeet Date: Thu, 30 Oct 2025 09:26:49 +0800 Subject: [PATCH 159/175] DBIter: Add FastIterKey::GetUK Also add Slice::notail(n) --- db/db_iter.cc | 2 +- db/db_iter.h | 10 ++++++++++ include/rocksdb/slice.h | 5 +++++ 3 files changed, 16 insertions(+), 1 deletion(-) diff --git a/db/db_iter.cc b/db/db_iter.cc index bb19f948d7..c7bfedb481 100644 --- a/db/db_iter.cc +++ b/db/db_iter.cc @@ -812,7 +812,7 @@ bool DBIter::FindNextUserEntryInternalTmpl(bool skipping_saved_key, // level. This may change in the future. if ((!is_prev_key_seqnum_zero || timestamp_size_ > 0) && skipping_saved_key && - EqKeyForSkip(saved_key_.GetUserKey(), ikey_.user_key, cmpNoTS)) { + EqKeyForSkip(saved_key_.GetUK(), ikey_.user_key, cmpNoTS)) { num_skipped++; // skip this entry PERF_COUNTER_ADD(internal_key_skipped_count, 1); } else { diff --git a/db/db_iter.h b/db/db_iter.h index c2f3c8949b..bf1ab8f4e9 100644 --- a/db/db_iter.h +++ b/db/db_iter.h @@ -473,6 +473,16 @@ class DBIter final : public Iterator { } rocksdb::EncodeFixed64(end - 8, PackSequenceAndType(seq, vt)); } + template + Slice GetUK() const { + if constexpr (FixLen == 64) + // avx512 FixLen==64 means max is 64(without seqvt 8) + return key.risk_to_str_local().notail(8); + if constexpr (FixLen != 0) + return key.risk_to_str_local_known_len().notail(8); + else + return GetUserKey(); + } Slice GetUserKey() const { return key.notail(8); } Slice GetInternalKey() const { return key.to(); } size_t Size() const { return key.size() - 8; } diff --git a/include/rocksdb/slice.h b/include/rocksdb/slice.h index 589f8d1597..8e77bed956 100644 --- a/include/rocksdb/slice.h +++ b/include/rocksdb/slice.h @@ -105,6 +105,11 @@ class Slice { size_ -= n; } + Slice notail(size_t n) const { + ROCKSDB_ASSERT_LE(n, size_); + return Slice(data_, size_ - n); + } + // Return a string that contains the copy of the referenced data. // when hex is true, returns a string of twice the length hex encoded (0-9A-F) std::string ToString(bool hex) const; From 592d3fe4bbae3ee7da9fe6d4dd1304cf5c19fd64 Mon Sep 17 00:00:00 2001 From: rockeet Date: Thu, 30 Oct 2025 09:53:39 +0800 Subject: [PATCH 160/175] DBIter::FindNextUserEntryInternalTmpl() Add likely/unlikely --- db/db_iter.cc | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/db/db_iter.cc b/db/db_iter.cc index c7bfedb481..ce9c062c85 100644 --- a/db/db_iter.cc +++ b/db/db_iter.cc @@ -802,7 +802,7 @@ bool DBIter::FindNextUserEntryInternalTmpl(bool skipping_saved_key, ikey_.user_key, timestamp_size_) : Slice(); bool more_recent = false; - if (IsVisible(ikey_.sequence, ts, &more_recent)) { + if (LIKELY(IsVisible(ikey_.sequence, ts, &more_recent))) { // If the previous entry is of seqnum 0, the current entry will not // possibly be skipped. This condition can potentially be relaxed to // prev_key.seq <= ikey_.sequence. We are cautious because it will be more @@ -810,9 +810,9 @@ bool DBIter::FindNextUserEntryInternalTmpl(bool skipping_saved_key, // Note that with current timestamp implementation, the same user key can // have different timestamps and zero sequence number on the bottommost // level. This may change in the future. - if ((!is_prev_key_seqnum_zero || timestamp_size_ > 0) && + if (UNLIKELY((!is_prev_key_seqnum_zero || timestamp_size_ > 0) && skipping_saved_key && - EqKeyForSkip(saved_key_.GetUK(), ikey_.user_key, cmpNoTS)) { + EqKeyForSkip(saved_key_.GetUK(), ikey_.user_key, cmpNoTS))) { num_skipped++; // skip this entry PERF_COUNTER_ADD(internal_key_skipped_count, 1); } else { From ceee51212c9fd7e91363f478dfcc77db8c56721e Mon Sep 17 00:00:00 2001 From: rockeet Date: Thu, 30 Oct 2025 09:54:46 +0800 Subject: [PATCH 161/175] DBIter::FastIterKey::key sso max should be 80 for avx512 --- db/db_iter.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/db/db_iter.h b/db/db_iter.h index bf1ab8f4e9..ec94682027 100644 --- a/db/db_iter.h +++ b/db/db_iter.h @@ -433,7 +433,7 @@ class DBIter final : public Iterator { #else #define ROCKSDB_TEST_PinnedDataIterator 0 struct FastIterKey { - terark::minimal_sso<72, false> key; // avx512 max is 64, 72 > 64 + terark::minimal_sso<80, false> key; // avx512 max is 64, 80 > 64+(seqvt 8) void Clear() { key.clear(); } void SetUserKey(const Slice& uk, bool copy = true) { key.assign(uk.size_ + 8, [=](char* buf, size_t len) { From b0411253162b90b7b6db77fee887cd94b18b42a8 Mon Sep 17 00:00:00 2001 From: rockeet Date: Thu, 30 Oct 2025 14:52:03 +0800 Subject: [PATCH 162/175] DBIter::FastIterKey::key sso max should better be 128 for avx512 When it is 128, minimal_sso.local_size() is faster because 128 is power of 2 thus can be optimized to (127 xor m_unused_len). --- db/db_iter.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/db/db_iter.h b/db/db_iter.h index ec94682027..c121199e3f 100644 --- a/db/db_iter.h +++ b/db/db_iter.h @@ -433,7 +433,7 @@ class DBIter final : public Iterator { #else #define ROCKSDB_TEST_PinnedDataIterator 0 struct FastIterKey { - terark::minimal_sso<80, false> key; // avx512 max is 64, 80 > 64+(seqvt 8) + terark::minimal_sso<128, false> key; // avx512 max is 64, 128 > 64+(seqvt 8) and is power of 2 void Clear() { key.clear(); } void SetUserKey(const Slice& uk, bool copy = true) { key.assign(uk.size_ + 8, [=](char* buf, size_t len) { From 15a22ce34a49b8a6dbcaad5a678c4661b45a90f8 Mon Sep 17 00:00:00 2001 From: rockeet Date: Thu, 30 Oct 2025 21:27:48 +0800 Subject: [PATCH 163/175] LevelIterator::UpdateScanFunc() set prepare_and_get_value_ PrepareAndGetValue::PrepareAndGetValue() also use work_iter_ --- db/version_set.cc | 4 ++++ table/iterator_wrapper.h | 2 +- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/db/version_set.cc b/db/version_set.cc index eac92a65d4..9e4bce994a 100644 --- a/db/version_set.cc +++ b/db/version_set.cc @@ -1205,10 +1205,14 @@ class LevelIterator final : public InternalIterator { iw->work_iter_ = this; iw->next_and_get_result_ = ForgeFuncPtr(this, &LevelIterator::NextAndGetResult); + iw->prepare_and_get_value_ = ForgeFuncPtr(this, + &LevelIterator::PrepareAndGetValue); } else { iw->work_iter_ = file_iter_.iter(); iw->next_and_get_result_ = ForgeFuncPtr(file_iter_.iter(), &InternalIterator::NextAndGetResult); + iw->prepare_and_get_value_ = ForgeFuncPtr(file_iter_.iter(), + &InternalIterator::PrepareAndGetValue); } retry_already_goes_invalid_ = false; } diff --git a/table/iterator_wrapper.h b/table/iterator_wrapper.h index ec63138015..1c12dbdb0e 100644 --- a/table/iterator_wrapper.h +++ b/table/iterator_wrapper.h @@ -143,7 +143,7 @@ class IteratorWrapperBase { #if !TOPLING_USE_BOUND_PMF return iter_->PrepareAndGetValue(v); // do minimal work #else - return prepare_and_get_value_(iter_, v); + return prepare_and_get_value_(work_iter_, v); #endif } #ifdef __GNUC__ From 03aeb5a9ad6aa344c2472bdd8506eb90e618973b Mon Sep 17 00:00:00 2001 From: rockeet Date: Thu, 30 Oct 2025 21:40:48 +0800 Subject: [PATCH 164/175] DBIter::value() form an invalid value on lazy load failed --- db/db_iter.h | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/db/db_iter.h b/db/db_iter.h index c121199e3f..f0525a7153 100644 --- a/db/db_iter.h +++ b/db/db_iter.h @@ -172,9 +172,11 @@ class DBIter final : public Iterator { if (LIKELY(mut->iter_.PrepareAndGetValue(&mut->value_))) { mut->is_value_prepared_ = true; mut->local_stats_.bytes_read_ += value_.size_; - } else { // Can not go on, die with message - ROCKSDB_DIE("PrepareAndGetValue() failed, status = %s", - iter_.status().ToString().c_str()); + } else { + // form an invalid value for caller to check, avoid first call + // PrepareValue() then call value(). this should be very rare + mut->value_.data_ = nullptr; + mut->value_.size_ = size_t(-1); } } return value_; From 109cac2a14e37270a85690fa1fcb51cb8edddfa4 Mon Sep 17 00:00:00 2001 From: rockeet Date: Fri, 31 Oct 2025 09:44:26 +0800 Subject: [PATCH 165/175] MemTableListVersion::UnrefMemTable() tolerate CSPPMemTable ApproximateMemoryUsage --- db/memtable_list.cc | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/db/memtable_list.cc b/db/memtable_list.cc index 845b3402d1..deeccae575 100644 --- a/db/memtable_list.cc +++ b/db/memtable_list.cc @@ -40,8 +40,14 @@ void MemTableListVersion::UnrefMemTable(autovector* to_delete, MemTable* m) { if (m->Unref()) { to_delete->push_back(m); + #if 0 ROCKSDB_ASSERT_GE(*parent_memtable_list_memory_usage_, m->ApproximateMemoryUsage()); *parent_memtable_list_memory_usage_ -= m->ApproximateMemoryUsage(); + #else + // Tolerate CSPPMemTable::ApproximateMemoryUsage() + *parent_memtable_list_memory_usage_ -= + std::min(*parent_memtable_list_memory_usage_, m->ApproximateMemoryUsage()); + #endif } } From ea8be0d9b08d1dfbfd23d9de10f6c07b06be0386 Mon Sep 17 00:00:00 2001 From: rockeet Date: Fri, 31 Oct 2025 09:52:38 +0800 Subject: [PATCH 166/175] LevelIterator::Prev() reset scan func If Next() reach a file_iter boundary, then call Prev(), reset scan func is needed, for example: ```c++ iter->Seek(); assert(iter->Valid()); iter->Next(); assert(iter->Valid()); iter->Next(); assert(iter->Valid()); // switch to next file iter->Prev(); assert(iter->Valid()); // switch to prev file iter->Next(); assert(iter->Valid()); // switch to next file ``` --- db/version_set.cc | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/db/version_set.cc b/db/version_set.cc index 9e4bce994a..c6520dd10f 100644 --- a/db/version_set.cc +++ b/db/version_set.cc @@ -1645,6 +1645,13 @@ bool LevelIterator::RetryNextAndGetResult(IterateResult* result) { void LevelIterator::Prev() { assert(Valid()); + if (auto iw = my_wrapper_; UNLIKELY(iw && iw->work_iter_ != this)) { + iw->work_iter_ = this; + iw->next_and_get_result_ = ForgeFuncPtr(this, + &LevelIterator::NextAndGetResult); + iw->prepare_and_get_value_ = ForgeFuncPtr(this, + &LevelIterator::PrepareAndGetValue); + } if (to_return_sentinel_) { ClearSentinel(); } else { From 64e9e35a326c02ac90282fc0259a40c3ac7e2aa0 Mon Sep 17 00:00:00 2001 From: rockeet Date: Fri, 31 Oct 2025 16:45:50 +0800 Subject: [PATCH 167/175] IteratorWrapper: Add value_iter_ for PrepareAndGetValue This is mainly for MergingIterator and works well for others. --- db/version_set.cc | 3 +++ table/iterator_wrapper.h | 4 +++- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/db/version_set.cc b/db/version_set.cc index c6520dd10f..38a7fcfe79 100644 --- a/db/version_set.cc +++ b/db/version_set.cc @@ -1203,12 +1203,14 @@ class LevelIterator final : public InternalIterator { void UpdateScanFunc(IteratorWrapper* iw) { if (to_return_sentinel_ || file_iter_.iter() == nullptr) { iw->work_iter_ = this; + iw->value_iter_ = this; iw->next_and_get_result_ = ForgeFuncPtr(this, &LevelIterator::NextAndGetResult); iw->prepare_and_get_value_ = ForgeFuncPtr(this, &LevelIterator::PrepareAndGetValue); } else { iw->work_iter_ = file_iter_.iter(); + iw->value_iter_ = file_iter_.iter(); iw->next_and_get_result_ = ForgeFuncPtr(file_iter_.iter(), &InternalIterator::NextAndGetResult); iw->prepare_and_get_value_ = ForgeFuncPtr(file_iter_.iter(), @@ -1647,6 +1649,7 @@ void LevelIterator::Prev() { assert(Valid()); if (auto iw = my_wrapper_; UNLIKELY(iw && iw->work_iter_ != this)) { iw->work_iter_ = this; + iw->value_iter_ = this; iw->next_and_get_result_ = ForgeFuncPtr(this, &LevelIterator::NextAndGetResult); iw->prepare_and_get_value_ = ForgeFuncPtr(this, diff --git a/table/iterator_wrapper.h b/table/iterator_wrapper.h index 1c12dbdb0e..cde6320eb0 100644 --- a/table/iterator_wrapper.h +++ b/table/iterator_wrapper.h @@ -50,6 +50,7 @@ class IteratorWrapperBase { iter_ = _iter; #if TOPLING_USE_BOUND_PMF work_iter_ = _iter; + value_iter_ = _iter; #endif if (iter_ == nullptr) { result_.is_valid = false; @@ -143,7 +144,7 @@ class IteratorWrapperBase { #if !TOPLING_USE_BOUND_PMF return iter_->PrepareAndGetValue(v); // do minimal work #else - return prepare_and_get_value_(work_iter_, v); + return prepare_and_get_value_(value_iter_, v); #endif } #ifdef __GNUC__ @@ -268,6 +269,7 @@ class IteratorWrapperBase { #if TOPLING_USE_BOUND_PMF public: InternalIteratorBase* work_iter_ = nullptr; + InternalIteratorBase* value_iter_ = nullptr; typedef bool (*NextAndGetResultFN)(InternalIteratorBase*, IterateResult*); typedef bool (*PrepareAndGetValueFN)(InternalIteratorBase*, TValue*); NextAndGetResultFN next_and_get_result_ = nullptr; From 35e332c3ac63a584d8a849af0f2f8ac76ce3b35b Mon Sep 17 00:00:00 2001 From: rockeet Date: Fri, 31 Oct 2025 17:50:43 +0800 Subject: [PATCH 168/175] MergingIterator::DoNext() & heap update_top() optimize compiler optimzation can fold top_changed branch with update_top() after inline expansion. --- table/merging_iterator.cc | 6 ++++-- util/heap.h | 12 +++++++----- 2 files changed, 11 insertions(+), 7 deletions(-) diff --git a/table/merging_iterator.cc b/table/merging_iterator.cc index bf09dd9c19..aa4f62de74 100644 --- a/table/merging_iterator.cc +++ b/table/merging_iterator.cc @@ -945,9 +945,11 @@ class MergingIterTmpl final : public MergingIterator { // iterator yields a sequence of keys, this is cheap. assert(current_->status().ok()); UpdatePrefixCache(minHeap_.top(), current_); - minHeap_.update_top(); + bool top_changed = minHeap_.update_top(); if (LIKELY(RangeTombstoneStaticEmpty || range_tombstone_iters_.empty())) { - current_ = &minHeap_.top()->iter; // current_ = CurrentForward(); + if (UNLIKELY(top_changed)) { + current_ = &minHeap_.top()->iter; // current_ = CurrentForward(); + } return true; } } else { diff --git a/util/heap.h b/util/heap.h index cf7c9f0637..82bfc0cfc3 100644 --- a/util/heap.h +++ b/util/heap.h @@ -81,9 +81,9 @@ class BinaryHeap : private Compare { downheap(get_root()); } - void update_top() { + bool update_top() { assert(!empty()); - downheap(get_root()); + return downheap(get_root()); } void pop() { @@ -145,11 +145,12 @@ class BinaryHeap : private Compare { reset_root_cmp_cache(); } - void downheap(size_t index) { + ///@returns true if top is changed + bool downheap(size_t index) { size_t heap_size = data_.size(); assert(0 == index); ///< wiered, index must be 0 if (UNLIKELY(1 >= heap_size)) { - return; + return false; } T* data_ = this->data_.data(); @@ -165,7 +166,7 @@ class BinaryHeap : private Compare { if (!cmp_()(data_[0], data_[picked_child])) { // the tree does not change anything root_cmp_cache_ = picked_child; - return; + return false; } reset_root_cmp_cache(); @@ -207,6 +208,7 @@ class BinaryHeap : private Compare { */ data_[index] = std::move(v); + return true; // top is changed } terark::valvec32 data_;static_assert(std::is_trivially_destructible_v); From 3907e6026e8cad72da511157d8b07ba4e54e39f1 Mon Sep 17 00:00:00 2001 From: rockeet Date: Fri, 31 Oct 2025 18:00:23 +0800 Subject: [PATCH 169/175] MergingIterator::PrepareScan() override optimize forward scan value fetch Set value fetch functions to reduce calling chains, just optimize forward scan, keep backward scan safe and correct. --- table/merging_iterator.cc | 36 ++++++++++++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) diff --git a/table/merging_iterator.cc b/table/merging_iterator.cc index aa4f62de74..f2a7f8995a 100644 --- a/table/merging_iterator.cc +++ b/table/merging_iterator.cc @@ -918,6 +918,34 @@ class MergingIterTmpl final : public MergingIterator { } } + void PrepareScan(IteratorWrapper* iw) override { + assert(iw != nullptr); + ROCKSDB_ASSUME(iw != nullptr); + my_wrapper_ = iw; + UpdateScanFunc(iw); + } + void UpdateScanFunc(IteratorWrapper* iw) { + if (iw) { + if (UNLIKELY(direction_ != kForward || nullptr == current_)) + ResetValueFunc(iw); + else + CopyValueFunc(iw, current_); + } + } + void ResetValueFunc(IteratorWrapper* iw) { + if (iw) { + iw->value_iter_ = this; + iw->prepare_and_get_value_ = + ForgeFuncPtr(this, &MergingIterTmpl::PrepareAndGetValue); + } + } + void CopyValueFunc(IteratorWrapper* dst, const IteratorWrapper* src) { + if (dst && src) { + dst->value_iter_ = src->value_iter_; + dst->prepare_and_get_value_ = src->prepare_and_get_value_; + } + } + void Next() override { DoNext(); // ignore return value } @@ -931,6 +959,7 @@ class MergingIterTmpl final : public MergingIterator { // The loop advanced all non-current children to be > key() so current_ // should still be strictly the smallest key. SwitchToForward(); + UpdateScanFunc(my_wrapper_); if (UNLIKELY(!status_.ok())) return false; } @@ -949,6 +978,7 @@ class MergingIterTmpl final : public MergingIterator { if (LIKELY(RangeTombstoneStaticEmpty || range_tombstone_iters_.empty())) { if (UNLIKELY(top_changed)) { current_ = &minHeap_.top()->iter; // current_ = CurrentForward(); + CopyValueFunc(my_wrapper_, current_); } return true; } @@ -966,8 +996,10 @@ class MergingIterTmpl final : public MergingIterator { FindNextVisibleKey(); if (LIKELY(!minHeap_.empty())) { current_ = &minHeap_.top()->iter; + CopyValueFunc(my_wrapper_, current_); return status_.ok(); } else { + ResetValueFunc(my_wrapper_); current_ = nullptr; return false; } @@ -994,6 +1026,7 @@ class MergingIterTmpl final : public MergingIterator { // Otherwise, retreat the non-current children. We retreat current_ // just after the if-block. SwitchToBackward(); + ResetValueFunc(my_wrapper_); } // For the heap modifications below to be correct, current_ must be the @@ -1129,6 +1162,9 @@ class MergingIterTmpl final : public MergingIterator { }; const InternalKeyComparator* comparator_; + + IteratorWrapper* my_wrapper_ = nullptr; + // HeapItem for range tombstone start and end keys. Each range tombstone // iterator will have at most one side (start key or end key) in a heap // at the same time, so this vector will be of size children_.size(); From 4bac285c560d8189a085513f765852cfd2025df7 Mon Sep 17 00:00:00 2001 From: rockeet Date: Fri, 31 Oct 2025 20:30:58 +0800 Subject: [PATCH 170/175] Add Iterator::GetUnwrapped() to remove ArenaWrappedDBIter overhead DBIter is embeded in ArenaWrappedDBIter -- although the overhead is very small, this commit provide a way to remove it. --- db/arena_wrapped_db_iter.h | 1 + include/rocksdb/iterator.h | 1 + java/rocksjni/iterator.cc | 7 ++++--- java/rocksjni/kv_helper.h | 3 ++- java/rocksjni/write_batch_with_index.cc | 4 ++-- tools/db_bench_tool.cc | 3 ++- 6 files changed, 12 insertions(+), 7 deletions(-) diff --git a/db/arena_wrapped_db_iter.h b/db/arena_wrapped_db_iter.h index ed8d45dba3..57115e1dbb 100644 --- a/db/arena_wrapped_db_iter.h +++ b/db/arena_wrapped_db_iter.h @@ -92,6 +92,7 @@ class ArenaWrappedDBIter final : public Iterator { Status Refresh() override; Status Refresh(const Snapshot*, bool keep_iter_pos) override; + Iterator* GetUnwrapped() override { return db_iter_; } void Init(Env* env, const ReadOptions& read_options, const ImmutableOptions& ioptions, diff --git a/include/rocksdb/iterator.h b/include/rocksdb/iterator.h index 5fdd851c25..ddfb2d1d1d 100644 --- a/include/rocksdb/iterator.h +++ b/include/rocksdb/iterator.h @@ -164,6 +164,7 @@ class Iterator : public Cleanable { // - if fixed_user_key_len is not equal to beg/end user key length, it is ignored // if fixed_user_key_len == 0, user keys are of variable length, no optimizations virtual size_t CountKeysInRange(const Slice& beg, const Slice& end, size_t fixed_user_key_len = 0); + virtual Iterator* GetUnwrapped() { return this; } }; // Return an empty iterator (yields nothing). diff --git a/java/rocksjni/iterator.cc b/java/rocksjni/iterator.cc index 6709f3b712..d6e8c86b92 100644 --- a/java/rocksjni/iterator.cc +++ b/java/rocksjni/iterator.cc @@ -20,7 +20,8 @@ namespace ROCKSDB_NAMESPACE { JZeroCopyIter::~JZeroCopyIter() { - delete iter; + delete own_iter; + own_iter = nullptr; iter = nullptr; } @@ -170,7 +171,7 @@ void Java_org_rocksdb_RocksIterator_prev0(JNIEnv* /*env*/, jobject /*jobj*/, void Java_org_rocksdb_RocksIterator_refresh0(JNIEnv* env, jobject /*jobj*/, jlong handle) { auto zc_it = reinterpret_cast(handle); - auto it = zc_it->iter; + auto it = zc_it->own_iter; ROCKSDB_NAMESPACE::Status s = it->Refresh(); if (it->Valid()) { zc_it->key = it->key(); @@ -426,7 +427,7 @@ JNIEXPORT void JNICALL Java_org_rocksdb_RocksIterator_nativeRefreshForDatabaseGC (JNIEnv* env, jobject, jlong jiter) { auto zc_it = reinterpret_cast(jiter); - auto iter = zc_it->iter; + auto iter = zc_it->own_iter; bool is_valid = iter->Valid(); ROCKSDB_NAMESPACE::Status s = iter->RefreshKeepSnapshot(true); if (is_valid) { diff --git a/java/rocksjni/kv_helper.h b/java/rocksjni/kv_helper.h index 21ee3431e5..ba0b9de475 100644 --- a/java/rocksjni/kv_helper.h +++ b/java/rocksjni/kv_helper.h @@ -286,9 +286,10 @@ struct JZeroCopyIter { Iterator* iter; Slice key{nullptr, 0}; Slice value{nullptr, 0}; + Iterator* own_iter; bool Valid() const { return key.data_ != nullptr; } ~JZeroCopyIter(); - JZeroCopyIter(Iterator* it) : iter(it) {} + JZeroCopyIter(Iterator* it) : iter(it->GetUnwrapped()), own_iter(it) {} JZeroCopyIter(const JZeroCopyIter&) = delete; JZeroCopyIter& operator=(const JZeroCopyIter&) = delete; static JZeroCopyIter* Make(Iterator* it) { diff --git a/java/rocksjni/write_batch_with_index.cc b/java/rocksjni/write_batch_with_index.cc index b63c6c5b1a..f15a686f1f 100644 --- a/java/rocksjni/write_batch_with_index.cc +++ b/java/rocksjni/write_batch_with_index.cc @@ -550,8 +550,8 @@ jlong Java_org_rocksdb_WriteBatchWithIndex_iteratorWithBase( reinterpret_cast(jcf_handle); auto* zc_iter = reinterpret_cast(jbase_iterator_handle); - auto* base_iterator = zc_iter->iter; - zc_iter->iter = nullptr; // ownership has been moved, reset to nullptr + auto* base_iterator = zc_iter->own_iter; + zc_iter->own_iter = nullptr; // ownership has been moved, reset to nullptr delete zc_iter; // java side will not delete it, we delete it here ROCKSDB_NAMESPACE::ReadOptions* read_opts = jread_opts_handle == 0 diff --git a/tools/db_bench_tool.cc b/tools/db_bench_tool.cc index d71d58fd5d..a1f17b8476 100644 --- a/tools/db_bench_tool.cc +++ b/tools/db_bench_tool.cc @@ -6020,6 +6020,8 @@ class Benchmark { options.auto_readahead_size = FLAGS_auto_readahead_size; options.fixed_user_key_len = FLAGS_scan_omit_key ? FLAGS_key_size : 0; Iterator* iter = db->NewIterator(options); + std::unique_ptr iter_auto_del(iter); + iter = iter->GetUnwrapped(); int64_t i = 0, bytes = 0; const auto limiter = thread->shared->read_rate_limiter.get(); const bool omit_value = FLAGS_scan_omit_value; @@ -6040,7 +6042,6 @@ class Benchmark { } key = iter->NextWithKey(); } - delete iter; thread->stats.AddBytes(bytes); } From 38b5c51dd84c063200cf2f5d6f8f58abe2d39bc2 Mon Sep 17 00:00:00 2001 From: rockeet Date: Fri, 31 Oct 2025 22:30:34 +0800 Subject: [PATCH 171/175] DBIter::FindNextUserEntryInternalTmpl() fix missing change SetUK(avx512 only) --- db/db_iter.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/db/db_iter.cc b/db/db_iter.cc index ce9c062c85..969c8df2d6 100644 --- a/db/db_iter.cc +++ b/db/db_iter.cc @@ -925,7 +925,7 @@ bool DBIter::FindNextUserEntryInternalTmpl(bool skipping_saved_key, num_skipped++; } else { FixLen != 0 ? // to propagate const FixLen - saved_key_.SetUserKey(ikey_.user_key.data_, FixLen) : + saved_key_.SetUK(ikey_.user_key) : saved_key_.SetUserKey( ikey_.user_key, !pin_thru_lifetime_ || !iter_.iter()->IsKeyPinned() /* copy */); From 06049f0dee850864aebf0e47fe9acf62bf6da540 Mon Sep 17 00:00:00 2001 From: rockeet Date: Sat, 1 Nov 2025 07:57:56 +0800 Subject: [PATCH 172/175] db_bench_tool.cc: readseq and nextwidthkey fix for empty DB And use iter->GetUnwrapped() for readseq -- same as nextwithkey --- tools/db_bench_tool.cc | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/tools/db_bench_tool.cc b/tools/db_bench_tool.cc index a1f17b8476..0d1b7b08ef 100644 --- a/tools/db_bench_tool.cc +++ b/tools/db_bench_tool.cc @@ -5966,6 +5966,8 @@ class Benchmark { options.fixed_user_key_len = omit_key ? key_size : 0; Iterator* iter = db->NewIterator(options); + std::unique_ptr iter_auto_del(iter); + iter = iter->GetUnwrapped(); int64_t i = 0; int64_t bytes = 0; const auto limiter = thread->shared->read_rate_limiter.get(); @@ -5974,7 +5976,7 @@ class Benchmark { if (UNLIKELY(!iter->Valid())) { // wrap if does not reach reads_ iter->SeekToFirst(); if (!iter->Valid()) - continue; // safe keep loop even on empty db + break; // empty db } if (omit_value) { bytes += omit_key ? key_size : iter->key().size(); @@ -5991,7 +5993,6 @@ class Benchmark { } } - delete iter; thread->stats.AddBytes(bytes); } @@ -6029,7 +6030,7 @@ class Benchmark { if (UNLIKELY(!key.data())) { // end of iter, wrap if does not reach reads_ key = iter->SeekToFirstWithKey(); if (!key.data()) - continue; // safe keep loop even on empty db + break; // empty db } bytes += key.size(); if (!omit_value) { From 78de0ce589628eb5b9f27deb2ef5565c3773f1bf Mon Sep 17 00:00:00 2001 From: rockeet Date: Sat, 1 Nov 2025 10:11:53 +0800 Subject: [PATCH 173/175] DBIter::value() set status on fail for user checking --- db/db_iter.h | 1 + 1 file changed, 1 insertion(+) diff --git a/db/db_iter.h b/db/db_iter.h index f0525a7153..3e398dacc4 100644 --- a/db/db_iter.h +++ b/db/db_iter.h @@ -177,6 +177,7 @@ class DBIter final : public Iterator { // PrepareValue() then call value(). this should be very rare mut->value_.data_ = nullptr; mut->value_.size_ = size_t(-1); + mut->status_ = mut->iter_.status(); } } return value_; From ecb1576a6014345757365660f9d9394184bc10c7 Mon Sep 17 00:00:00 2001 From: rockeet Date: Sat, 1 Nov 2025 10:13:33 +0800 Subject: [PATCH 174/175] Remove Iterator::PrepareValue(), use iter->value().size() == size_t(-1) as fail When `iter->value().size() == size_t(-1)`, user code should consider `iter->status()` for detailed error! --- db/arena_wrapped_db_iter.h | 2 -- db/db_iter.h | 18 ------------------ include/rocksdb/iterator.h | 2 -- utilities/ttl/db_ttl_impl.h | 1 - .../write_batch_with_index_internal.cc | 8 -------- .../write_batch_with_index_internal.h | 1 - 6 files changed, 32 deletions(-) diff --git a/db/arena_wrapped_db_iter.h b/db/arena_wrapped_db_iter.h index 57115e1dbb..169d517eb5 100644 --- a/db/arena_wrapped_db_iter.h +++ b/db/arena_wrapped_db_iter.h @@ -84,8 +84,6 @@ class ArenaWrappedDBIter final : public Iterator { const WideColumns& columns() const override { return db_iter_->columns(); } Status status() const override { return db_iter_->status(); } Slice timestamp() const override { return db_iter_->timestamp(); } - ROCKSDB_FLATTEN - bool PrepareValue() override { return db_iter_->PrepareValue(); } bool IsBlob() const { return db_iter_->IsBlob(); } Status GetProperty(std::string prop_name, std::string* prop) override; diff --git a/db/db_iter.h b/db/db_iter.h index 3e398dacc4..caea275161 100644 --- a/db/db_iter.h +++ b/db/db_iter.h @@ -183,24 +183,6 @@ class DBIter final : public Iterator { return value_; } - // without PrepareValue, user can not check iter_.PrepareAndGetValue(), - // thus must die in DBIter::value() if iter_.PrepareAndGetValue() fails. - bool PrepareValue() override { // enable error check for lazy load - assert(valid_); - if (!is_value_prepared_) { - if (LIKELY(iter_.PrepareAndGetValue(&value_))) { - is_value_prepared_ = true; - local_stats_.bytes_read_ += value_.size_; - } else { - valid_ = false; - status_ = iter_.status(); - ROCKSDB_VERIFY(!status_.ok()); - return false; - } - } - return true; - } - #if defined(TOPLINGDB_WITH_WIDE_COLUMNS) const WideColumns& columns() const override { assert(valid_); diff --git a/include/rocksdb/iterator.h b/include/rocksdb/iterator.h index ddfb2d1d1d..4102e46fe1 100644 --- a/include/rocksdb/iterator.h +++ b/include/rocksdb/iterator.h @@ -150,8 +150,6 @@ class Iterator : public Cleanable { return Slice(); } - virtual bool PrepareValue() { return true; } - virtual Slice NextWithKey(); virtual Slice PrevWithKey(); Slice SeekToFirstWithKey(); diff --git a/utilities/ttl/db_ttl_impl.h b/utilities/ttl/db_ttl_impl.h index ecb8695971..b125d79b06 100644 --- a/utilities/ttl/db_ttl_impl.h +++ b/utilities/ttl/db_ttl_impl.h @@ -143,7 +143,6 @@ class TtlIterator : public Iterator { trimmed_value.size_ -= DBWithTTLImpl::kTSLength; return trimmed_value; } - bool PrepareValue() override { return iter_->PrepareValue(); } Status status() const override { return iter_->status(); } diff --git a/utilities/write_batch_with_index/write_batch_with_index_internal.cc b/utilities/write_batch_with_index/write_batch_with_index_internal.cc index acb05dd3c8..dedc4b186a 100644 --- a/utilities/write_batch_with_index/write_batch_with_index_internal.cc +++ b/utilities/write_batch_with_index/write_batch_with_index_internal.cc @@ -226,14 +226,6 @@ Slice BaseDeltaIterator::timestamp() const { return current_at_base_ ? base_iterator_->timestamp() : Slice(); } -bool BaseDeltaIterator::PrepareValue() { - if (current_at_base_) { - return base_iterator_->PrepareValue(); - } else { - return true; - } -} - Status BaseDeltaIterator::status() const { if (!status_.ok()) { return status_; diff --git a/utilities/write_batch_with_index/write_batch_with_index_internal.h b/utilities/write_batch_with_index/write_batch_with_index_internal.h index f4cc349421..1933b21470 100644 --- a/utilities/write_batch_with_index/write_batch_with_index_internal.h +++ b/utilities/write_batch_with_index/write_batch_with_index_internal.h @@ -96,7 +96,6 @@ class BaseDeltaIterator final : public Iterator { Status Refresh(const Snapshot*, bool keep_iter_pos) override; using Iterator::Refresh; void Invalidate(Status s); - bool PrepareValue() override; std::unique_ptr& GetBaseIter() { return base_iterator_; } std::unique_ptr& GetDeltaIter() { return delta_iterator_; } const Comparator* GetComparator() const { return comparator_; } From 2deb0a0baf3f1b74b9ef421c92d0dd68fc7500ea Mon Sep 17 00:00:00 2001 From: rockeet Date: Sat, 1 Nov 2025 11:02:18 +0800 Subject: [PATCH 175/175] jni: iterator: check value lazy load failure --- java/rocksjni/iterator.cc | 44 ++++++++++++++++++++++++++++----------- 1 file changed, 32 insertions(+), 12 deletions(-) diff --git a/java/rocksjni/iterator.cc b/java/rocksjni/iterator.cc index d6e8c86b92..fd6ac32ad9 100644 --- a/java/rocksjni/iterator.cc +++ b/java/rocksjni/iterator.cc @@ -25,6 +25,13 @@ namespace ROCKSDB_NAMESPACE { iter = nullptr; } +#define THROW_ON_INVALID_VALUE(zc_iter, ReturnOnError) \ + if (UNLIKELY(zc_iter->value.size() == size_t(-1))) { \ + ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew \ + (env, zc_iter->iter->status()); \ + ReturnOnError; \ + } else (void)(0) + // ensure that the JZeroCopyIter accessing in java side is correct static_assert(offsetof(JZeroCopyIter, key.data_) == 8); static_assert(offsetof(JZeroCopyIter, key.size_) == 16); @@ -34,13 +41,15 @@ namespace ROCKSDB_NAMESPACE { // If such callback class is reusable, move it to a common header file. struct JCallbackSeek { void operator()(Slice target_slice) const; - JCallbackSeek(jlong h) : handle(h) {} + JCallbackSeek(jlong h, JNIEnv* _env) : handle(h), env(_env) {} jlong handle; + JNIEnv* env; }; struct JCallbackSeekForPrev { void operator()(Slice target_slice) const; - JCallbackSeekForPrev(jlong h) : handle(h) {} + JCallbackSeekForPrev(jlong h, JNIEnv* _env) : handle(h), env(_env) {} jlong handle; + JNIEnv* env; }; void JCallbackSeek::operator()(Slice target_slice) const { @@ -49,6 +58,7 @@ namespace ROCKSDB_NAMESPACE { bool fetch_value = (handle & 1) != 0; if (zc_iter->key.data() != nullptr && fetch_value) { zc_iter->value = zc_iter->iter->value(); + THROW_ON_INVALID_VALUE(zc_iter, return); } else { zc_iter->value.data_ = nullptr; } @@ -59,6 +69,7 @@ namespace ROCKSDB_NAMESPACE { bool fetch_value = (handle & 1) != 0; if (zc_iter->key.data() != nullptr && fetch_value) { zc_iter->value = zc_iter->iter->value(); + THROW_ON_INVALID_VALUE(zc_iter, return); } else { zc_iter->value.data_ = nullptr; } @@ -102,13 +113,14 @@ jboolean Java_org_rocksdb_RocksIterator_isValid0(JNIEnv* /*env*/, * Method: seekToFirst0 * Signature: (J)V */ -void Java_org_rocksdb_RocksIterator_seekToFirst0(JNIEnv* /*env*/, +void Java_org_rocksdb_RocksIterator_seekToFirst0(JNIEnv* env, jobject /*jobj*/, jlong handle) { auto it = reinterpret_cast(handle & jlong(~1L)); it->key = it->iter->SeekToFirstWithKey(); if (it->key.data() != nullptr && (handle & 1)) { it->value = it->iter->value(); + THROW_ON_INVALID_VALUE(it, return); } else { it->value.data_ = nullptr; } @@ -119,13 +131,14 @@ void Java_org_rocksdb_RocksIterator_seekToFirst0(JNIEnv* /*env*/, * Method: seekToLast0 * Signature: (J)V */ -void Java_org_rocksdb_RocksIterator_seekToLast0(JNIEnv* /*env*/, +void Java_org_rocksdb_RocksIterator_seekToLast0(JNIEnv* env, jobject /*jobj*/, jlong handle) { auto it = reinterpret_cast(handle & jlong(~1L)); it->key = it->iter->SeekToLastWithKey(); if (it->key.data() != nullptr && (handle & 1)) { it->value = it->iter->value(); + THROW_ON_INVALID_VALUE(it, return); } else { it->value.data_ = nullptr; } @@ -136,12 +149,13 @@ void Java_org_rocksdb_RocksIterator_seekToLast0(JNIEnv* /*env*/, * Method: next0 * Signature: (J)V */ -void Java_org_rocksdb_RocksIterator_next0(JNIEnv* /*env*/, jobject /*jobj*/, +void Java_org_rocksdb_RocksIterator_next0(JNIEnv* env, jobject /*jobj*/, jlong handle) { auto it = reinterpret_cast(handle & jlong(~1L)); it->key = it->iter->NextWithKey(); if (it->key.data() != nullptr && (handle & 1)) { it->value = it->iter->value(); + THROW_ON_INVALID_VALUE(it, return); } else { it->value.data_ = nullptr; } @@ -152,12 +166,13 @@ void Java_org_rocksdb_RocksIterator_next0(JNIEnv* /*env*/, jobject /*jobj*/, * Method: prev0 * Signature: (J)V */ -void Java_org_rocksdb_RocksIterator_prev0(JNIEnv* /*env*/, jobject /*jobj*/, +void Java_org_rocksdb_RocksIterator_prev0(JNIEnv* env, jobject /*jobj*/, jlong handle) { auto it = reinterpret_cast(handle & jlong(~1L)); it->key = it->iter->PrevWithKey(); if (it->key.data() != nullptr && (handle & 1)) { it->value = it->iter->value(); + THROW_ON_INVALID_VALUE(it, return); } else { it->value.data_ = nullptr; } @@ -177,6 +192,7 @@ void Java_org_rocksdb_RocksIterator_refresh0(JNIEnv* env, jobject /*jobj*/, zc_it->key = it->key(); if (zc_it->value.data_) { zc_it->value = it->value(); + THROW_ON_INVALID_VALUE(zc_it, return); } } else { zc_it->key = ROCKSDB_NAMESPACE::Slice(nullptr, 0); @@ -198,7 +214,7 @@ void Java_org_rocksdb_RocksIterator_refresh0(JNIEnv* env, jobject /*jobj*/, void Java_org_rocksdb_RocksIterator_seek0(JNIEnv* env, jobject /*jobj*/, jlong handle, jbyteArray jtarget, jint jtarget_len) { - JCallbackSeek seek(handle); + JCallbackSeek seek(handle, env); ROCKSDB_NAMESPACE::JniUtil::k_op_region(seek, env, jtarget, 0, jtarget_len); } @@ -214,7 +230,7 @@ void Java_org_rocksdb_RocksIterator_seek0(JNIEnv* env, jobject /*jobj*/, void Java_org_rocksdb_RocksIterator_seekByteArray0( JNIEnv* env, jobject /*jobj*/, jlong handle, jbyteArray jtarget, jint jtarget_off, jint jtarget_len) { - JCallbackSeek seek(handle); + JCallbackSeek seek(handle, env); ROCKSDB_NAMESPACE::JniUtil::k_op_region(seek, env, jtarget, jtarget_off, jtarget_len); } @@ -228,7 +244,7 @@ void Java_org_rocksdb_RocksIterator_seekDirect0(JNIEnv* env, jobject /*jobj*/, jlong handle, jobject jtarget, jint jtarget_off, jint jtarget_len) { - JCallbackSeek seek(handle); + JCallbackSeek seek(handle, env); ROCKSDB_NAMESPACE::JniUtil::k_op_direct(seek, env, jtarget, jtarget_off, jtarget_len); } @@ -241,7 +257,7 @@ void Java_org_rocksdb_RocksIterator_seekDirect0(JNIEnv* env, jobject /*jobj*/, void Java_org_rocksdb_RocksIterator_seekForPrevDirect0( JNIEnv* env, jobject /*jobj*/, jlong handle, jobject jtarget, jint jtarget_off, jint jtarget_len) { - JCallbackSeekForPrev seekPrev(handle); + JCallbackSeekForPrev seekPrev(handle, env); ROCKSDB_NAMESPACE::JniUtil::k_op_direct(seekPrev, env, jtarget, jtarget_off, jtarget_len); } @@ -255,7 +271,7 @@ void Java_org_rocksdb_RocksIterator_seekForPrev0(JNIEnv* env, jobject /*jobj*/, jlong handle, jbyteArray jtarget, jint jtarget_len) { - JCallbackSeekForPrev seek(handle); + JCallbackSeekForPrev seek(handle, env); ROCKSDB_NAMESPACE::JniUtil::k_op_region(seek, env, jtarget, 0, jtarget_len); } @@ -271,7 +287,7 @@ void Java_org_rocksdb_RocksIterator_seekForPrev0(JNIEnv* env, jobject /*jobj*/, void Java_org_rocksdb_RocksIterator_seekForPrevByteArray0( JNIEnv* env, jobject /*jobj*/, jlong handle, jbyteArray jtarget, jint jtarget_off, jint jtarget_len) { - JCallbackSeekForPrev seek(handle); + JCallbackSeekForPrev seek(handle, env); ROCKSDB_NAMESPACE::JniUtil::k_op_region(seek, env, jtarget, jtarget_off, jtarget_len); } @@ -369,6 +385,7 @@ void Java_org_rocksdb_RocksIterator_value0(JNIEnv* env, jobject /*jobj*/, assert(zc_it->iter->Valid()); assert(zc_it->key.data() != nullptr); zc_it->value = zc_it->iter->value(); + THROW_ON_INVALID_VALUE(zc_it, return); } /* @@ -385,6 +402,7 @@ jint Java_org_rocksdb_RocksIterator_valueDirect0(JNIEnv* env, jobject /*jobj*/, assert(zc_it->key.data() != nullptr); if (zc_it->value.data() == nullptr) { zc_it->value = zc_it->iter->value(); + THROW_ON_INVALID_VALUE(zc_it, return 0); } ROCKSDB_NAMESPACE::Slice value_slice = zc_it->value; return ROCKSDB_NAMESPACE::JniUtil::copyToDirect(env, value_slice, jtarget, @@ -407,6 +425,7 @@ jint Java_org_rocksdb_RocksIterator_valueByteArray0( assert(zc_it->key.data() != nullptr); if (zc_it->value.data() == nullptr) { zc_it->value = zc_it->iter->value(); + THROW_ON_INVALID_VALUE(zc_it, return 0); } ROCKSDB_NAMESPACE::Slice value_slice = zc_it->value; jsize copy_size = std::min(static_cast(value_slice.size()), @@ -433,6 +452,7 @@ JNIEXPORT void JNICALL Java_org_rocksdb_RocksIterator_nativeRefreshForDatabaseGC if (is_valid) { zc_it->key = iter->key(); zc_it->value = iter->value(); + THROW_ON_INVALID_VALUE(zc_it, return); } ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew(env, s); }