From 116aaa5c7e26cdd41dcd0d69d4765ac4979fc600 Mon Sep 17 00:00:00 2001 From: zdenop Date: Sun, 10 Aug 2025 22:21:49 +0200 Subject: [PATCH 1/5] cmake: build optimization --- .github/workflows/cmake-win64.yml | 2 +- .github/workflows/cmake.yml | 1 + CMakeLists.txt | 116 +++++++++++++++++++++-- cmake/BuildOptimizations.cmake | 149 ++++++++++++++++++++++++++++++ src/training/CMakeLists.txt | 8 +- 5 files changed, 268 insertions(+), 8 deletions(-) create mode 100644 cmake/BuildOptimizations.cmake diff --git a/.github/workflows/cmake-win64.yml b/.github/workflows/cmake-win64.yml index 8295fde417..d7bb982586 100644 --- a/.github/workflows/cmake-win64.yml +++ b/.github/workflows/cmake-win64.yml @@ -112,7 +112,7 @@ jobs: - name: Build and Install tesseract shell: cmd run: | - cmake -Bbuild -DCMAKE_BUILD_TYPE=RelWithDebInfo -DCMAKE_PREFIX_PATH=${{env.ILOC}} -DCMAKE_INSTALL_PREFIX=${{env.ILOC}} -DSW_BUILD=OFF -DBUILD_SHARED_LIBS=ON -DENABLE_LTO=ON -DBUILD_TRAINING_TOOLS=OFF -DFAST_FLOAT=ON -DGRAPHICS_DISABLED=ON -DOPENMP_BUILD=OFF + cmake -Bbuild -G Ninja -DCMAKE_BUILD_TYPE=RelWithDebInfo -DCMAKE_PREFIX_PATH=${{env.ILOC}} -DCMAKE_INSTALL_PREFIX=${{env.ILOC}} -DSW_BUILD=OFF -DBUILD_SHARED_LIBS=ON -DENABLE_LTO=ON -DBUILD_TRAINING_TOOLS=OFF -DFAST_FLOAT=ON -DGRAPHICS_DISABLED=ON -DOPENMP_BUILD=OFF cmake --build build --target install - name: Upload Build Results diff --git a/.github/workflows/cmake.yml b/.github/workflows/cmake.yml index 7264890710..0cf72cc487 100644 --- a/.github/workflows/cmake.yml +++ b/.github/workflows/cmake.yml @@ -96,6 +96,7 @@ jobs: -G Ninja \ -DCMAKE_BUILD_TYPE=Release \ -DOPENMP_BUILD=OFF \ + -DENABLE_UNITY_BUILD=ON \ -DCMAKE_CXX_COMPILER=${{ matrix.config.cxx }} \ -DCMAKE_INSTALL_PREFIX:PATH=inst if: runner.os == 'macOS' diff --git a/CMakeLists.txt b/CMakeLists.txt index 566cc065ea..2d2c26b0af 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -8,7 +8,8 @@ # # ############################################################################## -cmake_minimum_required(VERSION 3.10 FATAL_ERROR) +# Require CMake 3.18 for modern features like precompiled headers, unity builds, and better target management +cmake_minimum_required(VERSION 3.18 FATAL_ERROR) # In-source builds are disabled. if("${CMAKE_CURRENT_SOURCE_DIR}" STREQUAL "${CMAKE_CURRENT_BINARY_DIR}") @@ -100,11 +101,12 @@ option(DISABLE_ARCHIVE "Disable build with libarchive (if available)" OFF) option(DISABLE_CURL "Disable build with libcurl (if available)" OFF) option(INSTALL_CONFIGS "Install tesseract configs" ON) -if(NOT ${CMAKE_VERSION} VERSION_LESS "3.15.0") - if(WIN32 AND MSVC) - option(WIN32_MT_BUILD "Build with MT flag for MSVC" OFF) - endif() -endif() +# Build optimization options +option(ENABLE_UNITY_BUILD "Enable Unity/Jumbo builds for faster compilation" OFF) +option(ENABLE_PRECOMPILED_HEADERS "Enable precompiled headers for faster compilation" ON) +option(ENABLE_CCACHE "Enable ccache for faster incremental builds" ON) +option(ENABLE_NINJA_POOL "Enable Ninja job pools to manage parallelism" ON) + # ############################################################################## # @@ -286,6 +288,8 @@ if(CMAKE_COMPILER_IS_GNUCXX OR MINGW) elseif(MSVC) add_definitions(-D_CRT_SECURE_NO_WARNINGS) add_definitions(-D_CRT_NONSTDC_NO_DEPRECATE) # strdup + add_definitions(-D_USE_MATH_DEFINES) # Enable M_PI and other math constants + add_definitions(-DNOMINMAX) # Prevent min/max macro conflicts set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /utf-8") if(NOT CLANG) set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /MP") @@ -376,6 +380,47 @@ endif() add_definitions("-DCMAKE_BUILD") +# ############################################################################## +# +# Build optimizations +# +# ############################################################################## + +# Setup ccache if available and enabled +if(ENABLE_CCACHE) + find_program(CCACHE_PROGRAM ccache) + if(CCACHE_PROGRAM) + message(STATUS "Found ccache: ${CCACHE_PROGRAM}") + set(CMAKE_CXX_COMPILER_LAUNCHER "${CCACHE_PROGRAM}") + set(CMAKE_C_COMPILER_LAUNCHER "${CCACHE_PROGRAM}") + # Configure ccache for better performance + set(ENV{CCACHE_SLOPPINESS} "pch_defines,time_macros") + set(ENV{CCACHE_CPP2} "true") + else() + message(STATUS "ccache not found, disabling ccache support") + set(ENABLE_CCACHE OFF) + endif() +endif() + +# Setup Ninja job pools for better resource management +if(ENABLE_NINJA_POOL AND CMAKE_GENERATOR STREQUAL "Ninja") + include(ProcessorCount) + ProcessorCount(N) + if(N GREATER 1) + # Use 75% of available cores for compilation, rest for linking + math(EXPR COMPILE_JOBS "${N} * 3 / 4") + math(EXPR LINK_JOBS "${N} - ${COMPILE_JOBS}") + if(LINK_JOBS LESS 1) + set(LINK_JOBS 1) + endif() + + set_property(GLOBAL PROPERTY JOB_POOLS "compile=${COMPILE_JOBS};link=${LINK_JOBS}") + set(CMAKE_JOB_POOL_COMPILE compile) + set(CMAKE_JOB_POOL_LINK link) + message(STATUS "Ninja job pools: compile=${COMPILE_JOBS}, link=${LINK_JOBS}") + endif() +endif() + # ############################################################################## # # packages @@ -569,6 +614,16 @@ message(STATUS "Use system ICU Library [USE_SYSTEM_ICU]: ${USE_SYSTEM_ICU}") message( STATUS "Install tesseract configs [INSTALL_CONFIGS]: ${INSTALL_CONFIGS}") message(STATUS "--------------------------------------------------------") +message(STATUS "Modern build optimizations:") +message(STATUS "Unity build [ENABLE_UNITY_BUILD]: ${ENABLE_UNITY_BUILD}") +message(STATUS "Precompiled headers [ENABLE_PRECOMPILED_HEADERS]: ${ENABLE_PRECOMPILED_HEADERS}") +message(STATUS "ccache [ENABLE_CCACHE]: ${ENABLE_CCACHE}") +if(CMAKE_GENERATOR STREQUAL "Ninja") + message(STATUS "Ninja job pools [ENABLE_NINJA_POOL]: ${ENABLE_NINJA_POOL}") +else() + message(STATUS "Ninja job pools [ENABLE_NINJA_POOL]: Disabled (not using Ninja)") +endif() +message(STATUS "--------------------------------------------------------") message(STATUS) # ############################################################################## @@ -799,6 +854,55 @@ set(LIBTESSFILES ${TESSERACT_SRC} ${arch_files} ${arch_files_opt} source_group(TREE ${CMAKE_CURRENT_SOURCE_DIR} FILES ${LIBTESSFILES}) add_library(libtesseract ${LIBTESSFILES}) + +# Apply modern optimizations to the main library +if(ENABLE_UNITY_BUILD) + set_target_properties(libtesseract PROPERTIES UNITY_BUILD ON) + set_target_properties(libtesseract PROPERTIES UNITY_BUILD_BATCH_SIZE 16) + message(STATUS "Unity build enabled for libtesseract with batch size 16") +endif() + +# Apply precompiled headers to reduce compilation time +if(ENABLE_PRECOMPILED_HEADERS) + target_precompile_headers(libtesseract PRIVATE + + + + + + + + + ) + + # Exclude architecture-specific files from PCH due to custom compiler flags + set(ARCH_FILES_NO_PCH + src/arch/dotproduct.cpp + src/arch/dotproductavx.cpp + src/arch/dotproductavx512.cpp + src/arch/dotproductfma.cpp + src/arch/dotproductsse.cpp + src/arch/dotproductneon.cpp + src/arch/intsimdmatrixavx2.cpp + src/arch/intsimdmatrixsse.cpp + src/arch/intsimdmatrixneon.cpp + ) + + foreach(file ${ARCH_FILES_NO_PCH}) + if(EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/${file}") + set_source_files_properties("${file}" PROPERTIES SKIP_PRECOMPILE_HEADERS ON) + endif() + endforeach() + + message(STATUS "Precompiled headers enabled for libtesseract (excluding architecture-specific files)") +endif() + +# Configure build pools for Ninja +if(ENABLE_NINJA_POOL AND CMAKE_GENERATOR STREQUAL "Ninja") + set_target_properties(libtesseract PROPERTIES JOB_POOL_COMPILE compile) + set_target_properties(libtesseract PROPERTIES JOB_POOL_LINK link) +endif() + target_include_directories( libtesseract BEFORE PRIVATE src diff --git a/cmake/BuildOptimizations.cmake b/cmake/BuildOptimizations.cmake new file mode 100644 index 0000000000..ca05e40fb5 --- /dev/null +++ b/cmake/BuildOptimizations.cmake @@ -0,0 +1,149 @@ +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# http://www.apache.org/licenses/LICENSE-2.0 +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +################################################################################ +# +# Build Optimizations Module +# +# This module provides functions to apply modern CMake build optimizations +# to targets for faster and incremental builds. +# +################################################################################ + +# +# Function: apply_modern_optimizations +# Apply build optimizations to a target +# +# Parameters: +# target_name - Name of the target to optimize +# PCH_HEADERS - Optional list of headers for precompiled headers +# +function(apply_modern_optimizations target_name) + # Parse arguments + set(oneValueArgs ) + set(multiValueArgs PCH_HEADERS) + cmake_parse_arguments(ARG "" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) + + # Apply Unity Build if enabled + if(ENABLE_UNITY_BUILD) + set_target_properties(${target_name} PROPERTIES UNITY_BUILD ON) + # Use smaller batch sizes for libraries with many files + get_target_property(target_type ${target_name} TYPE) + if(target_type STREQUAL "STATIC_LIBRARY" OR target_type STREQUAL "SHARED_LIBRARY") + set_target_properties(${target_name} PROPERTIES UNITY_BUILD_BATCH_SIZE 16) + else() + set_target_properties(${target_name} PROPERTIES UNITY_BUILD_BATCH_SIZE 8) + endif() + message(STATUS "Unity build enabled for ${target_name}") + endif() + + # Apply Precompiled Headers if enabled and headers provided + if(ENABLE_PRECOMPILED_HEADERS) + if(ARG_PCH_HEADERS) + target_precompile_headers(${target_name} PRIVATE ${ARG_PCH_HEADERS}) + message(STATUS "Precompiled headers enabled for ${target_name}") + else() + # Use common standard library headers as default + target_precompile_headers(${target_name} PRIVATE + + + + + + + + + ) + message(STATUS "Default precompiled headers enabled for ${target_name}") + endif() + endif() + + # Configure build pools for Ninja + if(ENABLE_NINJA_POOL AND CMAKE_GENERATOR STREQUAL "Ninja") + set_target_properties(${target_name} PROPERTIES JOB_POOL_COMPILE compile) + set_target_properties(${target_name} PROPERTIES JOB_POOL_LINK link) + endif() + + # Apply compiler-specific optimizations + if(MSVC) + # Enable parallel compilation for MSVC if not already enabled + get_target_property(target_compile_options ${target_name} COMPILE_OPTIONS) + if(NOT target_compile_options MATCHES "/MP") + target_compile_options(${target_name} PRIVATE "/MP") + endif() + + # Enable function-level linking for better optimization + target_compile_options(${target_name} PRIVATE "/Gy") + + # Enable intrinsic functions for better performance + target_compile_options(${target_name} PRIVATE "/Oi") + elseif(CMAKE_CXX_COMPILER_ID MATCHES "GNU|Clang") + # Enable split debug info for faster incremental builds + if(CMAKE_BUILD_TYPE MATCHES Debug) + target_compile_options(${target_name} PRIVATE "-gsplit-dwarf") + endif() + + # Enable function sections for better dead code elimination + target_compile_options(${target_name} PRIVATE "-ffunction-sections" "-fdata-sections") + endif() +endfunction() + +# +# Function: apply_training_optimizations +# Apply optimizations specific to training tools +# +function(apply_training_optimizations target_name) + apply_modern_optimizations(${target_name} + PCH_HEADERS + + + + + + + + ) + + # Training tools usually build faster, so smaller unity batches are fine + if(ENABLE_UNITY_BUILD) + set_target_properties(${target_name} PROPERTIES UNITY_BUILD_BATCH_SIZE 4) + endif() +endfunction() + +# +# Function: apply_test_optimizations +# Apply optimizations specific to test targets +# +function(apply_test_optimizations target_name) + # Tests often have different compilation patterns + if(ENABLE_PRECOMPILED_HEADERS) + target_precompile_headers(${target_name} PRIVATE + + + + + + ) + message(STATUS "Test precompiled headers enabled for ${target_name}") + endif() + + # Tests benefit from unity builds but smaller batches + if(ENABLE_UNITY_BUILD) + set_target_properties(${target_name} PROPERTIES UNITY_BUILD ON) + set_target_properties(${target_name} PROPERTIES UNITY_BUILD_BATCH_SIZE 8) + message(STATUS "Unity build enabled for test ${target_name}") + endif() + + # Configure Ninja pools + if(ENABLE_NINJA_POOL AND CMAKE_GENERATOR STREQUAL "Ninja") + set_target_properties(${target_name} PROPERTIES JOB_POOL_COMPILE compile) + set_target_properties(${target_name} PROPERTIES JOB_POOL_LINK link) + endif() +endfunction() diff --git a/src/training/CMakeLists.txt b/src/training/CMakeLists.txt index f25e34825c..32cd5f78b5 100644 --- a/src/training/CMakeLists.txt +++ b/src/training/CMakeLists.txt @@ -1,10 +1,13 @@ # -# tesseract +# tesseract training tools # if(NOT ${CMAKE_VERSION} VERSION_LESS "3.12.0") cmake_policy(SET CMP0074 NEW) endif() +# Include build optimizations +include(BuildOptimizations) + if(SW_BUILD) set(ICU_FOUND 1) else() # NOT SW_BUILD @@ -115,6 +118,9 @@ add_library(common_training ${COMMON_TRAINING_SRC}) target_include_directories(common_training PUBLIC common ${CMAKE_CURRENT_BINARY_DIR}) target_link_libraries(common_training PUBLIC libtesseract) + +# Apply modern build optimizations +apply_training_optimizations(common_training) install( TARGETS common_training RUNTIME DESTINATION bin From fe5c522e8cfcaf7632832120c3c117a2b40e1966 Mon Sep 17 00:00:00 2001 From: zdenop Date: Sun, 10 Aug 2025 22:22:24 +0200 Subject: [PATCH 2/5] cmake: solve macos warning about installed ninja and icu4c --- .github/workflows/cmake.yml | 2 -- 1 file changed, 2 deletions(-) diff --git a/.github/workflows/cmake.yml b/.github/workflows/cmake.yml index 0cf72cc487..f6390a4a98 100644 --- a/.github/workflows/cmake.yml +++ b/.github/workflows/cmake.yml @@ -53,9 +53,7 @@ jobs: brew install leptonica # brew install libarchive brew install pango - brew install icu4c && brew link icu4c brew install cabextract - brew install ninja ninja --version cmake --version clang++ --version From 90333e43be053f2e236d77e6d306cbcb970c5cbf Mon Sep 17 00:00:00 2001 From: zdenop Date: Sun, 10 Aug 2025 22:23:32 +0200 Subject: [PATCH 3/5] cmake: Don't use GLOB in CMake files - fixes #4442 --- CMakeLists.txt | 39 +-- cmake/SourceLists.cmake | 584 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 593 insertions(+), 30 deletions(-) create mode 100644 cmake/SourceLists.cmake diff --git a/CMakeLists.txt b/CMakeLists.txt index 2d2c26b0af..d6ad09d11c 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -648,19 +648,11 @@ endif() # LIBRARY tesseract # ############################################################################## -file( - GLOB - TESSERACT_SRC - src/ccmain/*.cpp - src/ccstruct/*.cpp - src/ccutil/*.cpp - src/classify/*.cpp - src/cutil/*.cpp - src/dict/*.cpp - src/lstm/*.cpp - src/textord/*.cpp - src/viewer/*.cpp - src/wordrec/*.cpp) +# Include source file lists +include(cmake/SourceLists.cmake) + +# Build the core source file list +set(TESSERACT_SRC ${TESSERACT_SRC_CORE}) if(DISABLED_LEGACY_ENGINE) # prepend path to list of source files @@ -741,8 +733,8 @@ if(DISABLED_LEGACY_ENGINE) list(REMOVE_ITEM TESSERACT_SRC ${TESSERACT_SRC_LEGACY}) endif(DISABLED_LEGACY_ENGINE) -list(APPEND arch_files src/arch/dotproduct.cpp src/arch/simddetect.cpp - src/arch/intsimdmatrix.cpp) +# Use architecture files from SourceLists.cmake +set(arch_files ${TESSERACT_SRC_ARCH}) if(DOTPRODUCT_FLAGS) set_source_files_properties(src/arch/dotproduct.cpp @@ -786,21 +778,8 @@ if(HAVE_NEON) endif() endif(HAVE_NEON) -file( - GLOB_RECURSE - TESSERACT_HDR - include/* - src/arch/*.h - src/ccmain/*.h - src/ccstruct/*.h - src/ccutil/*.h - src/classify/*.h - src/cutil/*.h - src/dict/*.h - src/lstm/*.h - src/textord/*.h - src/viewer/*.h - src/wordrec/*.h) +# Use explicit header file lists from SourceLists.cmake +set(TESSERACT_HDR ${TESSERACT_HDR_INCLUDE} ${TESSERACT_HDR_INTERNAL}) set(TESSERACT_SRC ${TESSERACT_SRC} diff --git a/cmake/SourceLists.cmake b/cmake/SourceLists.cmake new file mode 100644 index 0000000000..bc8e415f53 --- /dev/null +++ b/cmake/SourceLists.cmake @@ -0,0 +1,584 @@ +# Source file lists for tesseract +# This file contains all source files organized by module + +# API module sources +set(TESSERACT_SRC_API + src/api/altorenderer.cpp + src/api/baseapi.cpp + src/api/capi.cpp + src/api/hocrrenderer.cpp + src/api/lstmboxrenderer.cpp + src/api/pagerenderer.cpp + src/api/pdfrenderer.cpp + src/api/renderer.cpp + src/api/wordstrboxrenderer.cpp +) + +# Architecture-specific sources +set(TESSERACT_SRC_ARCH + src/arch/dotproduct.cpp + src/arch/simddetect.cpp + src/arch/intsimdmatrix.cpp +) + +# Optional architecture-specific sources (conditionally added) +set(TESSERACT_SRC_ARCH_AVX + src/arch/dotproductavx.cpp +) + +set(TESSERACT_SRC_ARCH_AVX2 + src/arch/intsimdmatrixavx2.cpp + src/arch/dotproductavx.cpp +) + +set(TESSERACT_SRC_ARCH_AVX512F + src/arch/dotproductavx512.cpp +) + +set(TESSERACT_SRC_ARCH_FMA + src/arch/dotproductfma.cpp +) + +set(TESSERACT_SRC_ARCH_SSE41 + src/arch/dotproductsse.cpp + src/arch/intsimdmatrixsse.cpp +) + +set(TESSERACT_SRC_ARCH_NEON + src/arch/dotproductneon.cpp + src/arch/intsimdmatrixneon.cpp +) + +# CCMain module sources +set(TESSERACT_SRC_CCMAIN + src/ccmain/adaptions.cpp + src/ccmain/applybox.cpp + src/ccmain/control.cpp + src/ccmain/docqual.cpp + src/ccmain/equationdetect.cpp + src/ccmain/fixspace.cpp + src/ccmain/fixxht.cpp + src/ccmain/linerec.cpp + src/ccmain/ltrresultiterator.cpp + src/ccmain/mutableiterator.cpp + src/ccmain/osdetect.cpp + src/ccmain/output.cpp + src/ccmain/pageiterator.cpp + src/ccmain/pagesegmain.cpp + src/ccmain/pagewalk.cpp + src/ccmain/par_control.cpp + src/ccmain/paragraphs.cpp + src/ccmain/paramsd.cpp + src/ccmain/pgedit.cpp + src/ccmain/recogtraining.cpp + src/ccmain/reject.cpp + src/ccmain/resultiterator.cpp + src/ccmain/superscript.cpp + src/ccmain/tessbox.cpp + src/ccmain/tessedit.cpp + src/ccmain/tesseractclass.cpp + src/ccmain/tessvars.cpp + src/ccmain/tfacepp.cpp + src/ccmain/thresholder.cpp + src/ccmain/werdit.cpp +) + +# CCStruct module sources +set(TESSERACT_SRC_CCSTRUCT + src/ccstruct/blamer.cpp + src/ccstruct/blobbox.cpp + src/ccstruct/blobs.cpp + src/ccstruct/blread.cpp + src/ccstruct/boxread.cpp + src/ccstruct/boxword.cpp + src/ccstruct/ccstruct.cpp + src/ccstruct/coutln.cpp + src/ccstruct/detlinefit.cpp + src/ccstruct/dppoint.cpp + src/ccstruct/fontinfo.cpp + src/ccstruct/image.cpp + src/ccstruct/imagedata.cpp + src/ccstruct/linlsq.cpp + src/ccstruct/matrix.cpp + src/ccstruct/mod128.cpp + src/ccstruct/normalis.cpp + src/ccstruct/ocrblock.cpp + src/ccstruct/ocrpara.cpp + src/ccstruct/ocrrow.cpp + src/ccstruct/otsuthr.cpp + src/ccstruct/pageres.cpp + src/ccstruct/params_training_featdef.cpp + src/ccstruct/pdblock.cpp + src/ccstruct/points.cpp + src/ccstruct/polyaprx.cpp + src/ccstruct/polyblk.cpp + src/ccstruct/quadlsq.cpp + src/ccstruct/quspline.cpp + src/ccstruct/ratngs.cpp + src/ccstruct/rect.cpp + src/ccstruct/rejctmap.cpp + src/ccstruct/seam.cpp + src/ccstruct/split.cpp + src/ccstruct/statistc.cpp + src/ccstruct/stepblob.cpp + src/ccstruct/werd.cpp +) + +# CCUtil module sources +set(TESSERACT_SRC_CCUTIL + src/ccutil/ambigs.cpp + src/ccutil/bitvector.cpp + src/ccutil/ccutil.cpp + src/ccutil/errcode.cpp + src/ccutil/indexmapbidi.cpp + src/ccutil/params.cpp + src/ccutil/scanutils.cpp + src/ccutil/serialis.cpp + src/ccutil/tessdatamanager.cpp + src/ccutil/tprintf.cpp + src/ccutil/unichar.cpp + src/ccutil/unicharcompress.cpp + src/ccutil/unicharmap.cpp + src/ccutil/unicharset.cpp +) + +# Classify module sources +set(TESSERACT_SRC_CLASSIFY + src/classify/adaptive.cpp + src/classify/adaptmatch.cpp + src/classify/blobclass.cpp + src/classify/classify.cpp + src/classify/cluster.cpp + src/classify/clusttool.cpp + src/classify/cutoffs.cpp + src/classify/featdefs.cpp + src/classify/float2int.cpp + src/classify/fpoint.cpp + src/classify/intfeaturespace.cpp + src/classify/intfx.cpp + src/classify/intmatcher.cpp + src/classify/intproto.cpp + src/classify/kdtree.cpp + src/classify/mf.cpp + src/classify/mfoutline.cpp + src/classify/mfx.cpp + src/classify/normfeat.cpp + src/classify/normmatch.cpp + src/classify/ocrfeatures.cpp + src/classify/outfeat.cpp + src/classify/picofeat.cpp + src/classify/protos.cpp + src/classify/shapeclassifier.cpp + src/classify/shapetable.cpp + src/classify/tessclassifier.cpp + src/classify/trainingsample.cpp +) + +# CUtil module sources +set(TESSERACT_SRC_CUTIL + src/cutil/oldlist.cpp +) + +# Dict module sources +set(TESSERACT_SRC_DICT + src/dict/context.cpp + src/dict/dawg.cpp + src/dict/dawg_cache.cpp + src/dict/dict.cpp + src/dict/hyphen.cpp + src/dict/permdawg.cpp + src/dict/stopper.cpp + src/dict/trie.cpp +) + +# LSTM module sources +set(TESSERACT_SRC_LSTM + src/lstm/convolve.cpp + src/lstm/fullyconnected.cpp + src/lstm/functions.cpp + src/lstm/input.cpp + src/lstm/lstm.cpp + src/lstm/lstmrecognizer.cpp + src/lstm/maxpool.cpp + src/lstm/network.cpp + src/lstm/networkio.cpp + src/lstm/parallel.cpp + src/lstm/plumbing.cpp + src/lstm/recodebeam.cpp + src/lstm/reconfig.cpp + src/lstm/reversed.cpp + src/lstm/series.cpp + src/lstm/stridemap.cpp + src/lstm/weightmatrix.cpp +) + +# TextOrd module sources +set(TESSERACT_SRC_TEXTORD + src/textord/alignedblob.cpp + src/textord/baselinedetect.cpp + src/textord/bbgrid.cpp + src/textord/blkocc.cpp + src/textord/blobgrid.cpp + src/textord/ccnontextdetect.cpp + src/textord/cjkpitch.cpp + src/textord/colfind.cpp + src/textord/colpartition.cpp + src/textord/colpartitiongrid.cpp + src/textord/colpartitionset.cpp + src/textord/devanagari_processing.cpp + src/textord/drawtord.cpp + src/textord/edgblob.cpp + src/textord/edgloop.cpp + src/textord/equationdetectbase.cpp + src/textord/fpchop.cpp + src/textord/gap_map.cpp + src/textord/imagefind.cpp + src/textord/linefind.cpp + src/textord/makerow.cpp + src/textord/oldbasel.cpp + src/textord/pithsync.cpp + src/textord/pitsync1.cpp + src/textord/scanedg.cpp + src/textord/sortflts.cpp + src/textord/strokewidth.cpp + src/textord/tabfind.cpp + src/textord/tablefind.cpp + src/textord/tablerecog.cpp + src/textord/tabvector.cpp + src/textord/textlineprojection.cpp + src/textord/textord.cpp + src/textord/topitch.cpp + src/textord/tordmain.cpp + src/textord/tospace.cpp + src/textord/tovars.cpp + src/textord/underlin.cpp + src/textord/wordseg.cpp + src/textord/workingpartset.cpp +) + +# Viewer module sources +set(TESSERACT_SRC_VIEWER + src/viewer/scrollview.cpp + src/viewer/svmnode.cpp + src/viewer/svutil.cpp +) + +# WordRec module sources +set(TESSERACT_SRC_WORDREC + src/wordrec/associate.cpp + src/wordrec/chop.cpp + src/wordrec/chopper.cpp + src/wordrec/drawfx.cpp + src/wordrec/findseam.cpp + src/wordrec/gradechop.cpp + src/wordrec/language_model.cpp + src/wordrec/lm_consistency.cpp + src/wordrec/lm_pain_points.cpp + src/wordrec/lm_state.cpp + src/wordrec/outlines.cpp + src/wordrec/params_model.cpp + src/wordrec/pieces.cpp + src/wordrec/plotedges.cpp + src/wordrec/render.cpp + src/wordrec/segsearch.cpp + src/wordrec/tface.cpp + src/wordrec/wordclass.cpp + src/wordrec/wordrec.cpp +) + +# Legacy engine sources (excluded when DISABLED_LEGACY_ENGINE is ON) +set(TESSERACT_SRC_LEGACY + src/ccmain/adaptions.cpp + src/ccmain/docqual.cpp + src/ccmain/equationdetect.cpp + src/ccmain/fixspace.cpp + src/ccmain/fixxht.cpp + src/ccmain/osdetect.cpp + src/ccmain/par_control.cpp + src/ccmain/recogtraining.cpp + src/ccmain/superscript.cpp + src/ccmain/tessbox.cpp + src/ccmain/tfacepp.cpp + src/ccstruct/fontinfo.cpp + src/ccstruct/params_training_featdef.cpp + src/ccutil/ambigs.cpp + src/ccutil/bitvector.cpp + src/ccutil/indexmapbidi.cpp + src/classify/adaptive.cpp + src/classify/adaptmatch.cpp + src/classify/blobclass.cpp + src/classify/cluster.cpp + src/classify/clusttool.cpp + src/classify/cutoffs.cpp + src/classify/featdefs.cpp + src/classify/float2int.cpp + src/classify/fpoint.cpp + src/classify/intfeaturespace.cpp + src/classify/intfx.cpp + src/classify/intmatcher.cpp + src/classify/intproto.cpp + src/classify/kdtree.cpp + src/classify/mf.cpp + src/classify/mfoutline.cpp + src/classify/mfx.cpp + src/classify/normfeat.cpp + src/classify/normmatch.cpp + src/classify/ocrfeatures.cpp + src/classify/outfeat.cpp + src/classify/picofeat.cpp + src/classify/protos.cpp + src/classify/shapeclassifier.cpp + src/classify/shapetable.cpp + src/classify/tessclassifier.cpp + src/classify/trainingsample.cpp + src/dict/permdawg.cpp + src/dict/hyphen.cpp + src/wordrec/associate.cpp + src/wordrec/chop.cpp + src/wordrec/chopper.cpp + src/wordrec/drawfx.cpp + src/wordrec/findseam.cpp + src/wordrec/gradechop.cpp + src/wordrec/language_model.cpp + src/wordrec/lm_consistency.cpp + src/wordrec/lm_pain_points.cpp + src/wordrec/lm_state.cpp + src/wordrec/outlines.cpp + src/wordrec/params_model.cpp + src/wordrec/pieces.cpp + src/wordrec/plotedges.cpp + src/wordrec/render.cpp + src/wordrec/segsearch.cpp + src/wordrec/wordclass.cpp +) + +# Header files +set(TESSERACT_HDR_INCLUDE + include/tesseract/baseapi.h + include/tesseract/capi.h + include/tesseract/export.h + include/tesseract/ltrresultiterator.h + include/tesseract/ocrclass.h + include/tesseract/osdetect.h + include/tesseract/pageiterator.h + include/tesseract/publictypes.h + include/tesseract/renderer.h + include/tesseract/resultiterator.h + include/tesseract/unichar.h +) + +# Internal header files +set(TESSERACT_HDR_INTERNAL + src/api/pdf_ttf.h + src/arch/dotproduct.h + src/arch/intsimdmatrix.h + src/arch/simddetect.h + src/ccmain/control.h + src/ccmain/docqual.h + src/ccmain/equationdetect.h + src/ccmain/fixspace.h + src/ccmain/mutableiterator.h + src/ccmain/output.h + src/ccmain/paragraphs.h + src/ccmain/paragraphs_internal.h + src/ccmain/paramsd.h + src/ccmain/pgedit.h + src/ccmain/reject.h + src/ccmain/tesseractclass.h + src/ccmain/tessvars.h + src/ccmain/thresholder.h + src/ccmain/werdit.h + src/ccstruct/blamer.h + src/ccstruct/blobbox.h + src/ccstruct/blobs.h + src/ccstruct/blread.h + src/ccstruct/boxread.h + src/ccstruct/boxword.h + src/ccstruct/ccstruct.h + src/ccstruct/coutln.h + src/ccstruct/crakedge.h + src/ccstruct/debugpixa.h + src/ccstruct/detlinefit.h + src/ccstruct/dppoint.h + src/ccstruct/fontinfo.h + src/ccstruct/image.h + src/ccstruct/imagedata.h + src/ccstruct/linlsq.h + src/ccstruct/matrix.h + src/ccstruct/mod128.h + src/ccstruct/normalis.h + src/ccstruct/ocrblock.h + src/ccstruct/ocrpara.h + src/ccstruct/ocrrow.h + src/ccstruct/otsuthr.h + src/ccstruct/pageres.h + src/ccstruct/params_training_featdef.h + src/ccstruct/pdblock.h + src/ccstruct/points.h + src/ccstruct/polyaprx.h + src/ccstruct/polyblk.h + src/ccstruct/quadlsq.h + src/ccstruct/quadratc.h + src/ccstruct/quspline.h + src/ccstruct/ratngs.h + src/ccstruct/rect.h + src/ccstruct/rejctmap.h + src/ccstruct/seam.h + src/ccstruct/split.h + src/ccstruct/statistc.h + src/ccstruct/stepblob.h + src/ccstruct/werd.h + src/ccutil/ambigs.h + src/ccutil/bitvector.h + src/ccutil/ccutil.h + src/ccutil/clst.h + src/ccutil/elst.h + src/ccutil/elst2.h + src/ccutil/errcode.h + src/ccutil/fileerr.h + src/ccutil/genericvector.h + src/ccutil/genericheap.h + src/ccutil/helpers.h + src/ccutil/host.h + src/ccutil/indexmapbidi.h + src/ccutil/kdpair.h + src/ccutil/lsterr.h + src/ccutil/object_cache.h + src/ccutil/params.h + src/ccutil/qrsequence.h + src/ccutil/scanutils.h + src/ccutil/serialis.h + src/ccutil/sorthelper.h + src/ccutil/tessdatamanager.h + src/ccutil/tesserrstream.h + src/ccutil/tesstypes.h + src/ccutil/tprintf.h + src/ccutil/unicity_table.h + src/ccutil/unicharcompress.h + src/ccutil/unicharmap.h + src/ccutil/unicharset.h + src/ccutil/universalambigs.h + src/classify/adaptive.h + src/classify/classify.h + src/classify/cluster.h + src/classify/clusttool.h + src/classify/featdefs.h + src/classify/float2int.h + src/classify/fpoint.h + src/classify/intfeaturespace.h + src/classify/intfx.h + src/classify/intmatcher.h + src/classify/intproto.h + src/classify/kdtree.h + src/classify/mf.h + src/classify/mfdefs.h + src/classify/mfoutline.h + src/classify/mfx.h + src/classify/normfeat.h + src/classify/normmatch.h + src/classify/ocrfeatures.h + src/classify/outfeat.h + src/classify/picofeat.h + src/classify/protos.h + src/classify/shapeclassifier.h + src/classify/shapetable.h + src/classify/tessclassifier.h + src/classify/trainingsample.h + src/cutil/bitvec.h + src/cutil/oldlist.h + src/dict/dawg.h + src/dict/dawg_cache.h + src/dict/dict.h + src/dict/matchdefs.h + src/dict/stopper.h + src/dict/trie.h + src/lstm/convolve.h + src/lstm/fullyconnected.h + src/lstm/functions.h + src/lstm/input.h + src/lstm/lstm.h + src/lstm/lstmrecognizer.h + src/lstm/maxpool.h + src/lstm/network.h + src/lstm/networkio.h + src/lstm/networkscratch.h + src/lstm/parallel.h + src/lstm/plumbing.h + src/lstm/recodebeam.h + src/lstm/reconfig.h + src/lstm/reversed.h + src/lstm/series.h + src/lstm/static_shape.h + src/lstm/stridemap.h + src/lstm/weightmatrix.h + src/textord/alignedblob.h + src/textord/baselinedetect.h + src/textord/bbgrid.h + src/textord/blkocc.h + src/textord/blobgrid.h + src/textord/ccnontextdetect.h + src/textord/cjkpitch.h + src/textord/colfind.h + src/textord/colpartition.h + src/textord/colpartitiongrid.h + src/textord/colpartitionset.h + src/textord/devanagari_processing.h + src/textord/drawtord.h + src/textord/edgblob.h + src/textord/edgloop.h + src/textord/equationdetectbase.h + src/textord/fpchop.h + src/textord/gap_map.h + src/textord/imagefind.h + src/textord/linefind.h + src/textord/makerow.h + src/textord/oldbasel.h + src/textord/pithsync.h + src/textord/pitsync1.h + src/textord/scanedg.h + src/textord/sortflts.h + src/textord/strokewidth.h + src/textord/tabfind.h + src/textord/tablefind.h + src/textord/tablerecog.h + src/textord/tabvector.h + src/textord/textlineprojection.h + src/textord/textord.h + src/textord/topitch.h + src/textord/tordmain.h + src/textord/tovars.h + src/textord/underlin.h + src/textord/wordseg.h + src/textord/workingpartset.h + src/viewer/scrollview.h + src/viewer/svmnode.h + src/viewer/svutil.h + src/wordrec/associate.h + src/wordrec/chop.h + src/wordrec/drawfx.h + src/wordrec/findseam.h + src/wordrec/language_model.h + src/wordrec/lm_consistency.h + src/wordrec/lm_pain_points.h + src/wordrec/lm_state.h + src/wordrec/outlines.h + src/wordrec/params_model.h + src/wordrec/plotedges.h + src/wordrec/render.h + src/wordrec/wordrec.h +) + +# Combine all core source files +set(TESSERACT_SRC_CORE + ${TESSERACT_SRC_API} + ${TESSERACT_SRC_CCMAIN} + ${TESSERACT_SRC_CCSTRUCT} + ${TESSERACT_SRC_CCUTIL} + ${TESSERACT_SRC_CLASSIFY} + ${TESSERACT_SRC_CUTIL} + ${TESSERACT_SRC_DICT} + ${TESSERACT_SRC_LSTM} + ${TESSERACT_SRC_TEXTORD} + ${TESSERACT_SRC_VIEWER} + ${TESSERACT_SRC_WORDREC} +) From 196622dd055caf8134477f123d85868515cb2d54 Mon Sep 17 00:00:00 2001 From: zdenop Date: Sun, 10 Aug 2025 22:24:11 +0200 Subject: [PATCH 4/5] set kMaxMsgSize in svutil.h to fix macos build error: redefinition of 'kMaxMsgSize' --- src/viewer/scrollview.cpp | 1 - src/viewer/svutil.cpp | 2 -- src/viewer/svutil.h | 3 +++ 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/viewer/scrollview.cpp b/src/viewer/scrollview.cpp index 73bd3c5fc2..9c0c4e81db 100644 --- a/src/viewer/scrollview.cpp +++ b/src/viewer/scrollview.cpp @@ -43,7 +43,6 @@ namespace tesseract { const int kSvPort = 8461; -const int kMaxMsgSize = 4096; const int kMaxIntPairSize = 45; // Holds %d,%d, for up to 64 bit. struct SVPolyLineBuffer { diff --git a/src/viewer/svutil.cpp b/src/viewer/svutil.cpp index 3acce98040..c86594cded 100644 --- a/src/viewer/svutil.cpp +++ b/src/viewer/svutil.cpp @@ -61,8 +61,6 @@ namespace tesseract { -const int kMaxMsgSize = 4096; - // Starts a new process. void SVSync::StartProcess(const char *executable, const char *args) { std::string proc; diff --git a/src/viewer/svutil.h b/src/viewer/svutil.h index a0a8b091e4..11af84586a 100644 --- a/src/viewer/svutil.h +++ b/src/viewer/svutil.h @@ -34,6 +34,9 @@ namespace tesseract { +// Maximum message size for ScrollView network communication +const int kMaxMsgSize = 4096; + /// The SVSync class provides functionality for Thread & Process Creation class SVSync { public: From 6d3ef92d27fb9779f6d0b75564fb734a0789a717 Mon Sep 17 00:00:00 2001 From: zdenop Date: Sun, 10 Aug 2025 22:24:36 +0200 Subject: [PATCH 5/5] Fix cmake macos build: Change the macro definition from classify_enable_adaptive_matcher to CLASSIFY_ENABLE_ADAPTIVE_MATCHER_OVERRIDE to avoid conflicts with the BOOL_MEMBER parameter declaration in classify.cpp --- src/classify/adaptmatch.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/classify/adaptmatch.cpp b/src/classify/adaptmatch.cpp index 273259f918..29eebc7b33 100644 --- a/src/classify/adaptmatch.cpp +++ b/src/classify/adaptmatch.cpp @@ -75,7 +75,7 @@ namespace tesseract { // TODO: The parameter classify_enable_adaptive_matcher can cause // a segmentation fault if it is set to false (issue #256), // so override it here. -#define classify_enable_adaptive_matcher true +#define CLASSIFY_ENABLE_ADAPTIVE_MATCHER_OVERRIDE true #define ADAPT_TEMPLATE_SUFFIX ".a" @@ -465,7 +465,7 @@ void Classify::EndAdaptiveClassifier() { std::string Filename; FILE *File; - if (AdaptedTemplates != nullptr && classify_enable_adaptive_matcher && + if (AdaptedTemplates != nullptr && CLASSIFY_ENABLE_ADAPTIVE_MATCHER_OVERRIDE && classify_save_adapted_templates) { Filename = imagefile + ADAPT_TEMPLATE_SUFFIX; File = fopen(Filename.c_str(), "wb"); @@ -525,7 +525,7 @@ void Classify::EndAdaptiveClassifier() { * enables use of pre-adapted templates */ void Classify::InitAdaptiveClassifier(TessdataManager *mgr) { - if (!classify_enable_adaptive_matcher) { + if (!CLASSIFY_ENABLE_ADAPTIVE_MATCHER_OVERRIDE) { return; } if (AllProtosOn != nullptr) {