Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 6 additions & 10 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -10,27 +10,23 @@ jobs:
matrix:
os:
- ubuntu-24.04
- macos-13
- macos-14
- macos-15
runs-on: ${{ matrix.os }}
steps:
- name: checkout scm
uses: actions/checkout@v3
uses: actions/checkout@v4
with:
submodules: recursive

- name: install build dependencies (Ubuntu)
run: sudo apt update && sudo apt install -y build-essential libgtest-dev libhwy-dev libisal-dev libdeflate-dev
run: sudo apt update && sudo apt install -y build-essential cmake nasm
if: runner.os == 'Linux'

- name: install build dependencies (MacOS)
run: brew install highway googletest isa-l libdeflate
if: runner.os == 'macOS'

- name: make fastplong
run: make -j

- name: make test
run: make -j test
run: make test

- name: test
run: chmod a+x ./fastplong && ./fastplong --version

9 changes: 9 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -31,3 +31,12 @@
*.out
*.app
bin
fastplong

# Test data (generated)
testdata/*.fastq
testdata/*.fastq.gz

# Reports
fastplong.html
fastplong.json
12 changes: 12 additions & 0 deletions .gitmodules
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
[submodule "third_party/highway"]
path = third_party/highway
url = https://github.com/google/highway.git
[submodule "third_party/isa-l"]
path = third_party/isa-l
url = https://github.com/intel/isa-l.git
[submodule "third_party/libdeflate"]
path = third_party/libdeflate
url = https://github.com/ebiggers/libdeflate.git
[submodule "third_party/googletest"]
path = third_party/googletest
url = https://github.com/google/googletest.git
97 changes: 74 additions & 23 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -2,63 +2,114 @@ DIR_INC := ./inc
DIR_SRC := ./src
DIR_OBJ := ./obj
DIR_TEST := ./test
DIR_HWY := ./third_party/highway
DIR_ISAL := ./third_party/isa-l
DIR_LIBDEFLATE := ./third_party/libdeflate
DIR_GTEST := ./third_party/googletest

PREFIX ?= /usr/local
BINDIR ?= $(PREFIX)/bin
INCLUDE_DIRS ?= /opt/homebrew/include
LIBRARY_DIRS ?= /opt/homebrew/lib

SRC := $(wildcard ${DIR_SRC}/*.cpp)
TEST := $(wildcard ${DIR_TEST}/*.cpp)
OBJ := $(patsubst %.cpp,${DIR_OBJ}/%.o,$(notdir ${SRC}))
TEST_OBJ := $(patsubst %.cpp,${DIR_OBJ}/%.o,$(notdir ${TEST}))

# Highway runtime dispatch support
HWY_OBJS := ${DIR_OBJ}/hwy_targets.o ${DIR_OBJ}/hwy_abort.o
OBJ += $(HWY_OBJS)

TARGET := fastplong

BIN_TARGET := ${TARGET}
TEST_TARGET := bin/fastplong_unittest

CXX ?= g++
CXXFLAGS := -std=c++14 -pthread -g -O3 -MP -MD -I${DIR_INC} -I${DIR_SRC} $(foreach includedir,$(INCLUDE_DIRS),-I$(includedir)) ${CXXFLAGS}
LIBS := -lisal -ldeflate -lpthread -lhwy
STATIC_FLAGS := -static -L. -Wl,--no-as-needed -pthread
LD_FLAGS := $(foreach librarydir,$(LIBRARY_DIRS),-L$(librarydir)) $(LIBS) $(LD_FLAGS)
STATIC_LD_FLAGS := $(foreach librarydir,$(LIBRARY_DIRS),-L$(librarydir)) $(STATIC_FLAGS) $(LIBS) $(STATIC_LD_FLAGS)
CXXFLAGS := -std=c++17 -pthread -g -O3 -MD -MP \
-I. -I${DIR_INC} -I${DIR_SRC} -I${DIR_HWY} \
-I${DIR_ISAL}/include -I${DIR_LIBDEFLATE} \
${CXXFLAGS}

# Static libraries built from submodules
ISAL_LIB := $(DIR_ISAL)/bin/isa-l.a
LIBDEFLATE_LIB := $(DIR_LIBDEFLATE)/build/libdeflate.a
GTEST_LIB := $(DIR_GTEST)/build/lib/libgtest.a
GTEST_MAIN_LIB := $(DIR_GTEST)/build/lib/libgtest_main.a

# On Linux: fully static binary; on macOS: static libs, dynamic system runtime
UNAME_S := $(shell uname -s)
UNAME_M := $(shell uname -m)
ifeq ($(UNAME_S),Linux)
LD_FLAGS := -static -Wl,--no-as-needed -lpthread
else
LD_FLAGS := -lpthread
endif

# isa-l: macOS reports arm64 but isa-l expects aarch64 for NEON assembly
ISAL_MAKE_ARGS :=
ifeq ($(UNAME_M),arm64)
ISAL_MAKE_ARGS := host_cpu=aarch64 arch=aarch64
endif

# Default target: build deps from submodules and link statically
${BIN_TARGET}: $(ISAL_LIB) $(LIBDEFLATE_LIB) ${OBJ}
$(CXX) $(OBJ) -o $@ $(ISAL_LIB) $(LIBDEFLATE_LIB) $(LD_FLAGS)

${BIN_TARGET}:${OBJ}
$(CXX) $(OBJ) -o $@ $(LD_FLAGS)
# Build isa-l static library from submodule
$(ISAL_LIB):
$(MAKE) -C $(DIR_ISAL) -f Makefile.unx lib $(ISAL_MAKE_ARGS)

static:${OBJ}
$(CXX) $(OBJ) -o ${BIN_TARGET} $(STATIC_LD_FLAGS)
# Build libdeflate static library from submodule
$(LIBDEFLATE_LIB):
cd $(DIR_LIBDEFLATE) && cmake -B build \
-DCMAKE_BUILD_TYPE=Release \
-DLIBDEFLATE_BUILD_SHARED_LIB=OFF \
-DLIBDEFLATE_BUILD_GZIP=OFF && \
cmake --build build

# Build googletest static library from submodule
$(GTEST_LIB) $(GTEST_MAIN_LIB):
cd $(DIR_GTEST) && cmake -B build \
-DCMAKE_BUILD_TYPE=Release \
-DBUILD_GMOCK=OFF && \
cmake --build build

${DIR_OBJ}/%.o:${DIR_SRC}/%.cpp
@mkdir -p $(@D)
@mkdir -p $(@D)
$(CXX) -c $< -o $@ $(CXXFLAGS)

# Highway source files for runtime CPU detection and error handling
${DIR_OBJ}/hwy_targets.o:${DIR_HWY}/hwy/targets.cc
@mkdir -p $(@D)
$(CXX) -c $< -o $@ $(CXXFLAGS)

.PHONY:clean
.PHONY:static
${DIR_OBJ}/hwy_abort.o:${DIR_HWY}/hwy/abort.cc
@mkdir -p $(@D)
$(CXX) -c $< -o $@ $(CXXFLAGS)

.PHONY: clean install clean-deps test

clean:
@rm -rf $(DIR_OBJ)
@rm -f $(TARGET)
@rm -f $(TEST_TARGET)

clean-deps:
-$(MAKE) -C $(DIR_ISAL) -f Makefile.unx clean 2>/dev/null || true
-rm -rf $(DIR_LIBDEFLATE)/build 2>/dev/null || true
-rm -rf $(DIR_GTEST)/build 2>/dev/null || true

install:
install $(TARGET) $(BINDIR)/$(TARGET)
@echo "Installed."

${DIR_OBJ}/%.o:${DIR_TEST}/%.cpp
@mkdir -p $(@D)
$(CXX) -c $< -o $@ $(CXXFLAGS)

test-static: ${TEST_OBJ} ${OBJ}
@mkdir -p bin
$(CXX) $(TEST_OBJ) ${OBJ:./obj/main.o=} -o ${TEST_TARGET} $(STATIC_LD_FLAGS) -lgtest -lgtest_main
./${TEST_TARGET}
@mkdir -p $(@D)
$(CXX) -c $< -o $@ $(CXXFLAGS) -I${DIR_GTEST}/googletest/include

test:${TEST_OBJ} ${OBJ}
test: $(ISAL_LIB) $(LIBDEFLATE_LIB) $(GTEST_LIB) ${TEST_OBJ} ${OBJ}
@mkdir -p bin
$(CXX) $(TEST_OBJ) ${OBJ:./obj/main.o=} -o ${TEST_TARGET} $(LD_FLAGS) -lgtest -lgtest_main
$(CXX) $(TEST_OBJ) ${OBJ:./obj/main.o=} -o ${TEST_TARGET} $(ISAL_LIB) $(LIBDEFLATE_LIB) $(GTEST_LIB) $(GTEST_MAIN_LIB) $(LD_FLAGS)
./${TEST_TARGET}

-include $(OBJ:.o=.d)
23 changes: 7 additions & 16 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -67,30 +67,21 @@ mv fastplong.0.2.2 fastplong
chmod a+x ./fastplong
```
## or compile from source
`fastplong` depends on `libdeflate` and `isa-l` for fast decompression and compression of zipped data, and depends on `libhwy` for SIMD acceleration. It's recommended to install all of them via Anaconda:
```
conda install conda-forge::libdeflate
conda install conda-forge::isa-l
conda install conda-forge::libhwy
```
You can also try to install them with other package management systems like `apt/yum` on Linux, or `brew` on MacOS. Otherwise you can compile them from source (https://github.com/intel/isa-l, https://github.com/ebiggers/libdeflate, and https://github.com/google/highway)

### download and build fastplong
`fastplong` depends on `isa-l` and `libdeflate`, which are included as git submodules and built statically. Requires `cmake` and a C/C++ compiler.
```shell
# get source (you can also use browser to download from master or releases)
git clone https://github.com/OpenGene/fastplong.git

# build
git clone --recursive https://github.com/OpenGene/fastplong.git
cd fastplong
make -j

# test
make test

# Install
sudo make install
```

If you already cloned without `--recursive`, initialize the submodules first:
```shell
git submodule update --init --recursive
```

# input and output
Specify input by `-i` or `--in`, and specify output by `-o` or `--out`.
* if you don't specify the output file names, no output files will be written, but the QC will still be done for both data before and after filtering.
Expand Down
76 changes: 76 additions & 0 deletions docs/plans/2026-02-26-simd-highway-refactor-design.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
# SIMD Highway Refactor Design

## Goal

Centralize all SIMD code into a single `simd.h`/`simd.cpp` pair using Google Highway's `foreach_target.h` multi-target compilation + `HWY_DYNAMIC_DISPATCH` runtime dispatch pattern. Port proven implementations from fastp and accelerate remaining scalar hot loops.

## Current State

- fastplong links `-lhwy` as system library
- SIMD exists in `adaptertrimmer.cpp` (mismatch counting) and `sequence.cpp` (reverse complement) but without multi-target dispatch
- `simdutil.h` has a custom `Transform1Reversed` template
- `filter.cpp` hot loops (`passFilter`, `passLowComplexityFilter`) are pure scalar

## Changes

### New Files

- `third_party/highway/` — Highway source (header-only + `targets.cc` / `abort.cc`)
- `src/simd.h` — Public SIMD API
- `src/simd.cpp` — Multi-target implementation with `foreach_target.h`

### Modified Files

- `Makefile` — Add `DIR_HWY`, compile `hwy_targets.o` / `hwy_abort.o`, remove `-lhwy`, add `-I${DIR_HWY}`
- `src/filter.cpp` — Replace scalar loops with `countQualityMetrics` and `countAdjacentDiffs`
- `src/sequence.cpp` — Replace inline Highway code with `reverseComplement` call
- `src/adaptertrimmer.cpp` — Replace inner SIMD loop with `countMismatches` call

### Deleted Files

- `src/simdutil.h` — Functionality merged into `simd.cpp`

## API

```cpp
namespace fastplong_simd {
void countQualityMetrics(const char* qualstr, const char* seqstr, int len,
char qualThreshold, int& lowQualNum, int& nBaseNum,
int& totalQual);
void reverseComplement(const char* src, char* dst, int len);
int countAdjacentDiffs(const char* data, int len);
int countMismatches(const char* a, const char* b, int len);
bool testSimd();
}
```

## simd.cpp Structure

```
#define HWY_TARGET_INCLUDE "src/simd.cpp"
#include "hwy/foreach_target.h"

namespace fastplong_simd::HWY_NAMESPACE {
CountQualityMetricsImpl() — vectorized quality stats (SumsOf2 overflow-safe)
ReverseComplementImpl() — vectorized complement + reverse
CountAdjacentDiffsImpl() — vectorized adjacent comparison
CountMismatchesImpl() — vectorized byte comparison
}

#if HWY_ONCE
HWY_EXPORT(...) — register all target versions
dispatch wrappers — HWY_DYNAMIC_DISPATCH
scalar references — for test validation
testSimd() — unit tests
#endif
```

## Key Technical Details

- Block-based accumulation in `countQualityMetrics` (255*N elements per block) prevents u16 overflow for long reads (10k-100k+ bp)
- `SumsOf2` for u8->u16 promotion works on all targets including HWY_SCALAR
- `searchAdapter` outer logic (branching, edit_distance) stays in `adaptertrimmer.cpp`; only inner mismatch loop delegates to `countMismatches`

## Reference

Based on fastp's `src/simd.h` / `src/simd.cpp` implementation pattern.
Loading