From 045c13f26ca99ef264f284946c0fe905989b1b0b Mon Sep 17 00:00:00 2001 From: Daniel Markstedt Date: Sat, 7 Mar 2026 08:32:13 +0100 Subject: [PATCH] introduce fuzz testing of the library using llvm libfuzzer with clang and llvm's libfuzzer, fuzzed data can be fed into the functions under test to stress test them with random input values --- .clusterfuzzlite/Dockerfile | 14 + .clusterfuzzlite/build.sh | 51 +++ .github/dependabot.yml | 5 + .github/workflows/fuzz.yml | 79 +++++ bstring/meson.build | 29 +- fuzz/README.md | 85 +++++ fuzz/corpus/01_hello_world | Bin 0 -> 18 bytes fuzz/corpus/02_html_markup | Bin 0 -> 33 bytes fuzz/corpus/03_underscore_split | Bin 0 -> 20 bytes fuzz/corpus/04_base64 | Bin 0 -> 20 bytes fuzz/corpus/05_empty | Bin 0 -> 4 bytes fuzz/corpus/06_embedded_nul | Bin 0 -> 16 bytes fuzz/corpus/07_newlines | Bin 0 -> 33 bytes fuzz/corpus/08_whitespace | Bin 0 -> 37 bytes fuzz/corpus/09_long_string | 1 + fuzz/corpus/10_sgml_entities | Bin 0 -> 29 bytes fuzz/corpus/11_utf8_multibyte | Bin 0 -> 16 bytes fuzz/corpus/12_utf8_malformed | 1 + fuzz/corpus/13_utf16le_bom | Bin 0 -> 10 bytes fuzz/corpus/14_utf16be_bom | Bin 0 -> 10 bytes fuzz/corpus/15_ucs4_codepoints | Bin 0 -> 16 bytes fuzz/corpus/16_surrogate_codepoints | Bin 0 -> 9 bytes fuzz/fuzz_bstring.c | 532 ++++++++++++++++++++++++++++ fuzz/meson.build | 16 + meson.build | 14 + meson_options.txt | 12 + 26 files changed, 831 insertions(+), 8 deletions(-) create mode 100644 .clusterfuzzlite/Dockerfile create mode 100755 .clusterfuzzlite/build.sh create mode 100644 .github/workflows/fuzz.yml create mode 100644 fuzz/README.md create mode 100644 fuzz/corpus/01_hello_world create mode 100644 fuzz/corpus/02_html_markup create mode 100644 fuzz/corpus/03_underscore_split create mode 100644 fuzz/corpus/04_base64 create mode 100644 fuzz/corpus/05_empty create mode 100644 fuzz/corpus/06_embedded_nul create mode 100644 fuzz/corpus/07_newlines create mode 100644 fuzz/corpus/08_whitespace create mode 100644 fuzz/corpus/09_long_string create mode 100644 fuzz/corpus/10_sgml_entities create mode 100644 fuzz/corpus/11_utf8_multibyte create mode 100644 fuzz/corpus/12_utf8_malformed create mode 100644 fuzz/corpus/13_utf16le_bom create mode 100644 fuzz/corpus/14_utf16be_bom create mode 100644 fuzz/corpus/15_ucs4_codepoints create mode 100644 fuzz/corpus/16_surrogate_codepoints create mode 100644 fuzz/fuzz_bstring.c create mode 100644 fuzz/meson.build diff --git a/.clusterfuzzlite/Dockerfile b/.clusterfuzzlite/Dockerfile new file mode 100644 index 0000000..70e26c0 --- /dev/null +++ b/.clusterfuzzlite/Dockerfile @@ -0,0 +1,14 @@ +FROM gcr.io/oss-fuzz-base/base-builder:v1@sha256:6fce2f40942176a0bb0c2cd6ee4bf5faea55e64ef3bee65f1c647cc2c8dcbe82 +USER root +RUN apt-get update && \ + apt-get install -y --no-install-recommends \ + meson \ + ninja-build \ + && rm -rf /var/lib/apt/lists/* + +COPY bstring/ $SRC/project/bstring/ +COPY fuzz/ $SRC/project/fuzz/ +COPY meson_options.txt $SRC/project/ +COPY meson.build $SRC/project/ +WORKDIR $SRC/project +COPY .clusterfuzzlite/build.sh $SRC/build.sh diff --git a/.clusterfuzzlite/build.sh b/.clusterfuzzlite/build.sh new file mode 100755 index 0000000..8cb9291 --- /dev/null +++ b/.clusterfuzzlite/build.sh @@ -0,0 +1,51 @@ +#!/bin/bash -eu +# +# ClusterFuzzLite build script for bstring. +# +# ClusterFuzzLite sets: +# $CC / $CXX - clang +# $CFLAGS - sanitizer + coverage flags (e.g. -fsanitize=address,fuzzer-no-link) +# $LDFLAGS - sanitizer link flags +# $LIB_FUZZING_ENGINE - fuzzer driver (e.g. -fsanitize=fuzzer) +# $OUT - output directory for fuzz target binaries +# +# Meson picks up $CC/$CFLAGS/$LDFLAGS from the environment during setup. +# Do not append $LIB_FUZZING_ENGINE to global LDFLAGS here: Meson's +# compiler sanity check links a regular main() and fails when libFuzzer's +# main() is injected globally. + +PROJECT_SRC="$SRC/project" +if [[ ! -f "$PROJECT_SRC/meson.build" ]]; then + PROJECT_SRC="$SRC/bstring" +fi + +build_targets() { + cd "$PROJECT_SRC" + + rm -rf build + + meson_args=( + -Ddefault_library=static + -Denable-docs=false + -Denable-fuzzing=true + -Denable-utf8=true + -Denable-tests=false + --buildtype=plain + ) + + if [[ -n "${LIB_FUZZING_ENGINE:-}" ]]; then + meson_args+=("-Dfuzz-link-arg=${LIB_FUZZING_ENGINE}") + fi + + meson setup build "${meson_args[@]}" + ninja -C build +} + +package_outputs() { + cd "$PROJECT_SRC" + cp build/fuzz/fuzz_bstring "$OUT/" + zip -j "$OUT/fuzz_bstring_seed_corpus.zip" fuzz/corpus/* +} + +build_targets +package_outputs diff --git a/.github/dependabot.yml b/.github/dependabot.yml index 5ace460..62dde92 100644 --- a/.github/dependabot.yml +++ b/.github/dependabot.yml @@ -1,5 +1,10 @@ version: 2 updates: + - package-ecosystem: "docker" + directory: "/.clusterfuzzlite" + schedule: + interval: "weekly" + - package-ecosystem: "github-actions" directory: "/" schedule: diff --git a/.github/workflows/fuzz.yml b/.github/workflows/fuzz.yml new file mode 100644 index 0000000..ed8081e --- /dev/null +++ b/.github/workflows/fuzz.yml @@ -0,0 +1,79 @@ +name: ClusterFuzzLite +on: + push: + branches: + - main + pull_request: + branches: + - main + types: + - opened + - synchronize + - reopened + schedule: + - cron: '0 3 * * 0' # Weekly on Sunday at 03:00 UTC + workflow_dispatch: + +permissions: + actions: read + contents: read + +jobs: + # Run for a short window on every PR / push to catch regressions introduced + # by the change under review. + PR: + name: Fuzzing (code-change) + if: github.event_name == 'pull_request' || github.event_name == 'push' + runs-on: ubuntu-latest + concurrency: + group: ${{ github.workflow }}-${{ github.ref }}-pr + cancel-in-progress: true + permissions: + actions: read + contents: read + security-events: write + steps: + - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 + - name: Build fuzz targets + uses: google/clusterfuzzlite/actions/build_fuzzers@884713a6c30a92e5e8544c39945cd7cb630abcd1 # v1 + with: + language: c + sanitizer: address + - name: Run fuzz targets + uses: google/clusterfuzzlite/actions/run_fuzzers@884713a6c30a92e5e8544c39945cd7cb630abcd1 # v1 + with: + github-token: ${{ secrets.GITHUB_TOKEN }} + fuzz-seconds: 60 + mode: code-change + sanitizer: address + output-sarif: true + - name: Upload SARIF results + if: always() && hashFiles('sarif-results/address.sarif') != '' + uses: github/codeql-action/upload-sarif@0d579ffd059c29b07949a3cce3983f0780820c98 # v4.32.6 + with: + sarif_file: sarif-results/address.sarif + + # Run longer on a schedule to build up a persistent corpus and surface + # crashes that require deeper exploration. + batch: + name: Fuzzing (batch) + if: github.event_name == 'schedule' || github.event_name == 'workflow_dispatch' + runs-on: ubuntu-latest + permissions: + actions: read + contents: read + security-events: write + steps: + - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 + - name: Build fuzz targets + uses: google/clusterfuzzlite/actions/build_fuzzers@884713a6c30a92e5e8544c39945cd7cb630abcd1 # v1 + with: + language: c + sanitizer: address + - name: Run fuzz targets + uses: google/clusterfuzzlite/actions/run_fuzzers@884713a6c30a92e5e8544c39945cd7cb630abcd1 # v1 + with: + github-token: ${{ secrets.GITHUB_TOKEN }} + fuzz-seconds: 3600 + mode: batch + sanitizer: address diff --git a/bstring/meson.build b/bstring/meson.build index bb46e8c..6d76a7c 100644 --- a/bstring/meson.build +++ b/bstring/meson.build @@ -8,13 +8,26 @@ endif install_headers(bstring_headers) -libbstring = library( - meson.project_name(), - bstring_sources, - version: meson.project_version(), - soversion: '1', - include_directories: bstring_inc, - install: true, -) +# When fuzzing, the library must be static so that coverage-instrumented object +# files are linked directly into the fuzz binary, where the sanitizer runtime +# can resolve the __sanitizer_cov_* symbols. A shared library would leave +# those references dangling at library link time. +if get_option('enable-fuzzing') + libbstring = static_library( + meson.project_name(), + bstring_sources, + include_directories: bstring_inc, + install: false, + ) +else + libbstring = library( + meson.project_name(), + bstring_sources, + version: meson.project_version(), + soversion: '1', + include_directories: bstring_inc, + install: true, + ) +endif bstring_dep = declare_dependency(include_directories: bstring_inc, link_with: libbstring) diff --git a/fuzz/README.md b/fuzz/README.md new file mode 100644 index 0000000..0c711ad --- /dev/null +++ b/fuzz/README.md @@ -0,0 +1,85 @@ +# Fuzz Testing + +A single libFuzzer fuzz target is provided which tests `bstrlib`, `bstraux`, and optionally `utf8util` and `buniutil`. + +Unicode support is conditional: when the project is built with `-Denable-utf8=true`, +the target is compiled with `-DBSTRING_ENABLE_UTF8` and the unicode fuzzing code +is included automatically. + +## Prerequisites + +libFuzzer is part of LLVM. The compiler runtime libraries are shipped in a +separate package from the compiler itself. + +**Debian/Ubuntu:** + +```sh +sudo apt-get install clang libclang-rt-dev +``` + +**Fedora:** + +```sh +sudo dnf install clang compiler-rt +``` + +## Build + +```sh +CC=clang meson setup build-fuzz \ + -Denable-fuzzing=true \ + -Denable-tests=false \ + --buildtype=plain +meson compile -C build-fuzz +``` + +## Run + +```sh +# Run against the seed corpus for 60 seconds +./build-fuzz/fuzz/fuzz_bstring fuzz/corpus/ -max_total_time=60 -max_len=260 + +# Run indefinitely, saving new corpus entries and crash inputs as they are found +mkdir -p fuzz/crashes +./build-fuzz/fuzz/fuzz_bstring fuzz/corpus/ \ + -max_len=260 \ + -artifact_prefix=fuzz/crashes/ \ + -jobs=$(nproc) +``` + +Useful libFuzzer flags: + +| Flag | Description | +| --- | --- | +| `-max_total_time=N` | Stop after N seconds | +| `-jobs=N` | Run N parallel fuzzing instances | +| `-max_len=4096` | Cap input size in bytes | +| `-artifact_prefix=DIR/` | Directory for crash and timeout inputs | +| `-runs=N` | Stop after N iterations | + +## Reproducing a crash + +If a crash input is found, reproduce it by passing the file directly to the +relevant target: + +```sh +./build-fuzz/fuzz/fuzz_bstring path/to/crash-input +``` + +## Seed corpus + +The `corpus/` directory contains initial seed inputs that guide the fuzzer +toward interesting code paths on the first run. As the fuzzer discovers new +coverage it writes additional entries to the corpus directory automatically. + +## CI (ClusterFuzzLite) + +The `.clusterfuzzlite/` directory at the repository root contains the +Dockerfile and `build.sh` used by ClusterFuzzLite. The GitHub Actions +workflow at `.github/workflows/clusterfuzzlite.yml` runs two jobs: + +- **code-change** ق€” 60 seconds on every push and pull request targeting `main` +- **batch** ق€” 1 hour on a weekly schedule + +Crash reports from the batch job are surfaced as GitHub Security alerts via +SARIF upload. diff --git a/fuzz/corpus/01_hello_world b/fuzz/corpus/01_hello_world new file mode 100644 index 0000000000000000000000000000000000000000..5e34ee82f6bb70ef6367fb1c4e27bbe0f1a39f46 GIT binary patch literal 18 ZcmY$;VPN%0&B@8vQ7F$Z%1Mb)1OO`f1vdZy literal 0 HcmV?d00001 diff --git a/fuzz/corpus/02_html_markup b/fuzz/corpus/02_html_markup new file mode 100644 index 0000000000000000000000000000000000000000..b8fb241a1be6ba7405c2ef8ab24e8b9cd2190214 GIT binary patch literal 33 ocmY$eVPLgMvhzsI$;nqxOUx~>RtV28%1N=&PqMQ~D$=(D0GSF2p#T5? literal 0 HcmV?d00001 diff --git a/fuzz/corpus/03_underscore_split b/fuzz/corpus/03_underscore_split new file mode 100644 index 0000000000000000000000000000000000000000..87fedc4d67a67933b6cc25ae0009661de007bd47 GIT binary patch literal 20 ZcmdOmXJARo&yP<^ECS-H_`=fC3IIYd2R8r! literal 0 HcmV?d00001 diff --git a/fuzz/corpus/04_base64 b/fuzz/corpus/04_base64 new file mode 100644 index 0000000000000000000000000000000000000000..c8e395bb695cd7e75ba9db98c67e6fa3492ab941 GIT binary patch literal 20 bcmY$eW#9;Q4=YY`w@42&vaC#U53~gUGx!C9 literal 0 HcmV?d00001 diff --git a/fuzz/corpus/05_empty b/fuzz/corpus/05_empty new file mode 100644 index 0000000000000000000000000000000000000000..1adea62472f82b68318a50970ae083dbf76e9c96 GIT binary patch literal 4 LcmY$;VPFIR0SN$3 literal 0 HcmV?d00001 diff --git a/fuzz/corpus/06_embedded_nul b/fuzz/corpus/06_embedded_nul new file mode 100644 index 0000000000000000000000000000000000000000..641a13f254be88136f494efdc9335fe403d13967 GIT binary patch literal 16 VcmZQz0D_FvoSb}y^8BKl6aX1e1U&!% literal 0 HcmV?d00001 diff --git a/fuzz/corpus/07_newlines b/fuzz/corpus/07_newlines new file mode 100644 index 0000000000000000000000000000000000000000..9401f13d970ccdeab96010799b6aae469d00b49d GIT binary patch literal 33 fcmY%3V&KTh%u7|s&r9V3GfK+yp^S{8)Ko42qB9Cz literal 0 HcmV?d00001 diff --git a/fuzz/corpus/08_whitespace b/fuzz/corpus/08_whitespace new file mode 100644 index 0000000000000000000000000000000000000000..c2d3c01db6dbc24b1b7f1cf9ef49934358618c64 GIT binary patch literal 37 ncmY#TU{C;poYcgW%)E4k#Jm)RlA^@S91y3tATc?$7$^$>$4(1j literal 0 HcmV?d00001 diff --git a/fuzz/corpus/09_long_string b/fuzz/corpus/09_long_string new file mode 100644 index 0000000..a7193f4 --- /dev/null +++ b/fuzz/corpus/09_long_string @@ -0,0 +1 @@ +x/abcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghij \ No newline at end of file diff --git a/fuzz/corpus/10_sgml_entities b/fuzz/corpus/10_sgml_entities new file mode 100644 index 0000000000000000000000000000000000000000..f9c47b313be21f5d90ce97ed425a3670153b395a GIT binary patch literal 29 gcmY$hW?)szDX~^d2jaxs0&BIx(tHrJAivle0CWim!2kdN literal 0 HcmV?d00001 diff --git a/fuzz/corpus/11_utf8_multibyte b/fuzz/corpus/11_utf8_multibyte new file mode 100644 index 0000000000000000000000000000000000000000..379521feaa60ba65dc94872e4716cca3b54f5807 GIT binary patch literal 16 YcmZQ#IK1*v)0z+SXEbD_=H%o907VQ4IsgCw literal 0 HcmV?d00001 diff --git a/fuzz/corpus/12_utf8_malformed b/fuzz/corpus/12_utf8_malformed new file mode 100644 index 0000000..05080f9 --- /dev/null +++ b/fuzz/corpus/12_utf8_malformed @@ -0,0 +1 @@ +أق‚ِŸ˜€ٍ € \ No newline at end of file diff --git a/fuzz/corpus/13_utf16le_bom b/fuzz/corpus/13_utf16le_bom new file mode 100644 index 0000000000000000000000000000000000000000..cc085d9a3233a1cfbfdc2efe7693fce180ecb08a GIT binary patch literal 10 RcmZQz$jJQv&ym520RR%A15yA0 literal 0 HcmV?d00001 diff --git a/fuzz/corpus/14_utf16be_bom b/fuzz/corpus/14_utf16be_bom new file mode 100644 index 0000000000000000000000000000000000000000..2dfc241ebc4d2bd5631bd12fe47e32951b6e77c3 GIT binary patch literal 10 RcmZQz$j|=wpTUvA2>=pf16u$9 literal 0 HcmV?d00001 diff --git a/fuzz/corpus/15_ucs4_codepoints b/fuzz/corpus/15_ucs4_codepoints new file mode 100644 index 0000000000000000000000000000000000000000..73025169c85cd4fb2319d5ab369541c93f34c4b5 GIT binary patch literal 16 XcmZQzU~pt$P*}sj_>F-<;QwC$6Q2YT literal 0 HcmV?d00001 diff --git a/fuzz/corpus/16_surrogate_codepoints b/fuzz/corpus/16_surrogate_codepoints new file mode 100644 index 0000000000000000000000000000000000000000..2d7c2671e84667cb3fb37e542e243e0b4928507c GIT binary patch literal 9 QcmZQzX1Kw?aEE~b00+eZxBvhE literal 0 HcmV?d00001 diff --git a/fuzz/fuzz_bstring.c b/fuzz/fuzz_bstring.c new file mode 100644 index 0000000..6870758 --- /dev/null +++ b/fuzz/fuzz_bstring.c @@ -0,0 +1,532 @@ +/* Copyright (C) 2026 Daniel Markstedt + * libFuzzer fuzz target for the Better String Library + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. Neither the name of bstrlib nor the names of its contributors may be + * used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + * Alternatively, the contents of this file may be used under the terms of + * GNU General Public License Version 2 (the "GPL"). + */ + +/* + * Exercises the full public API of bstrlib and bstraux using raw bytes + * supplied by the fuzzer engine. + * + * Input layout + * ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ + * data[0] fill character (gap-fill for insert / replace) + * data[1] split character (separator for bsplit) + * data[2] position hint (used modulo string length) + * data[3] length hint (used modulo a safe upper bound) + * data[4..mid] content of b0 + * data[mid..] content of b1 (also used as find / replace target) + */ + +#include +#include +#include +#include + +#include "bstraux.h" +#include "bstrlib.h" + +#ifdef BSTRING_ENABLE_UTF8 +#include "buniutil.h" +#include "utf8util.h" + +/* Maximum UCS-2 output words allocated on the stack for buGetBlkUTF16. */ +#define MAX_UCS2_OUT 256 + +static const cpUcs4 errch_table[] = { + 0xFFFD, /* replacement character */ + '?', /* ASCII fallback */ + 0, /* NUL */ + ~(cpUcs4)0, /* abort sentinel */ +}; +#define ERRCH_TABLE_SZ ((int)(sizeof errch_table / sizeof errch_table[0])) +#endif /* BSTRING_ENABLE_UTF8 */ + +int +LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) +{ + /* Need at least four control bytes to do anything interesting. */ + if (size < 4) { + return 0; + } + + unsigned char fill = data[0]; + unsigned char split_ch = data[1]; + int pos_hint = (int)data[2]; + int len_hint = (int)data[3]; + + const uint8_t *payload = data + 4; + size_t psize = size - 4; + size_t mid = psize / 2; + + /* Primary bstrings built directly from the fuzzer payload. */ + bstring b0 = blk2bstr(payload, (int)mid); + bstring b1 = blk2bstr(payload + mid, (int)(psize - mid)); + + if (!b0 || !b1) { + bdestroy(b0); + bdestroy(b1); + return 0; + } + + int slen0 = b0->slen; + int slen1 = b1->slen; + + /* Clamp positional parameters to valid ranges. */ + int safe_pos = slen0 > 0 ? pos_hint % (slen0 + 1) : 0; + int safe_len = (len_hint % 64) + 1; /* 1..64 */ + + /* ق”€ق”€ Search and comparison (read-only) ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ */ + + (void)binstr(b0, safe_pos, b1); + (void)binstrr(b0, slen0 > 0 ? slen0 - 1 : 0, b1); + (void)binstrcaseless(b0, safe_pos, b1); + (void)binstrrcaseless(b0, slen0 > 0 ? slen0 - 1 : 0, b1); + (void)bstrchrp(b0, split_ch, safe_pos); + (void)bstrrchrp(b0, split_ch, slen0 > 0 ? slen0 - 1 : 0); + (void)binchr(b0, 0, b1); + (void)binchrr(b0, slen0 > 0 ? slen0 - 1 : 0, b1); + (void)bninchr(b0, 0, b1); + (void)bninchrr(b0, slen0 > 0 ? slen0 - 1 : 0, b1); + (void)bstrcmp(b0, b1); + (void)bstricmp(b0, b1); + (void)bstrncmp(b0, b1, safe_len); + (void)bstrnicmp(b0, b1, safe_len); + (void)biseq(b0, b1); + (void)biseqcaseless(b0, b1); + (void)biseqblk(b0, payload, (int)mid); + (void)bisstemeqblk(b0, payload, (int)mid); + (void)bisstemeqcaselessblk(b0, payload, (int)mid); + + /* ق”€ق”€ Substring extraction ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ */ + + bstring sub = bmidstr(b0, safe_pos, safe_len); + bdestroy(sub); + + bstring head = bHead(b0, safe_len); + bdestroy(head); + + bstring tail = bTail(b0, safe_len); + bdestroy(tail); + + /* ق”€ق”€ NUL-terminated C-string round-trip ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ */ + + char *cstr0 = bstr2cstr(b0, '?'); + + /* ق”€ق”€ Encoding / decoding ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ */ + + bstring enc_sgml = bstrcpy(b0); + if (enc_sgml) { + (void)bSGMLEncode(enc_sgml); + bdestroy(enc_sgml); + } + + bstring enc_b64 = bBase64Encode(b0); + bdestroy(enc_b64); + + int b64_err = 0; + bstring dec_b64 = bBase64DecodeEx(b0, &b64_err); + bdestroy(dec_b64); + + bstring enc_uu = bUuEncode(b0); + bdestroy(enc_uu); + + int uu_bad = 0; + bstring dec_uu = bUuDecodeEx(b0, &uu_bad); + bdestroy(dec_uu); + + bstring enc_yenc = bYEncode(b0); + bdestroy(enc_yenc); + + bstring dec_yenc = bYDecode(b0); + bdestroy(dec_yenc); + + /* ق”€ق”€ Netstring parser (fuzzes parser with raw bytes as input) ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ */ + + if (psize > 0) { + char *raw_cstr = malloc(psize + 1); + if (raw_cstr) { + memcpy(raw_cstr, payload, psize); + raw_cstr[psize] = '\0'; + bstring fromnet = bNetStr2Bstr(raw_cstr); + bdestroy(fromnet); + free(raw_cstr); + } + } + + char *netstr = bStr2NetStr(b0); + if (netstr) { + bstring roundtrip = bNetStr2Bstr(netstr); + bdestroy(roundtrip); + bcstrfree(netstr); + } + + /* ق”€ق”€ Split / join ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ */ + + struct bstrList *lst0 = bsplit(b0, split_ch); + if (lst0) { + bstring joined = bjoin(lst0, b1); + bdestroy(joined); + bstrListDestroy(lst0); + } + + struct bstrList *lst1 = bsplits(b0, b1); + if (lst1) { + bstrListDestroy(lst1); + } + + /* bsplitstr requires a non-empty separator. */ + if (slen1 > 0) { + struct bstrList *lst2 = bsplitstr(b0, b1); + if (lst2) { + bstring joined2 = bjoin(lst2, b1); + bdestroy(joined2); + bstrListDestroy(lst2); + } + } + + /* ق”€ق”€ In-place mutation (each operates on a fresh copy of b0) ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ */ + + bstring work; + + work = bstrcpy(b0); + if (work) { + (void)bconcat(work, b1); + bdestroy(work); + } + + work = bstrcpy(b0); + if (work) { + (void)bcatblk(work, payload + mid, (int)(psize - mid)); + bdestroy(work); + } + + if (cstr0) { + work = bstrcpy(b0); + if (work) { + (void)bcatcstr(work, cstr0); + bdestroy(work); + } + + work = bstrcpy(b1); + if (work) { + (void)bassigncstr(work, cstr0); + bdestroy(work); + } + } + + work = bstrcpy(b0); + if (work) { + (void)bassignblk(work, payload + mid, (int)(psize - mid)); + bdestroy(work); + } + + work = bstrcpy(b0); + if (work) { + (void)binsert(work, safe_pos, b1, fill); + bdestroy(work); + } + + work = bstrcpy(b0); + if (work) { + (void)binsertblk(work, safe_pos, payload, (int)mid, fill); + bdestroy(work); + } + + work = bstrcpy(b0); + if (work) { + (void)binsertch(work, safe_pos, safe_len, fill); + bdestroy(work); + } + + work = bstrcpy(b0); + if (work) { + (void)bdelete(work, safe_pos, safe_len); + bdestroy(work); + } + + work = bstrcpy(b0); + if (work) { + (void)bsetstr(work, safe_pos, b1, fill); + bdestroy(work); + } + + work = bstrcpy(b0); + if (work) { + (void)breplace(work, safe_pos, safe_len, b1, fill); + bdestroy(work); + } + + work = bstrcpy(b0); + if (work) { + (void)btrunc(work, safe_len); + bdestroy(work); + } + + work = bstrcpy(b0); + if (work) { + (void)btoupper(work); + bdestroy(work); + } + + work = bstrcpy(b0); + if (work) { + (void)btolower(work); + bdestroy(work); + } + + work = bstrcpy(b0); + if (work) { + (void)bltrimws(work); + bdestroy(work); + } + + work = bstrcpy(b0); + if (work) { + (void)brtrimws(work); + bdestroy(work); + } + + work = bstrcpy(b0); + if (work) { + (void)btrimws(work); + bdestroy(work); + } + + work = bstrcpy(b0); + if (work) { + (void)bReverse(work); + bdestroy(work); + } + + /* bpattern errors on empty strings; guard to avoid false crash reports. */ + if (slen0 > 0) { + work = bstrcpy(b0); + if (work) { + (void)bpattern(work, safe_len + slen0); + bdestroy(work); + } + } + + /* Cap repetitions to prevent OOM in CI. */ + work = bstrcpy(b0); + if (work) { + (void)bReplicate(work, (len_hint % 8) + 1); + bdestroy(work); + } + + /* bfindreplace: find operand must be non-empty. */ + if (slen1 > 0) { + work = bstrcpy(b0); + if (work) { + (void)bfindreplace(work, b1, b0, 0); + bdestroy(work); + } + + work = bstrcpy(b0); + if (work) { + (void)bfindreplacecaseless(work, b1, b0, 0); + bdestroy(work); + } + } + + /* ق”€ق”€ Justification (bstraux) ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ */ + + int width = safe_len + slen0; + + work = bstrcpy(b0); + if (work) { + (void)bJustifyLeft(work, ' '); + bdestroy(work); + } + + work = bstrcpy(b0); + if (work) { + (void)bJustifyRight(work, width, ' '); + bdestroy(work); + } + + work = bstrcpy(b0); + if (work) { + (void)bJustifyCenter(work, width, ' '); + bdestroy(work); + } + + work = bstrcpy(b0); + if (work) { + (void)bJustifyMargin(work, width, ' '); + bdestroy(work); + } + + bcstrfree(cstr0); + bdestroy(b0); + bdestroy(b1); + +#ifdef BSTRING_ENABLE_UTF8 + /* + * Unicode fuzzing + * ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ + * Reuses the same payload bytes already parsed above. + * data[0] ق†’ pos_hint (same byte as fill) + * data[1] ق†’ errch_sel (same byte as split_ch) + * payload[0..mid) ق†’ treated as raw UTF-8 + * payload[mid..) ق†’ reinterpreted as UCS-2 / UCS-4 word arrays + */ + { + unsigned char pos_hint = data[0]; + unsigned char errch_sel = data[1]; + + const uint8_t *utf8_data = payload; + size_t utf8_size = mid; + const uint8_t *word_data = payload + mid; + size_t word_size = psize - mid; + + cpUcs4 errCh = errch_table[errch_sel % ERRCH_TABLE_SZ]; + + /* ق”€ق”€ utf8util: forward iterator (GetNext) ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ */ + { + struct utf8Iterator iter; + utf8IteratorInit(&iter, (unsigned char *)(uintptr_t)utf8_data, + (int)utf8_size); + while (!utf8IteratorNoMore(&iter)) { + (void)utf8IteratorGetNextCodePoint(&iter, errCh); + } + utf8IteratorUninit(&iter); + } + + /* ق”€ق”€ utf8util: peek iterator (GetCurr without advancing) ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ */ + { + struct utf8Iterator iter; + utf8IteratorInit(&iter, (unsigned char *)(uintptr_t)utf8_data, + (int)utf8_size); + if (!utf8IteratorNoMore(&iter)) { + (void)utf8IteratorGetCurrCodePoint(&iter, errCh); + } + utf8IteratorUninit(&iter); + } + + /* ق”€ق”€ utf8util: backward scanner ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ */ + if (utf8_size > 0) { + for (int i = 0; i < (int)utf8_size; i++) { + cpUcs4 out; + (void)utf8ScanBackwardsForCodePoint(utf8_data, + (int)utf8_size, i, &out); + } + int pos = (int)(pos_hint % utf8_size); + (void)utf8ScanBackwardsForCodePoint(utf8_data, + (int)utf8_size, pos, NULL); + } + + /* ق”€ق”€ buniutil: UTF-8 validation ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ */ + { + bstring bu = blk2bstr(utf8_data, (int)utf8_size); + if (bu) { + (void)buIsUTF8Content(bu); + bdestroy(bu); + } + } + + /* ق”€ق”€ buniutil: UTF-8 bstring ق†’ UTF-16 array ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ */ + { + bstring bu = blk2bstr(utf8_data, (int)utf8_size); + if (bu) { + cpUcs2 out_buf[MAX_UCS2_OUT]; + (void)buGetBlkUTF16(out_buf, MAX_UCS2_OUT, errCh, bu, 0); + if (utf8_size > 0) { + int pos = (int)(pos_hint % utf8_size); + (void)buGetBlkUTF16(out_buf, MAX_UCS2_OUT, errCh, bu, pos); + } + bdestroy(bu); + } + } + + /* ق”€ق”€ buniutil: UCS-4 array ق†’ UTF-8 bstring ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ */ + if (word_size >= sizeof(cpUcs4)) { + int n = (int)(word_size / sizeof(cpUcs4)); + cpUcs4 *ucs4_buf = malloc((size_t)n * sizeof(cpUcs4)); + if (ucs4_buf) { + memcpy(ucs4_buf, word_data, (size_t)n * sizeof(cpUcs4)); + bstring out = bfromcstr(""); + if (out) { + (void)buAppendBlkUcs4(out, ucs4_buf, n, errCh); + bdestroy(out); + } + free(ucs4_buf); + } + } + + /* ق”€ق”€ buniutil: UTF-16 array ق†’ UTF-8 bstring ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ */ + if (word_size >= sizeof(cpUcs2)) { + int n = (int)(word_size / sizeof(cpUcs2)); + cpUcs2 *ucs2_buf = malloc((size_t)n * sizeof(cpUcs2)); + if (ucs2_buf) { + memcpy(ucs2_buf, word_data, (size_t)n * sizeof(cpUcs2)); + + bstring out; + + /* bom=NULL: no BOM tracking, native endianness assumed. */ + out = bfromcstr(""); + if (out) { + (void)buAppendBlkUTF16(out, ucs2_buf, n, NULL, errCh); + bdestroy(out); + } + + /* *bom=0: let the function detect and consume a leading BOM. */ + out = bfromcstr(""); + if (out) { + cpUcs2 bom = 0; + (void)buAppendBlkUTF16(out, ucs2_buf, n, &bom, errCh); + bdestroy(out); + } + + /* *bom=0xFEFF: caller pre-detected a little-endian BOM. */ + out = bfromcstr(""); + if (out) { + cpUcs2 bom = 0xFEFF; + (void)buAppendBlkUTF16(out, ucs2_buf, n, &bom, errCh); + bdestroy(out); + } + + /* *bom=0xFFFE: caller pre-detected a big-endian BOM. */ + out = bfromcstr(""); + if (out) { + cpUcs2 bom = 0xFFFE; + (void)buAppendBlkUTF16(out, ucs2_buf, n, &bom, errCh); + bdestroy(out); + } + + free(ucs2_buf); + } + } + } +#endif /* BSTRING_ENABLE_UTF8 */ + + return 0; +} diff --git a/fuzz/meson.build b/fuzz/meson.build new file mode 100644 index 0000000..85f91ea --- /dev/null +++ b/fuzz/meson.build @@ -0,0 +1,16 @@ +fuzzer_c_args = [] +fuzzer_link_arg = get_option('fuzz-link-arg') +fuzzer_link_args = [fuzzer_link_arg] + +if get_option('enable-utf8') + fuzzer_c_args += '-DBSTRING_ENABLE_UTF8' +endif + +executable( + 'fuzz_bstring', + 'fuzz_bstring.c', + link_with: libbstring, + include_directories: bstring_inc, + c_args: fuzzer_c_args, + link_args: fuzzer_link_args, +) diff --git a/meson.build b/meson.build index 3248ebf..be4c4c3 100644 --- a/meson.build +++ b/meson.build @@ -38,6 +38,16 @@ if get_option('reduce-namespace-pollution') add_project_arguments('-DBSTRLIB_REDUCE_NAMESPACE_POLLUTION', language: 'c') endif +if get_option('enable-fuzzing') + if cc.get_id() != 'clang' + error('Fuzz targets require clang (set CC=clang)') + endif + # Instrument the library itself for coverage so libFuzzer can track which + # code paths each input explores. The fuzz targets are linked separately + # with the full -fsanitize=fuzzer (or $LIB_FUZZING_ENGINE) driver. + add_project_arguments('-fsanitize=fuzzer-no-link', language: 'c') +endif + add_project_arguments(warning_flags, language: 'c') bstring_inc = include_directories(['.', 'bstring']) conf_data = configuration_data() @@ -72,6 +82,10 @@ if get_option('enable-tests') endif endif +if get_option('enable-fuzzing') + subdir('fuzz') +endif + doxygen = find_program('doxygen', required: false) if get_option('enable-docs') diff --git a/meson_options.txt b/meson_options.txt index 95c0a3b..ee6221c 100644 --- a/meson_options.txt +++ b/meson_options.txt @@ -10,6 +10,18 @@ option( value: false, description: 'Generate and install documentation', ) +option( + 'enable-fuzzing', + type: 'boolean', + value: false, + description: 'Build libFuzzer fuzz targets (requires clang)', +) +option( + 'fuzz-link-arg', + type: 'string', + value: '-fsanitize=fuzzer,address', + description: 'Linker flag used for fuzz targets (ClusterFuzzLite passes LIB_FUZZING_ENGINE here)', +) option( 'enable-tests', type: 'boolean',