diff --git a/.clusterfuzzlite/Dockerfile b/.clusterfuzzlite/Dockerfile new file mode 100644 index 0000000..70e26c0 --- /dev/null +++ b/.clusterfuzzlite/Dockerfile @@ -0,0 +1,14 @@ +FROM gcr.io/oss-fuzz-base/base-builder:v1@sha256:6fce2f40942176a0bb0c2cd6ee4bf5faea55e64ef3bee65f1c647cc2c8dcbe82 +USER root +RUN apt-get update && \ + apt-get install -y --no-install-recommends \ + meson \ + ninja-build \ + && rm -rf /var/lib/apt/lists/* + +COPY bstring/ $SRC/project/bstring/ +COPY fuzz/ $SRC/project/fuzz/ +COPY meson_options.txt $SRC/project/ +COPY meson.build $SRC/project/ +WORKDIR $SRC/project +COPY .clusterfuzzlite/build.sh $SRC/build.sh diff --git a/.clusterfuzzlite/build.sh b/.clusterfuzzlite/build.sh new file mode 100755 index 0000000..8cb9291 --- /dev/null +++ b/.clusterfuzzlite/build.sh @@ -0,0 +1,51 @@ +#!/bin/bash -eu +# +# ClusterFuzzLite build script for bstring. +# +# ClusterFuzzLite sets: +# $CC / $CXX - clang +# $CFLAGS - sanitizer + coverage flags (e.g. -fsanitize=address,fuzzer-no-link) +# $LDFLAGS - sanitizer link flags +# $LIB_FUZZING_ENGINE - fuzzer driver (e.g. -fsanitize=fuzzer) +# $OUT - output directory for fuzz target binaries +# +# Meson picks up $CC/$CFLAGS/$LDFLAGS from the environment during setup. +# Do not append $LIB_FUZZING_ENGINE to global LDFLAGS here: Meson's +# compiler sanity check links a regular main() and fails when libFuzzer's +# main() is injected globally. + +PROJECT_SRC="$SRC/project" +if [[ ! -f "$PROJECT_SRC/meson.build" ]]; then + PROJECT_SRC="$SRC/bstring" +fi + +build_targets() { + cd "$PROJECT_SRC" + + rm -rf build + + meson_args=( + -Ddefault_library=static + -Denable-docs=false + -Denable-fuzzing=true + -Denable-utf8=true + -Denable-tests=false + --buildtype=plain + ) + + if [[ -n "${LIB_FUZZING_ENGINE:-}" ]]; then + meson_args+=("-Dfuzz-link-arg=${LIB_FUZZING_ENGINE}") + fi + + meson setup build "${meson_args[@]}" + ninja -C build +} + +package_outputs() { + cd "$PROJECT_SRC" + cp build/fuzz/fuzz_bstring "$OUT/" + zip -j "$OUT/fuzz_bstring_seed_corpus.zip" fuzz/corpus/* +} + +build_targets +package_outputs diff --git a/.github/dependabot.yml b/.github/dependabot.yml index 5ace460..62dde92 100644 --- a/.github/dependabot.yml +++ b/.github/dependabot.yml @@ -1,5 +1,10 @@ version: 2 updates: + - package-ecosystem: "docker" + directory: "/.clusterfuzzlite" + schedule: + interval: "weekly" + - package-ecosystem: "github-actions" directory: "/" schedule: diff --git a/.github/workflows/fuzz.yml b/.github/workflows/fuzz.yml new file mode 100644 index 0000000..ed8081e --- /dev/null +++ b/.github/workflows/fuzz.yml @@ -0,0 +1,79 @@ +name: ClusterFuzzLite +on: + push: + branches: + - main + pull_request: + branches: + - main + types: + - opened + - synchronize + - reopened + schedule: + - cron: '0 3 * * 0' # Weekly on Sunday at 03:00 UTC + workflow_dispatch: + +permissions: + actions: read + contents: read + +jobs: + # Run for a short window on every PR / push to catch regressions introduced + # by the change under review. + PR: + name: Fuzzing (code-change) + if: github.event_name == 'pull_request' || github.event_name == 'push' + runs-on: ubuntu-latest + concurrency: + group: ${{ github.workflow }}-${{ github.ref }}-pr + cancel-in-progress: true + permissions: + actions: read + contents: read + security-events: write + steps: + - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 + - name: Build fuzz targets + uses: google/clusterfuzzlite/actions/build_fuzzers@884713a6c30a92e5e8544c39945cd7cb630abcd1 # v1 + with: + language: c + sanitizer: address + - name: Run fuzz targets + uses: google/clusterfuzzlite/actions/run_fuzzers@884713a6c30a92e5e8544c39945cd7cb630abcd1 # v1 + with: + github-token: ${{ secrets.GITHUB_TOKEN }} + fuzz-seconds: 60 + mode: code-change + sanitizer: address + output-sarif: true + - name: Upload SARIF results + if: always() && hashFiles('sarif-results/address.sarif') != '' + uses: github/codeql-action/upload-sarif@0d579ffd059c29b07949a3cce3983f0780820c98 # v4.32.6 + with: + sarif_file: sarif-results/address.sarif + + # Run longer on a schedule to build up a persistent corpus and surface + # crashes that require deeper exploration. + batch: + name: Fuzzing (batch) + if: github.event_name == 'schedule' || github.event_name == 'workflow_dispatch' + runs-on: ubuntu-latest + permissions: + actions: read + contents: read + security-events: write + steps: + - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 + - name: Build fuzz targets + uses: google/clusterfuzzlite/actions/build_fuzzers@884713a6c30a92e5e8544c39945cd7cb630abcd1 # v1 + with: + language: c + sanitizer: address + - name: Run fuzz targets + uses: google/clusterfuzzlite/actions/run_fuzzers@884713a6c30a92e5e8544c39945cd7cb630abcd1 # v1 + with: + github-token: ${{ secrets.GITHUB_TOKEN }} + fuzz-seconds: 3600 + mode: batch + sanitizer: address diff --git a/bstring/meson.build b/bstring/meson.build index bb46e8c..6d76a7c 100644 --- a/bstring/meson.build +++ b/bstring/meson.build @@ -8,13 +8,26 @@ endif install_headers(bstring_headers) -libbstring = library( - meson.project_name(), - bstring_sources, - version: meson.project_version(), - soversion: '1', - include_directories: bstring_inc, - install: true, -) +# When fuzzing, the library must be static so that coverage-instrumented object +# files are linked directly into the fuzz binary, where the sanitizer runtime +# can resolve the __sanitizer_cov_* symbols. A shared library would leave +# those references dangling at library link time. +if get_option('enable-fuzzing') + libbstring = static_library( + meson.project_name(), + bstring_sources, + include_directories: bstring_inc, + install: false, + ) +else + libbstring = library( + meson.project_name(), + bstring_sources, + version: meson.project_version(), + soversion: '1', + include_directories: bstring_inc, + install: true, + ) +endif bstring_dep = declare_dependency(include_directories: bstring_inc, link_with: libbstring) diff --git a/fuzz/README.md b/fuzz/README.md new file mode 100644 index 0000000..0c711ad --- /dev/null +++ b/fuzz/README.md @@ -0,0 +1,85 @@ +# Fuzz Testing + +A single libFuzzer fuzz target is provided which tests `bstrlib`, `bstraux`, and optionally `utf8util` and `buniutil`. + +Unicode support is conditional: when the project is built with `-Denable-utf8=true`, +the target is compiled with `-DBSTRING_ENABLE_UTF8` and the unicode fuzzing code +is included automatically. + +## Prerequisites + +libFuzzer is part of LLVM. The compiler runtime libraries are shipped in a +separate package from the compiler itself. + +**Debian/Ubuntu:** + +```sh +sudo apt-get install clang libclang-rt-dev +``` + +**Fedora:** + +```sh +sudo dnf install clang compiler-rt +``` + +## Build + +```sh +CC=clang meson setup build-fuzz \ + -Denable-fuzzing=true \ + -Denable-tests=false \ + --buildtype=plain +meson compile -C build-fuzz +``` + +## Run + +```sh +# Run against the seed corpus for 60 seconds +./build-fuzz/fuzz/fuzz_bstring fuzz/corpus/ -max_total_time=60 -max_len=260 + +# Run indefinitely, saving new corpus entries and crash inputs as they are found +mkdir -p fuzz/crashes +./build-fuzz/fuzz/fuzz_bstring fuzz/corpus/ \ + -max_len=260 \ + -artifact_prefix=fuzz/crashes/ \ + -jobs=$(nproc) +``` + +Useful libFuzzer flags: + +| Flag | Description | +| --- | --- | +| `-max_total_time=N` | Stop after N seconds | +| `-jobs=N` | Run N parallel fuzzing instances | +| `-max_len=4096` | Cap input size in bytes | +| `-artifact_prefix=DIR/` | Directory for crash and timeout inputs | +| `-runs=N` | Stop after N iterations | + +## Reproducing a crash + +If a crash input is found, reproduce it by passing the file directly to the +relevant target: + +```sh +./build-fuzz/fuzz/fuzz_bstring path/to/crash-input +``` + +## Seed corpus + +The `corpus/` directory contains initial seed inputs that guide the fuzzer +toward interesting code paths on the first run. As the fuzzer discovers new +coverage it writes additional entries to the corpus directory automatically. + +## CI (ClusterFuzzLite) + +The `.clusterfuzzlite/` directory at the repository root contains the +Dockerfile and `build.sh` used by ClusterFuzzLite. The GitHub Actions +workflow at `.github/workflows/clusterfuzzlite.yml` runs two jobs: + +- **code-change** ق€” 60 seconds on every push and pull request targeting `main` +- **batch** ق€” 1 hour on a weekly schedule + +Crash reports from the batch job are surfaced as GitHub Security alerts via +SARIF upload. diff --git a/fuzz/corpus/01_hello_world b/fuzz/corpus/01_hello_world new file mode 100644 index 0000000..5e34ee8 Binary files /dev/null and b/fuzz/corpus/01_hello_world differ diff --git a/fuzz/corpus/02_html_markup b/fuzz/corpus/02_html_markup new file mode 100644 index 0000000..b8fb241 Binary files /dev/null and b/fuzz/corpus/02_html_markup differ diff --git a/fuzz/corpus/03_underscore_split b/fuzz/corpus/03_underscore_split new file mode 100644 index 0000000..87fedc4 Binary files /dev/null and b/fuzz/corpus/03_underscore_split differ diff --git a/fuzz/corpus/04_base64 b/fuzz/corpus/04_base64 new file mode 100644 index 0000000..c8e395b Binary files /dev/null and b/fuzz/corpus/04_base64 differ diff --git a/fuzz/corpus/05_empty b/fuzz/corpus/05_empty new file mode 100644 index 0000000..1adea62 Binary files /dev/null and b/fuzz/corpus/05_empty differ diff --git a/fuzz/corpus/06_embedded_nul b/fuzz/corpus/06_embedded_nul new file mode 100644 index 0000000..641a13f Binary files /dev/null and b/fuzz/corpus/06_embedded_nul differ diff --git a/fuzz/corpus/07_newlines b/fuzz/corpus/07_newlines new file mode 100644 index 0000000..9401f13 Binary files /dev/null and b/fuzz/corpus/07_newlines differ diff --git a/fuzz/corpus/08_whitespace b/fuzz/corpus/08_whitespace new file mode 100644 index 0000000..c2d3c01 Binary files /dev/null and b/fuzz/corpus/08_whitespace differ diff --git a/fuzz/corpus/09_long_string b/fuzz/corpus/09_long_string new file mode 100644 index 0000000..a7193f4 --- /dev/null +++ b/fuzz/corpus/09_long_string @@ -0,0 +1 @@ +x/abcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghij \ No newline at end of file diff --git a/fuzz/corpus/10_sgml_entities b/fuzz/corpus/10_sgml_entities new file mode 100644 index 0000000..f9c47b3 Binary files /dev/null and b/fuzz/corpus/10_sgml_entities differ diff --git a/fuzz/corpus/11_utf8_multibyte b/fuzz/corpus/11_utf8_multibyte new file mode 100644 index 0000000..379521f Binary files /dev/null and b/fuzz/corpus/11_utf8_multibyte differ diff --git a/fuzz/corpus/12_utf8_malformed b/fuzz/corpus/12_utf8_malformed new file mode 100644 index 0000000..05080f9 --- /dev/null +++ b/fuzz/corpus/12_utf8_malformed @@ -0,0 +1 @@ +أق‚ِŸ˜€ٍ € \ No newline at end of file diff --git a/fuzz/corpus/13_utf16le_bom b/fuzz/corpus/13_utf16le_bom new file mode 100644 index 0000000..cc085d9 Binary files /dev/null and b/fuzz/corpus/13_utf16le_bom differ diff --git a/fuzz/corpus/14_utf16be_bom b/fuzz/corpus/14_utf16be_bom new file mode 100644 index 0000000..2dfc241 Binary files /dev/null and b/fuzz/corpus/14_utf16be_bom differ diff --git a/fuzz/corpus/15_ucs4_codepoints b/fuzz/corpus/15_ucs4_codepoints new file mode 100644 index 0000000..7302516 Binary files /dev/null and b/fuzz/corpus/15_ucs4_codepoints differ diff --git a/fuzz/corpus/16_surrogate_codepoints b/fuzz/corpus/16_surrogate_codepoints new file mode 100644 index 0000000..2d7c267 Binary files /dev/null and b/fuzz/corpus/16_surrogate_codepoints differ diff --git a/fuzz/fuzz_bstring.c b/fuzz/fuzz_bstring.c new file mode 100644 index 0000000..6870758 --- /dev/null +++ b/fuzz/fuzz_bstring.c @@ -0,0 +1,532 @@ +/* Copyright (C) 2026 Daniel Markstedt + * libFuzzer fuzz target for the Better String Library + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. Neither the name of bstrlib nor the names of its contributors may be + * used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + * Alternatively, the contents of this file may be used under the terms of + * GNU General Public License Version 2 (the "GPL"). + */ + +/* + * Exercises the full public API of bstrlib and bstraux using raw bytes + * supplied by the fuzzer engine. + * + * Input layout + * ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ + * data[0] fill character (gap-fill for insert / replace) + * data[1] split character (separator for bsplit) + * data[2] position hint (used modulo string length) + * data[3] length hint (used modulo a safe upper bound) + * data[4..mid] content of b0 + * data[mid..] content of b1 (also used as find / replace target) + */ + +#include +#include +#include +#include + +#include "bstraux.h" +#include "bstrlib.h" + +#ifdef BSTRING_ENABLE_UTF8 +#include "buniutil.h" +#include "utf8util.h" + +/* Maximum UCS-2 output words allocated on the stack for buGetBlkUTF16. */ +#define MAX_UCS2_OUT 256 + +static const cpUcs4 errch_table[] = { + 0xFFFD, /* replacement character */ + '?', /* ASCII fallback */ + 0, /* NUL */ + ~(cpUcs4)0, /* abort sentinel */ +}; +#define ERRCH_TABLE_SZ ((int)(sizeof errch_table / sizeof errch_table[0])) +#endif /* BSTRING_ENABLE_UTF8 */ + +int +LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) +{ + /* Need at least four control bytes to do anything interesting. */ + if (size < 4) { + return 0; + } + + unsigned char fill = data[0]; + unsigned char split_ch = data[1]; + int pos_hint = (int)data[2]; + int len_hint = (int)data[3]; + + const uint8_t *payload = data + 4; + size_t psize = size - 4; + size_t mid = psize / 2; + + /* Primary bstrings built directly from the fuzzer payload. */ + bstring b0 = blk2bstr(payload, (int)mid); + bstring b1 = blk2bstr(payload + mid, (int)(psize - mid)); + + if (!b0 || !b1) { + bdestroy(b0); + bdestroy(b1); + return 0; + } + + int slen0 = b0->slen; + int slen1 = b1->slen; + + /* Clamp positional parameters to valid ranges. */ + int safe_pos = slen0 > 0 ? pos_hint % (slen0 + 1) : 0; + int safe_len = (len_hint % 64) + 1; /* 1..64 */ + + /* ق”€ق”€ Search and comparison (read-only) ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ */ + + (void)binstr(b0, safe_pos, b1); + (void)binstrr(b0, slen0 > 0 ? slen0 - 1 : 0, b1); + (void)binstrcaseless(b0, safe_pos, b1); + (void)binstrrcaseless(b0, slen0 > 0 ? slen0 - 1 : 0, b1); + (void)bstrchrp(b0, split_ch, safe_pos); + (void)bstrrchrp(b0, split_ch, slen0 > 0 ? slen0 - 1 : 0); + (void)binchr(b0, 0, b1); + (void)binchrr(b0, slen0 > 0 ? slen0 - 1 : 0, b1); + (void)bninchr(b0, 0, b1); + (void)bninchrr(b0, slen0 > 0 ? slen0 - 1 : 0, b1); + (void)bstrcmp(b0, b1); + (void)bstricmp(b0, b1); + (void)bstrncmp(b0, b1, safe_len); + (void)bstrnicmp(b0, b1, safe_len); + (void)biseq(b0, b1); + (void)biseqcaseless(b0, b1); + (void)biseqblk(b0, payload, (int)mid); + (void)bisstemeqblk(b0, payload, (int)mid); + (void)bisstemeqcaselessblk(b0, payload, (int)mid); + + /* ق”€ق”€ Substring extraction ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ */ + + bstring sub = bmidstr(b0, safe_pos, safe_len); + bdestroy(sub); + + bstring head = bHead(b0, safe_len); + bdestroy(head); + + bstring tail = bTail(b0, safe_len); + bdestroy(tail); + + /* ق”€ق”€ NUL-terminated C-string round-trip ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ */ + + char *cstr0 = bstr2cstr(b0, '?'); + + /* ق”€ق”€ Encoding / decoding ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ */ + + bstring enc_sgml = bstrcpy(b0); + if (enc_sgml) { + (void)bSGMLEncode(enc_sgml); + bdestroy(enc_sgml); + } + + bstring enc_b64 = bBase64Encode(b0); + bdestroy(enc_b64); + + int b64_err = 0; + bstring dec_b64 = bBase64DecodeEx(b0, &b64_err); + bdestroy(dec_b64); + + bstring enc_uu = bUuEncode(b0); + bdestroy(enc_uu); + + int uu_bad = 0; + bstring dec_uu = bUuDecodeEx(b0, &uu_bad); + bdestroy(dec_uu); + + bstring enc_yenc = bYEncode(b0); + bdestroy(enc_yenc); + + bstring dec_yenc = bYDecode(b0); + bdestroy(dec_yenc); + + /* ق”€ق”€ Netstring parser (fuzzes parser with raw bytes as input) ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ */ + + if (psize > 0) { + char *raw_cstr = malloc(psize + 1); + if (raw_cstr) { + memcpy(raw_cstr, payload, psize); + raw_cstr[psize] = '\0'; + bstring fromnet = bNetStr2Bstr(raw_cstr); + bdestroy(fromnet); + free(raw_cstr); + } + } + + char *netstr = bStr2NetStr(b0); + if (netstr) { + bstring roundtrip = bNetStr2Bstr(netstr); + bdestroy(roundtrip); + bcstrfree(netstr); + } + + /* ق”€ق”€ Split / join ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ */ + + struct bstrList *lst0 = bsplit(b0, split_ch); + if (lst0) { + bstring joined = bjoin(lst0, b1); + bdestroy(joined); + bstrListDestroy(lst0); + } + + struct bstrList *lst1 = bsplits(b0, b1); + if (lst1) { + bstrListDestroy(lst1); + } + + /* bsplitstr requires a non-empty separator. */ + if (slen1 > 0) { + struct bstrList *lst2 = bsplitstr(b0, b1); + if (lst2) { + bstring joined2 = bjoin(lst2, b1); + bdestroy(joined2); + bstrListDestroy(lst2); + } + } + + /* ق”€ق”€ In-place mutation (each operates on a fresh copy of b0) ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ */ + + bstring work; + + work = bstrcpy(b0); + if (work) { + (void)bconcat(work, b1); + bdestroy(work); + } + + work = bstrcpy(b0); + if (work) { + (void)bcatblk(work, payload + mid, (int)(psize - mid)); + bdestroy(work); + } + + if (cstr0) { + work = bstrcpy(b0); + if (work) { + (void)bcatcstr(work, cstr0); + bdestroy(work); + } + + work = bstrcpy(b1); + if (work) { + (void)bassigncstr(work, cstr0); + bdestroy(work); + } + } + + work = bstrcpy(b0); + if (work) { + (void)bassignblk(work, payload + mid, (int)(psize - mid)); + bdestroy(work); + } + + work = bstrcpy(b0); + if (work) { + (void)binsert(work, safe_pos, b1, fill); + bdestroy(work); + } + + work = bstrcpy(b0); + if (work) { + (void)binsertblk(work, safe_pos, payload, (int)mid, fill); + bdestroy(work); + } + + work = bstrcpy(b0); + if (work) { + (void)binsertch(work, safe_pos, safe_len, fill); + bdestroy(work); + } + + work = bstrcpy(b0); + if (work) { + (void)bdelete(work, safe_pos, safe_len); + bdestroy(work); + } + + work = bstrcpy(b0); + if (work) { + (void)bsetstr(work, safe_pos, b1, fill); + bdestroy(work); + } + + work = bstrcpy(b0); + if (work) { + (void)breplace(work, safe_pos, safe_len, b1, fill); + bdestroy(work); + } + + work = bstrcpy(b0); + if (work) { + (void)btrunc(work, safe_len); + bdestroy(work); + } + + work = bstrcpy(b0); + if (work) { + (void)btoupper(work); + bdestroy(work); + } + + work = bstrcpy(b0); + if (work) { + (void)btolower(work); + bdestroy(work); + } + + work = bstrcpy(b0); + if (work) { + (void)bltrimws(work); + bdestroy(work); + } + + work = bstrcpy(b0); + if (work) { + (void)brtrimws(work); + bdestroy(work); + } + + work = bstrcpy(b0); + if (work) { + (void)btrimws(work); + bdestroy(work); + } + + work = bstrcpy(b0); + if (work) { + (void)bReverse(work); + bdestroy(work); + } + + /* bpattern errors on empty strings; guard to avoid false crash reports. */ + if (slen0 > 0) { + work = bstrcpy(b0); + if (work) { + (void)bpattern(work, safe_len + slen0); + bdestroy(work); + } + } + + /* Cap repetitions to prevent OOM in CI. */ + work = bstrcpy(b0); + if (work) { + (void)bReplicate(work, (len_hint % 8) + 1); + bdestroy(work); + } + + /* bfindreplace: find operand must be non-empty. */ + if (slen1 > 0) { + work = bstrcpy(b0); + if (work) { + (void)bfindreplace(work, b1, b0, 0); + bdestroy(work); + } + + work = bstrcpy(b0); + if (work) { + (void)bfindreplacecaseless(work, b1, b0, 0); + bdestroy(work); + } + } + + /* ق”€ق”€ Justification (bstraux) ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ */ + + int width = safe_len + slen0; + + work = bstrcpy(b0); + if (work) { + (void)bJustifyLeft(work, ' '); + bdestroy(work); + } + + work = bstrcpy(b0); + if (work) { + (void)bJustifyRight(work, width, ' '); + bdestroy(work); + } + + work = bstrcpy(b0); + if (work) { + (void)bJustifyCenter(work, width, ' '); + bdestroy(work); + } + + work = bstrcpy(b0); + if (work) { + (void)bJustifyMargin(work, width, ' '); + bdestroy(work); + } + + bcstrfree(cstr0); + bdestroy(b0); + bdestroy(b1); + +#ifdef BSTRING_ENABLE_UTF8 + /* + * Unicode fuzzing + * ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ + * Reuses the same payload bytes already parsed above. + * data[0] ق†’ pos_hint (same byte as fill) + * data[1] ق†’ errch_sel (same byte as split_ch) + * payload[0..mid) ق†’ treated as raw UTF-8 + * payload[mid..) ق†’ reinterpreted as UCS-2 / UCS-4 word arrays + */ + { + unsigned char pos_hint = data[0]; + unsigned char errch_sel = data[1]; + + const uint8_t *utf8_data = payload; + size_t utf8_size = mid; + const uint8_t *word_data = payload + mid; + size_t word_size = psize - mid; + + cpUcs4 errCh = errch_table[errch_sel % ERRCH_TABLE_SZ]; + + /* ق”€ق”€ utf8util: forward iterator (GetNext) ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ */ + { + struct utf8Iterator iter; + utf8IteratorInit(&iter, (unsigned char *)(uintptr_t)utf8_data, + (int)utf8_size); + while (!utf8IteratorNoMore(&iter)) { + (void)utf8IteratorGetNextCodePoint(&iter, errCh); + } + utf8IteratorUninit(&iter); + } + + /* ق”€ق”€ utf8util: peek iterator (GetCurr without advancing) ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ */ + { + struct utf8Iterator iter; + utf8IteratorInit(&iter, (unsigned char *)(uintptr_t)utf8_data, + (int)utf8_size); + if (!utf8IteratorNoMore(&iter)) { + (void)utf8IteratorGetCurrCodePoint(&iter, errCh); + } + utf8IteratorUninit(&iter); + } + + /* ق”€ق”€ utf8util: backward scanner ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ */ + if (utf8_size > 0) { + for (int i = 0; i < (int)utf8_size; i++) { + cpUcs4 out; + (void)utf8ScanBackwardsForCodePoint(utf8_data, + (int)utf8_size, i, &out); + } + int pos = (int)(pos_hint % utf8_size); + (void)utf8ScanBackwardsForCodePoint(utf8_data, + (int)utf8_size, pos, NULL); + } + + /* ق”€ق”€ buniutil: UTF-8 validation ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ */ + { + bstring bu = blk2bstr(utf8_data, (int)utf8_size); + if (bu) { + (void)buIsUTF8Content(bu); + bdestroy(bu); + } + } + + /* ق”€ق”€ buniutil: UTF-8 bstring ق†’ UTF-16 array ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ */ + { + bstring bu = blk2bstr(utf8_data, (int)utf8_size); + if (bu) { + cpUcs2 out_buf[MAX_UCS2_OUT]; + (void)buGetBlkUTF16(out_buf, MAX_UCS2_OUT, errCh, bu, 0); + if (utf8_size > 0) { + int pos = (int)(pos_hint % utf8_size); + (void)buGetBlkUTF16(out_buf, MAX_UCS2_OUT, errCh, bu, pos); + } + bdestroy(bu); + } + } + + /* ق”€ق”€ buniutil: UCS-4 array ق†’ UTF-8 bstring ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ */ + if (word_size >= sizeof(cpUcs4)) { + int n = (int)(word_size / sizeof(cpUcs4)); + cpUcs4 *ucs4_buf = malloc((size_t)n * sizeof(cpUcs4)); + if (ucs4_buf) { + memcpy(ucs4_buf, word_data, (size_t)n * sizeof(cpUcs4)); + bstring out = bfromcstr(""); + if (out) { + (void)buAppendBlkUcs4(out, ucs4_buf, n, errCh); + bdestroy(out); + } + free(ucs4_buf); + } + } + + /* ق”€ق”€ buniutil: UTF-16 array ق†’ UTF-8 bstring ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ق”€ */ + if (word_size >= sizeof(cpUcs2)) { + int n = (int)(word_size / sizeof(cpUcs2)); + cpUcs2 *ucs2_buf = malloc((size_t)n * sizeof(cpUcs2)); + if (ucs2_buf) { + memcpy(ucs2_buf, word_data, (size_t)n * sizeof(cpUcs2)); + + bstring out; + + /* bom=NULL: no BOM tracking, native endianness assumed. */ + out = bfromcstr(""); + if (out) { + (void)buAppendBlkUTF16(out, ucs2_buf, n, NULL, errCh); + bdestroy(out); + } + + /* *bom=0: let the function detect and consume a leading BOM. */ + out = bfromcstr(""); + if (out) { + cpUcs2 bom = 0; + (void)buAppendBlkUTF16(out, ucs2_buf, n, &bom, errCh); + bdestroy(out); + } + + /* *bom=0xFEFF: caller pre-detected a little-endian BOM. */ + out = bfromcstr(""); + if (out) { + cpUcs2 bom = 0xFEFF; + (void)buAppendBlkUTF16(out, ucs2_buf, n, &bom, errCh); + bdestroy(out); + } + + /* *bom=0xFFFE: caller pre-detected a big-endian BOM. */ + out = bfromcstr(""); + if (out) { + cpUcs2 bom = 0xFFFE; + (void)buAppendBlkUTF16(out, ucs2_buf, n, &bom, errCh); + bdestroy(out); + } + + free(ucs2_buf); + } + } + } +#endif /* BSTRING_ENABLE_UTF8 */ + + return 0; +} diff --git a/fuzz/meson.build b/fuzz/meson.build new file mode 100644 index 0000000..85f91ea --- /dev/null +++ b/fuzz/meson.build @@ -0,0 +1,16 @@ +fuzzer_c_args = [] +fuzzer_link_arg = get_option('fuzz-link-arg') +fuzzer_link_args = [fuzzer_link_arg] + +if get_option('enable-utf8') + fuzzer_c_args += '-DBSTRING_ENABLE_UTF8' +endif + +executable( + 'fuzz_bstring', + 'fuzz_bstring.c', + link_with: libbstring, + include_directories: bstring_inc, + c_args: fuzzer_c_args, + link_args: fuzzer_link_args, +) diff --git a/meson.build b/meson.build index 3248ebf..be4c4c3 100644 --- a/meson.build +++ b/meson.build @@ -38,6 +38,16 @@ if get_option('reduce-namespace-pollution') add_project_arguments('-DBSTRLIB_REDUCE_NAMESPACE_POLLUTION', language: 'c') endif +if get_option('enable-fuzzing') + if cc.get_id() != 'clang' + error('Fuzz targets require clang (set CC=clang)') + endif + # Instrument the library itself for coverage so libFuzzer can track which + # code paths each input explores. The fuzz targets are linked separately + # with the full -fsanitize=fuzzer (or $LIB_FUZZING_ENGINE) driver. + add_project_arguments('-fsanitize=fuzzer-no-link', language: 'c') +endif + add_project_arguments(warning_flags, language: 'c') bstring_inc = include_directories(['.', 'bstring']) conf_data = configuration_data() @@ -72,6 +82,10 @@ if get_option('enable-tests') endif endif +if get_option('enable-fuzzing') + subdir('fuzz') +endif + doxygen = find_program('doxygen', required: false) if get_option('enable-docs') diff --git a/meson_options.txt b/meson_options.txt index 95c0a3b..ee6221c 100644 --- a/meson_options.txt +++ b/meson_options.txt @@ -10,6 +10,18 @@ option( value: false, description: 'Generate and install documentation', ) +option( + 'enable-fuzzing', + type: 'boolean', + value: false, + description: 'Build libFuzzer fuzz targets (requires clang)', +) +option( + 'fuzz-link-arg', + type: 'string', + value: '-fsanitize=fuzzer,address', + description: 'Linker flag used for fuzz targets (ClusterFuzzLite passes LIB_FUZZING_ENGINE here)', +) option( 'enable-tests', type: 'boolean',