Skip to content

Commit 3227836

Browse files
authored
Merge pull request #246 from bgpkit/fuzz
Add benchmarking script, fuzz tests, and parser fixes
2 parents 2559b99 + f7a866a commit 3227836

30 files changed

+568
-46
lines changed

.gitignore

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,4 +14,7 @@ examples/debug-file.rs
1414
.env
1515
.claude
1616
*.pcap
17-
*.bin
17+
*.bin
18+
19+
# local benchmark results
20+
benchmarks

CHANGELOG.md

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,22 @@
22

33
All notable changes to this project will be documented in this file.
44

5+
## Unreleased
6+
7+
### Testing and fuzzing
8+
9+
* Add cargo-fuzz harness and initial fuzz targets (mrt_record, bgp_message, parser)
10+
* Add additional fuzz targets (BMP/OpenBMP, RIS Live, attributes, MRT header, NLRI); enable rislive feature for fuzzing
11+
12+
### Bug fixes
13+
14+
* Add bounds checks throughout parsers to avoid overread/advance/split_to panics
15+
* Handle invalid MRT BGP4MP_ET header length gracefully (reject ET records with on-wire length < 4)
16+
17+
### Tooling and benchmarking
18+
19+
* Add hyperfine benchmarking script for bgpkit-parser
20+
521
## v0.12.0 - 2025-10-17
622

723
### Examples and benchmarking

scripts/bench_hyperfine.sh

Lines changed: 205 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,205 @@
1+
#!/usr/bin/env bash
2+
set -euo pipefail
3+
4+
# Benchmark bgpkit-parser parsing performance using hyperfine.
5+
# Benchmarks the local target/release binary of bgpkit-parser and compares against the system PATH binary.
6+
#
7+
# Usage:
8+
# scripts/bench_hyperfine.sh [-- input-args]
9+
# Any arguments after -- are passed to the parser binaries before the INPUT file.
10+
#
11+
# Environment variables:
12+
# BGPKIT_BENCH_INPUT Path to input MRT/BGP file to parse. If unset, defaults to a large RIB.
13+
# HYPERFINE_RUNS Number of measurement runs (default: 3 for large RIB, else 10)
14+
# HYPERFINE_WARMUP Number of warmup runs (default: 1 for large RIB, else 3)
15+
# LOCAL_BIN Override local binary path (default: target/release/bgpkit-parser)
16+
# SYSTEM_BIN Override system binary path; default is the first bgpkit-parser found in PATH
17+
# HYPERFINE_EXTRA_ARGS Extra args passed to hyperfine (e.g., "--show-output")
18+
# BGPKIT_ARGS Extra args passed to bgpkit-parser before the INPUT (e.g., "--skip-v4")
19+
# OUT_DIR Output directory for results (default: benchmarks/hyperfine)
20+
# BENCH_DATA_DIR Directory to store/download inputs when BGPKIT_BENCH_INPUT is not set (default: benchmarks/test_data)
21+
#
22+
# Notes:
23+
# - This script assumes the bgpkit-parser CLI accepts a single positional INPUT path.
24+
# - If BGPKIT_BENCH_INPUT is not set, the script will:
25+
# 1) Use benchmarks/test_data/rib-example.bz2 (download if missing from https://spaces.bgpkit.org/parser/rib-example.bz2)
26+
# 2) Otherwise fall back to rib-example-small.bz2 in repo root or target/test_data.
27+
# Defaults to runs=3, warmup=1 when using the large RIB.
28+
29+
ROOT_DIR="$(cd "$(dirname "$0")"/.. && pwd)"
30+
cd "$ROOT_DIR"
31+
32+
# Check hyperfine availability
33+
if ! command -v hyperfine >/dev/null 2>&1; then
34+
echo "ERROR: 'hyperfine' is not installed or not in PATH." >&2
35+
echo "Install: https://github.com/sharkdp/hyperfine or e.g. 'brew install hyperfine'" >&2
36+
exit 1
37+
fi
38+
39+
# Build local release binary to ensure it's up to date
40+
LOCAL_BIN_DEFAULT="target/release/bgpkit-parser"
41+
echo "Building release binary..."
42+
cargo build --release >/dev/null
43+
44+
LOCAL_BIN="${LOCAL_BIN:-$LOCAL_BIN_DEFAULT}"
45+
if [ ! -x "$LOCAL_BIN" ]; then
46+
echo "ERROR: Local binary not found or not executable: $LOCAL_BIN" >&2
47+
exit 1
48+
fi
49+
50+
# Resolve system binary (for comparison)
51+
SYSTEM_BIN="${SYSTEM_BIN:-}"
52+
if [ -z "$SYSTEM_BIN" ]; then
53+
if command -v bgpkit-parser >/dev/null 2>&1; then
54+
SYSTEM_BIN="$(command -v bgpkit-parser)"
55+
else
56+
SYSTEM_BIN=""
57+
fi
58+
fi
59+
60+
# Input file detection
61+
INPUT="${BGPKIT_BENCH_INPUT:-}"
62+
BENCH_DATA_DIR="${BENCH_DATA_DIR:-benchmarks/test_data}"
63+
LARGE_URL="https://spaces.bgpkit.org/parser/rib-example.bz2"
64+
LARGE_PATH="$BENCH_DATA_DIR/rib-example.bz2"
65+
66+
if [ -z "$INPUT" ]; then
67+
# Prefer large RIB in benchmarks/test_data; download if missing
68+
if [ -f "$LARGE_PATH" ]; then
69+
INPUT="$LARGE_PATH"
70+
else
71+
mkdir -p "$BENCH_DATA_DIR"
72+
echo "Attempting to download large RIB to $LARGE_PATH"
73+
if command -v curl >/dev/null 2>&1; then
74+
curl -fL "$LARGE_URL" -o "$LARGE_PATH"
75+
elif command -v wget >/dev/null 2>&1; then
76+
wget -O "$LARGE_PATH" "$LARGE_URL"
77+
else
78+
echo "WARN: Neither curl nor wget available; cannot auto-download large RIB." >&2
79+
fi
80+
if [ -f "$LARGE_PATH" ]; then
81+
INPUT="$LARGE_PATH"
82+
elif [ -f "rib-example-small.bz2" ]; then
83+
INPUT="rib-example-small.bz2"
84+
elif [ -f "target/test_data/rib-example-small.bz2" ]; then
85+
INPUT="target/test_data/rib-example-small.bz2"
86+
else
87+
echo "ERROR: Could not locate an input file." >&2
88+
echo "Set BGPKIT_BENCH_INPUT to a valid file path, or place 'rib-example-small.bz2' in repo root, or run 'cargo bench' once to download test data, or install curl/wget for auto-download of the large RIB." >&2
89+
exit 1
90+
fi
91+
fi
92+
fi
93+
94+
if [ ! -f "$INPUT" ]; then
95+
echo "ERROR: Input file does not exist: $INPUT" >&2
96+
exit 1
97+
fi
98+
99+
# Determine defaults for runs/warmup based on input size choice
100+
if [[ "$INPUT" == *"rib-example.bz2"* ]]; then
101+
RUNS_DEFAULT=3
102+
WARMUP_DEFAULT=1
103+
else
104+
RUNS_DEFAULT=10
105+
WARMUP_DEFAULT=3
106+
fi
107+
108+
# Optional args passed to parser binaries before INPUT
109+
FORWARDED_ARGS=()
110+
if [ $# -gt 0 ]; then
111+
# Support separator -- to pass arguments to the parser
112+
while [ $# -gt 0 ]; do
113+
if [ "$1" = "--" ]; then
114+
shift
115+
break
116+
fi
117+
shift
118+
done
119+
if [ $# -gt 0 ]; then
120+
FORWARDED_ARGS=("$@")
121+
fi
122+
fi
123+
124+
# Merge BGPKIT_ARGS from env
125+
if [ -n "${BGPKIT_ARGS:-}" ]; then
126+
# shellcheck disable=SC2206
127+
FORWARDED_ARGS=( ${BGPKIT_ARGS} "${FORWARDED_ARGS[@]}" )
128+
fi
129+
130+
RUNS="${HYPERFINE_RUNS:-$RUNS_DEFAULT}"
131+
WARMUP="${HYPERFINE_WARMUP:-$WARMUP_DEFAULT}"
132+
EXTRA="${HYPERFINE_EXTRA_ARGS:-}"
133+
134+
# Output location
135+
STAMP="$(date +%Y%m%d-%H%M%S)"
136+
SHA="$(git rev-parse --short HEAD 2>/dev/null || echo nosha)"
137+
OUT_DIR="${OUT_DIR:-benchmarks/hyperfine}"
138+
mkdir -p "$OUT_DIR"
139+
OUT_JSON="$OUT_DIR/${STAMP}-${SHA}.json"
140+
OUT_MD="$OUT_DIR/${STAMP}-${SHA}.md"
141+
142+
143+
echo "Benchmarking with input: $INPUT"
144+
echo "Local bin: $LOCAL_BIN"
145+
if [ -n "$SYSTEM_BIN" ]; then
146+
echo "System bin: $SYSTEM_BIN"
147+
else
148+
echo "System bin: (not found in PATH)"
149+
fi
150+
echo "Runs: $RUNS, Warmup: $WARMUP"
151+
152+
# Compare paths; warn if identical
153+
IDENTICAL=0
154+
if [ -n "$SYSTEM_BIN" ]; then
155+
if [ "$SYSTEM_BIN" = "$LOCAL_BIN" ]; then
156+
IDENTICAL=1
157+
fi
158+
fi
159+
if [ "$IDENTICAL" -eq 1 ]; then
160+
echo "WARN: system bgpkit-parser resolves to the same path as local: $SYSTEM_BIN" >&2
161+
fi
162+
163+
# Compose commands for hyperfine
164+
# Use explicit labels via --command-name for readability
165+
if [ -n "$SYSTEM_BIN" ] && [ -x "$SYSTEM_BIN" ] && [ "$IDENTICAL" -eq 0 ]; then
166+
hyperfine \
167+
--warmup "$WARMUP" \
168+
--runs "$RUNS" \
169+
--export-json "$OUT_JSON" \
170+
${EXTRA} \
171+
--command-name "local(target/release)" \
172+
"\"$LOCAL_BIN\" ${FORWARDED_ARGS[*]-} \"$INPUT\"" \
173+
--command-name "system(PATH)" \
174+
"\"$SYSTEM_BIN\" ${FORWARDED_ARGS[*]-} \"$INPUT\""
175+
else
176+
hyperfine \
177+
--warmup "$WARMUP" \
178+
--runs "$RUNS" \
179+
--export-json "$OUT_JSON" \
180+
${EXTRA} \
181+
--command-name "local(target/release)" \
182+
"\"$LOCAL_BIN\" ${FORWARDED_ARGS[*]-} \"$INPUT\""
183+
fi
184+
185+
# Also write a small markdown summary next to the JSON for quick viewing
186+
{
187+
echo "# Hyperfine: bgpkit-parser"
188+
echo
189+
echo "- Timestamp: $STAMP"
190+
echo "- Commit: $SHA"
191+
echo "- Input: $INPUT"
192+
echo "- Local: $LOCAL_BIN"
193+
if [ -n "$SYSTEM_BIN" ]; then
194+
echo "- System: $SYSTEM_BIN"
195+
fi
196+
echo "- Runs: $RUNS"
197+
echo "- Warmup: $WARMUP"
198+
echo
199+
echo "Command to reproduce:"
200+
echo "\n\tHYPERFINE_RUNS=$RUNS HYPERFINE_WARMUP=$WARMUP scripts/bench_hyperfine.sh -- ${FORWARDED_ARGS[*]-} \"$INPUT\"\n"
201+
} > "$OUT_MD"
202+
203+
echo "Saved results:"
204+
echo " JSON: $OUT_JSON"
205+
echo " Note: $OUT_MD"

src/parser/bgp/attributes/attr_14_15_nlri.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ pub fn parse_nlri(
3131
reachable: bool, // whether the NLRI is announcements or withdrawals
3232
additional_paths: bool, // whether the NLRI is part of an additional paths message
3333
) -> Result<AttributeValue, ParserError> {
34-
let first_byte_zero = input[0] == 0;
34+
let first_byte_zero = input.first().map(|b| *b == 0).unwrap_or(false);
3535

3636
// read address family
3737
let afi = match afi {

src/parser/bgp/attributes/attr_23_tunnel_encap.rs

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -13,8 +13,8 @@ pub fn parse_tunnel_encapsulation_attribute(
1313
let mut attr = TunnelEncapAttribute::new();
1414

1515
while data.remaining() >= 4 {
16-
let tunnel_type = data.get_u16();
17-
let tunnel_length = data.get_u16();
16+
let tunnel_type = data.read_u16()?;
17+
let tunnel_length = data.read_u16()?;
1818

1919
if data.remaining() < tunnel_length as usize {
2020
return Err(ParserError::TruncatedMsg(format!(
@@ -44,7 +44,7 @@ fn parse_tunnel_tlv(tunnel_type: u16, mut data: Bytes) -> Result<TunnelEncapTlv,
4444
));
4545
}
4646

47-
let sub_tlv_type = data.get_u8();
47+
let sub_tlv_type = data.read_u8()?;
4848

4949
// Sub-TLV length encoding: 1 byte for types 0-127, 2 bytes for types 128-255
5050
let sub_tlv_length: u16 = if sub_tlv_type < 128 {
@@ -53,14 +53,14 @@ fn parse_tunnel_tlv(tunnel_type: u16, mut data: Bytes) -> Result<TunnelEncapTlv,
5353
"Not enough data for Sub-TLV length".to_string(),
5454
));
5555
}
56-
data.get_u8() as u16
56+
data.read_u8()? as u16
5757
} else {
5858
if data.remaining() < 2 {
5959
return Err(ParserError::TruncatedMsg(
6060
"Not enough data for extended Sub-TLV length".to_string(),
6161
));
6262
}
63-
data.get_u16()
63+
data.read_u16()?
6464
};
6565

6666
if data.remaining() < sub_tlv_length as usize {

src/parser/bgp/attributes/attr_29_linkstate.rs

Lines changed: 15 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -12,8 +12,8 @@ pub fn parse_link_state_attribute(mut data: Bytes) -> Result<AttributeValue, Par
1212
let mut attr = LinkStateAttribute::new();
1313

1414
while data.remaining() >= 4 {
15-
let tlv_type = data.get_u16();
16-
let tlv_length = data.get_u16();
15+
let tlv_type = data.read_u16()?;
16+
let tlv_length = data.read_u16()?;
1717

1818
if data.remaining() < tlv_length as usize {
1919
return Err(ParserError::TruncatedMsg(format!(
@@ -93,8 +93,8 @@ pub fn parse_link_state_nlri(
9393
let mut nlri_list = Vec::new();
9494

9595
while data.remaining() >= 4 {
96-
let nlri_type = data.get_u16();
97-
let nlri_len = data.get_u16();
96+
let nlri_type = data.read_u16()?;
97+
let nlri_len = data.read_u16()?;
9898

9999
if data.remaining() < nlri_len as usize {
100100
return Err(ParserError::TruncatedMsg(format!(
@@ -133,8 +133,8 @@ fn parse_single_link_state_nlri(
133133
)));
134134
}
135135

136-
let protocol_id = ProtocolId::from(data.get_u8());
137-
let identifier = data.get_u64();
136+
let protocol_id = ProtocolId::from(data.read_u8()?);
137+
let identifier = data.read_u64()?;
138138

139139
// Parse descriptors based on NLRI type
140140
let (local_node_descriptors, remote_node_descriptors, link_descriptors, prefix_descriptors) =
@@ -180,7 +180,7 @@ fn parse_node_descriptors(data: &mut Bytes) -> Result<NodeDescriptor, ParserErro
180180
if data.remaining() < 2 {
181181
return Ok(node_desc);
182182
}
183-
let desc_len = data.get_u16();
183+
let desc_len = data.read_u16()?;
184184

185185
if data.remaining() < desc_len as usize {
186186
return Err(ParserError::TruncatedMsg(format!(
@@ -193,8 +193,8 @@ fn parse_node_descriptors(data: &mut Bytes) -> Result<NodeDescriptor, ParserErro
193193
let mut desc_data: Bytes = data.read_n_bytes(desc_len as usize)?.into();
194194

195195
while desc_data.remaining() >= 4 {
196-
let sub_tlv_type = desc_data.get_u16();
197-
let sub_tlv_len = desc_data.get_u16();
196+
let sub_tlv_type = desc_data.read_u16()?;
197+
let sub_tlv_len = desc_data.read_u16()?;
198198

199199
if desc_data.remaining() < sub_tlv_len as usize {
200200
break;
@@ -245,7 +245,7 @@ fn parse_link_descriptors(data: &mut Bytes) -> Result<LinkDescriptor, ParserErro
245245
if data.remaining() < 2 {
246246
return Ok(link_desc);
247247
}
248-
let desc_len = data.get_u16();
248+
let desc_len = data.read_u16()?;
249249

250250
if data.remaining() < desc_len as usize {
251251
return Err(ParserError::TruncatedMsg(format!(
@@ -258,8 +258,8 @@ fn parse_link_descriptors(data: &mut Bytes) -> Result<LinkDescriptor, ParserErro
258258
let mut desc_data: Bytes = data.read_n_bytes(desc_len as usize)?.into();
259259

260260
while desc_data.remaining() >= 4 {
261-
let sub_tlv_type = desc_data.get_u16();
262-
let sub_tlv_len = desc_data.get_u16();
261+
let sub_tlv_type = desc_data.read_u16()?;
262+
let sub_tlv_len = desc_data.read_u16()?;
263263

264264
if desc_data.remaining() < sub_tlv_len as usize {
265265
break;
@@ -331,7 +331,7 @@ fn parse_prefix_descriptors(data: &mut Bytes) -> Result<PrefixDescriptor, Parser
331331
if data.remaining() < 2 {
332332
return Ok(prefix_desc);
333333
}
334-
let desc_len = data.get_u16();
334+
let desc_len = data.read_u16()?;
335335

336336
if data.remaining() < desc_len as usize {
337337
return Err(ParserError::TruncatedMsg(format!(
@@ -344,8 +344,8 @@ fn parse_prefix_descriptors(data: &mut Bytes) -> Result<PrefixDescriptor, Parser
344344
let mut desc_data: Bytes = data.read_n_bytes(desc_len as usize)?.into();
345345

346346
while desc_data.remaining() >= 4 {
347-
let sub_tlv_type = desc_data.get_u16();
348-
let sub_tlv_len = desc_data.get_u16();
347+
let sub_tlv_type = desc_data.read_u16()?;
348+
let sub_tlv_len = desc_data.read_u16()?;
349349

350350
if desc_data.remaining() < sub_tlv_len as usize {
351351
break;

src/parser/bgp/attributes/attr_32_large_communities.rs

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -9,9 +9,8 @@ pub fn parse_large_communities(mut input: Bytes) -> Result<AttributeValue, Parse
99

1010
let mut communities = Vec::with_capacity(num_communities);
1111
while input.remaining() > 0 {
12-
input.has_n_remaining(12)?; // 12 bytes for large community (3x 32 bits integers)
13-
let global_administrator = input.get_u32();
14-
let local_data = [input.get_u32(), input.get_u32()];
12+
let global_administrator = input.read_u32()?;
13+
let local_data = [input.read_u32()?, input.read_u32()?];
1514
communities.push(LargeCommunity::new(global_administrator, local_data));
1615
}
1716

0 commit comments

Comments
 (0)