From 2e64b43d7789bed26e97cc06498bfd701931777c Mon Sep 17 00:00:00 2001 From: joyguoguo <160556976+joyguoguo@users.noreply.github.com> Date: Sun, 20 Jul 2025 00:48:48 -0700 Subject: [PATCH 001/134] Upload the python project Fuzz test script valid_projects.txt: Python project list script_fuzz_py_final.sh: Single project test script script_fuzz_py_batch_final.sh: Batch projects test script --- script_fuzz_py_batch_final.sh | 201 +++++++++++++++++++++++++++++ script_fuzz_py_final.sh | 126 ++++++++++++++++++ valid_projects.txt | 234 ++++++++++++++++++++++++++++++++++ 3 files changed, 561 insertions(+) create mode 100644 script_fuzz_py_batch_final.sh create mode 100644 script_fuzz_py_final.sh create mode 100644 valid_projects.txt diff --git a/script_fuzz_py_batch_final.sh b/script_fuzz_py_batch_final.sh new file mode 100644 index 0000000..1d7d9af --- /dev/null +++ b/script_fuzz_py_batch_final.sh @@ -0,0 +1,201 @@ +#!/bin/bash +# script_fuzz_py_batch_final.sh +# 批量执行OSS-Fuzz本地测试全流程:从文件读取项目列表,依次为每个项目自动发现目标 -> 构建镜像 -> 编译fuzzer -> 运行测试 -> 生成覆盖率报告(暂无)) +# 用法:./script_fuzz_py_batch_final.sh [项目列表文件] [sanitizer类型] +# 示例: ./script_fuzz_py_batch_final.sh valid_projects.txt address + +# --- 全局配置 --- +PROJECT_LIST_FILE="${1:-valid_projects.txt}" # 默认项目列表文件 +SANITIZER="${2:-address}" # 默认检测器类型 +OSS_FUZZ_DIR="$HOME/oss-fuzz-py/oss-fuzz" # OSS-Fuzz目录 +LOG_DIR="$OSS_FUZZ_DIR/script_lz4_batch_logs" # 所有项目的总日志目录 +FAILED_PROJECTS=() # 存储失败项目列表 + +# --- 环境检查 --- +check_environment() { + if [ ! -d "$OSS_FUZZ_DIR" ]; then + echo "❌ 错误: OSS-Fuzz 目录 '$OSS_FUZZ_DIR' 不存在!" + return 1 + fi + if [ ! -f "$PROJECT_LIST_FILE" ]; then + echo "❌ 错误: 项目列表文件 '$PROJECT_LIST_FILE' 不存在!" + return 1 + fi + mkdir -p "$LOG_DIR" + chmod 777 "$LOG_DIR" 2>/dev/null || true + cd "$OSS_FUZZ_DIR" || return 1 + echo "✅ 环境检查通过。OSS-Fuzz 目录: $OSS_FUZZ_DIR" +} + +# --- 带日志记录的命令执行 --- +run_command() { + local cmd="$1" + local log_msg="$2" + local log_file="$3" # 日志文件作为参数传入 + local allowed_exit="${4:-}" + + echo "▶️ $log_msg..." | tee -a "$log_file" + set +e + { yes | eval "$cmd" ; } 2>&1 | tee -a "$log_file" + local exit_code=${PIPESTATUS[0]} + set -e + + if [[ -n "$allowed_exit" && ",$allowed_exit," =~ ",$exit_code," ]]; then + echo "ℹ️ 命令以预期状态退出: $exit_code" | tee -a "$log_file" + return 0 + elif [ $exit_code -ne 0 ]; then + echo "❌ 命令执行失败: $cmd (退出码: $exit_code)" | tee -a "$log_file" + return 1 # 返回错误而不是退出脚本 + fi +} + +# --- 自动发现 Fuzz 目标 --- +discover_fuzz_targets() { + local project_name="$1" + local project_dir="$OSS_FUZZ_DIR/build/out/$project_name" + local project_src="$OSS_FUZZ_DIR/projects/$project_name" + local targets=() + + if [ -d "$project_dir" ]; then + while IFS= read -r -d $'\0' file; do + filename=$(basename "$file") + if [[ -x "$file" && "$filename" =~ ^fuzz_ && ! "$file" =~ \..*$ ]]; then + targets+=("$filename") + fi + done < <(find "$project_dir" -maxdepth 1 -type f -print0) + fi + + if [ ${#targets[@]} -eq 0 ] && [ -d "$project_src" ]; then + while IFS= read -r -d $'\0' file; do + if grep -q "atheris.Setup" "$file"; then + targets+=("$(basename "${file%.*}")") + fi + done < <(find "$project_src" -name 'fuzz_*.py' -print0) + fi + + echo "${targets[@]}" +} + +# --- 单个项目的完整处理流程 --- +process_project() { + local project_name="$1" + local log_file="$LOG_DIR/oss_fuzz_${project_name}_$(date +%Y%m%d%H%M%S).log" + local project_failed=0 + + echo "============================================================" | tee -a "$log_file" + echo "🚀 开始处理项目: $project_name" | tee -a "$log_file" + echo "📝 日志文件: $log_file" | tee -a "$log_file" + echo "============================================================" | tee -a "$log_file" + + #1. 构建Docker镜像 + if ! run_command \ + "python3 infra/helper.py build_image $project_name" \ + "步骤1/5: 构建 $project_name 的Docker镜像" \ + "$log_file"; then + echo "❌ 项目 $project_name 构建镜像失败,跳过后续步骤" | tee -a "$log_file" + project_failed=1 + fi + + # 2. 编译带检测器的fuzzer (仅在构建镜像成功后执行) + if [ $project_failed -eq 0 ]; then + if ! run_command \ + "python3 infra/helper.py build_fuzzers --sanitizer $SANITIZER $project_name" \ + "步骤2/5: 编译 $project_name 的fuzzer (sanitizer=$SANITIZER)" \ + "$log_file"; then + echo "❌ 项目 $project_name 编译fuzzer失败,跳过后续步骤" | tee -a "$log_file" + project_failed=1 + fi + fi + + # 3. 自动发现目标 (仅在编译成功后执行) + if [ $project_failed -eq 0 ]; then + echo "🔍 正在为 $project_name 自动发现fuzz目标..." | tee -a "$log_file" + FUZZ_TARGETS=($(discover_fuzz_targets "$project_name")) + + if [ ${#FUZZ_TARGETS[@]} -eq 0 ]; then + echo "⚠️ 警告: 项目 $project_name 未找到任何fuzz目标!跳过运行步骤。" | tee -a "$log_file" + else + echo "✅ 发现目标: ${FUZZ_TARGETS[*]}" | tee -a "$log_file" + + # 4. 遍历运行所有目标 默认测试60秒 + for target in "${FUZZ_TARGETS[@]}"; do + if ! run_command \ + "python3 infra/helper.py run_fuzzer $project_name $target -- -max_total_time=60" \ + "步骤3/5: 运行目标 [$target] (60秒超时)" \ + "$log_file" \ + "124,1"; then # 允许超时(124)和发现崩溃(1) + echo "⚠️ 警告: 目标 [$target] 运行失败,继续下一个目标" | tee -a "$log_file" + fi + done + fi + fi + + # 5. 生成覆盖率报告 (已注释掉,与原脚本保持一致) + # [保留原有注释的覆盖率代码] + + if [ $project_failed -eq 0 ]; then + echo "✅ 项目 $project_name 处理完成!" | tee -a "$log_file" + + else + echo "❌ 项目 $project_name 处理失败!" | tee -a "$log_file" + + fi + + echo "------------------------------------------------------------" + return $project_failed +} + +# --- 主流程 --- +main() { +if ! check_environment; then + echo "❌ 环境检查失败,脚本终止" + exit 1 + fi + + # 新增:读取项目列表到数组(过滤空行和注释行) + local PROJECTS=() + while IFS= read -r line || [[ -n "$line" ]]; do + if [[ -z "$line" || "$line" =~ ^# ]]; then + continue + fi + PROJECTS+=("$line") + done < "$PROJECT_LIST_FILE" + + # 项目总数从数组长度获取(原逻辑从文件行数获取) + local total_projects=${#PROJECTS[@]} + local current_project_num=0 + local success_count=0 + local fail_count=0 + + # 新增:遍历数组处理项目(替代原while读取文件的循环) + for project_name in "${PROJECTS[@]}"; do + + current_project_num=$((current_project_num + 1)) + echo ">>> [ $current_project_num / $total_projects ] 开始处理项目: $project_name <<<" + + if process_project "$project_name"; then + echo "✅ [$current_project_num/$total_projects] 项目 $project_name 成功完成" + ( success_count=$[ $success_count + 1 ]) + else + echo "❌ [$current_project_num/$total_projects] 项目 $project_name 处理失败" + FAILED_PROJECTS+=("$project_name") + ( success_count=$[ $success_count + 1 ]) + fi + + done + + echo "============================================================" + echo "🎉 批量处理完成!" + echo "📊 总计: $total_projects 个项目" + echo "✅ 成功: $success_count" + echo "❌ 失败: $fail_count" + + if [ ${#FAILED_PROJECTS[@]} -gt 0 ]; then + echo "📛 失败项目列表:" + printf ' • %s\n' "${FAILED_PROJECTS[@]}" + echo "💡 提示: 可以重新运行失败项目,检查日志获取详细信息" + echo " 日志目录: $LOG_DIR" + fi +} + +main "$@" \ No newline at end of file diff --git a/script_fuzz_py_final.sh b/script_fuzz_py_final.sh new file mode 100644 index 0000000..c2bf00a --- /dev/null +++ b/script_fuzz_py_final.sh @@ -0,0 +1,126 @@ +#!/bin/bash +# script_fuzz_py_final.sh +# 执行OSS-Fuzz本地测试全流程:自动发现目标 -> 构建镜像 -> 编译fuzzer -> 运行测试 -> 生成覆盖率报告 +# 用法:script_fuzz_py_final.sh <项目名> [sanitizer类型] + +set -e # 遇到错误立即退出 + +PROJECT_NAME="${1:-abseil-py}" # 默认项目名 +SANITIZER="${2:-address}" # 默认检测器类型 +OSS_FUZZ_DIR="$HOME/oss-fuzz-py/oss-fuzz" # OSS-Fuzz目录 +LOG_DIR="$OSS_FUZZ_DIR/script_lz4_logs" +LOG_FILE="$LOG_DIR/oss_fuzz_${PROJECT_NAME}_$(date +%Y%m%d%H%M%S).log" +# 验证目录有效性 +check_environment() { + if [ ! -d "$OSS_FUZZ_DIR" ]; then + echo "❌ 错误: $OSS_FUZZ_DIR 目录不存在!" + exit 1 + fi + mkdir -p "$LOG_DIR" # 关键修复:创建日志目录 + chmod 777 "$LOG_DIR" 2>/dev/null || true # 宽松权限设置 + cd "$OSS_FUZZ_DIR" || exit 1 +} + +# 带日志记录的命令执行(支持允许的退出码) +run_command() { + local cmd="$1" + local log_msg="$2" + local allowed_exit="${3:-}" # 可选:允许的退出码(逗号分隔) + + echo "▶️ $log_msg..." | tee -a "$LOG_FILE" + set +e # 临时禁用错误退出 + eval "$cmd" 2>&1 | tee -a "$LOG_FILE" + local exit_code=${PIPESTATUS[0]} + set -e # 重新启用错误退出 + + # 检查退出码是否被允许 + if [[ -n "$allowed_exit" && ",$allowed_exit," =~ ",$exit_code," ]]; then + echo "ℹ️ 命令以预期状态退出: $exit_code" | tee -a "$LOG_FILE" + return 0 + elif [ $exit_code -ne 0 ]; then + echo "❌ 命令执行失败: $cmd (退出码: $exit_code)" | tee -a "$LOG_FILE" + exit 1 + fi +} + +# 自动发现fuzz目标 +discover_fuzz_targets() { + local project_dir="$OSS_FUZZ_DIR/build/out/$PROJECT_NAME" + local project_src="$OSS_FUZZ_DIR/projects/$PROJECT_NAME" + local targets=() + + # 编译目录扫描:仅匹配"fuzz_"开头的可执行文件 + if [ -d "$project_dir" ]; then + while IFS= read -r -d $'\0' file; do + filename=$(basename "$file") + if [[ -x "$file" && "$filename" =~ ^fuzz_ && ! "$file" =~ \..*$ ]]; then + targets+=("$filename") + fi + done < <(find "$project_dir" -maxdepth 1 -type f -print0) + fi + + # 源码目录扫描:仅匹配"fuzz_*.py"且含Atheris标识 + if [ ${#targets[@]} -eq 0 ] && [ -d "$project_src" ]; then + while IFS= read -r -d $'\0' file; do + if grep -q "atheris.Setup" "$file"; then + targets+=("$(basename "${file%.*}")") + fi + done < <(find "$project_src" -name 'fuzz_*.py' -print0) + fi + + echo "${targets[@]}" +} + +# 主流程 +main() { + check_environment + echo "==============================" + echo "🚀 开始OSS-Fuzz测试 - 项目: $PROJECT_NAME" + echo "📝 日志文件: $LOG_FILE" + echo "==============================" + + #1. 构建Docker镜像 + run_command \ + "python3 infra/helper.py build_image $PROJECT_NAME" \ + "步骤1/5: 构建Docker镜像" + + # 2. 编译带检测器的fuzzer + run_command \ + "python3 infra/helper.py build_fuzzers --sanitizer $SANITIZER $PROJECT_NAME" \ + "步骤2/5: 编译fuzzer (sanitizer=$SANITIZER)" + + # 3. 自动发现目标 + echo "🔍 自动发现fuzz目标..." + FUZZ_TARGETS=($(discover_fuzz_targets)) + + if [ ${#FUZZ_TARGETS[@]} -eq 0 ]; then + echo "❌ 未找到任何fuzz目标!检查项目配置" | tee -a "$LOG_FILE" + exit 1 + fi + + echo "✅ 发现目标: ${FUZZ_TARGETS[*]}" | tee -a "$LOG_FILE" + + # 4. 遍历运行所有目标 + for target in "${FUZZ_TARGETS[@]}"; do + run_command \ + "python3 infra/helper.py run_fuzzer $PROJECT_NAME $target -- -max_total_time=180" \ + "步骤3/5: 运行目标 [$target] (120秒超时)" \ + "124,1" # 允许超时(124)和发现崩溃(1) +done + + # 5. 生成覆盖率报告 + # run_command \ + # "python3 infra/helper.py build_fuzzers --sanitizer coverage $PROJECT_NAME" \ + # "步骤4/5: 编译覆盖率版本" + + # run_command \ + # "python3 infra/helper.py coverage --no-serve $PROJECT_NAME" \ + # "步骤5/5: 生成覆盖率报告" + + echo "✅ 所有步骤完成!结果查看:" + echo "🔍 测试日志: $LOG_FILE" + echo "📊 覆盖率报告(暂无): $OSS_FUZZ_DIR/build/out/$PROJECT_NAME/report/coverage/index.html" + echo "💥 崩溃报告: $OSS_FUZZ_DIR/build/out/$PROJECT_NAME/crashes/" +} + +main "$@" \ No newline at end of file diff --git a/valid_projects.txt b/valid_projects.txt new file mode 100644 index 0000000..94fdcd5 --- /dev/null +++ b/valid_projects.txt @@ -0,0 +1,234 @@ +abseil-py +adal +aiohttp +aniso8601 +ansible +argcomplete +arrow-py +asn1crypto +asteval +astroid +asttokens +attrs +autoflake +autopep8 +azure-sdk-for-python +babel +black +botocore +bottleneck +bz2file +cachetools +cffi +chardet +charset_normalizer +click +cloud-custodian +configparser +connexion +coveragepy +croniter +cryptography +cssselect +dask +decorator +defusedxml +digest +dill +distlib +dnspython +docutils +ecdsa-python +et-xmlfile +face +filelock +filesystem_spec +flask +flask-jwt-extended +flask-restx +flask-wtf +fonttools +ftfy +g-api-auth-httplib2 +g-api-auth-library-python +g-api-pubsub +g-api-py-api-common-protos +g-api-py-oauthlib +g-api-python-bigquery-storage +g-api-python-client +g-api-python-cloud-core +g-api-python-firestore +g-api-python-tasks +g-api-resource-manager +g-api-resumable-media-python +g-api-secret-manager +g-apis-py-api-core +gast +gc-iam +gcloud-error-py +g-cloud-logging-py +gcp-python-cloud-storage +genshi +gitdb +glom +gprof2dot +g-py-bigquery +g-py-crc32c +grpc-py +gunicorn +h11 +h5py +hiredis-py +html2text +html5lib-python +httpcore +httpretty +httpx +idna +ijson +importlib_metadata +iniconfig +ipaddress +ipykernel +ipython +isodate +itsdangerous +jedi +jinja2 +jmespathpy +joblib +jsmin +jupyter-nbconvert +jupyter_server +kafka +keras +kiwisolver +lark-parser +libcst +looker-sdk +lxml +mako +markupsafe +matplotlib +mccabe +mdit-py-plugins +mdurl +more-itertools +mrab-regex +msal +msgpack-python +multidict +mutagen +nbclassic +nbformat +netaddr-py +networkx +ntlm2 +ntlm-auth +numexpr +numpy +oauth2 +oauthlib +olefile +openapi-schema-validator +opencensus-python +openpyxl +opt_einsum +oracle-py-cx +orjson +oscrypto +packaging +pandas +paramiko +parse +parsimonious +pasta +pathlib2 +pdoc +pem +pendulum +pip +ply +protobuf-python +proto-plus-python +psqlparse +psutil +psycopg2 +pyasn1 +pyasn1-modules +pycparser +pycrypto +pydantic +pydateutil +pygments +pyjson5 +pyjwt +pymysql +pynacl +pyodbc +pyparsing +pyrsistent +py-serde +pytables +pytest-py +python3-openid +python-ecdsa +python-email-validator +python-fastjsonschema +python-future +python-graphviz +python-hyperlink +python-jose +python-lz4 +python-markdown +python-markdownify +python-nameparser +python-nvd3 +python-pathspec +python-prompt-toolkit +python-pypdf +python-rison +python-rsa +python-tabulate +pytz +pyxdg +pyyaml +pyzmq +redis-py +requests +retry +rfc3967 +rich +sacremoses +scikit-learn +scipy +setuptools +sigstore-python +simplejson +six +smart_open +soupsieve +sqlalchemy_jsonfield +sqlalchemy-utils +sqlparse +stack_data +tensorflow-addons +tinycss2 +toml +tomlkit +toolbelt +toolz +tqdm +typing_extensions +underscore +uritemplate +urlextract +urllib3 +validators +w3lib +websocket-client +wheel +wtforms +xlrd +yarl +zipp From 16338dcbe8c04209badcf123d23eef86a64517fe Mon Sep 17 00:00:00 2001 From: joyguoguo <599325370@qq.com> Date: Sun, 20 Jul 2025 23:03:36 +0000 Subject: [PATCH 002/134] feat: Add OSS-Fuzz submodule tracking main branch --- .gitmodules | 4 ++++ fuzz/oss-fuzz | 1 + 2 files changed, 5 insertions(+) create mode 160000 fuzz/oss-fuzz diff --git a/.gitmodules b/.gitmodules index 5b761d3..1683a3c 100644 --- a/.gitmodules +++ b/.gitmodules @@ -1,3 +1,7 @@ [submodule "UniTSyn"] path = UniTSyn url = https://github.com/SecurityLab-UCD/UniTSyn.git +[submodule "fuzz/oss-fuzz"] + path = fuzz/oss-fuzz + url = https://github.com/google/oss-fuzz.git + branch = main diff --git a/fuzz/oss-fuzz b/fuzz/oss-fuzz new file mode 160000 index 0000000..1351fe0 --- /dev/null +++ b/fuzz/oss-fuzz @@ -0,0 +1 @@ +Subproject commit 1351fe0fbefb2965b64d51411562fa6df86bcc86 From ca103e9263e14c51cff07edece6da79b638a683e Mon Sep 17 00:00:00 2001 From: joyguoguo <599325370@qq.com> Date: Mon, 21 Jul 2025 06:46:24 +0000 Subject: [PATCH 003/134] chore: Switch oss-fuzz submodule to personal fork --- fuzz/oss-fuzz | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fuzz/oss-fuzz b/fuzz/oss-fuzz index 1351fe0..c3d3fb7 160000 --- a/fuzz/oss-fuzz +++ b/fuzz/oss-fuzz @@ -1 +1 @@ -Subproject commit 1351fe0fbefb2965b64d51411562fa6df86bcc86 +Subproject commit c3d3fb70f9ece4d31e623ce0f29bc0dfd34e0af6 From f39e72744df2d08f3c3349bb8ee80520dd87c77d Mon Sep 17 00:00:00 2001 From: joyguoguo <599325370@qq.com> Date: Mon, 21 Jul 2025 06:47:24 +0000 Subject: [PATCH 004/134] Switch oss-fuzz submodule to personal fork --- .gitmodules | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.gitmodules b/.gitmodules index 1683a3c..82926c7 100644 --- a/.gitmodules +++ b/.gitmodules @@ -3,5 +3,5 @@ url = https://github.com/SecurityLab-UCD/UniTSyn.git [submodule "fuzz/oss-fuzz"] path = fuzz/oss-fuzz - url = https://github.com/google/oss-fuzz.git + url = https://github.com/joyguoguo/oss-fuzz.git branch = main From d691eeabc9996e62824814d22caee45ef8eed2ff Mon Sep 17 00:00:00 2001 From: joyguoguo <599325370@qq.com> Date: Mon, 21 Jul 2025 23:48:08 +0000 Subject: [PATCH 005/134] move the valid_project file --- data/valid_projects.txt | 234 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 234 insertions(+) create mode 100644 data/valid_projects.txt diff --git a/data/valid_projects.txt b/data/valid_projects.txt new file mode 100644 index 0000000..94fdcd5 --- /dev/null +++ b/data/valid_projects.txt @@ -0,0 +1,234 @@ +abseil-py +adal +aiohttp +aniso8601 +ansible +argcomplete +arrow-py +asn1crypto +asteval +astroid +asttokens +attrs +autoflake +autopep8 +azure-sdk-for-python +babel +black +botocore +bottleneck +bz2file +cachetools +cffi +chardet +charset_normalizer +click +cloud-custodian +configparser +connexion +coveragepy +croniter +cryptography +cssselect +dask +decorator +defusedxml +digest +dill +distlib +dnspython +docutils +ecdsa-python +et-xmlfile +face +filelock +filesystem_spec +flask +flask-jwt-extended +flask-restx +flask-wtf +fonttools +ftfy +g-api-auth-httplib2 +g-api-auth-library-python +g-api-pubsub +g-api-py-api-common-protos +g-api-py-oauthlib +g-api-python-bigquery-storage +g-api-python-client +g-api-python-cloud-core +g-api-python-firestore +g-api-python-tasks +g-api-resource-manager +g-api-resumable-media-python +g-api-secret-manager +g-apis-py-api-core +gast +gc-iam +gcloud-error-py +g-cloud-logging-py +gcp-python-cloud-storage +genshi +gitdb +glom +gprof2dot +g-py-bigquery +g-py-crc32c +grpc-py +gunicorn +h11 +h5py +hiredis-py +html2text +html5lib-python +httpcore +httpretty +httpx +idna +ijson +importlib_metadata +iniconfig +ipaddress +ipykernel +ipython +isodate +itsdangerous +jedi +jinja2 +jmespathpy +joblib +jsmin +jupyter-nbconvert +jupyter_server +kafka +keras +kiwisolver +lark-parser +libcst +looker-sdk +lxml +mako +markupsafe +matplotlib +mccabe +mdit-py-plugins +mdurl +more-itertools +mrab-regex +msal +msgpack-python +multidict +mutagen +nbclassic +nbformat +netaddr-py +networkx +ntlm2 +ntlm-auth +numexpr +numpy +oauth2 +oauthlib +olefile +openapi-schema-validator +opencensus-python +openpyxl +opt_einsum +oracle-py-cx +orjson +oscrypto +packaging +pandas +paramiko +parse +parsimonious +pasta +pathlib2 +pdoc +pem +pendulum +pip +ply +protobuf-python +proto-plus-python +psqlparse +psutil +psycopg2 +pyasn1 +pyasn1-modules +pycparser +pycrypto +pydantic +pydateutil +pygments +pyjson5 +pyjwt +pymysql +pynacl +pyodbc +pyparsing +pyrsistent +py-serde +pytables +pytest-py +python3-openid +python-ecdsa +python-email-validator +python-fastjsonschema +python-future +python-graphviz +python-hyperlink +python-jose +python-lz4 +python-markdown +python-markdownify +python-nameparser +python-nvd3 +python-pathspec +python-prompt-toolkit +python-pypdf +python-rison +python-rsa +python-tabulate +pytz +pyxdg +pyyaml +pyzmq +redis-py +requests +retry +rfc3967 +rich +sacremoses +scikit-learn +scipy +setuptools +sigstore-python +simplejson +six +smart_open +soupsieve +sqlalchemy_jsonfield +sqlalchemy-utils +sqlparse +stack_data +tensorflow-addons +tinycss2 +toml +tomlkit +toolbelt +toolz +tqdm +typing_extensions +underscore +uritemplate +urlextract +urllib3 +validators +w3lib +websocket-client +wheel +wtforms +xlrd +yarl +zipp From 25b01915000c45531be9e07a71cfe6d45963441f Mon Sep 17 00:00:00 2001 From: joyguoguo <599325370@qq.com> Date: Mon, 21 Jul 2025 23:48:36 +0000 Subject: [PATCH 006/134] move the .py file --- fuzz/fuzz_runner_pool.py | 270 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 270 insertions(+) create mode 100644 fuzz/fuzz_runner_pool.py diff --git a/fuzz/fuzz_runner_pool.py b/fuzz/fuzz_runner_pool.py new file mode 100644 index 0000000..abd4b30 --- /dev/null +++ b/fuzz/fuzz_runner_pool.py @@ -0,0 +1,270 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- + +""" +fuzz_runner_pool.py + +并行批量执行 OSS-Fuzz 本地测试全流程。使用 multiprocessing.Pool 将项目 +分发到多个 CPU核心上同时处理。 + +用法: python3 fuzz_runner_pool.py [项目列表文件] [--sanitizer 类型] [--workers N] +示例: python3 fuzz_runner_pool.py /home/jiayiguo/FuzzAug/data/valid_projects.txt --workers 4 +""" + +import os +import sys +import subprocess +import argparse +from datetime import datetime +from pathlib import Path +from typing import List, Optional, Tuple +from multiprocessing import Pool, cpu_count + +# --- 全局配置 (可通过命令行参数覆盖) --- +HOME_DIR = Path.home() +OSS_FUZZ_DIR = HOME_DIR / "FuzzAug" / "fuzz"/"oss-fuzz" +LOG_DIR = OSS_FUZZ_DIR / "script_pool_batch_logs" + +def setup_logging(project_name: str) -> Path: + """为单个项目创建带时间戳的日志文件.""" + LOG_DIR.mkdir(parents=True, exist_ok=True) + try: + LOG_DIR.chmod(0o777) + except PermissionError: + # 在并行环境中,这里可能会有多个进程同时尝试,打印一次警告即可 + pass + + timestamp = datetime.now().strftime("%Y%m%d%H%M%S") + log_file_path = LOG_DIR / f"oss_fuzz_{project_name}_{timestamp}.log" + return log_file_path + +def log_and_print(message: str, log_file: Path, to_stdout: bool = True): + """将消息写入日志文件,并根据需要打印到控制台。""" + if to_stdout: + # 添加进程ID,以便区分并行输出 + print(f"[PID:{os.getpid()}] {message}") + with open(log_file, "a", encoding="utf-8") as f: + f.write(message + "\n") + +def run_command( + cmd: str, + log_msg: str, + log_file: Path, + allowed_exit_codes: Optional[List[int]] = None, + auto_confirm: bool = True # 新增自动确认参数 +) -> bool: + """执行一个 shell 命令,并将输出实时流式传输到日志文件。""" + if allowed_exit_codes is None: + allowed_exit_codes = [] + + log_and_print(f"▶️ {log_msg}...", log_file, to_stdout=False) + log_and_print(f" $ {cmd}", log_file, to_stdout=False) + + try: + # 添加自动确认机制 + if auto_confirm: + cmd = f"yes | {cmd}" + + process = subprocess.Popen( + cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, + text=True, encoding='utf-8', errors='replace', bufsize=1 + ) + + with open(log_file, "a", encoding="utf-8") as f: + for line in iter(process.stdout.readline, ''): + f.write(line) # 只写入日志,避免控制台输出混乱 + + process.wait() + exit_code = process.returncode + + if exit_code == 0: + log_and_print(f"✅ 命令成功完成。", log_file, to_stdout=False) + return True + elif exit_code in allowed_exit_codes: + log_and_print(f"ℹ️ 命令以预期状态退出: {exit_code}", log_file, to_stdout=False) + return True + else: + log_and_print(f"❌ 命令执行失败 (退出码: {exit_code})", log_file) + return False + except Exception as e: + log_and_print(f"💥 执行命令时发生异常: {e}", log_file) + return False + +def discover_fuzz_targets(project_name: str) -> List[str]: + """自动发现 Fuzz 目标。""" + project_out_dir = OSS_FUZZ_DIR / "build" / "out" / project_name + project_src_dir = OSS_FUZZ_DIR / "projects" / project_name + targets = [] + + if project_out_dir.is_dir(): + for f in project_out_dir.iterdir(): + if (f.is_file() and os.access(f, os.X_OK) and + f.name.startswith("fuzz_") and '.' not in f.name): + targets.append(f.name) + + if not targets and project_src_dir.is_dir(): + for py_file in project_src_dir.glob("fuzz_*.py"): + try: + with open(py_file, "r", encoding="utf-8") as f: + if "atheris.Setup" in f.read(): + targets.append(py_file.stem) + except Exception as e: + # 在worker进程中,只打印到自己的日志 + print(f"⚠️ 警告: 读取文件 {py_file} 失败: {e}") + return targets + +def run_project_workflow(project_name: str, sanitizer: str) -> Tuple[bool, str]: + """ + 处理单个项目的完整工作流 (Worker Function)。 + 此函数由进程池中的每个工作进程独立执行。 + + Returns: + 一个元组 (is_success: bool, project_name: str) + """ + log_file = setup_logging(project_name) + + # 在 worker 的开头打印,以便追踪 + log_and_print("=" * 60, log_file) + log_and_print(f"🚀 开始处理项目: {project_name}", log_file) + log_and_print(f"📝 日志文件: {log_file}", log_file) + log_and_print("=" * 60, log_file) + + # 每个进程都需要设置自己的工作目录 + try: + os.chdir(OSS_FUZZ_DIR) + except FileNotFoundError: + log_and_print(f"❌ 严重错误: OSS-Fuzz 目录 '{OSS_FUZZ_DIR}' 不存在!", log_file) + return (False, project_name) + + # 步骤 1: 构建Docker镜像(启用自动确认) + if not run_command( + f"python3 infra/helper.py build_image {project_name}", + f"步骤1/5: 构建 {project_name} 的Docker镜像", log_file, + auto_confirm=True # 自动确认所有提示 + ): + log_and_print(f"❌ 项目 {project_name} 构建镜像失败", log_file) + return (False, project_name) + + # 步骤 2: 编译带检测器的fuzzer(启用自动确认) + if not run_command( + f"python3 infra/helper.py build_fuzzers --sanitizer {sanitizer} {project_name}", + f"步骤2/5: 编译 {project_name} 的fuzzer (sanitizer={sanitizer})", log_file, + auto_confirm=True # 自动确认所有提示 + ): + log_and_print(f"❌ 项目 {project_name} 编译fuzzer失败", log_file) + return (False, project_name) + + # 步骤 3: 自动发现目标 + log_and_print(f"🔍 正在为 {project_name} 自动发现fuzz目标...", log_file) + fuzz_targets = discover_fuzz_targets(project_name) + + if not fuzz_targets: + log_and_print(f"⚠️ 警告: {project_name} 未找到任何fuzz目标!跳过运行步骤。", log_file) + return (True, project_name) + + log_and_print(f"✅ 发现目标: {', '.join(fuzz_targets)}", log_file) + + # 步骤 4: 遍历运行所有目标(启用自动确认) + for i, target in enumerate(fuzz_targets, 1): + run_command( + f"python3 infra/helper.py run_fuzzer {project_name} {target} -- -max_total_time=60", + f"步骤4/{len(fuzz_targets)}: 运行目标 [{target}] (60秒)", log_file, + allowed_exit_codes=[1, 124], + auto_confirm=True # 自动确认所有提示 + ) + + # 步骤 5: 生成覆盖率报告 (暂无) + log_and_print("步骤5/5: 生成覆盖率报告 (当前版本暂未实现)", log_file) + log_and_print(f"✅ 项目 {project_name} 处理完成!", log_file) + return (True, project_name) + +def main(): + """ + 主流程函数:设置进程池并分发任务。 + """ + parser = argparse.ArgumentParser( + description="OSS-Fuzz 并行批量测试工具", + formatter_class=argparse.RawTextHelpFormatter, + epilog="示例:\n python3 fuzz_runner_parallel.py valid_projects.txt --workers 4\n python3 fuzz_runner_parallel.py my_projects.txt --sanitizer undefined" + ) + parser.add_argument( + "project_list_file", nargs="?", default="valid_projects.txt", + help="包含待测试项目列表的文本文件。(默认: valid_projects.txt)" + ) + parser.add_argument( + "--sanitizer", default="address", choices=["address", "memory", "undefined", "coverage"], + help="要使用的 sanitizer 类型。(默认: address)" + ) + parser.add_argument( + "--workers", type=int, default=cpu_count(), + help=f"并发执行的工作进程数。(默认: 系统CPU核心数, 即 {cpu_count()})" + ) + args = parser.parse_args() + + # --- 环境检查 --- + if not OSS_FUZZ_DIR.is_dir(): + print(f"❌ 错误: OSS-Fuzz 目录 '{OSS_FUZZ_DIR}' 不存在!") + sys.exit(1) + + project_list_path = Path(args.project_list_file) + if not project_list_path.is_file(): + print(f"❌ 错误: 项目列表文件 '{project_list_path}' 不存在!") + sys.exit(1) + + print(f"✅ 环境检查通过。将使用 {args.workers} 个并行工作进程。") + + # --- 读取和准备任务 --- + try: + with open(project_list_path, "r", encoding="utf-8") as f: + projects = [line.strip() for line in f if line.strip() and not line.strip().startswith("#")] + except Exception as e: + print(f"❌ 读取项目列表文件时出错: {e}") + sys.exit(1) + + if not projects: + print("⚠️ 警告: 项目列表为空,无可执行任务。") + sys.exit(0) + + # 为 starmap 准备任务参数列表,每个项目都是一个 (project_name, sanitizer) 元组 + tasks = [(project, args.sanitizer) for project in projects] + total_projects = len(tasks) + + print(f"\n🚀 即将并行处理 {total_projects} 个项目...") + + # --- 执行并行处理 --- + # 使用 with 语句确保进程池被正确关闭 + with Pool(processes=args.workers) as pool: + # starmap 会阻塞直到所有任务完成 + # 它将 tasks 列表中的每个元组解包作为参数传递给 worker 函数 + results = pool.starmap(run_project_workflow, tasks) + + # --- 收集并打印结果 --- + failed_projects = [] + for success, project_name in results: + if success: + print(f"✅ 项目 {project_name} 成功完成") + else: + print(f"❌ 项目 {project_name} 处理失败") + failed_projects.append(project_name) + + # --- 最终总结 --- + fail_count = len(failed_projects) + success_count = total_projects - fail_count + + print("\n" + "=" * 60) + print("🎉 批量处理完成!") + print(f"📊 总计: {total_projects} 个项目") + print(f"✅ 成功: {success_count}") + print(f"❌ 失败: {fail_count}") + + if failed_projects: + print("📛 失败项目列表:") + for proj in sorted(failed_projects): + print(f" • {proj}") + print("\n💡 提示: 失败项目的详细信息请查看对应的日志文件。") + print(f" 日志目录: {LOG_DIR}") + +if __name__ == "__main__": + # 在 Windows 或 macOS 的某些 Python 版本上,需要将 main 调用放在这个保护块中 + # 以防止子进程重新导入和执行主模块代码,导致无限递归。 + main() \ No newline at end of file From fcf80a94403de8b1f995b56907b410ebc4537cfd Mon Sep 17 00:00:00 2001 From: joyguoguo <599325370@qq.com> Date: Mon, 21 Jul 2025 23:50:42 +0000 Subject: [PATCH 007/134] create build_oss_fuzz.py --- fuzz/build_oss_fuzz.py | 131 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 131 insertions(+) create mode 100644 fuzz/build_oss_fuzz.py diff --git a/fuzz/build_oss_fuzz.py b/fuzz/build_oss_fuzz.py new file mode 100644 index 0000000..525c556 --- /dev/null +++ b/fuzz/build_oss_fuzz.py @@ -0,0 +1,131 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- + +""" +build_oss_fuzz.py + +并行构建 OSS-Fuzz 项目(Docker 镜像和 Fuzzer 编译)。 +使用 multiprocessing.Pool 将项目分发到多个 CPU 核心同时处理。 + +用法: python3 build_oss_fuzz.py [项目列表文件] [--sanitizer 类型] [--workers N] +示例: python3 build_oss_fuzz.py valid_projects.txt --sanitizer address --workers 4 +""" + +import os +import sys +import subprocess +import argparse +from datetime import datetime +from pathlib import Path +from typing import List, Optional, Tuple +from multiprocessing import Pool, cpu_count + +# --- 全局配置 --- +HOME_DIR = Path.home() +OSS_FUZZ_DIR = HOME_DIR / "FuzzAug" / "fuzz" / "oss-fuzz" +LOG_DIR = OSS_FUZZ_DIR / "build_logs" + +def setup_logging(project_name: str) -> Path: + """为单个项目创建带时间戳的日志文件""" + LOG_DIR.mkdir(parents=True, exist_ok=True) + timestamp = datetime.now().strftime("%Y%m%d%H%M%S") + return LOG_DIR / f"build_{project_name}_{timestamp}.log" + +def log_and_print(message: str, log_file: Path, to_stdout: bool = True): + """将消息写入日志并打印到控制台""" + if to_stdout: + print(f"[PID:{os.getpid()}] {message}") + with open(log_file, "a", encoding="utf-8") as f: + f.write(f"{datetime.now().isoformat()} {message}\n") + +def run_command( + cmd: str, + log_msg: str, + log_file: Path, + allowed_exit_codes: Optional[List[int]] = None +) -> bool: + """执行 shell 命令并实时记录输出""" + allowed_exit_codes = allowed_exit_codes or [] + log_and_print(f"▶️ {log_msg}...", log_file, to_stdout=False) + log_and_print(f" $ {cmd}", log_file, to_stdout=False) + + try: + process = subprocess.Popen( + f"yes | {cmd}", # 自动确认所有提示 + shell=True, + stdout=subprocess.PIPE, + stderr=subprocess.STDOUT, + text=True, + encoding="utf-8", + errors="replace" + ) + with open(log_file, "a", encoding="utf-8") as f: + for line in iter(process.stdout.readline, ""): + f.write(line) + process.wait() + exit_code = process.returncode + if exit_code in [0, *allowed_exit_codes]: + log_and_print(f"✅ 命令成功完成", log_file, to_stdout=False) + return True + log_and_print(f"❌ 命令失败 (退出码: {exit_code})", log_file) + return False + except Exception as e: + log_and_print(f"💥 执行异常: {e}", log_file) + return False + +def build_project(project_name: str, sanitizer: str) -> Tuple[bool, str]: + """单个项目的构建工作流""" + log_file = setup_logging(project_name) + os.chdir(OSS_FUZZ_DIR) + + log_and_print("="*60, log_file) + log_and_print(f"🔨 开始构建项目: {project_name}", log_file) + log_and_print(f"📝 日志路径: {log_file}", log_file) + log_and_print("="*60, log_file) + + # 1. 构建 Docker 镜像 + if not run_command( + f"python3 infra/helper.py build_image {project_name}", + "步骤1/2: 构建 Docker 镜像", + log_file + ): + return (False, project_name) + + # 2. 编译 Fuzzer + if not run_command( + f"python3 infra/helper.py build_fuzzers --sanitizer {sanitizer} {project_name}", + f"步骤2/2: 编译 Fuzzer (sanitizer={sanitizer})", + log_file + ): + return (False, project_name) + + log_and_print(f"✅ 项目 {project_name} 构建完成", log_file) + return (True, project_name) + +def main(): + parser = argparse.ArgumentParser(description="OSS-Fuzz 并行构建工具") + parser.add_argument("project_list", help="项目列表文件路径") + parser.add_argument("--sanitizer", default="address", choices=["address", "memory", "undefined"]) + parser.add_argument("--workers", type=int, default=cpu_count()) + args = parser.parse_args() + + # 读取项目列表 + try: + with open(args.project_list, "r") as f: + projects = [line.strip() for line in f if line.strip()] + except Exception as e: + print(f"❌ 读取项目列表失败: {e}") + sys.exit(1) + + # 并行构建 + with Pool(args.workers) as pool: + results = pool.starmap(build_project, [(p, args.sanitizer) for p in projects]) + + # 输出结果 + failed = [p for success, p in results if not success] + print(f"\n📊 构建完成: 成功 {len(projects)-len(failed)}/{len(projects)}") + if failed: + print("❌ 失败项目: " + ", ".join(failed)) + +if __name__ == "__main__": + main() \ No newline at end of file From 3e8e7f4ca758b2f42ab8df7f2c371fadf4b6c06c Mon Sep 17 00:00:00 2001 From: joyguoguo <599325370@qq.com> Date: Mon, 21 Jul 2025 23:53:27 +0000 Subject: [PATCH 008/134] create run_fuzz_target.py --- fuzz/oss-fuzz | 2 +- fuzz/run_fuzz_target.py | 140 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 141 insertions(+), 1 deletion(-) create mode 100644 fuzz/run_fuzz_target.py diff --git a/fuzz/oss-fuzz b/fuzz/oss-fuzz index c3d3fb7..0b81ba5 160000 --- a/fuzz/oss-fuzz +++ b/fuzz/oss-fuzz @@ -1 +1 @@ -Subproject commit c3d3fb70f9ece4d31e623ce0f29bc0dfd34e0af6 +Subproject commit 0b81ba5d97ae3d1402744e00b1d9075fed7b7f1e diff --git a/fuzz/run_fuzz_target.py b/fuzz/run_fuzz_target.py new file mode 100644 index 0000000..ae7b036 --- /dev/null +++ b/fuzz/run_fuzz_target.py @@ -0,0 +1,140 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- + +""" +run_fuzz_target.py + +并行运行 OSS-Fuzz 测试目标。 +使用 multiprocessing.Pool 分发任务到多个 CPU 核心。 + +用法: python3 run_fuzz_target.py [项目列表文件] [--timeout 秒] [--workers N] +示例: python3 run_fuzz_target.py valid_projects.txt --timeout 120 --workers 4 +""" + +import os +import sys +import subprocess +import argparse +from datetime import datetime +from pathlib import Path +from typing import List, Optional, Tuple +from multiprocessing import Pool, cpu_count + +# --- 全局配置 --- +HOME_DIR = Path.home() +OSS_FUZZ_DIR = HOME_DIR / "FuzzAug" / "fuzz" / "oss-fuzz" +LOG_DIR = OSS_FUZZ_DIR / "run_logs" + +def setup_logging(project_name: str) -> Path: + """创建带时间戳的运行日志""" + LOG_DIR.mkdir(parents=True, exist_ok=True) + timestamp = datetime.now().strftime("%Y%m%d%H%M%S") + return LOG_DIR / f"run_{project_name}_{timestamp}.log" + +def log_and_print(message: str, log_file: Path, to_stdout: bool = True): + """日志和控制台输出""" + if to_stdout: + print(f"[PID:{os.getpid()}] {message}") + with open(log_file, "a", encoding="utf-8") as f: + f.write(f"{datetime.now().isoformat()} {message}\n") + +def run_command( + cmd: str, + log_msg: str, + log_file: Path, + allowed_exit_codes: Optional[List[int]] = None +) -> bool: + """执行命令并实时记录输出""" + allowed_exit_codes = allowed_exit_codes or [] + log_and_print(f"▶️ {log_msg}...", log_file, to_stdout=False) + log_and_print(f" $ {cmd}", log_file, to_stdout=False) + + try: + process = subprocess.Popen( + cmd, + shell=True, + stdout=subprocess.PIPE, + stderr=subprocess.STDOUT, + text=True, + encoding="utf-8", + errors="replace" + ) + with open(log_file, "a", encoding="utf-8") as f: + for line in iter(process.stdout.readline, ""): + f.write(line) + process.wait() + return process.returncode in [0, *allowed_exit_codes] + except Exception as e: + log_and_print(f"💥 执行异常: {e}", log_file) + return False + +def discover_targets(project_name: str) -> List[str]: + """发现可用的 Fuzz 目标""" + out_dir = OSS_FUZZ_DIR / "build" / "out" / project_name + targets = [] + if out_dir.exists(): + for f in out_dir.iterdir(): + if f.is_file() and f.name.startswith("fuzz_") and os.access(f, os.X_OK): + targets.append(f.name) + return targets + +def run_project(project_name: str, timeout: int) -> Tuple[bool, str]: + """单个项目的测试工作流""" + log_file = setup_logging(project_name) + os.chdir(OSS_FUZZ_DIR) + + log_and_print("="*60, log_file) + log_and_print(f"🚀 开始测试项目: {project_name}", log_file) + log_and_print(f"📝 日志路径: {log_file}", log_file) + log_and_print("="*60, log_file) + + # 1. 发现测试目标 + targets = discover_targets(project_name) + if not targets: + log_and_print("⚠️ 未发现测试目标", log_file) + return (False, project_name) + log_and_print(f"🔍 发现 {len(targets)} 个测试目标", log_file) + + # 2. 运行所有目标 + all_success = True + for i, target in enumerate(targets, 1): + cmd = f"python3 infra/helper.py run_fuzzer {project_name} {target} -- -max_total_time={timeout}" + success = run_command( + cmd, + f"运行目标 [{i}/{len(targets)}] {target} (超时={timeout}s)", + log_file, + allowed_exit_codes=[1, 124] # 允许超时退出 + ) + all_success &= success + + # 3. 生成报告(占位) + log_and_print("📊 覆盖率报告生成 (当前版本暂未实现)", log_file) + return (all_success, project_name) + +def main(): + parser = argparse.ArgumentParser(description="OSS-Fuzz 并行测试工具") + parser.add_argument("project_list", help="项目列表文件路径") + parser.add_argument("--timeout", type=int, default=60, help="单目标测试超时时间(秒)") + parser.add_argument("--workers", type=int, default=cpu_count()) + args = parser.parse_args() + + # 读取项目列表 + try: + with open(args.project_list) as f: + projects = [line.strip() for line in f if line.strip()] + except Exception as e: + print(f"❌ 读取项目列表失败: {e}") + sys.exit(1) + + # 并行运行 + with Pool(args.workers) as pool: + results = pool.starmap(run_project, [(p, args.timeout) for p in projects]) + + # 输出结果 + failed = [p for success, p in results if not success] + print(f"\n📊 测试完成: 成功 {len(projects)-len(failed)}/{len(projects)}") + if failed: + print("❌ 失败项目: " + ", ".join(failed)) + +if __name__ == "__main__": + main() \ No newline at end of file From def645e95f0040ea862ce635651ada09780d37f4 Mon Sep 17 00:00:00 2001 From: joyguoguo <599325370@qq.com> Date: Tue, 22 Jul 2025 00:15:31 +0000 Subject: [PATCH 009/134] split the pool.py into build_oss_fuzz and run_fuzz_target --- fuzz/build_oss_fuzz.py | 4 +++- fuzz/fuzz_runner_pool.py | 2 +- fuzz/run_fuzz_target.py | 2 +- 3 files changed, 5 insertions(+), 3 deletions(-) diff --git a/fuzz/build_oss_fuzz.py b/fuzz/build_oss_fuzz.py index 525c556..7c60d56 100644 --- a/fuzz/build_oss_fuzz.py +++ b/fuzz/build_oss_fuzz.py @@ -8,7 +8,9 @@ 使用 multiprocessing.Pool 将项目分发到多个 CPU 核心同时处理。 用法: python3 build_oss_fuzz.py [项目列表文件] [--sanitizer 类型] [--workers N] -示例: python3 build_oss_fuzz.py valid_projects.txt --sanitizer address --workers 4 +示例: python3 fuzz/build_oss_fuzz.py data/valid_projects.txt \ + --sanitizer address \ + --workers 8 """ import os diff --git a/fuzz/fuzz_runner_pool.py b/fuzz/fuzz_runner_pool.py index abd4b30..7c40d61 100644 --- a/fuzz/fuzz_runner_pool.py +++ b/fuzz/fuzz_runner_pool.py @@ -8,7 +8,7 @@ 分发到多个 CPU核心上同时处理。 用法: python3 fuzz_runner_pool.py [项目列表文件] [--sanitizer 类型] [--workers N] -示例: python3 fuzz_runner_pool.py /home/jiayiguo/FuzzAug/data/valid_projects.txt --workers 4 +示例: python3 fuzz/fuzz_runner_pool.py data/valid_projects.txt --workers 4 """ import os diff --git a/fuzz/run_fuzz_target.py b/fuzz/run_fuzz_target.py index ae7b036..9765dbb 100644 --- a/fuzz/run_fuzz_target.py +++ b/fuzz/run_fuzz_target.py @@ -8,7 +8,7 @@ 使用 multiprocessing.Pool 分发任务到多个 CPU 核心。 用法: python3 run_fuzz_target.py [项目列表文件] [--timeout 秒] [--workers N] -示例: python3 run_fuzz_target.py valid_projects.txt --timeout 120 --workers 4 +示例: python3 fuzz/run_fuzz_target.py data/valid_projects.txt --timeout 60 --workers 4 """ import os From 1251bcd45a1d3ae84741d9217d439bc55b14756f Mon Sep 17 00:00:00 2001 From: joyguoguo <599325370@qq.com> Date: Tue, 22 Jul 2025 00:15:52 +0000 Subject: [PATCH 010/134] delete the .sh files --- script_fuzz_py_batch_final.sh | 201 ----------------------------- script_fuzz_py_final.sh | 126 ------------------ valid_projects.txt | 234 ---------------------------------- 3 files changed, 561 deletions(-) delete mode 100644 script_fuzz_py_batch_final.sh delete mode 100644 script_fuzz_py_final.sh delete mode 100644 valid_projects.txt diff --git a/script_fuzz_py_batch_final.sh b/script_fuzz_py_batch_final.sh deleted file mode 100644 index 1d7d9af..0000000 --- a/script_fuzz_py_batch_final.sh +++ /dev/null @@ -1,201 +0,0 @@ -#!/bin/bash -# script_fuzz_py_batch_final.sh -# 批量执行OSS-Fuzz本地测试全流程:从文件读取项目列表,依次为每个项目自动发现目标 -> 构建镜像 -> 编译fuzzer -> 运行测试 -> 生成覆盖率报告(暂无)) -# 用法:./script_fuzz_py_batch_final.sh [项目列表文件] [sanitizer类型] -# 示例: ./script_fuzz_py_batch_final.sh valid_projects.txt address - -# --- 全局配置 --- -PROJECT_LIST_FILE="${1:-valid_projects.txt}" # 默认项目列表文件 -SANITIZER="${2:-address}" # 默认检测器类型 -OSS_FUZZ_DIR="$HOME/oss-fuzz-py/oss-fuzz" # OSS-Fuzz目录 -LOG_DIR="$OSS_FUZZ_DIR/script_lz4_batch_logs" # 所有项目的总日志目录 -FAILED_PROJECTS=() # 存储失败项目列表 - -# --- 环境检查 --- -check_environment() { - if [ ! -d "$OSS_FUZZ_DIR" ]; then - echo "❌ 错误: OSS-Fuzz 目录 '$OSS_FUZZ_DIR' 不存在!" - return 1 - fi - if [ ! -f "$PROJECT_LIST_FILE" ]; then - echo "❌ 错误: 项目列表文件 '$PROJECT_LIST_FILE' 不存在!" - return 1 - fi - mkdir -p "$LOG_DIR" - chmod 777 "$LOG_DIR" 2>/dev/null || true - cd "$OSS_FUZZ_DIR" || return 1 - echo "✅ 环境检查通过。OSS-Fuzz 目录: $OSS_FUZZ_DIR" -} - -# --- 带日志记录的命令执行 --- -run_command() { - local cmd="$1" - local log_msg="$2" - local log_file="$3" # 日志文件作为参数传入 - local allowed_exit="${4:-}" - - echo "▶️ $log_msg..." | tee -a "$log_file" - set +e - { yes | eval "$cmd" ; } 2>&1 | tee -a "$log_file" - local exit_code=${PIPESTATUS[0]} - set -e - - if [[ -n "$allowed_exit" && ",$allowed_exit," =~ ",$exit_code," ]]; then - echo "ℹ️ 命令以预期状态退出: $exit_code" | tee -a "$log_file" - return 0 - elif [ $exit_code -ne 0 ]; then - echo "❌ 命令执行失败: $cmd (退出码: $exit_code)" | tee -a "$log_file" - return 1 # 返回错误而不是退出脚本 - fi -} - -# --- 自动发现 Fuzz 目标 --- -discover_fuzz_targets() { - local project_name="$1" - local project_dir="$OSS_FUZZ_DIR/build/out/$project_name" - local project_src="$OSS_FUZZ_DIR/projects/$project_name" - local targets=() - - if [ -d "$project_dir" ]; then - while IFS= read -r -d $'\0' file; do - filename=$(basename "$file") - if [[ -x "$file" && "$filename" =~ ^fuzz_ && ! "$file" =~ \..*$ ]]; then - targets+=("$filename") - fi - done < <(find "$project_dir" -maxdepth 1 -type f -print0) - fi - - if [ ${#targets[@]} -eq 0 ] && [ -d "$project_src" ]; then - while IFS= read -r -d $'\0' file; do - if grep -q "atheris.Setup" "$file"; then - targets+=("$(basename "${file%.*}")") - fi - done < <(find "$project_src" -name 'fuzz_*.py' -print0) - fi - - echo "${targets[@]}" -} - -# --- 单个项目的完整处理流程 --- -process_project() { - local project_name="$1" - local log_file="$LOG_DIR/oss_fuzz_${project_name}_$(date +%Y%m%d%H%M%S).log" - local project_failed=0 - - echo "============================================================" | tee -a "$log_file" - echo "🚀 开始处理项目: $project_name" | tee -a "$log_file" - echo "📝 日志文件: $log_file" | tee -a "$log_file" - echo "============================================================" | tee -a "$log_file" - - #1. 构建Docker镜像 - if ! run_command \ - "python3 infra/helper.py build_image $project_name" \ - "步骤1/5: 构建 $project_name 的Docker镜像" \ - "$log_file"; then - echo "❌ 项目 $project_name 构建镜像失败,跳过后续步骤" | tee -a "$log_file" - project_failed=1 - fi - - # 2. 编译带检测器的fuzzer (仅在构建镜像成功后执行) - if [ $project_failed -eq 0 ]; then - if ! run_command \ - "python3 infra/helper.py build_fuzzers --sanitizer $SANITIZER $project_name" \ - "步骤2/5: 编译 $project_name 的fuzzer (sanitizer=$SANITIZER)" \ - "$log_file"; then - echo "❌ 项目 $project_name 编译fuzzer失败,跳过后续步骤" | tee -a "$log_file" - project_failed=1 - fi - fi - - # 3. 自动发现目标 (仅在编译成功后执行) - if [ $project_failed -eq 0 ]; then - echo "🔍 正在为 $project_name 自动发现fuzz目标..." | tee -a "$log_file" - FUZZ_TARGETS=($(discover_fuzz_targets "$project_name")) - - if [ ${#FUZZ_TARGETS[@]} -eq 0 ]; then - echo "⚠️ 警告: 项目 $project_name 未找到任何fuzz目标!跳过运行步骤。" | tee -a "$log_file" - else - echo "✅ 发现目标: ${FUZZ_TARGETS[*]}" | tee -a "$log_file" - - # 4. 遍历运行所有目标 默认测试60秒 - for target in "${FUZZ_TARGETS[@]}"; do - if ! run_command \ - "python3 infra/helper.py run_fuzzer $project_name $target -- -max_total_time=60" \ - "步骤3/5: 运行目标 [$target] (60秒超时)" \ - "$log_file" \ - "124,1"; then # 允许超时(124)和发现崩溃(1) - echo "⚠️ 警告: 目标 [$target] 运行失败,继续下一个目标" | tee -a "$log_file" - fi - done - fi - fi - - # 5. 生成覆盖率报告 (已注释掉,与原脚本保持一致) - # [保留原有注释的覆盖率代码] - - if [ $project_failed -eq 0 ]; then - echo "✅ 项目 $project_name 处理完成!" | tee -a "$log_file" - - else - echo "❌ 项目 $project_name 处理失败!" | tee -a "$log_file" - - fi - - echo "------------------------------------------------------------" - return $project_failed -} - -# --- 主流程 --- -main() { -if ! check_environment; then - echo "❌ 环境检查失败,脚本终止" - exit 1 - fi - - # 新增:读取项目列表到数组(过滤空行和注释行) - local PROJECTS=() - while IFS= read -r line || [[ -n "$line" ]]; do - if [[ -z "$line" || "$line" =~ ^# ]]; then - continue - fi - PROJECTS+=("$line") - done < "$PROJECT_LIST_FILE" - - # 项目总数从数组长度获取(原逻辑从文件行数获取) - local total_projects=${#PROJECTS[@]} - local current_project_num=0 - local success_count=0 - local fail_count=0 - - # 新增:遍历数组处理项目(替代原while读取文件的循环) - for project_name in "${PROJECTS[@]}"; do - - current_project_num=$((current_project_num + 1)) - echo ">>> [ $current_project_num / $total_projects ] 开始处理项目: $project_name <<<" - - if process_project "$project_name"; then - echo "✅ [$current_project_num/$total_projects] 项目 $project_name 成功完成" - ( success_count=$[ $success_count + 1 ]) - else - echo "❌ [$current_project_num/$total_projects] 项目 $project_name 处理失败" - FAILED_PROJECTS+=("$project_name") - ( success_count=$[ $success_count + 1 ]) - fi - - done - - echo "============================================================" - echo "🎉 批量处理完成!" - echo "📊 总计: $total_projects 个项目" - echo "✅ 成功: $success_count" - echo "❌ 失败: $fail_count" - - if [ ${#FAILED_PROJECTS[@]} -gt 0 ]; then - echo "📛 失败项目列表:" - printf ' • %s\n' "${FAILED_PROJECTS[@]}" - echo "💡 提示: 可以重新运行失败项目,检查日志获取详细信息" - echo " 日志目录: $LOG_DIR" - fi -} - -main "$@" \ No newline at end of file diff --git a/script_fuzz_py_final.sh b/script_fuzz_py_final.sh deleted file mode 100644 index c2bf00a..0000000 --- a/script_fuzz_py_final.sh +++ /dev/null @@ -1,126 +0,0 @@ -#!/bin/bash -# script_fuzz_py_final.sh -# 执行OSS-Fuzz本地测试全流程:自动发现目标 -> 构建镜像 -> 编译fuzzer -> 运行测试 -> 生成覆盖率报告 -# 用法:script_fuzz_py_final.sh <项目名> [sanitizer类型] - -set -e # 遇到错误立即退出 - -PROJECT_NAME="${1:-abseil-py}" # 默认项目名 -SANITIZER="${2:-address}" # 默认检测器类型 -OSS_FUZZ_DIR="$HOME/oss-fuzz-py/oss-fuzz" # OSS-Fuzz目录 -LOG_DIR="$OSS_FUZZ_DIR/script_lz4_logs" -LOG_FILE="$LOG_DIR/oss_fuzz_${PROJECT_NAME}_$(date +%Y%m%d%H%M%S).log" -# 验证目录有效性 -check_environment() { - if [ ! -d "$OSS_FUZZ_DIR" ]; then - echo "❌ 错误: $OSS_FUZZ_DIR 目录不存在!" - exit 1 - fi - mkdir -p "$LOG_DIR" # 关键修复:创建日志目录 - chmod 777 "$LOG_DIR" 2>/dev/null || true # 宽松权限设置 - cd "$OSS_FUZZ_DIR" || exit 1 -} - -# 带日志记录的命令执行(支持允许的退出码) -run_command() { - local cmd="$1" - local log_msg="$2" - local allowed_exit="${3:-}" # 可选:允许的退出码(逗号分隔) - - echo "▶️ $log_msg..." | tee -a "$LOG_FILE" - set +e # 临时禁用错误退出 - eval "$cmd" 2>&1 | tee -a "$LOG_FILE" - local exit_code=${PIPESTATUS[0]} - set -e # 重新启用错误退出 - - # 检查退出码是否被允许 - if [[ -n "$allowed_exit" && ",$allowed_exit," =~ ",$exit_code," ]]; then - echo "ℹ️ 命令以预期状态退出: $exit_code" | tee -a "$LOG_FILE" - return 0 - elif [ $exit_code -ne 0 ]; then - echo "❌ 命令执行失败: $cmd (退出码: $exit_code)" | tee -a "$LOG_FILE" - exit 1 - fi -} - -# 自动发现fuzz目标 -discover_fuzz_targets() { - local project_dir="$OSS_FUZZ_DIR/build/out/$PROJECT_NAME" - local project_src="$OSS_FUZZ_DIR/projects/$PROJECT_NAME" - local targets=() - - # 编译目录扫描:仅匹配"fuzz_"开头的可执行文件 - if [ -d "$project_dir" ]; then - while IFS= read -r -d $'\0' file; do - filename=$(basename "$file") - if [[ -x "$file" && "$filename" =~ ^fuzz_ && ! "$file" =~ \..*$ ]]; then - targets+=("$filename") - fi - done < <(find "$project_dir" -maxdepth 1 -type f -print0) - fi - - # 源码目录扫描:仅匹配"fuzz_*.py"且含Atheris标识 - if [ ${#targets[@]} -eq 0 ] && [ -d "$project_src" ]; then - while IFS= read -r -d $'\0' file; do - if grep -q "atheris.Setup" "$file"; then - targets+=("$(basename "${file%.*}")") - fi - done < <(find "$project_src" -name 'fuzz_*.py' -print0) - fi - - echo "${targets[@]}" -} - -# 主流程 -main() { - check_environment - echo "==============================" - echo "🚀 开始OSS-Fuzz测试 - 项目: $PROJECT_NAME" - echo "📝 日志文件: $LOG_FILE" - echo "==============================" - - #1. 构建Docker镜像 - run_command \ - "python3 infra/helper.py build_image $PROJECT_NAME" \ - "步骤1/5: 构建Docker镜像" - - # 2. 编译带检测器的fuzzer - run_command \ - "python3 infra/helper.py build_fuzzers --sanitizer $SANITIZER $PROJECT_NAME" \ - "步骤2/5: 编译fuzzer (sanitizer=$SANITIZER)" - - # 3. 自动发现目标 - echo "🔍 自动发现fuzz目标..." - FUZZ_TARGETS=($(discover_fuzz_targets)) - - if [ ${#FUZZ_TARGETS[@]} -eq 0 ]; then - echo "❌ 未找到任何fuzz目标!检查项目配置" | tee -a "$LOG_FILE" - exit 1 - fi - - echo "✅ 发现目标: ${FUZZ_TARGETS[*]}" | tee -a "$LOG_FILE" - - # 4. 遍历运行所有目标 - for target in "${FUZZ_TARGETS[@]}"; do - run_command \ - "python3 infra/helper.py run_fuzzer $PROJECT_NAME $target -- -max_total_time=180" \ - "步骤3/5: 运行目标 [$target] (120秒超时)" \ - "124,1" # 允许超时(124)和发现崩溃(1) -done - - # 5. 生成覆盖率报告 - # run_command \ - # "python3 infra/helper.py build_fuzzers --sanitizer coverage $PROJECT_NAME" \ - # "步骤4/5: 编译覆盖率版本" - - # run_command \ - # "python3 infra/helper.py coverage --no-serve $PROJECT_NAME" \ - # "步骤5/5: 生成覆盖率报告" - - echo "✅ 所有步骤完成!结果查看:" - echo "🔍 测试日志: $LOG_FILE" - echo "📊 覆盖率报告(暂无): $OSS_FUZZ_DIR/build/out/$PROJECT_NAME/report/coverage/index.html" - echo "💥 崩溃报告: $OSS_FUZZ_DIR/build/out/$PROJECT_NAME/crashes/" -} - -main "$@" \ No newline at end of file diff --git a/valid_projects.txt b/valid_projects.txt deleted file mode 100644 index 94fdcd5..0000000 --- a/valid_projects.txt +++ /dev/null @@ -1,234 +0,0 @@ -abseil-py -adal -aiohttp -aniso8601 -ansible -argcomplete -arrow-py -asn1crypto -asteval -astroid -asttokens -attrs -autoflake -autopep8 -azure-sdk-for-python -babel -black -botocore -bottleneck -bz2file -cachetools -cffi -chardet -charset_normalizer -click -cloud-custodian -configparser -connexion -coveragepy -croniter -cryptography -cssselect -dask -decorator -defusedxml -digest -dill -distlib -dnspython -docutils -ecdsa-python -et-xmlfile -face -filelock -filesystem_spec -flask -flask-jwt-extended -flask-restx -flask-wtf -fonttools -ftfy -g-api-auth-httplib2 -g-api-auth-library-python -g-api-pubsub -g-api-py-api-common-protos -g-api-py-oauthlib -g-api-python-bigquery-storage -g-api-python-client -g-api-python-cloud-core -g-api-python-firestore -g-api-python-tasks -g-api-resource-manager -g-api-resumable-media-python -g-api-secret-manager -g-apis-py-api-core -gast -gc-iam -gcloud-error-py -g-cloud-logging-py -gcp-python-cloud-storage -genshi -gitdb -glom -gprof2dot -g-py-bigquery -g-py-crc32c -grpc-py -gunicorn -h11 -h5py -hiredis-py -html2text -html5lib-python -httpcore -httpretty -httpx -idna -ijson -importlib_metadata -iniconfig -ipaddress -ipykernel -ipython -isodate -itsdangerous -jedi -jinja2 -jmespathpy -joblib -jsmin -jupyter-nbconvert -jupyter_server -kafka -keras -kiwisolver -lark-parser -libcst -looker-sdk -lxml -mako -markupsafe -matplotlib -mccabe -mdit-py-plugins -mdurl -more-itertools -mrab-regex -msal -msgpack-python -multidict -mutagen -nbclassic -nbformat -netaddr-py -networkx -ntlm2 -ntlm-auth -numexpr -numpy -oauth2 -oauthlib -olefile -openapi-schema-validator -opencensus-python -openpyxl -opt_einsum -oracle-py-cx -orjson -oscrypto -packaging -pandas -paramiko -parse -parsimonious -pasta -pathlib2 -pdoc -pem -pendulum -pip -ply -protobuf-python -proto-plus-python -psqlparse -psutil -psycopg2 -pyasn1 -pyasn1-modules -pycparser -pycrypto -pydantic -pydateutil -pygments -pyjson5 -pyjwt -pymysql -pynacl -pyodbc -pyparsing -pyrsistent -py-serde -pytables -pytest-py -python3-openid -python-ecdsa -python-email-validator -python-fastjsonschema -python-future -python-graphviz -python-hyperlink -python-jose -python-lz4 -python-markdown -python-markdownify -python-nameparser -python-nvd3 -python-pathspec -python-prompt-toolkit -python-pypdf -python-rison -python-rsa -python-tabulate -pytz -pyxdg -pyyaml -pyzmq -redis-py -requests -retry -rfc3967 -rich -sacremoses -scikit-learn -scipy -setuptools -sigstore-python -simplejson -six -smart_open -soupsieve -sqlalchemy_jsonfield -sqlalchemy-utils -sqlparse -stack_data -tensorflow-addons -tinycss2 -toml -tomlkit -toolbelt -toolz -tqdm -typing_extensions -underscore -uritemplate -urlextract -urllib3 -validators -w3lib -websocket-client -wheel -wtforms -xlrd -yarl -zipp From 21017f1c2cf08f1929775b3480bf01320ec0315b Mon Sep 17 00:00:00 2001 From: joyguoguo <599325370@qq.com> Date: Tue, 22 Jul 2025 00:45:28 +0000 Subject: [PATCH 011/134] translate to english --- fuzz/build_oss_fuzz.py | 56 +++++++------- fuzz/fuzz_runner_pool.py | 158 +++++++++++++++++++-------------------- fuzz/run_fuzz_target.py | 62 +++++++-------- 3 files changed, 138 insertions(+), 138 deletions(-) diff --git a/fuzz/build_oss_fuzz.py b/fuzz/build_oss_fuzz.py index 7c60d56..1c78a0d 100644 --- a/fuzz/build_oss_fuzz.py +++ b/fuzz/build_oss_fuzz.py @@ -4,11 +4,11 @@ """ build_oss_fuzz.py -并行构建 OSS-Fuzz 项目(Docker 镜像和 Fuzzer 编译)。 -使用 multiprocessing.Pool 将项目分发到多个 CPU 核心同时处理。 +Parallel build of OSS-Fuzz projects (Docker images and Fuzzer compilation). +Uses multiprocessing.Pool to distribute projects across multiple CPU cores for concurrent processing. -用法: python3 build_oss_fuzz.py [项目列表文件] [--sanitizer 类型] [--workers N] -示例: python3 fuzz/build_oss_fuzz.py data/valid_projects.txt \ +Usage: python3 build_oss_fuzz.py [project_list_file] [--sanitizer type] [--workers N] +Example: python3 fuzz/build_oss_fuzz.py data/valid_projects.txt \ --sanitizer address \ --workers 8 """ @@ -22,19 +22,19 @@ from typing import List, Optional, Tuple from multiprocessing import Pool, cpu_count -# --- 全局配置 --- +# --- Global configuration --- HOME_DIR = Path.home() OSS_FUZZ_DIR = HOME_DIR / "FuzzAug" / "fuzz" / "oss-fuzz" LOG_DIR = OSS_FUZZ_DIR / "build_logs" def setup_logging(project_name: str) -> Path: - """为单个项目创建带时间戳的日志文件""" + """Create a timestamped log file for a single project""" LOG_DIR.mkdir(parents=True, exist_ok=True) timestamp = datetime.now().strftime("%Y%m%d%H%M%S") return LOG_DIR / f"build_{project_name}_{timestamp}.log" def log_and_print(message: str, log_file: Path, to_stdout: bool = True): - """将消息写入日志并打印到控制台""" + """Write message to log and print to console""" if to_stdout: print(f"[PID:{os.getpid()}] {message}") with open(log_file, "a", encoding="utf-8") as f: @@ -46,14 +46,14 @@ def run_command( log_file: Path, allowed_exit_codes: Optional[List[int]] = None ) -> bool: - """执行 shell 命令并实时记录输出""" + """Execute a shell command and stream output to log in real-time""" allowed_exit_codes = allowed_exit_codes or [] log_and_print(f"▶️ {log_msg}...", log_file, to_stdout=False) log_and_print(f" $ {cmd}", log_file, to_stdout=False) try: process = subprocess.Popen( - f"yes | {cmd}", # 自动确认所有提示 + f"yes | {cmd}", # Auto-confirm all prompts shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, @@ -67,67 +67,67 @@ def run_command( process.wait() exit_code = process.returncode if exit_code in [0, *allowed_exit_codes]: - log_and_print(f"✅ 命令成功完成", log_file, to_stdout=False) + log_and_print(f"✅ Command completed successfully", log_file, to_stdout=False) return True - log_and_print(f"❌ 命令失败 (退出码: {exit_code})", log_file) + log_and_print(f"❌ Command failed (exit code: {exit_code})", log_file) return False except Exception as e: - log_and_print(f"💥 执行异常: {e}", log_file) + log_and_print(f"💥 Execution exception: {e}", log_file) return False def build_project(project_name: str, sanitizer: str) -> Tuple[bool, str]: - """单个项目的构建工作流""" + """Build workflow for a single project""" log_file = setup_logging(project_name) os.chdir(OSS_FUZZ_DIR) log_and_print("="*60, log_file) - log_and_print(f"🔨 开始构建项目: {project_name}", log_file) - log_and_print(f"📝 日志路径: {log_file}", log_file) + log_and_print(f"🔨 Starting build for project: {project_name}", log_file) + log_and_print(f"📝 Log path: {log_file}", log_file) log_and_print("="*60, log_file) - # 1. 构建 Docker 镜像 + # 1. Build Docker image if not run_command( f"python3 infra/helper.py build_image {project_name}", - "步骤1/2: 构建 Docker 镜像", + "Step 1/2: Building Docker image", log_file ): return (False, project_name) - # 2. 编译 Fuzzer + # 2. Compile Fuzzers if not run_command( f"python3 infra/helper.py build_fuzzers --sanitizer {sanitizer} {project_name}", - f"步骤2/2: 编译 Fuzzer (sanitizer={sanitizer})", + f"Step 2/2: Compiling Fuzzers (sanitizer={sanitizer})", log_file ): return (False, project_name) - log_and_print(f"✅ 项目 {project_name} 构建完成", log_file) + log_and_print(f"✅ Project {project_name} build completed", log_file) return (True, project_name) def main(): - parser = argparse.ArgumentParser(description="OSS-Fuzz 并行构建工具") - parser.add_argument("project_list", help="项目列表文件路径") + parser = argparse.ArgumentParser(description="OSS-Fuzz Parallel Build Tool") + parser.add_argument("project_list", help="Project list file path") parser.add_argument("--sanitizer", default="address", choices=["address", "memory", "undefined"]) parser.add_argument("--workers", type=int, default=cpu_count()) args = parser.parse_args() - # 读取项目列表 + # Read project list try: with open(args.project_list, "r") as f: projects = [line.strip() for line in f if line.strip()] except Exception as e: - print(f"❌ 读取项目列表失败: {e}") + print(f"❌ Failed to read project list: {e}") sys.exit(1) - # 并行构建 + # Parallel build with Pool(args.workers) as pool: results = pool.starmap(build_project, [(p, args.sanitizer) for p in projects]) - # 输出结果 + # Output results failed = [p for success, p in results if not success] - print(f"\n📊 构建完成: 成功 {len(projects)-len(failed)}/{len(projects)}") + print(f"\n📊 Build completed: Success {len(projects)-len(failed)}/{len(projects)}") if failed: - print("❌ 失败项目: " + ", ".join(failed)) + print("❌ Failed projects: " + ", ".join(failed)) if __name__ == "__main__": main() \ No newline at end of file diff --git a/fuzz/fuzz_runner_pool.py b/fuzz/fuzz_runner_pool.py index 7c40d61..996b940 100644 --- a/fuzz/fuzz_runner_pool.py +++ b/fuzz/fuzz_runner_pool.py @@ -4,11 +4,11 @@ """ fuzz_runner_pool.py -并行批量执行 OSS-Fuzz 本地测试全流程。使用 multiprocessing.Pool 将项目 -分发到多个 CPU核心上同时处理。 +Parallel batch execution of the entire OSS-Fuzz local testing process. +Uses multiprocessing.Pool to distribute projects across multiple CPU cores for concurrent processing. -用法: python3 fuzz_runner_pool.py [项目列表文件] [--sanitizer 类型] [--workers N] -示例: python3 fuzz/fuzz_runner_pool.py data/valid_projects.txt --workers 4 +Usage: python3 fuzz_runner_pool.py [project_list_file] [--sanitizer type] [--workers N] +Example: python3 fuzz/fuzz_runner_pool.py data/valid_projects.txt --workers 4 """ import os @@ -20,18 +20,18 @@ from typing import List, Optional, Tuple from multiprocessing import Pool, cpu_count -# --- 全局配置 (可通过命令行参数覆盖) --- +# --- Global configuration (can be overridden by command line arguments) --- HOME_DIR = Path.home() OSS_FUZZ_DIR = HOME_DIR / "FuzzAug" / "fuzz"/"oss-fuzz" LOG_DIR = OSS_FUZZ_DIR / "script_pool_batch_logs" def setup_logging(project_name: str) -> Path: - """为单个项目创建带时间戳的日志文件.""" + """Create a timestamped log file for a single project.""" LOG_DIR.mkdir(parents=True, exist_ok=True) try: LOG_DIR.chmod(0o777) except PermissionError: - # 在并行环境中,这里可能会有多个进程同时尝试,打印一次警告即可 + # In a parallel environment, multiple processes may try simultaneously, printing a warning once is sufficient pass timestamp = datetime.now().strftime("%Y%m%d%H%M%S") @@ -39,9 +39,9 @@ def setup_logging(project_name: str) -> Path: return log_file_path def log_and_print(message: str, log_file: Path, to_stdout: bool = True): - """将消息写入日志文件,并根据需要打印到控制台。""" + """Write message to log file and optionally print to console.""" if to_stdout: - # 添加进程ID,以便区分并行输出 + # Add process ID to distinguish parallel outputs print(f"[PID:{os.getpid()}] {message}") with open(log_file, "a", encoding="utf-8") as f: f.write(message + "\n") @@ -51,9 +51,9 @@ def run_command( log_msg: str, log_file: Path, allowed_exit_codes: Optional[List[int]] = None, - auto_confirm: bool = True # 新增自动确认参数 + auto_confirm: bool = True # New auto-confirm parameter ) -> bool: - """执行一个 shell 命令,并将输出实时流式传输到日志文件。""" + """Execute a shell command and stream output to log file in real-time.""" if allowed_exit_codes is None: allowed_exit_codes = [] @@ -61,7 +61,7 @@ def run_command( log_and_print(f" $ {cmd}", log_file, to_stdout=False) try: - # 添加自动确认机制 + # Add auto-confirm mechanism if auto_confirm: cmd = f"yes | {cmd}" @@ -72,26 +72,26 @@ def run_command( with open(log_file, "a", encoding="utf-8") as f: for line in iter(process.stdout.readline, ''): - f.write(line) # 只写入日志,避免控制台输出混乱 + f.write(line) # Write to log only to avoid console clutter process.wait() exit_code = process.returncode if exit_code == 0: - log_and_print(f"✅ 命令成功完成。", log_file, to_stdout=False) + log_and_print(f"✅ Command completed successfully.", log_file, to_stdout=False) return True elif exit_code in allowed_exit_codes: - log_and_print(f"ℹ️ 命令以预期状态退出: {exit_code}", log_file, to_stdout=False) + log_and_print(f"ℹ️ Command exited with expected status: {exit_code}", log_file, to_stdout=False) return True else: - log_and_print(f"❌ 命令执行失败 (退出码: {exit_code})", log_file) + log_and_print(f"❌ Command execution failed (exit code: {exit_code})", log_file) return False except Exception as e: - log_and_print(f"💥 执行命令时发生异常: {e}", log_file) + log_and_print(f"💥 Exception occurred while executing command: {e}", log_file) return False def discover_fuzz_targets(project_name: str) -> List[str]: - """自动发现 Fuzz 目标。""" + """Automatically discover Fuzz targets.""" project_out_dir = OSS_FUZZ_DIR / "build" / "out" / project_name project_src_dir = OSS_FUZZ_DIR / "projects" / project_name targets = [] @@ -109,162 +109,162 @@ def discover_fuzz_targets(project_name: str) -> List[str]: if "atheris.Setup" in f.read(): targets.append(py_file.stem) except Exception as e: - # 在worker进程中,只打印到自己的日志 - print(f"⚠️ 警告: 读取文件 {py_file} 失败: {e}") + # In worker processes, only print to own log + print(f"⚠️ Warning: Failed to read file {py_file}: {e}") return targets def run_project_workflow(project_name: str, sanitizer: str) -> Tuple[bool, str]: """ - 处理单个项目的完整工作流 (Worker Function)。 - 此函数由进程池中的每个工作进程独立执行。 + Handle the complete workflow for a single project (Worker Function). + This function is executed independently by each worker process in the process pool. Returns: - 一个元组 (is_success: bool, project_name: str) + A tuple (is_success: bool, project_name: str) """ log_file = setup_logging(project_name) - # 在 worker 的开头打印,以便追踪 + # Print at the beginning of worker for tracking log_and_print("=" * 60, log_file) - log_and_print(f"🚀 开始处理项目: {project_name}", log_file) - log_and_print(f"📝 日志文件: {log_file}", log_file) + log_and_print(f"🚀 Starting processing for project: {project_name}", log_file) + log_and_print(f"📝 Log file: {log_file}", log_file) log_and_print("=" * 60, log_file) - # 每个进程都需要设置自己的工作目录 + # Each process needs to set its own working directory try: os.chdir(OSS_FUZZ_DIR) except FileNotFoundError: - log_and_print(f"❌ 严重错误: OSS-Fuzz 目录 '{OSS_FUZZ_DIR}' 不存在!", log_file) + log_and_print(f"❌ Critical error: OSS-Fuzz directory '{OSS_FUZZ_DIR}' does not exist!", log_file) return (False, project_name) - # 步骤 1: 构建Docker镜像(启用自动确认) + # Step 1: Build Docker image (with auto-confirm enabled) if not run_command( f"python3 infra/helper.py build_image {project_name}", - f"步骤1/5: 构建 {project_name} 的Docker镜像", log_file, - auto_confirm=True # 自动确认所有提示 + f"Step 1/5: Building Docker image for {project_name}", log_file, + auto_confirm=True # Auto-confirm all prompts ): - log_and_print(f"❌ 项目 {project_name} 构建镜像失败", log_file) + log_and_print(f"❌ Project {project_name} failed to build image", log_file) return (False, project_name) - # 步骤 2: 编译带检测器的fuzzer(启用自动确认) + # Step 2: Compile fuzzers with sanitizer (with auto-confirm enabled) if not run_command( f"python3 infra/helper.py build_fuzzers --sanitizer {sanitizer} {project_name}", - f"步骤2/5: 编译 {project_name} 的fuzzer (sanitizer={sanitizer})", log_file, - auto_confirm=True # 自动确认所有提示 + f"Step 2/5: Compiling fuzzers for {project_name} (sanitizer={sanitizer})", log_file, + auto_confirm=True # Auto-confirm all prompts ): - log_and_print(f"❌ 项目 {project_name} 编译fuzzer失败", log_file) + log_and_print(f"❌ Project {project_name} failed to compile fuzzers", log_file) return (False, project_name) - # 步骤 3: 自动发现目标 - log_and_print(f"🔍 正在为 {project_name} 自动发现fuzz目标...", log_file) + # Step 3: Automatically discover targets + log_and_print(f"🔍 Automatically discovering fuzz targets for {project_name}...", log_file) fuzz_targets = discover_fuzz_targets(project_name) if not fuzz_targets: - log_and_print(f"⚠️ 警告: {project_name} 未找到任何fuzz目标!跳过运行步骤。", log_file) + log_and_print(f"⚠️ Warning: No fuzz targets found for {project_name}! Skipping run step.", log_file) return (True, project_name) - log_and_print(f"✅ 发现目标: {', '.join(fuzz_targets)}", log_file) + log_and_print(f"✅ Targets discovered: {', '.join(fuzz_targets)}", log_file) - # 步骤 4: 遍历运行所有目标(启用自动确认) + # Step 4: Run all targets (with auto-confirm enabled) for i, target in enumerate(fuzz_targets, 1): run_command( f"python3 infra/helper.py run_fuzzer {project_name} {target} -- -max_total_time=60", - f"步骤4/{len(fuzz_targets)}: 运行目标 [{target}] (60秒)", log_file, + f"Step 4/{len(fuzz_targets)}: Running target [{target}] (60 seconds)", log_file, allowed_exit_codes=[1, 124], - auto_confirm=True # 自动确认所有提示 + auto_confirm=True # Auto-confirm all prompts ) - # 步骤 5: 生成覆盖率报告 (暂无) - log_and_print("步骤5/5: 生成覆盖率报告 (当前版本暂未实现)", log_file) - log_and_print(f"✅ 项目 {project_name} 处理完成!", log_file) + # Step 5: Generate coverage report (not implemented yet) + log_and_print("Step 5/5: Generating coverage report (not implemented in current version)", log_file) + log_and_print(f"✅ Project {project_name} processing completed!", log_file) return (True, project_name) def main(): """ - 主流程函数:设置进程池并分发任务。 + Main workflow function: Set up process pool and distribute tasks. """ parser = argparse.ArgumentParser( - description="OSS-Fuzz 并行批量测试工具", + description="OSS-Fuzz Parallel Batch Testing Tool", formatter_class=argparse.RawTextHelpFormatter, - epilog="示例:\n python3 fuzz_runner_parallel.py valid_projects.txt --workers 4\n python3 fuzz_runner_parallel.py my_projects.txt --sanitizer undefined" + epilog="Examples:\n python3 fuzz_runner_parallel.py valid_projects.txt --workers 4\n python3 fuzz_runner_parallel.py my_projects.txt --sanitizer undefined" ) parser.add_argument( "project_list_file", nargs="?", default="valid_projects.txt", - help="包含待测试项目列表的文本文件。(默认: valid_projects.txt)" + help="Text file containing list of projects to test. (Default: valid_projects.txt)" ) parser.add_argument( "--sanitizer", default="address", choices=["address", "memory", "undefined", "coverage"], - help="要使用的 sanitizer 类型。(默认: address)" + help="Type of sanitizer to use. (Default: address)" ) parser.add_argument( "--workers", type=int, default=cpu_count(), - help=f"并发执行的工作进程数。(默认: 系统CPU核心数, 即 {cpu_count()})" + help=f"Number of concurrent worker processes. (Default: system CPU count, currently {cpu_count()})" ) args = parser.parse_args() - # --- 环境检查 --- + # --- Environment checks --- if not OSS_FUZZ_DIR.is_dir(): - print(f"❌ 错误: OSS-Fuzz 目录 '{OSS_FUZZ_DIR}' 不存在!") + print(f"❌ Error: OSS-Fuzz directory '{OSS_FUZZ_DIR}' does not exist!") sys.exit(1) project_list_path = Path(args.project_list_file) if not project_list_path.is_file(): - print(f"❌ 错误: 项目列表文件 '{project_list_path}' 不存在!") + print(f"❌ Error: Project list file '{project_list_path}' does not exist!") sys.exit(1) - print(f"✅ 环境检查通过。将使用 {args.workers} 个并行工作进程。") + print(f"✅ Environment checks passed. Will use {args.workers} parallel worker processes.") - # --- 读取和准备任务 --- + # --- Read and prepare tasks --- try: with open(project_list_path, "r", encoding="utf-8") as f: projects = [line.strip() for line in f if line.strip() and not line.strip().startswith("#")] except Exception as e: - print(f"❌ 读取项目列表文件时出错: {e}") + print(f"❌ Error reading project list file: {e}") sys.exit(1) if not projects: - print("⚠️ 警告: 项目列表为空,无可执行任务。") + print("⚠️ Warning: Project list is empty, no tasks to execute.") sys.exit(0) - # 为 starmap 准备任务参数列表,每个项目都是一个 (project_name, sanitizer) 元组 + # Prepare task parameters for starmap, each project is a (project_name, sanitizer) tuple tasks = [(project, args.sanitizer) for project in projects] total_projects = len(tasks) - print(f"\n🚀 即将并行处理 {total_projects} 个项目...") + print(f"\n🚀 About to process {total_projects} projects in parallel...") - # --- 执行并行处理 --- - # 使用 with 语句确保进程池被正确关闭 + # --- Execute parallel processing --- + # Use with statement to ensure proper pool cleanup with Pool(processes=args.workers) as pool: - # starmap 会阻塞直到所有任务完成 - # 它将 tasks 列表中的每个元组解包作为参数传递给 worker 函数 + # starmap blocks until all tasks complete + # Unpacks each tuple in tasks as arguments to worker function results = pool.starmap(run_project_workflow, tasks) - # --- 收集并打印结果 --- + # --- Collect and print results --- failed_projects = [] for success, project_name in results: if success: - print(f"✅ 项目 {project_name} 成功完成") + print(f"✅ Project {project_name} completed successfully") else: - print(f"❌ 项目 {project_name} 处理失败") + print(f"❌ Project {project_name} processing failed") failed_projects.append(project_name) - # --- 最终总结 --- + # --- Final summary --- fail_count = len(failed_projects) success_count = total_projects - fail_count print("\n" + "=" * 60) - print("🎉 批量处理完成!") - print(f"📊 总计: {total_projects} 个项目") - print(f"✅ 成功: {success_count}") - print(f"❌ 失败: {fail_count}") + print("🎉 Batch processing completed!") + print(f"📊 Total: {total_projects} projects") + print(f"✅ Success: {success_count}") + print(f"❌ Failures: {fail_count}") if failed_projects: - print("📛 失败项目列表:") + print("📛 Failed projects list:") for proj in sorted(failed_projects): print(f" • {proj}") - print("\n💡 提示: 失败项目的详细信息请查看对应的日志文件。") - print(f" 日志目录: {LOG_DIR}") + print("\n💡 Tip: Detailed information for failed projects can be found in corresponding log files.") + print(f" Log directory: {LOG_DIR}") if __name__ == "__main__": - # 在 Windows 或 macOS 的某些 Python 版本上,需要将 main 调用放在这个保护块中 - # 以防止子进程重新导入和执行主模块代码,导致无限递归。 - main() \ No newline at end of file + # On some Python versions for Windows or macOS, main call needs to be in this guard + # To prevent child processes from re-importing and executing main module code, causing infinite recursion. + main() diff --git a/fuzz/run_fuzz_target.py b/fuzz/run_fuzz_target.py index 9765dbb..5b43a9e 100644 --- a/fuzz/run_fuzz_target.py +++ b/fuzz/run_fuzz_target.py @@ -4,11 +4,11 @@ """ run_fuzz_target.py -并行运行 OSS-Fuzz 测试目标。 -使用 multiprocessing.Pool 分发任务到多个 CPU 核心。 +Run OSS-Fuzz test targets in parallel. +Uses multiprocessing.Pool to distribute tasks to multiple CPU cores. -用法: python3 run_fuzz_target.py [项目列表文件] [--timeout 秒] [--workers N] -示例: python3 fuzz/run_fuzz_target.py data/valid_projects.txt --timeout 60 --workers 4 +Usage: python3 run_fuzz_target.py [project_list_file] [--timeout seconds] [--workers N] +Example: python3 fuzz/run_fuzz_target.py data/valid_projects.txt --timeout 60 --workers 4 """ import os @@ -20,19 +20,19 @@ from typing import List, Optional, Tuple from multiprocessing import Pool, cpu_count -# --- 全局配置 --- +# --- Global configuration --- HOME_DIR = Path.home() OSS_FUZZ_DIR = HOME_DIR / "FuzzAug" / "fuzz" / "oss-fuzz" LOG_DIR = OSS_FUZZ_DIR / "run_logs" def setup_logging(project_name: str) -> Path: - """创建带时间戳的运行日志""" + """Create a timestamped run log""" LOG_DIR.mkdir(parents=True, exist_ok=True) timestamp = datetime.now().strftime("%Y%m%d%H%M%S") return LOG_DIR / f"run_{project_name}_{timestamp}.log" def log_and_print(message: str, log_file: Path, to_stdout: bool = True): - """日志和控制台输出""" + """Log and console output""" if to_stdout: print(f"[PID:{os.getpid()}] {message}") with open(log_file, "a", encoding="utf-8") as f: @@ -44,7 +44,7 @@ def run_command( log_file: Path, allowed_exit_codes: Optional[List[int]] = None ) -> bool: - """执行命令并实时记录输出""" + """Execute command and log output in real-time""" allowed_exit_codes = allowed_exit_codes or [] log_and_print(f"▶️ {log_msg}...", log_file, to_stdout=False) log_and_print(f" $ {cmd}", log_file, to_stdout=False) @@ -65,11 +65,11 @@ def run_command( process.wait() return process.returncode in [0, *allowed_exit_codes] except Exception as e: - log_and_print(f"💥 执行异常: {e}", log_file) + log_and_print(f"💥 Execution exception: {e}", log_file) return False def discover_targets(project_name: str) -> List[str]: - """发现可用的 Fuzz 目标""" + """Discover available Fuzz targets""" out_dir = OSS_FUZZ_DIR / "build" / "out" / project_name targets = [] if out_dir.exists(): @@ -79,62 +79,62 @@ def discover_targets(project_name: str) -> List[str]: return targets def run_project(project_name: str, timeout: int) -> Tuple[bool, str]: - """单个项目的测试工作流""" + """Testing workflow for a single project""" log_file = setup_logging(project_name) os.chdir(OSS_FUZZ_DIR) log_and_print("="*60, log_file) - log_and_print(f"🚀 开始测试项目: {project_name}", log_file) - log_and_print(f"📝 日志路径: {log_file}", log_file) + log_and_print(f"🚀 Starting testing for project: {project_name}", log_file) + log_and_print(f"📝 Log path: {log_file}", log_file) log_and_print("="*60, log_file) - # 1. 发现测试目标 + # 1. Discover test targets targets = discover_targets(project_name) if not targets: - log_and_print("⚠️ 未发现测试目标", log_file) + log_and_print("⚠️ No test targets found", log_file) return (False, project_name) - log_and_print(f"🔍 发现 {len(targets)} 个测试目标", log_file) + log_and_print(f"🔍 Discovered {len(targets)} test targets", log_file) - # 2. 运行所有目标 + # 2. Run all targets all_success = True for i, target in enumerate(targets, 1): cmd = f"python3 infra/helper.py run_fuzzer {project_name} {target} -- -max_total_time={timeout}" success = run_command( cmd, - f"运行目标 [{i}/{len(targets)}] {target} (超时={timeout}s)", + f"Running target [{i}/{len(targets)}] {target} (timeout={timeout}s)", log_file, - allowed_exit_codes=[1, 124] # 允许超时退出 + allowed_exit_codes=[1, 124] # Allow timeout exit codes ) all_success &= success - # 3. 生成报告(占位) - log_and_print("📊 覆盖率报告生成 (当前版本暂未实现)", log_file) + # 3. Generate report (placeholder) + log_and_print("📊 Coverage report generation (not implemented in current version)", log_file) return (all_success, project_name) def main(): - parser = argparse.ArgumentParser(description="OSS-Fuzz 并行测试工具") - parser.add_argument("project_list", help="项目列表文件路径") - parser.add_argument("--timeout", type=int, default=60, help="单目标测试超时时间(秒)") + parser = argparse.ArgumentParser(description="OSS-Fuzz Parallel Testing Tool") + parser.add_argument("project_list", help="Project list file path") + parser.add_argument("--timeout", type=int, default=60, help="Timeout per target test (seconds)") parser.add_argument("--workers", type=int, default=cpu_count()) args = parser.parse_args() - # 读取项目列表 + # Read project list try: with open(args.project_list) as f: projects = [line.strip() for line in f if line.strip()] except Exception as e: - print(f"❌ 读取项目列表失败: {e}") + print(f"❌ Failed to read project list: {e}") sys.exit(1) - # 并行运行 + # Parallel execution with Pool(args.workers) as pool: results = pool.starmap(run_project, [(p, args.timeout) for p in projects]) - # 输出结果 + # Output results failed = [p for success, p in results if not success] - print(f"\n📊 测试完成: 成功 {len(projects)-len(failed)}/{len(projects)}") + print(f"\n📊 Testing completed: Success {len(projects)-len(failed)}/{len(projects)}") if failed: - print("❌ 失败项目: " + ", ".join(failed)) + print("❌ Failed projects: " + ", ".join(failed)) if __name__ == "__main__": - main() \ No newline at end of file + main() From bb5f14aa0f1c8956f2ab19d9e2bacb0a0a59d2d6 Mon Sep 17 00:00:00 2001 From: joyguoguo <599325370@qq.com> Date: Tue, 22 Jul 2025 03:15:29 +0000 Subject: [PATCH 012/134] fuzz_runner_pool.py:74 --- fuzz/fuzz_runner_pool.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/fuzz/fuzz_runner_pool.py b/fuzz/fuzz_runner_pool.py index 996b940..3580930 100644 --- a/fuzz/fuzz_runner_pool.py +++ b/fuzz/fuzz_runner_pool.py @@ -71,8 +71,9 @@ def run_command( ) with open(log_file, "a", encoding="utf-8") as f: - for line in iter(process.stdout.readline, ''): - f.write(line) # Write to log only to avoid console clutter + if process.stdout is not None: # 显式检查 + for line in iter(process.stdout.readline, ""): + f.write(line) # Write to log only to avoid console clutter process.wait() exit_code = process.returncode From 1b9b0101071faf8b66a06051adfdb16d68644e44 Mon Sep 17 00:00:00 2001 From: joyguoguo <599325370@qq.com> Date: Tue, 22 Jul 2025 03:19:18 +0000 Subject: [PATCH 013/134] edit stdout --- fuzz/fuzz_runner_pool.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fuzz/fuzz_runner_pool.py b/fuzz/fuzz_runner_pool.py index 3580930..6a2e4dd 100644 --- a/fuzz/fuzz_runner_pool.py +++ b/fuzz/fuzz_runner_pool.py @@ -49,7 +49,7 @@ def log_and_print(message: str, log_file: Path, to_stdout: bool = True): def run_command( cmd: str, log_msg: str, - log_file: Path, + log_file: TextIO , allowed_exit_codes: Optional[List[int]] = None, auto_confirm: bool = True # New auto-confirm parameter ) -> bool: From 49e9dddf1d45fc0ddd240197b92aa6e910b98d67 Mon Sep 17 00:00:00 2001 From: joyguoguo <599325370@qq.com> Date: Tue, 22 Jul 2025 03:23:41 +0000 Subject: [PATCH 014/134] =?UTF-8?q?=E6=B7=BB=E5=8A=A0=E7=A9=BA=E5=80=BC?= =?UTF-8?q?=E6=A3=80=E6=9F=A5?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- fuzz/build_oss_fuzz.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/fuzz/build_oss_fuzz.py b/fuzz/build_oss_fuzz.py index 1c78a0d..fdd894e 100644 --- a/fuzz/build_oss_fuzz.py +++ b/fuzz/build_oss_fuzz.py @@ -62,8 +62,11 @@ def run_command( errors="replace" ) with open(log_file, "a", encoding="utf-8") as f: - for line in iter(process.stdout.readline, ""): - f.write(line) + if process.stdout is not None: + for line in iter(process.stdout.readline, ""): + f.write(line) + else: + log_and_print("⚠️ Warning: process.stdout is None", log_file) process.wait() exit_code = process.returncode if exit_code in [0, *allowed_exit_codes]: From 6e5221dc52b5234b59e2fe42665e92347071a815 Mon Sep 17 00:00:00 2001 From: joyguoguo <599325370@qq.com> Date: Tue, 22 Jul 2025 03:27:29 +0000 Subject: [PATCH 015/134] modify stdout, delete pool.py --- fuzz/fuzz_runner_pool.py | 271 --------------------------------------- fuzz/run_fuzz_target.py | 7 +- 2 files changed, 5 insertions(+), 273 deletions(-) delete mode 100644 fuzz/fuzz_runner_pool.py diff --git a/fuzz/fuzz_runner_pool.py b/fuzz/fuzz_runner_pool.py deleted file mode 100644 index 6a2e4dd..0000000 --- a/fuzz/fuzz_runner_pool.py +++ /dev/null @@ -1,271 +0,0 @@ -#!/usr/bin/env python3 -# -*- coding: utf-8 -*- - -""" -fuzz_runner_pool.py - -Parallel batch execution of the entire OSS-Fuzz local testing process. -Uses multiprocessing.Pool to distribute projects across multiple CPU cores for concurrent processing. - -Usage: python3 fuzz_runner_pool.py [project_list_file] [--sanitizer type] [--workers N] -Example: python3 fuzz/fuzz_runner_pool.py data/valid_projects.txt --workers 4 -""" - -import os -import sys -import subprocess -import argparse -from datetime import datetime -from pathlib import Path -from typing import List, Optional, Tuple -from multiprocessing import Pool, cpu_count - -# --- Global configuration (can be overridden by command line arguments) --- -HOME_DIR = Path.home() -OSS_FUZZ_DIR = HOME_DIR / "FuzzAug" / "fuzz"/"oss-fuzz" -LOG_DIR = OSS_FUZZ_DIR / "script_pool_batch_logs" - -def setup_logging(project_name: str) -> Path: - """Create a timestamped log file for a single project.""" - LOG_DIR.mkdir(parents=True, exist_ok=True) - try: - LOG_DIR.chmod(0o777) - except PermissionError: - # In a parallel environment, multiple processes may try simultaneously, printing a warning once is sufficient - pass - - timestamp = datetime.now().strftime("%Y%m%d%H%M%S") - log_file_path = LOG_DIR / f"oss_fuzz_{project_name}_{timestamp}.log" - return log_file_path - -def log_and_print(message: str, log_file: Path, to_stdout: bool = True): - """Write message to log file and optionally print to console.""" - if to_stdout: - # Add process ID to distinguish parallel outputs - print(f"[PID:{os.getpid()}] {message}") - with open(log_file, "a", encoding="utf-8") as f: - f.write(message + "\n") - -def run_command( - cmd: str, - log_msg: str, - log_file: TextIO , - allowed_exit_codes: Optional[List[int]] = None, - auto_confirm: bool = True # New auto-confirm parameter -) -> bool: - """Execute a shell command and stream output to log file in real-time.""" - if allowed_exit_codes is None: - allowed_exit_codes = [] - - log_and_print(f"▶️ {log_msg}...", log_file, to_stdout=False) - log_and_print(f" $ {cmd}", log_file, to_stdout=False) - - try: - # Add auto-confirm mechanism - if auto_confirm: - cmd = f"yes | {cmd}" - - process = subprocess.Popen( - cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, - text=True, encoding='utf-8', errors='replace', bufsize=1 - ) - - with open(log_file, "a", encoding="utf-8") as f: - if process.stdout is not None: # 显式检查 - for line in iter(process.stdout.readline, ""): - f.write(line) # Write to log only to avoid console clutter - - process.wait() - exit_code = process.returncode - - if exit_code == 0: - log_and_print(f"✅ Command completed successfully.", log_file, to_stdout=False) - return True - elif exit_code in allowed_exit_codes: - log_and_print(f"ℹ️ Command exited with expected status: {exit_code}", log_file, to_stdout=False) - return True - else: - log_and_print(f"❌ Command execution failed (exit code: {exit_code})", log_file) - return False - except Exception as e: - log_and_print(f"💥 Exception occurred while executing command: {e}", log_file) - return False - -def discover_fuzz_targets(project_name: str) -> List[str]: - """Automatically discover Fuzz targets.""" - project_out_dir = OSS_FUZZ_DIR / "build" / "out" / project_name - project_src_dir = OSS_FUZZ_DIR / "projects" / project_name - targets = [] - - if project_out_dir.is_dir(): - for f in project_out_dir.iterdir(): - if (f.is_file() and os.access(f, os.X_OK) and - f.name.startswith("fuzz_") and '.' not in f.name): - targets.append(f.name) - - if not targets and project_src_dir.is_dir(): - for py_file in project_src_dir.glob("fuzz_*.py"): - try: - with open(py_file, "r", encoding="utf-8") as f: - if "atheris.Setup" in f.read(): - targets.append(py_file.stem) - except Exception as e: - # In worker processes, only print to own log - print(f"⚠️ Warning: Failed to read file {py_file}: {e}") - return targets - -def run_project_workflow(project_name: str, sanitizer: str) -> Tuple[bool, str]: - """ - Handle the complete workflow for a single project (Worker Function). - This function is executed independently by each worker process in the process pool. - - Returns: - A tuple (is_success: bool, project_name: str) - """ - log_file = setup_logging(project_name) - - # Print at the beginning of worker for tracking - log_and_print("=" * 60, log_file) - log_and_print(f"🚀 Starting processing for project: {project_name}", log_file) - log_and_print(f"📝 Log file: {log_file}", log_file) - log_and_print("=" * 60, log_file) - - # Each process needs to set its own working directory - try: - os.chdir(OSS_FUZZ_DIR) - except FileNotFoundError: - log_and_print(f"❌ Critical error: OSS-Fuzz directory '{OSS_FUZZ_DIR}' does not exist!", log_file) - return (False, project_name) - - # Step 1: Build Docker image (with auto-confirm enabled) - if not run_command( - f"python3 infra/helper.py build_image {project_name}", - f"Step 1/5: Building Docker image for {project_name}", log_file, - auto_confirm=True # Auto-confirm all prompts - ): - log_and_print(f"❌ Project {project_name} failed to build image", log_file) - return (False, project_name) - - # Step 2: Compile fuzzers with sanitizer (with auto-confirm enabled) - if not run_command( - f"python3 infra/helper.py build_fuzzers --sanitizer {sanitizer} {project_name}", - f"Step 2/5: Compiling fuzzers for {project_name} (sanitizer={sanitizer})", log_file, - auto_confirm=True # Auto-confirm all prompts - ): - log_and_print(f"❌ Project {project_name} failed to compile fuzzers", log_file) - return (False, project_name) - - # Step 3: Automatically discover targets - log_and_print(f"🔍 Automatically discovering fuzz targets for {project_name}...", log_file) - fuzz_targets = discover_fuzz_targets(project_name) - - if not fuzz_targets: - log_and_print(f"⚠️ Warning: No fuzz targets found for {project_name}! Skipping run step.", log_file) - return (True, project_name) - - log_and_print(f"✅ Targets discovered: {', '.join(fuzz_targets)}", log_file) - - # Step 4: Run all targets (with auto-confirm enabled) - for i, target in enumerate(fuzz_targets, 1): - run_command( - f"python3 infra/helper.py run_fuzzer {project_name} {target} -- -max_total_time=60", - f"Step 4/{len(fuzz_targets)}: Running target [{target}] (60 seconds)", log_file, - allowed_exit_codes=[1, 124], - auto_confirm=True # Auto-confirm all prompts - ) - - # Step 5: Generate coverage report (not implemented yet) - log_and_print("Step 5/5: Generating coverage report (not implemented in current version)", log_file) - log_and_print(f"✅ Project {project_name} processing completed!", log_file) - return (True, project_name) - -def main(): - """ - Main workflow function: Set up process pool and distribute tasks. - """ - parser = argparse.ArgumentParser( - description="OSS-Fuzz Parallel Batch Testing Tool", - formatter_class=argparse.RawTextHelpFormatter, - epilog="Examples:\n python3 fuzz_runner_parallel.py valid_projects.txt --workers 4\n python3 fuzz_runner_parallel.py my_projects.txt --sanitizer undefined" - ) - parser.add_argument( - "project_list_file", nargs="?", default="valid_projects.txt", - help="Text file containing list of projects to test. (Default: valid_projects.txt)" - ) - parser.add_argument( - "--sanitizer", default="address", choices=["address", "memory", "undefined", "coverage"], - help="Type of sanitizer to use. (Default: address)" - ) - parser.add_argument( - "--workers", type=int, default=cpu_count(), - help=f"Number of concurrent worker processes. (Default: system CPU count, currently {cpu_count()})" - ) - args = parser.parse_args() - - # --- Environment checks --- - if not OSS_FUZZ_DIR.is_dir(): - print(f"❌ Error: OSS-Fuzz directory '{OSS_FUZZ_DIR}' does not exist!") - sys.exit(1) - - project_list_path = Path(args.project_list_file) - if not project_list_path.is_file(): - print(f"❌ Error: Project list file '{project_list_path}' does not exist!") - sys.exit(1) - - print(f"✅ Environment checks passed. Will use {args.workers} parallel worker processes.") - - # --- Read and prepare tasks --- - try: - with open(project_list_path, "r", encoding="utf-8") as f: - projects = [line.strip() for line in f if line.strip() and not line.strip().startswith("#")] - except Exception as e: - print(f"❌ Error reading project list file: {e}") - sys.exit(1) - - if not projects: - print("⚠️ Warning: Project list is empty, no tasks to execute.") - sys.exit(0) - - # Prepare task parameters for starmap, each project is a (project_name, sanitizer) tuple - tasks = [(project, args.sanitizer) for project in projects] - total_projects = len(tasks) - - print(f"\n🚀 About to process {total_projects} projects in parallel...") - - # --- Execute parallel processing --- - # Use with statement to ensure proper pool cleanup - with Pool(processes=args.workers) as pool: - # starmap blocks until all tasks complete - # Unpacks each tuple in tasks as arguments to worker function - results = pool.starmap(run_project_workflow, tasks) - - # --- Collect and print results --- - failed_projects = [] - for success, project_name in results: - if success: - print(f"✅ Project {project_name} completed successfully") - else: - print(f"❌ Project {project_name} processing failed") - failed_projects.append(project_name) - - # --- Final summary --- - fail_count = len(failed_projects) - success_count = total_projects - fail_count - - print("\n" + "=" * 60) - print("🎉 Batch processing completed!") - print(f"📊 Total: {total_projects} projects") - print(f"✅ Success: {success_count}") - print(f"❌ Failures: {fail_count}") - - if failed_projects: - print("📛 Failed projects list:") - for proj in sorted(failed_projects): - print(f" • {proj}") - print("\n💡 Tip: Detailed information for failed projects can be found in corresponding log files.") - print(f" Log directory: {LOG_DIR}") - -if __name__ == "__main__": - # On some Python versions for Windows or macOS, main call needs to be in this guard - # To prevent child processes from re-importing and executing main module code, causing infinite recursion. - main() diff --git a/fuzz/run_fuzz_target.py b/fuzz/run_fuzz_target.py index 5b43a9e..aa6bafb 100644 --- a/fuzz/run_fuzz_target.py +++ b/fuzz/run_fuzz_target.py @@ -60,8 +60,11 @@ def run_command( errors="replace" ) with open(log_file, "a", encoding="utf-8") as f: - for line in iter(process.stdout.readline, ""): - f.write(line) + if process.stdout is not None: + for line in iter(process.stdout.readline, ""): + f.write(line) + else: + log_and_print("⚠️ Warning: process.stdout is None", log_file) process.wait() return process.returncode in [0, *allowed_exit_codes] except Exception as e: From 4a5befabedf3cc425b7bc78fbb0d07a51ba62c1e Mon Sep 17 00:00:00 2001 From: joyguoguo <599325370@qq.com> Date: Tue, 22 Jul 2025 03:32:03 +0000 Subject: [PATCH 016/134] indentation level check --- fuzz/oss-fuzz | 2 +- fuzz/run_fuzz_target.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/fuzz/oss-fuzz b/fuzz/oss-fuzz index 0b81ba5..f73b405 160000 --- a/fuzz/oss-fuzz +++ b/fuzz/oss-fuzz @@ -1 +1 @@ -Subproject commit 0b81ba5d97ae3d1402744e00b1d9075fed7b7f1e +Subproject commit f73b405d84e886bac90f8b15200230f08a2709c9 diff --git a/fuzz/run_fuzz_target.py b/fuzz/run_fuzz_target.py index aa6bafb..4a9f15a 100644 --- a/fuzz/run_fuzz_target.py +++ b/fuzz/run_fuzz_target.py @@ -60,7 +60,7 @@ def run_command( errors="replace" ) with open(log_file, "a", encoding="utf-8") as f: - if process.stdout is not None: + if process.stdout is not None: for line in iter(process.stdout.readline, ""): f.write(line) else: From 7e6c1a2dd4d100ef8e3d65be015ce36e2f33e0a6 Mon Sep 17 00:00:00 2001 From: joyguoguo <599325370@qq.com> Date: Tue, 22 Jul 2025 17:59:07 +0000 Subject: [PATCH 017/134] Remove build log write files --- fuzz/build_oss_fuzz.py | 92 +++++++++++------------------------------- 1 file changed, 24 insertions(+), 68 deletions(-) diff --git a/fuzz/build_oss_fuzz.py b/fuzz/build_oss_fuzz.py index fdd894e..3e44315 100644 --- a/fuzz/build_oss_fuzz.py +++ b/fuzz/build_oss_fuzz.py @@ -10,14 +10,13 @@ Usage: python3 build_oss_fuzz.py [project_list_file] [--sanitizer type] [--workers N] Example: python3 fuzz/build_oss_fuzz.py data/valid_projects.txt \ --sanitizer address \ - --workers 8 + --workers 4 """ import os import sys import subprocess import argparse -from datetime import datetime from pathlib import Path from typing import List, Optional, Tuple from multiprocessing import Pool, cpu_count @@ -25,35 +24,24 @@ # --- Global configuration --- HOME_DIR = Path.home() OSS_FUZZ_DIR = HOME_DIR / "FuzzAug" / "fuzz" / "oss-fuzz" -LOG_DIR = OSS_FUZZ_DIR / "build_logs" -def setup_logging(project_name: str) -> Path: - """Create a timestamped log file for a single project""" - LOG_DIR.mkdir(parents=True, exist_ok=True) - timestamp = datetime.now().strftime("%Y%m%d%H%M%S") - return LOG_DIR / f"build_{project_name}_{timestamp}.log" - -def log_and_print(message: str, log_file: Path, to_stdout: bool = True): - """Write message to log and print to console""" - if to_stdout: - print(f"[PID:{os.getpid()}] {message}") - with open(log_file, "a", encoding="utf-8") as f: - f.write(f"{datetime.now().isoformat()} {message}\n") +def log_and_print(message: str): + """Print to console with process info""" + print(f"[PID:{os.getpid()}] {message}") def run_command( - cmd: str, - log_msg: str, - log_file: Path, + cmd: str, + log_msg: str, allowed_exit_codes: Optional[List[int]] = None ) -> bool: - """Execute a shell command and stream output to log in real-time""" + """Execute a shell command and stream output to console in real-time""" allowed_exit_codes = allowed_exit_codes or [] - log_and_print(f"▶️ {log_msg}...", log_file, to_stdout=False) - log_and_print(f" $ {cmd}", log_file, to_stdout=False) + log_and_print(f"▶️ {log_msg}...") + log_and_print(f" $ {cmd}") try: process = subprocess.Popen( - f"yes | {cmd}", # Auto-confirm all prompts + f"yes | {cmd}", shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, @@ -61,76 +49,44 @@ def run_command( encoding="utf-8", errors="replace" ) - with open(log_file, "a", encoding="utf-8") as f: - if process.stdout is not None: - for line in iter(process.stdout.readline, ""): - f.write(line) - else: - log_and_print("⚠️ Warning: process.stdout is None", log_file) + if process.stdout is not None: + for line in iter(process.stdout.readline, ""): + print(line, end="") # real-time output to console process.wait() exit_code = process.returncode if exit_code in [0, *allowed_exit_codes]: - log_and_print(f"✅ Command completed successfully", log_file, to_stdout=False) + log_and_print(f"✅ Command completed successfully") return True - log_and_print(f"❌ Command failed (exit code: {exit_code})", log_file) + log_and_print(f"❌ Command failed (exit code: {exit_code})") return False except Exception as e: - log_and_print(f"💥 Execution exception: {e}", log_file) + log_and_print(f"💥 Execution exception: {e}") return False def build_project(project_name: str, sanitizer: str) -> Tuple[bool, str]: """Build workflow for a single project""" - log_file = setup_logging(project_name) os.chdir(OSS_FUZZ_DIR) - - log_and_print("="*60, log_file) - log_and_print(f"🔨 Starting build for project: {project_name}", log_file) - log_and_print(f"📝 Log path: {log_file}", log_file) - log_and_print("="*60, log_file) - # 1. Build Docker image + log_and_print("="*60) + log_and_print(f"🔨 Starting build for project: {project_name}") + log_and_print("="*60) + if not run_command( f"python3 infra/helper.py build_image {project_name}", - "Step 1/2: Building Docker image", - log_file + "Step 1/2: Building Docker image" ): return (False, project_name) - # 2. Compile Fuzzers if not run_command( f"python3 infra/helper.py build_fuzzers --sanitizer {sanitizer} {project_name}", - f"Step 2/2: Compiling Fuzzers (sanitizer={sanitizer})", - log_file + f"Step 2/2: Compiling Fuzzers (sanitizer={sanitizer})" ): return (False, project_name) - log_and_print(f"✅ Project {project_name} build completed", log_file) + log_and_print(f"✅ Project {project_name} build completed") return (True, project_name) def main(): parser = argparse.ArgumentParser(description="OSS-Fuzz Parallel Build Tool") parser.add_argument("project_list", help="Project list file path") - parser.add_argument("--sanitizer", default="address", choices=["address", "memory", "undefined"]) - parser.add_argument("--workers", type=int, default=cpu_count()) - args = parser.parse_args() - - # Read project list - try: - with open(args.project_list, "r") as f: - projects = [line.strip() for line in f if line.strip()] - except Exception as e: - print(f"❌ Failed to read project list: {e}") - sys.exit(1) - - # Parallel build - with Pool(args.workers) as pool: - results = pool.starmap(build_project, [(p, args.sanitizer) for p in projects]) - - # Output results - failed = [p for success, p in results if not success] - print(f"\n📊 Build completed: Success {len(projects)-len(failed)}/{len(projects)}") - if failed: - print("❌ Failed projects: " + ", ".join(failed)) - -if __name__ == "__main__": - main() \ No newline at end of file + parser.add_argument("--sanitizer", default="addres_ From a7e447e5ae31aea0b432c5458ef87612651cd28b Mon Sep 17 00:00:00 2001 From: joyguoguo <599325370@qq.com> Date: Tue, 22 Jul 2025 18:00:36 +0000 Subject: [PATCH 018/134] Remove build log write files --- fuzz/build_oss_fuzz.py | 24 ++++++++++++++++++++++-- 1 file changed, 22 insertions(+), 2 deletions(-) diff --git a/fuzz/build_oss_fuzz.py b/fuzz/build_oss_fuzz.py index 3e44315..9553e35 100644 --- a/fuzz/build_oss_fuzz.py +++ b/fuzz/build_oss_fuzz.py @@ -10,7 +10,7 @@ Usage: python3 build_oss_fuzz.py [project_list_file] [--sanitizer type] [--workers N] Example: python3 fuzz/build_oss_fuzz.py data/valid_projects.txt \ --sanitizer address \ - --workers 4 + --workers 8 """ import os @@ -89,4 +89,24 @@ def build_project(project_name: str, sanitizer: str) -> Tuple[bool, str]: def main(): parser = argparse.ArgumentParser(description="OSS-Fuzz Parallel Build Tool") parser.add_argument("project_list", help="Project list file path") - parser.add_argument("--sanitizer", default="addres_ + parser.add_argument("--sanitizer", default="address", choices=["address", "memory", "undefined"]) + parser.add_argument("--workers", type=int, default=cpu_count()) + args = parser.parse_args() + + try: + with open(args.project_list, "r") as f: + projects = [line.strip() for line in f if line.strip()] + except Exception as e: + print(f"❌ Failed to read project list: {e}") + sys.exit(1) + + with Pool(args.workers) as pool: + results = pool.starmap(build_project, [(p, args.sanitizer) for p in projects]) + + failed = [p for success, p in results if not success] + print(f"\n📊 Build completed: Success {len(projects)-len(failed)}/{len(projects)}") + if failed: + print("❌ Failed projects: " + ", ".join(failed)) + +if __name__ == "__main__": + main() From fb4da2a3e75dfa33c49b4f3fc2af4dbf6c53e662 Mon Sep 17 00:00:00 2001 From: joyguoguo <599325370@qq.com> Date: Tue, 22 Jul 2025 18:18:41 +0000 Subject: [PATCH 019/134] use logging mdule --- fuzz/build_oss_fuzz.py | 56 ++++++++++++++++++++++++++++-------------- 1 file changed, 38 insertions(+), 18 deletions(-) diff --git a/fuzz/build_oss_fuzz.py b/fuzz/build_oss_fuzz.py index 9553e35..89cbc25 100644 --- a/fuzz/build_oss_fuzz.py +++ b/fuzz/build_oss_fuzz.py @@ -12,7 +12,6 @@ --sanitizer address \ --workers 8 """ - import os import sys import subprocess @@ -20,24 +19,22 @@ from pathlib import Path from typing import List, Optional, Tuple from multiprocessing import Pool, cpu_count +import logging # --- Global configuration --- HOME_DIR = Path.home() OSS_FUZZ_DIR = HOME_DIR / "FuzzAug" / "fuzz" / "oss-fuzz" - -def log_and_print(message: str): - """Print to console with process info""" - print(f"[PID:{os.getpid()}] {message}") - def run_command( cmd: str, log_msg: str, allowed_exit_codes: Optional[List[int]] = None ) -> bool: - """Execute a shell command and stream output to console in real-time""" + + """Execute a shell command and stream output to console""" allowed_exit_codes = allowed_exit_codes or [] - log_and_print(f"▶️ {log_msg}...") - log_and_print(f" $ {cmd}") + + logging.info(f"▶️ {log_msg}") + logging.debug(f"$ {cmd}") try: process = subprocess.Popen( @@ -49,27 +46,28 @@ def run_command( encoding="utf-8", errors="replace" ) - if process.stdout is not None: + if process.stdout: for line in iter(process.stdout.readline, ""): - print(line, end="") # real-time output to console + sys.stdout.write(line) + sys.stdout.flush() process.wait() exit_code = process.returncode if exit_code in [0, *allowed_exit_codes]: - log_and_print(f"✅ Command completed successfully") + logging.info("✅ Command completed successfully") return True - log_and_print(f"❌ Command failed (exit code: {exit_code})") + logging.error(f"❌ Command failed (exit code: {exit_code})") return False except Exception as e: - log_and_print(f"💥 Execution exception: {e}") + logging.exception(f"💥 Execution exception: {e}") return False def build_project(project_name: str, sanitizer: str) -> Tuple[bool, str]: """Build workflow for a single project""" os.chdir(OSS_FUZZ_DIR) - log_and_print("="*60) - log_and_print(f"🔨 Starting build for project: {project_name}") - log_and_print("="*60) + logging.info("=" * 60) + logging.info(f"🔨 Starting build for project: {project_name}") + logging.info("=" * 60) if not run_command( f"python3 infra/helper.py build_image {project_name}", @@ -83,7 +81,7 @@ def build_project(project_name: str, sanitizer: str) -> Tuple[bool, str]: ): return (False, project_name) - log_and_print(f"✅ Project {project_name} build completed") + logging.info(f"✅ Project {project_name} build completed") return (True, project_name) def main(): @@ -93,7 +91,29 @@ def main(): parser.add_argument("--workers", type=int, default=cpu_count()) args = parser.parse_args() + logging.basicConfig( + level=logging.INFO, + format='[%(levelname)s] [PID:%(process)d] %(message)s' + ) + try: + with open(args.project_list, "r") as f: + projects = [line.strip() for line in f if line.strip()] + except Exception as e: + logging.error(f"Failed to read project list: {e}") + sys.exit(1) + + with Pool(args.workers) as pool: + results = pool.starmap(build_project, [(p, args.sanitizer) for p in projects]) + + failed = [p for success, p in results if not success] + logging.info(f"\n📊 Build completed: Success {len(projects) - len(failed)}/{len(projects)}") + if failed: + logging.warning("❌ Failed projects: " + ", ".join(failed)) + +if __name__ == "__main__": + main() + with open(args.project_list, "r") as f: projects = [line.strip() for line in f if line.strip()] except Exception as e: From e582af60ebd584b73eafc3533f5792ea7148b388 Mon Sep 17 00:00:00 2001 From: joyguoguo <599325370@qq.com> Date: Tue, 22 Jul 2025 18:32:37 +0000 Subject: [PATCH 020/134] use precise logging --- fuzz/build_oss_fuzz.py | 48 ++++++++++++++++++++++++------------------ 1 file changed, 27 insertions(+), 21 deletions(-) diff --git a/fuzz/build_oss_fuzz.py b/fuzz/build_oss_fuzz.py index 89cbc25..e4d2b3b 100644 --- a/fuzz/build_oss_fuzz.py +++ b/fuzz/build_oss_fuzz.py @@ -29,7 +29,6 @@ def run_command( log_msg: str, allowed_exit_codes: Optional[List[int]] = None ) -> bool: - """Execute a shell command and stream output to console""" allowed_exit_codes = allowed_exit_codes or [] @@ -46,6 +45,17 @@ def run_command( encoding="utf-8", errors="replace" ) + except FileNotFoundError: + logging.error(f"Command not found: {cmd}") + return False + except OSError as e: + logging.error(f"OS error while executing command: {e}") + return False + except ValueError as e: + logging.error(f"Invalid arguments to Popen: {e}") + return False + + try: if process.stdout: for line in iter(process.stdout.readline, ""): sys.stdout.write(line) @@ -57,10 +67,15 @@ def run_command( return True logging.error(f"❌ Command failed (exit code: {exit_code})") return False + except KeyboardInterrupt: + logging.warning("⛔️ Command interrupted by user") + process.terminate() + return False except Exception as e: - logging.exception(f"💥 Execution exception: {e}") + logging.exception(f"Unexpected error during process execution: {e}") return False + def build_project(project_name: str, sanitizer: str) -> Tuple[bool, str]: """Build workflow for a single project""" os.chdir(OSS_FUZZ_DIR) @@ -96,11 +111,18 @@ def main(): format='[%(levelname)s] [PID:%(process)d] %(message)s' ) + if not os.path.isfile(args.project_list): + logging.error(f"Project list file not found: {args.project_list}") + sys.exit(1) + try: - with open(args.project_list, "r") as f: + with open(args.project_list, "r", encoding="utf-8") as f: projects = [line.strip() for line in f if line.strip()] - except Exception as e: - logging.error(f"Failed to read project list: {e}") + except OSError as e: + logging.error(f"OS error while reading project list: {e}") + sys.exit(1) + except UnicodeDecodeError as e: + logging.error(f"Encoding error while reading file: {e}") sys.exit(1) with Pool(args.workers) as pool: @@ -114,19 +136,3 @@ def main(): if __name__ == "__main__": main() - with open(args.project_list, "r") as f: - projects = [line.strip() for line in f if line.strip()] - except Exception as e: - print(f"❌ Failed to read project list: {e}") - sys.exit(1) - - with Pool(args.workers) as pool: - results = pool.starmap(build_project, [(p, args.sanitizer) for p in projects]) - - failed = [p for success, p in results if not success] - print(f"\n📊 Build completed: Success {len(projects)-len(failed)}/{len(projects)}") - if failed: - print("❌ Failed projects: " + ", ".join(failed)) - -if __name__ == "__main__": - main() From 02f9269d720b75759619f3021520b26a97c28e54 Mon Sep 17 00:00:00 2001 From: joyguoguo <599325370@qq.com> Date: Tue, 22 Jul 2025 18:50:34 +0000 Subject: [PATCH 021/134] use logging --- fuzz/run_fuzz_target.py | 154 +++++++++++++++++++++++++++------------- 1 file changed, 103 insertions(+), 51 deletions(-) diff --git a/fuzz/run_fuzz_target.py b/fuzz/run_fuzz_target.py index 4a9f15a..b13afb6 100644 --- a/fuzz/run_fuzz_target.py +++ b/fuzz/run_fuzz_target.py @@ -4,8 +4,8 @@ """ run_fuzz_target.py -Run OSS-Fuzz test targets in parallel. -Uses multiprocessing.Pool to distribute tasks to multiple CPU cores. +Run OSS-Fuzz test targets in parallel with enhanced logging. +Uses multiprocessing.Pool and logging module for robust task management. Usage: python3 run_fuzz_target.py [project_list_file] [--timeout seconds] [--workers N] Example: python3 fuzz/run_fuzz_target.py data/valid_projects.txt --timeout 60 --workers 4 @@ -15,39 +15,64 @@ import sys import subprocess import argparse +import logging from datetime import datetime from pathlib import Path from typing import List, Optional, Tuple -from multiprocessing import Pool, cpu_count +from multiprocessing import Pool, cpu_count, current_process # --- Global configuration --- HOME_DIR = Path.home() OSS_FUZZ_DIR = HOME_DIR / "FuzzAug" / "fuzz" / "oss-fuzz" LOG_DIR = OSS_FUZZ_DIR / "run_logs" -def setup_logging(project_name: str) -> Path: - """Create a timestamped run log""" +def setup_logging(project_name: str) -> logging.Logger: + """Configure hierarchical logger with file and console handlers""" LOG_DIR.mkdir(parents=True, exist_ok=True) timestamp = datetime.now().strftime("%Y%m%d%H%M%S") - return LOG_DIR / f"run_{project_name}_{timestamp}.log" - -def log_and_print(message: str, log_file: Path, to_stdout: bool = True): - """Log and console output""" - if to_stdout: - print(f"[PID:{os.getpid()}] {message}") - with open(log_file, "a", encoding="utf-8") as f: - f.write(f"{datetime.now().isoformat()} {message}\n") + log_file = LOG_DIR / f"run_{project_name}_{timestamp}.log" + + # Create process-specific logger + logger = logging.getLogger(f"{project_name}.{current_process().name}") + logger.setLevel(logging.DEBUG) + + # File handler (all levels) + file_handler = logging.FileHandler(log_file, encoding="utf-8") + file_handler.setLevel(logging.DEBUG) + file_formatter = logging.Formatter( + "%(asctime)s [PID:%(process)d] %(levelname)s: %(message)s", + datefmt="%Y-%m-%d %H:%M:%S" + ) + file_handler.setFormatter(file_formatter) + + # Console handler (INFO+ only) + console_handler = logging.StreamHandler() + console_handler.setLevel(logging.INFO) + console_formatter = logging.Formatter( + "[%(levelname)s] %(message)s" + ) + console_handler.setFormatter(console_formatter) + + logger.addHandler(file_handler) + logger.addHandler(console_handler) + + # Capture uncaught exceptions + sys.excepthook = lambda exc_type, exc_value, exc_traceback: ( + logger.critical("Unhandled exception", exc_info=(exc_type, exc_value, exc_traceback)) + ) + + return logger def run_command( cmd: str, log_msg: str, - log_file: Path, + logger: logging.Logger, allowed_exit_codes: Optional[List[int]] = None ) -> bool: - """Execute command and log output in real-time""" + """Execute command with real-time logging""" allowed_exit_codes = allowed_exit_codes or [] - log_and_print(f"▶️ {log_msg}...", log_file, to_stdout=False) - log_and_print(f" $ {cmd}", log_file, to_stdout=False) + logger.info(f"▶️ {log_msg}...") + logger.debug(f" $ {cmd}") try: process = subprocess.Popen( @@ -59,45 +84,56 @@ def run_command( encoding="utf-8", errors="replace" ) - with open(log_file, "a", encoding="utf-8") as f: - if process.stdout is not None: - for line in iter(process.stdout.readline, ""): - f.write(line) - else: - log_and_print("⚠️ Warning: process.stdout is None", log_file) + + # Stream output to logger + if process.stdout: + for line in iter(process.stdout.readline, ""): + logger.debug(line.strip()) + else: + logger.warning("Process stdout is None") + process.wait() - return process.returncode in [0, *allowed_exit_codes] + + if process.returncode not in [0, *allowed_exit_codes]: + logger.error(f"Command failed with exit code: {process.returncode}") + return False + return True except Exception as e: - log_and_print(f"💥 Execution exception: {e}", log_file) + logger.exception(f"💥 Execution exception: {e}") return False -def discover_targets(project_name: str) -> List[str]: - """Discover available Fuzz targets""" +def discover_targets(project_name: str, logger: logging.Logger) -> List[str]: + """Discover available Fuzz targets with error handling""" out_dir = OSS_FUZZ_DIR / "build" / "out" / project_name targets = [] - if out_dir.exists(): - for f in out_dir.iterdir(): - if f.is_file() and f.name.startswith("fuzz_") and os.access(f, os.X_OK): - targets.append(f.name) + try: + if out_dir.exists(): + for f in out_dir.iterdir(): + if f.is_file() and f.name.startswith("fuzz_") and os.access(f, os.X_OK): + targets.append(f.name) + else: + logger.warning(f"Build directory not found: {out_dir}") + except Exception as e: + logger.exception(f"Target discovery failed: {e}") return targets def run_project(project_name: str, timeout: int) -> Tuple[bool, str]: - """Testing workflow for a single project""" - log_file = setup_logging(project_name) + """Testing workflow for a single project with logging""" + logger = setup_logging(project_name) os.chdir(OSS_FUZZ_DIR) - - log_and_print("="*60, log_file) - log_and_print(f"🚀 Starting testing for project: {project_name}", log_file) - log_and_print(f"📝 Log path: {log_file}", log_file) - log_and_print("="*60, log_file) - + + logger.info("=" * 60) + logger.info(f"🚀 Starting testing for project: {project_name}") + logger.debug(f"📝 Log path: {[h.baseFilename for h in logger.handlers if isinstance(h, logging.FileHandler)]}") + logger.info("=" * 60) + # 1. Discover test targets - targets = discover_targets(project_name) + targets = discover_targets(project_name, logger) if not targets: - log_and_print("⚠️ No test targets found", log_file) + logger.error("⚠️ No test targets found") return (False, project_name) - log_and_print(f"🔍 Discovered {len(targets)} test targets", log_file) - + logger.info(f"🔍 Discovered {len(targets)} test targets: {', '.join(targets)}") + # 2. Run all targets all_success = True for i, target in enumerate(targets, 1): @@ -105,28 +141,44 @@ def run_project(project_name: str, timeout: int) -> Tuple[bool, str]: success = run_command( cmd, f"Running target [{i}/{len(targets)}] {target} (timeout={timeout}s)", - log_file, + logger, allowed_exit_codes=[1, 124] # Allow timeout exit codes ) all_success &= success - - # 3. Generate report (placeholder) - log_and_print("📊 Coverage report generation (not implemented in current version)", log_file) + if not success: + logger.error(f"❌ Target failed: {target}") + + # 3. Final status + if all_success: + logger.info(f"✅ All targets completed successfully for {project_name}") + else: + logger.error(f"❌ One or more targets failed for {project_name}") + return (all_success, project_name) def main(): + # Root logger config for main process + logging.basicConfig( + level=logging.INFO, + format="[%(levelname)s] %(message)s" + ) + logger = logging.getLogger("Main") + parser = argparse.ArgumentParser(description="OSS-Fuzz Parallel Testing Tool") parser.add_argument("project_list", help="Project list file path") parser.add_argument("--timeout", type=int, default=60, help="Timeout per target test (seconds)") parser.add_argument("--workers", type=int, default=cpu_count()) args = parser.parse_args() + + logger.info(f"🚀 Starting parallel fuzzing (workers={args.workers}, timeout={args.timeout}s)") # Read project list try: with open(args.project_list) as f: projects = [line.strip() for line in f if line.strip()] + logger.info(f"📋 Loaded {len(projects)} projects from {args.project_list}") except Exception as e: - print(f"❌ Failed to read project list: {e}") + logger.exception(f"❌ Failed to read project list: {e}") sys.exit(1) # Parallel execution @@ -135,9 +187,9 @@ def main(): # Output results failed = [p for success, p in results if not success] - print(f"\n📊 Testing completed: Success {len(projects)-len(failed)}/{len(projects)}") + logger.info(f"\n📊 Testing completed: Success {len(projects)-len(failed)}/{len(projects)}") if failed: - print("❌ Failed projects: " + ", ".join(failed)) + logger.error("❌ Failed projects: " + ", ".join(failed)) if __name__ == "__main__": - main() + main() \ No newline at end of file From 15a7095da4563ad4b7f3072a62eafdb619d90428 Mon Sep 17 00:00:00 2001 From: joyguoguo <599325370@qq.com> Date: Tue, 22 Jul 2025 18:57:47 +0000 Subject: [PATCH 022/134] use precise exception log info --- fuzz/run_fuzz_target.py | 268 ++++++++++++++++++++++++++++------------ 1 file changed, 186 insertions(+), 82 deletions(-) diff --git a/fuzz/run_fuzz_target.py b/fuzz/run_fuzz_target.py index b13afb6..7b6b893 100644 --- a/fuzz/run_fuzz_target.py +++ b/fuzz/run_fuzz_target.py @@ -4,7 +4,7 @@ """ run_fuzz_target.py -Run OSS-Fuzz test targets in parallel with enhanced logging. +Run OSS-Fuzz test targets in parallel with enhanced logging and precise exception handling. Uses multiprocessing.Pool and logging module for robust task management. Usage: python3 run_fuzz_target.py [project_list_file] [--timeout seconds] [--workers N] @@ -16,6 +16,8 @@ import subprocess import argparse import logging +import time +import shutil from datetime import datetime from pathlib import Path from typing import List, Optional, Tuple @@ -28,53 +30,60 @@ def setup_logging(project_name: str) -> logging.Logger: """Configure hierarchical logger with file and console handlers""" - LOG_DIR.mkdir(parents=True, exist_ok=True) - timestamp = datetime.now().strftime("%Y%m%d%H%M%S") - log_file = LOG_DIR / f"run_{project_name}_{timestamp}.log" - - # Create process-specific logger - logger = logging.getLogger(f"{project_name}.{current_process().name}") - logger.setLevel(logging.DEBUG) - - # File handler (all levels) - file_handler = logging.FileHandler(log_file, encoding="utf-8") - file_handler.setLevel(logging.DEBUG) - file_formatter = logging.Formatter( - "%(asctime)s [PID:%(process)d] %(levelname)s: %(message)s", - datefmt="%Y-%m-%d %H:%M:%S" - ) - file_handler.setFormatter(file_formatter) - - # Console handler (INFO+ only) - console_handler = logging.StreamHandler() - console_handler.setLevel(logging.INFO) - console_formatter = logging.Formatter( - "[%(levelname)s] %(message)s" - ) - console_handler.setFormatter(console_formatter) - - logger.addHandler(file_handler) - logger.addHandler(console_handler) - - # Capture uncaught exceptions - sys.excepthook = lambda exc_type, exc_value, exc_traceback: ( - logger.critical("Unhandled exception", exc_info=(exc_type, exc_value, exc_traceback)) - ) - - return logger + try: + LOG_DIR.mkdir(parents=True, exist_ok=True) + timestamp = datetime.now().strftime("%Y%m%d%H%M%S") + log_file = LOG_DIR / f"run_{project_name}_{timestamp}.log" + + # Create process-specific logger + logger = logging.getLogger(f"{project_name}.{current_process().name}") + logger.setLevel(logging.DEBUG) + + # File handler (all levels) + file_handler = logging.FileHandler(log_file, encoding="utf-8") + file_handler.setLevel(logging.DEBUG) + file_formatter = logging.Formatter( + "%(asctime)s [PID:%(process)d] %(levelname)s: %(message)s", + datefmt="%Y-%m-%d %H:%M:%S" + ) + file_handler.setFormatter(file_formatter) + + # Console handler (INFO+ only) + console_handler = logging.StreamHandler() + console_handler.setLevel(logging.INFO) + console_formatter = logging.Formatter( + "[%(levelname)s] %(message)s" + ) + console_handler.setFormatter(console_formatter) + + logger.addHandler(file_handler) + logger.addHandler(console_handler) + + # Capture uncaught exceptions + sys.excepthook = lambda exc_type, exc_value, exc_traceback: ( + logger.critical("Unhandled exception", exc_info=(exc_type, exc_value, exc_traceback)) + ) + + return logger + except (PermissionError, OSError) as e: + print(f"❌ Critical logging setup error: {e}") + sys.exit(1) def run_command( cmd: str, log_msg: str, logger: logging.Logger, - allowed_exit_codes: Optional[List[int]] = None + allowed_exit_codes: Optional[List[int]] = None, + timeout: int = 3600 # 1 hour default timeout ) -> bool: - """Execute command with real-time logging""" + """Execute command with real-time logging and precise error handling""" allowed_exit_codes = allowed_exit_codes or [] logger.info(f"▶️ {log_msg}...") logger.debug(f" $ {cmd}") + process = None try: + # Start process with explicit timeout handling process = subprocess.Popen( cmd, shell=True, @@ -85,42 +94,89 @@ def run_command( errors="replace" ) - # Stream output to logger - if process.stdout: - for line in iter(process.stdout.readline, ""): - logger.debug(line.strip()) - else: - logger.warning("Process stdout is None") - - process.wait() + # Stream output to logger with timeout control + start_time = time.time() + while process.poll() is None: + if time.time() - start_time > timeout: + logger.error(f"⌛ Command timed out after {timeout} seconds") + process.terminate() + try: + process.wait(timeout=5) + except subprocess.TimeoutExpired: + process.kill() + return False + + # Read available output + if process.stdout: + line = process.stdout.readline() + if line: + logger.debug(line.strip()) + else: + logger.warning("Process stdout is None") + time.sleep(0.1) - if process.returncode not in [0, *allowed_exit_codes]: - logger.error(f"Command failed with exit code: {process.returncode}") + # Check exit code + exit_code = process.returncode + if exit_code not in [0, *allowed_exit_codes]: + logger.error(f"❌ Command failed with exit code: {exit_code}") return False return True - except Exception as e: - logger.exception(f"💥 Execution exception: {e}") + + except FileNotFoundError as e: + logger.error(f"🔍 Command not found: {cmd.split()[0]}") return False + except PermissionError as e: + logger.error(f"🔒 Permission denied for command: {cmd}") + return False + except subprocess.SubprocessError as e: + logger.exception(f"💥 Subprocess error: {e}") + return False + except OSError as e: + logger.exception(f"💥 OS error during command execution: {e}") + return False + finally: + # Ensure process is cleaned up + if process and process.poll() is None: + try: + process.terminate() + process.wait(timeout=5) + except: + pass def discover_targets(project_name: str, logger: logging.Logger) -> List[str]: - """Discover available Fuzz targets with error handling""" + """Discover available Fuzz targets with precise error handling""" out_dir = OSS_FUZZ_DIR / "build" / "out" / project_name targets = [] + try: - if out_dir.exists(): - for f in out_dir.iterdir(): + if not out_dir.exists(): + logger.warning(f"⚠️ Build directory not found: {out_dir}") + return targets + + for f in out_dir.iterdir(): + try: if f.is_file() and f.name.startswith("fuzz_") and os.access(f, os.X_OK): targets.append(f.name) - else: - logger.warning(f"Build directory not found: {out_dir}") - except Exception as e: - logger.exception(f"Target discovery failed: {e}") + except OSError as e: + logger.warning(f"⚠️ Skipping file {f.name} due to access error: {e}") + + except FileNotFoundError: + logger.error(f"❌ Directory not found: {out_dir}") + except PermissionError: + logger.error(f"🔒 Permission denied accessing: {out_dir}") + except OSError as e: + logger.exception(f"💥 OS error during target discovery: {e}") + return targets def run_project(project_name: str, timeout: int) -> Tuple[bool, str]: - """Testing workflow for a single project with logging""" - logger = setup_logging(project_name) - os.chdir(OSS_FUZZ_DIR) + """Testing workflow for a single project with precise error handling""" + try: + logger = setup_logging(project_name) + os.chdir(OSS_FUZZ_DIR) + except (OSError, PermissionError) as e: + print(f"❌ Critical error initializing project {project_name}: {e}") + return (False, project_name) logger.info("=" * 60) logger.info(f"🚀 Starting testing for project: {project_name}") @@ -128,25 +184,34 @@ def run_project(project_name: str, timeout: int) -> Tuple[bool, str]: logger.info("=" * 60) # 1. Discover test targets - targets = discover_targets(project_name, logger) - if not targets: - logger.error("⚠️ No test targets found") + try: + targets = discover_targets(project_name, logger) + if not targets: + logger.error("⚠️ No test targets found") + return (False, project_name) + logger.info(f"🔍 Discovered {len(targets)} test targets: {', '.join(targets)}") + except Exception as e: + logger.exception(f"💥 Target discovery failed unexpectedly: {e}") return (False, project_name) - logger.info(f"🔍 Discovered {len(targets)} test targets: {', '.join(targets)}") # 2. Run all targets all_success = True for i, target in enumerate(targets, 1): - cmd = f"python3 infra/helper.py run_fuzzer {project_name} {target} -- -max_total_time={timeout}" - success = run_command( - cmd, - f"Running target [{i}/{len(targets)}] {target} (timeout={timeout}s)", - logger, - allowed_exit_codes=[1, 124] # Allow timeout exit codes - ) - all_success &= success - if not success: - logger.error(f"❌ Target failed: {target}") + try: + cmd = f"python3 infra/helper.py run_fuzzer {project_name} {target} -- -max_total_time={timeout}" + success = run_command( + cmd, + f"Running target [{i}/{len(targets)}] {target} (timeout={timeout}s)", + logger, + allowed_exit_codes=[1, 124], # Allow timeout exit codes + timeout=timeout + 300 # Add buffer for setup/teardown + ) + all_success &= success + if not success: + logger.error(f"❌ Target failed: {target}") + except Exception as e: + logger.exception(f"💥 Unexpected error running target {target}: {e}") + all_success = False # 3. Final status if all_success: @@ -172,24 +237,63 @@ def main(): logger.info(f"🚀 Starting parallel fuzzing (workers={args.workers}, timeout={args.timeout}s)") - # Read project list + # Read project list with precise error handling try: - with open(args.project_list) as f: + project_path = Path(args.project_list) + if not project_path.exists(): + raise FileNotFoundError(f"Project list file not found: {project_path}") + + if not project_path.is_file(): + raise ValueError(f"Path is not a file: {project_path}") + + with open(project_path, "r") as f: projects = [line.strip() for line in f if line.strip()] - logger.info(f"📋 Loaded {len(projects)} projects from {args.project_list}") - except Exception as e: - logger.exception(f"❌ Failed to read project list: {e}") + + logger.info(f"📋 Loaded {len(projects)} projects from {project_path}") + except FileNotFoundError as e: + logger.error(f"❌ {e}") + sys.exit(1) + except PermissionError as e: + logger.error(f"🔒 Permission denied: {e}") + sys.exit(1) + except (OSError, ValueError) as e: + logger.exception(f"💥 Error reading project list: {e}") sys.exit(1) - # Parallel execution + # Parallel execution with error isolation with Pool(args.workers) as pool: - results = pool.starmap(run_project, [(p, args.timeout) for p in projects]) + results = [] + for p in projects: + try: + results.append(pool.apply_async(run_project, (p, args.timeout))) + except Exception as e: + logger.error(f"💥 Failed to schedule project {p}: {e}") + results.append((False, p)) + + # Collect results with timeout + final_results = [] + for res in results: + try: + final_results.append(res.get(timeout=args.timeout * 2)) + except TimeoutError: + logger.error("⌛ Project execution timed out") + final_results.append((False, "unknown")) + except Exception as e: + logger.error(f"💥 Error collecting result: {e}") + final_results.append((False, "unknown")) # Output results - failed = [p for success, p in results if not success] + failed = [p for success, p in final_results if not success] logger.info(f"\n📊 Testing completed: Success {len(projects)-len(failed)}/{len(projects)}") if failed: logger.error("❌ Failed projects: " + ", ".join(failed)) if __name__ == "__main__": - main() \ No newline at end of file + try: + main() + except KeyboardInterrupt: + print("\n🛑 Operation cancelled by user") + sys.exit(1) + except Exception as e: + print(f"💥 Critical error in main: {e}") + sys.exit(1) \ No newline at end of file From 9a471aa4a2f5be51675249be15f81ed7a236c714 Mon Sep 17 00:00:00 2001 From: joyguoguo <599325370@qq.com> Date: Tue, 22 Jul 2025 20:04:17 +0000 Subject: [PATCH 023/134] correct type problems --- fuzz/run_fuzz_target.py | 29 ++++++++++++++++++++--------- 1 file changed, 20 insertions(+), 9 deletions(-) diff --git a/fuzz/run_fuzz_target.py b/fuzz/run_fuzz_target.py index 7b6b893..7324d3c 100644 --- a/fuzz/run_fuzz_target.py +++ b/fuzz/run_fuzz_target.py @@ -21,7 +21,7 @@ from datetime import datetime from pathlib import Path from typing import List, Optional, Tuple -from multiprocessing import Pool, cpu_count, current_process +from multiprocessing import Pool, cpu_count, current_process, ApplyResult # --- Global configuration --- HOME_DIR = Path.home() @@ -146,7 +146,7 @@ def run_command( def discover_targets(project_name: str, logger: logging.Logger) -> List[str]: """Discover available Fuzz targets with precise error handling""" out_dir = OSS_FUZZ_DIR / "build" / "out" / project_name - targets = [] + targets: List[str] = [] # 添加类型注解 try: if not out_dir.exists(): @@ -221,6 +221,16 @@ def run_project(project_name: str, timeout: int) -> Tuple[bool, str]: return (all_success, project_name) +def _create_fake_async_result(result: bool, project_name: str) -> ApplyResult: + """创建兼容的异步结果包装器""" + from multiprocessing.pool import ApplyResult + from functools import partial + + def _wrapper(): + return (result, project_name) + + return ApplyResult(None, _wrapper, ()) + def main(): # Root logger config for main process logging.basicConfig( @@ -260,19 +270,20 @@ def main(): logger.exception(f"💥 Error reading project list: {e}") sys.exit(1) - # Parallel execution with error isolation + # 修改后的并行执行部分 with Pool(args.workers) as pool: - results = [] + async_results = [] for p in projects: try: - results.append(pool.apply_async(run_project, (p, args.timeout))) + async_results.append(pool.apply_async(run_project, (p, args.timeout))) except Exception as e: logger.error(f"💥 Failed to schedule project {p}: {e}") - results.append((False, p)) + # 使用包装器保持类型一致 + async_results.append(_create_fake_async_result(False, p)) - # Collect results with timeout - final_results = [] - for res in results: + # 收集结果 + final_results: List[Tuple[bool, str]] = [] + for res in async_results: try: final_results.append(res.get(timeout=args.timeout * 2)) except TimeoutError: From 026614bafc20d06d0508de3f31a6258f36416555 Mon Sep 17 00:00:00 2001 From: joyguoguo <599325370@qq.com> Date: Tue, 22 Jul 2025 20:10:20 +0000 Subject: [PATCH 024/134] correct some mistakes --- fuzz/run_fuzz_target.py | 45 +++++++++++++---------------------------- 1 file changed, 14 insertions(+), 31 deletions(-) diff --git a/fuzz/run_fuzz_target.py b/fuzz/run_fuzz_target.py index 7324d3c..402089d 100644 --- a/fuzz/run_fuzz_target.py +++ b/fuzz/run_fuzz_target.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python3 +#!/usr/bin/env python3 # -*- coding: utf-8 -*- """ @@ -21,7 +21,7 @@ from datetime import datetime from pathlib import Path from typing import List, Optional, Tuple -from multiprocessing import Pool, cpu_count, current_process, ApplyResult +from multiprocessing import Pool, cpu_count, current_process # --- Global configuration --- HOME_DIR = Path.home() @@ -83,7 +83,6 @@ def run_command( process = None try: - # Start process with explicit timeout handling process = subprocess.Popen( cmd, shell=True, @@ -94,7 +93,6 @@ def run_command( errors="replace" ) - # Stream output to logger with timeout control start_time = time.time() while process.poll() is None: if time.time() - start_time > timeout: @@ -106,7 +104,6 @@ def run_command( process.kill() return False - # Read available output if process.stdout: line = process.stdout.readline() if line: @@ -115,17 +112,16 @@ def run_command( logger.warning("Process stdout is None") time.sleep(0.1) - # Check exit code exit_code = process.returncode if exit_code not in [0, *allowed_exit_codes]: logger.error(f"❌ Command failed with exit code: {exit_code}") return False return True - except FileNotFoundError as e: + except FileNotFoundError: logger.error(f"🔍 Command not found: {cmd.split()[0]}") return False - except PermissionError as e: + except PermissionError: logger.error(f"🔒 Permission denied for command: {cmd}") return False except subprocess.SubprocessError as e: @@ -135,7 +131,6 @@ def run_command( logger.exception(f"💥 OS error during command execution: {e}") return False finally: - # Ensure process is cleaned up if process and process.poll() is None: try: process.terminate() @@ -146,7 +141,7 @@ def run_command( def discover_targets(project_name: str, logger: logging.Logger) -> List[str]: """Discover available Fuzz targets with precise error handling""" out_dir = OSS_FUZZ_DIR / "build" / "out" / project_name - targets: List[str] = [] # 添加类型注解 + targets: List[str] = [] try: if not out_dir.exists(): @@ -183,7 +178,6 @@ def run_project(project_name: str, timeout: int) -> Tuple[bool, str]: logger.debug(f"📝 Log path: {[h.baseFilename for h in logger.handlers if isinstance(h, logging.FileHandler)]}") logger.info("=" * 60) - # 1. Discover test targets try: targets = discover_targets(project_name, logger) if not targets: @@ -194,7 +188,6 @@ def run_project(project_name: str, timeout: int) -> Tuple[bool, str]: logger.exception(f"💥 Target discovery failed unexpectedly: {e}") return (False, project_name) - # 2. Run all targets all_success = True for i, target in enumerate(targets, 1): try: @@ -203,8 +196,8 @@ def run_project(project_name: str, timeout: int) -> Tuple[bool, str]: cmd, f"Running target [{i}/{len(targets)}] {target} (timeout={timeout}s)", logger, - allowed_exit_codes=[1, 124], # Allow timeout exit codes - timeout=timeout + 300 # Add buffer for setup/teardown + allowed_exit_codes=[1, 124], + timeout=timeout + 300 ) all_success &= success if not success: @@ -213,7 +206,6 @@ def run_project(project_name: str, timeout: int) -> Tuple[bool, str]: logger.exception(f"💥 Unexpected error running target {target}: {e}") all_success = False - # 3. Final status if all_success: logger.info(f"✅ All targets completed successfully for {project_name}") else: @@ -221,18 +213,14 @@ def run_project(project_name: str, timeout: int) -> Tuple[bool, str]: return (all_success, project_name) -def _create_fake_async_result(result: bool, project_name: str) -> ApplyResult: - """创建兼容的异步结果包装器""" - from multiprocessing.pool import ApplyResult - from functools import partial - - def _wrapper(): - return (result, project_name) - - return ApplyResult(None, _wrapper, ()) +def _create_fake_async_result(result: bool, project_name: str): + """模拟 Pool.apply_async 返回值,便于错误恢复""" + class FakeApplyResult: + def get(self, timeout=None): + return (result, project_name) + return FakeApplyResult() def main(): - # Root logger config for main process logging.basicConfig( level=logging.INFO, format="[%(levelname)s] %(message)s" @@ -247,7 +235,6 @@ def main(): logger.info(f"🚀 Starting parallel fuzzing (workers={args.workers}, timeout={args.timeout}s)") - # Read project list with precise error handling try: project_path = Path(args.project_list) if not project_path.exists(): @@ -270,7 +257,6 @@ def main(): logger.exception(f"💥 Error reading project list: {e}") sys.exit(1) - # 修改后的并行执行部分 with Pool(args.workers) as pool: async_results = [] for p in projects: @@ -278,10 +264,8 @@ def main(): async_results.append(pool.apply_async(run_project, (p, args.timeout))) except Exception as e: logger.error(f"💥 Failed to schedule project {p}: {e}") - # 使用包装器保持类型一致 async_results.append(_create_fake_async_result(False, p)) - # 收集结果 final_results: List[Tuple[bool, str]] = [] for res in async_results: try: @@ -293,7 +277,6 @@ def main(): logger.error(f"💥 Error collecting result: {e}") final_results.append((False, "unknown")) - # Output results failed = [p for success, p in final_results if not success] logger.info(f"\n📊 Testing completed: Success {len(projects)-len(failed)}/{len(projects)}") if failed: @@ -307,4 +290,4 @@ def main(): sys.exit(1) except Exception as e: print(f"💥 Critical error in main: {e}") - sys.exit(1) \ No newline at end of file + sys.exit(1) From 74f44dc97660aa881991721b2494e056a2cc410a Mon Sep 17 00:00:00 2001 From: joyguoguo <599325370@qq.com> Date: Tue, 22 Jul 2025 20:32:46 +0000 Subject: [PATCH 025/134] correct some mistakes --- fuzz/run_fuzz_target.py | 51 ++++++++++++++++++++++++++--------------- 1 file changed, 33 insertions(+), 18 deletions(-) diff --git a/fuzz/run_fuzz_target.py b/fuzz/run_fuzz_target.py index 402089d..326971c 100644 --- a/fuzz/run_fuzz_target.py +++ b/fuzz/run_fuzz_target.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python3 +#!/usr/bin/env python3 # -*- coding: utf-8 -*- """ @@ -17,7 +17,6 @@ import argparse import logging import time -import shutil from datetime import datetime from pathlib import Path from typing import List, Optional, Tuple @@ -28,6 +27,14 @@ OSS_FUZZ_DIR = HOME_DIR / "FuzzAug" / "fuzz" / "oss-fuzz" LOG_DIR = OSS_FUZZ_DIR / "run_logs" +class FakeResult: + """模拟ApplyResult的对象,用于在任务调度失败时返回结果""" + def __init__(self, result: bool, project_name: str): + self.result = (result, project_name) + + def get(self, timeout=None) -> Tuple[bool, str]: + return self.result + def setup_logging(project_name: str) -> logging.Logger: """Configure hierarchical logger with file and console handlers""" try: @@ -83,6 +90,7 @@ def run_command( process = None try: + # Start process with explicit timeout handling process = subprocess.Popen( cmd, shell=True, @@ -93,9 +101,11 @@ def run_command( errors="replace" ) + # Stream output to logger with timeout control start_time = time.time() while process.poll() is None: - if time.time() - start_time > timeout: + elapsed = time.time() - start_time + if elapsed > timeout: logger.error(f"⌛ Command timed out after {timeout} seconds") process.terminate() try: @@ -104,24 +114,25 @@ def run_command( process.kill() return False + # Read available output if process.stdout: line = process.stdout.readline() if line: logger.debug(line.strip()) else: - logger.warning("Process stdout is None") - time.sleep(0.1) + time.sleep(0.1) # 减少日志噪音 + # Check exit code exit_code = process.returncode if exit_code not in [0, *allowed_exit_codes]: logger.error(f"❌ Command failed with exit code: {exit_code}") return False return True - except FileNotFoundError: + except FileNotFoundError as e: logger.error(f"🔍 Command not found: {cmd.split()[0]}") return False - except PermissionError: + except PermissionError as e: logger.error(f"🔒 Permission denied for command: {cmd}") return False except subprocess.SubprocessError as e: @@ -131,6 +142,7 @@ def run_command( logger.exception(f"💥 OS error during command execution: {e}") return False finally: + # Ensure process is cleaned up if process and process.poll() is None: try: process.terminate() @@ -178,6 +190,7 @@ def run_project(project_name: str, timeout: int) -> Tuple[bool, str]: logger.debug(f"📝 Log path: {[h.baseFilename for h in logger.handlers if isinstance(h, logging.FileHandler)]}") logger.info("=" * 60) + # 1. Discover test targets try: targets = discover_targets(project_name, logger) if not targets: @@ -188,6 +201,7 @@ def run_project(project_name: str, timeout: int) -> Tuple[bool, str]: logger.exception(f"💥 Target discovery failed unexpectedly: {e}") return (False, project_name) + # 2. Run all targets all_success = True for i, target in enumerate(targets, 1): try: @@ -196,8 +210,8 @@ def run_project(project_name: str, timeout: int) -> Tuple[bool, str]: cmd, f"Running target [{i}/{len(targets)}] {target} (timeout={timeout}s)", logger, - allowed_exit_codes=[1, 124], - timeout=timeout + 300 + allowed_exit_codes=[1, 124], # Allow timeout exit codes + timeout=timeout + 300 # Add buffer for setup/teardown ) all_success &= success if not success: @@ -206,6 +220,7 @@ def run_project(project_name: str, timeout: int) -> Tuple[bool, str]: logger.exception(f"💥 Unexpected error running target {target}: {e}") all_success = False + # 3. Final status if all_success: logger.info(f"✅ All targets completed successfully for {project_name}") else: @@ -213,14 +228,8 @@ def run_project(project_name: str, timeout: int) -> Tuple[bool, str]: return (all_success, project_name) -def _create_fake_async_result(result: bool, project_name: str): - """模拟 Pool.apply_async 返回值,便于错误恢复""" - class FakeApplyResult: - def get(self, timeout=None): - return (result, project_name) - return FakeApplyResult() - def main(): + # Root logger config for main process logging.basicConfig( level=logging.INFO, format="[%(levelname)s] %(message)s" @@ -235,6 +244,7 @@ def main(): logger.info(f"🚀 Starting parallel fuzzing (workers={args.workers}, timeout={args.timeout}s)") + # Read project list with precise error handling try: project_path = Path(args.project_list) if not project_path.exists(): @@ -257,6 +267,7 @@ def main(): logger.exception(f"💥 Error reading project list: {e}") sys.exit(1) + # Parallel execution with error isolation with Pool(args.workers) as pool: async_results = [] for p in projects: @@ -264,11 +275,14 @@ def main(): async_results.append(pool.apply_async(run_project, (p, args.timeout))) except Exception as e: logger.error(f"💥 Failed to schedule project {p}: {e}") - async_results.append(_create_fake_async_result(False, p)) + # 使用自定义的FakeResult替代ApplyResult + async_results.append(FakeResult(False, p)) + # Collect results with timeout final_results: List[Tuple[bool, str]] = [] for res in async_results: try: + # 使用双倍超时时间确保结果收集 final_results.append(res.get(timeout=args.timeout * 2)) except TimeoutError: logger.error("⌛ Project execution timed out") @@ -277,6 +291,7 @@ def main(): logger.error(f"💥 Error collecting result: {e}") final_results.append((False, "unknown")) + # Output results failed = [p for success, p in final_results if not success] logger.info(f"\n📊 Testing completed: Success {len(projects)-len(failed)}/{len(projects)}") if failed: @@ -290,4 +305,4 @@ def main(): sys.exit(1) except Exception as e: print(f"💥 Critical error in main: {e}") - sys.exit(1) + sys.exit(1) \ No newline at end of file From 413c2dd78ba6b63c1ae5a34c9dd7fca36b65f2f2 Mon Sep 17 00:00:00 2001 From: joyguoguo <599325370@qq.com> Date: Tue, 22 Jul 2025 20:45:17 +0000 Subject: [PATCH 026/134] correct --- fuzz/run_fuzz_target.py | 49 ++++++++++++++--------------------------- 1 file changed, 17 insertions(+), 32 deletions(-) diff --git a/fuzz/run_fuzz_target.py b/fuzz/run_fuzz_target.py index 326971c..2432289 100644 --- a/fuzz/run_fuzz_target.py +++ b/fuzz/run_fuzz_target.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python3 +#!/usr/bin/env python3 # -*- coding: utf-8 -*- """ @@ -17,6 +17,7 @@ import argparse import logging import time +import shutil from datetime import datetime from pathlib import Path from typing import List, Optional, Tuple @@ -27,14 +28,6 @@ OSS_FUZZ_DIR = HOME_DIR / "FuzzAug" / "fuzz" / "oss-fuzz" LOG_DIR = OSS_FUZZ_DIR / "run_logs" -class FakeResult: - """模拟ApplyResult的对象,用于在任务调度失败时返回结果""" - def __init__(self, result: bool, project_name: str): - self.result = (result, project_name) - - def get(self, timeout=None) -> Tuple[bool, str]: - return self.result - def setup_logging(project_name: str) -> logging.Logger: """Configure hierarchical logger with file and console handlers""" try: @@ -90,7 +83,6 @@ def run_command( process = None try: - # Start process with explicit timeout handling process = subprocess.Popen( cmd, shell=True, @@ -101,11 +93,9 @@ def run_command( errors="replace" ) - # Stream output to logger with timeout control start_time = time.time() while process.poll() is None: - elapsed = time.time() - start_time - if elapsed > timeout: + if time.time() - start_time > timeout: logger.error(f"⌛ Command timed out after {timeout} seconds") process.terminate() try: @@ -114,25 +104,24 @@ def run_command( process.kill() return False - # Read available output if process.stdout: line = process.stdout.readline() if line: logger.debug(line.strip()) else: - time.sleep(0.1) # 减少日志噪音 + logger.warning("Process stdout is None") + time.sleep(0.1) - # Check exit code exit_code = process.returncode if exit_code not in [0, *allowed_exit_codes]: logger.error(f"❌ Command failed with exit code: {exit_code}") return False return True - except FileNotFoundError as e: + except FileNotFoundError: logger.error(f"🔍 Command not found: {cmd.split()[0]}") return False - except PermissionError as e: + except PermissionError: logger.error(f"🔒 Permission denied for command: {cmd}") return False except subprocess.SubprocessError as e: @@ -142,7 +131,6 @@ def run_command( logger.exception(f"💥 OS error during command execution: {e}") return False finally: - # Ensure process is cleaned up if process and process.poll() is None: try: process.terminate() @@ -190,7 +178,6 @@ def run_project(project_name: str, timeout: int) -> Tuple[bool, str]: logger.debug(f"📝 Log path: {[h.baseFilename for h in logger.handlers if isinstance(h, logging.FileHandler)]}") logger.info("=" * 60) - # 1. Discover test targets try: targets = discover_targets(project_name, logger) if not targets: @@ -201,7 +188,6 @@ def run_project(project_name: str, timeout: int) -> Tuple[bool, str]: logger.exception(f"💥 Target discovery failed unexpectedly: {e}") return (False, project_name) - # 2. Run all targets all_success = True for i, target in enumerate(targets, 1): try: @@ -210,8 +196,8 @@ def run_project(project_name: str, timeout: int) -> Tuple[bool, str]: cmd, f"Running target [{i}/{len(targets)}] {target} (timeout={timeout}s)", logger, - allowed_exit_codes=[1, 124], # Allow timeout exit codes - timeout=timeout + 300 # Add buffer for setup/teardown + allowed_exit_codes=[1, 124], + timeout=timeout + 300 ) all_success &= success if not success: @@ -220,7 +206,6 @@ def run_project(project_name: str, timeout: int) -> Tuple[bool, str]: logger.exception(f"💥 Unexpected error running target {target}: {e}") all_success = False - # 3. Final status if all_success: logger.info(f"✅ All targets completed successfully for {project_name}") else: @@ -228,8 +213,14 @@ def run_project(project_name: str, timeout: int) -> Tuple[bool, str]: return (all_success, project_name) +def _create_fake_async_result(result: bool, project_name: str): + """模拟 Pool.apply_async 返回值,便于错误恢复""" + class FakeApplyResult: + def get(self, timeout=None): + return (result, project_name) + return FakeApplyResult() + def main(): - # Root logger config for main process logging.basicConfig( level=logging.INFO, format="[%(levelname)s] %(message)s" @@ -244,7 +235,6 @@ def main(): logger.info(f"🚀 Starting parallel fuzzing (workers={args.workers}, timeout={args.timeout}s)") - # Read project list with precise error handling try: project_path = Path(args.project_list) if not project_path.exists(): @@ -267,7 +257,6 @@ def main(): logger.exception(f"💥 Error reading project list: {e}") sys.exit(1) - # Parallel execution with error isolation with Pool(args.workers) as pool: async_results = [] for p in projects: @@ -275,14 +264,11 @@ def main(): async_results.append(pool.apply_async(run_project, (p, args.timeout))) except Exception as e: logger.error(f"💥 Failed to schedule project {p}: {e}") - # 使用自定义的FakeResult替代ApplyResult - async_results.append(FakeResult(False, p)) + async_results.append(_create_fake_async_result(False, p)) - # Collect results with timeout final_results: List[Tuple[bool, str]] = [] for res in async_results: try: - # 使用双倍超时时间确保结果收集 final_results.append(res.get(timeout=args.timeout * 2)) except TimeoutError: logger.error("⌛ Project execution timed out") @@ -291,7 +277,6 @@ def main(): logger.error(f"💥 Error collecting result: {e}") final_results.append((False, "unknown")) - # Output results failed = [p for success, p in final_results if not success] logger.info(f"\n📊 Testing completed: Success {len(projects)-len(failed)}/{len(projects)}") if failed: From c8b755dba39b5fa640e9d3c82c4b46e06fdbede1 Mon Sep 17 00:00:00 2001 From: joyguoguo <599325370@qq.com> Date: Tue, 22 Jul 2025 21:53:48 +0000 Subject: [PATCH 027/134] modify discover fuzz target --- fuzz/run_fuzz_target.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/fuzz/run_fuzz_target.py b/fuzz/run_fuzz_target.py index 2432289..20f3e67 100644 --- a/fuzz/run_fuzz_target.py +++ b/fuzz/run_fuzz_target.py @@ -139,7 +139,7 @@ def run_command( pass def discover_targets(project_name: str, logger: logging.Logger) -> List[str]: - """Discover available Fuzz targets with precise error handling""" + """Discover fuzz targets (fuzz_ prefix, no extension, executable)""" out_dir = OSS_FUZZ_DIR / "build" / "out" / project_name targets: List[str] = [] @@ -150,7 +150,11 @@ def discover_targets(project_name: str, logger: logging.Logger) -> List[str]: for f in out_dir.iterdir(): try: - if f.is_file() and f.name.startswith("fuzz_") and os.access(f, os.X_OK): + # 核心修改:检查无后缀的可执行文件 + if (f.is_file() and + f.name.startswith("fuzz_") and + '.' not in f.name and # 确保无文件后缀 + os.access(f, os.X_OK)): # 确保可执行权限 targets.append(f.name) except OSError as e: logger.warning(f"⚠️ Skipping file {f.name} due to access error: {e}") From 4b92185c6cb84d4226ed862f6e188eb4afd7ec2b Mon Sep 17 00:00:00 2001 From: joyguoguo <599325370@qq.com> Date: Tue, 22 Jul 2025 22:44:06 +0000 Subject: [PATCH 028/134] modify the oss-fuzz dir --- fuzz/build_oss_fuzz.py | 178 +++++++++++++++++++++++------------------ 1 file changed, 100 insertions(+), 78 deletions(-) diff --git a/fuzz/build_oss_fuzz.py b/fuzz/build_oss_fuzz.py index e4d2b3b..ba70a62 100644 --- a/fuzz/build_oss_fuzz.py +++ b/fuzz/build_oss_fuzz.py @@ -5,105 +5,117 @@ build_oss_fuzz.py Parallel build of OSS-Fuzz projects (Docker images and Fuzzer compilation). -Uses multiprocessing.Pool to distribute projects across multiple CPU cores for concurrent processing. +Uses multiprocessing.Pool to distribute projects across multiple CPU cores. -Usage: python3 build_oss_fuzz.py [project_list_file] [--sanitizer type] [--workers N] +Usage: python3 build_oss_fuzz.py [project_list_file] --oss-fuzz-dir /path/to/oss-fuzz \ + [--sanitizer type] [--workers N] Example: python3 fuzz/build_oss_fuzz.py data/valid_projects.txt \ + --oss-fuzz-dir ./fuzz/oss-fuzz \ --sanitizer address \ --workers 8 """ + import os import sys import subprocess import argparse +import logging from pathlib import Path from typing import List, Optional, Tuple from multiprocessing import Pool, cpu_count -import logging -# --- Global configuration --- -HOME_DIR = Path.home() -OSS_FUZZ_DIR = HOME_DIR / "FuzzAug" / "fuzz" / "oss-fuzz" +class CommandExecutionError(Exception): + """Custom command execution exception""" + def __init__(self, message: str, exit_code: Optional[int] = None): + super().__init__(message) + self.exit_code = exit_code + def run_command( cmd: str, - log_msg: str, + oss_fuzz_dir: Path, allowed_exit_codes: Optional[List[int]] = None -) -> bool: - """Execute a shell command and stream output to console""" - allowed_exit_codes = allowed_exit_codes or [] - - logging.info(f"▶️ {log_msg}") - logging.debug(f"$ {cmd}") - +) -> int: + """Execute a command and return the exit code, throws CommandExecutionError on failure""" + allowed_exit_codes = allowed_exit_codes or [0] + logging.info(f"▶️ Executing command: {cmd}") + try: process = subprocess.Popen( f"yes | {cmd}", shell=True, + cwd=str(oss_fuzz_dir), stdout=subprocess.PIPE, stderr=subprocess.STDOUT, text=True, encoding="utf-8", errors="replace" ) - except FileNotFoundError: - logging.error(f"Command not found: {cmd}") - return False - except OSError as e: - logging.error(f"OS error while executing command: {e}") - return False - except ValueError as e: - logging.error(f"Invalid arguments to Popen: {e}") - return False - - try: - if process.stdout: - for line in iter(process.stdout.readline, ""): - sys.stdout.write(line) - sys.stdout.flush() + + # Real-time streaming output processing + for line in iter(process.stdout.readline, ''): + sys.stdout.write(line) + sys.stdout.flush() + process.wait() exit_code = process.returncode - if exit_code in [0, *allowed_exit_codes]: - logging.info("✅ Command completed successfully") - return True - logging.error(f"❌ Command failed (exit code: {exit_code})") - return False - except KeyboardInterrupt: - logging.warning("⛔️ Command interrupted by user") - process.terminate() - return False - except Exception as e: - logging.exception(f"Unexpected error during process execution: {e}") - return False - - -def build_project(project_name: str, sanitizer: str) -> Tuple[bool, str]: - """Build workflow for a single project""" - os.chdir(OSS_FUZZ_DIR) - - logging.info("=" * 60) - logging.info(f"🔨 Starting build for project: {project_name}") - logging.info("=" * 60) + + if exit_code in allowed_exit_codes: + return exit_code + raise CommandExecutionError( + f"Command failed (exit code: {exit_code})", + exit_code=exit_code + ) + + except FileNotFoundError as e: + raise CommandExecutionError(f"Command not found: {cmd.split()[0]}") from e + except OSError as e: + raise CommandExecutionError(f"System error: {e}") from e + except subprocess.SubprocessError as e: + raise CommandExecutionError(f"Subprocess error: {e}") from e - if not run_command( - f"python3 infra/helper.py build_image {project_name}", - "Step 1/2: Building Docker image" - ): +def build_project(project_name: str, sanitizer: str, oss_fuzz_dir: Path) -> Tuple[bool, str]: + """Project build workflow""" + try: + logging.info("=" * 60) + logging.info(f"🔨 Starting build for project: {project_name}") + logging.info(f"📁 OSS-Fuzz directory: {oss_fuzz_dir}") + logging.info("=" * 60) + + # Validate paths + helper_script = oss_fuzz_dir / "infra" / "helper.py" + if not helper_script.exists(): + raise FileNotFoundError(f"Critical script missing: {helper_script}") + + # Execute build commands + run_command( + f"python3 infra/helper.py build_image {project_name}", + oss_fuzz_dir + ) + run_command( + f"python3 infra/helper.py build_fuzzers --sanitizer {sanitizer} {project_name}", + oss_fuzz_dir + ) + + logging.info(f"✅ Project {project_name} built successfully") + return (True, project_name) + + except CommandExecutionError as e: + logging.error(f"❌ Project {project_name} build failed: {str(e)}") return (False, project_name) - - if not run_command( - f"python3 infra/helper.py build_fuzzers --sanitizer {sanitizer} {project_name}", - f"Step 2/2: Compiling Fuzzers (sanitizer={sanitizer})" - ): + except Exception as e: + logging.exception(f"🔥 Unhandled exception: {e}") return (False, project_name) - logging.info(f"✅ Project {project_name} build completed") - return (True, project_name) - def main(): - parser = argparse.ArgumentParser(description="OSS-Fuzz Parallel Build Tool") + parser = argparse.ArgumentParser(description="OSS-Fuzz parallel build tool") parser.add_argument("project_list", help="Project list file path") - parser.add_argument("--sanitizer", default="address", choices=["address", "memory", "undefined"]) - parser.add_argument("--workers", type=int, default=cpu_count()) + parser.add_argument("--oss-fuzz-dir", required=True, type=str, + help="OSS-Fuzz directory path") + parser.add_argument("--sanitizer", default="address", + choices=["address", "memory", "undefined"], + help="Fuzzer sanitizer type") + parser.add_argument("--workers", type=int, default=cpu_count(), + help="Number of parallel worker processes") args = parser.parse_args() logging.basicConfig( @@ -111,28 +123,38 @@ def main(): format='[%(levelname)s] [PID:%(process)d] %(message)s' ) - if not os.path.isfile(args.project_list): - logging.error(f"Project list file not found: {args.project_list}") - sys.exit(1) + # Process paths + oss_fuzz_dir = Path(args.oss_fuzz_dir).resolve() + logging.info(f"📁 Using OSS-Fuzz directory: {oss_fuzz_dir}") + # Read project list try: with open(args.project_list, "r", encoding="utf-8") as f: projects = [line.strip() for line in f if line.strip()] - except OSError as e: - logging.error(f"OS error while reading project list: {e}") - sys.exit(1) - except UnicodeDecodeError as e: - logging.error(f"Encoding error while reading file: {e}") + logging.info(f"📋 Loaded {len(projects)} projects") + except Exception as e: + logging.error(f"❌ Failed to read project list: {e}") sys.exit(1) + # Parallel build with Pool(args.workers) as pool: - results = pool.starmap(build_project, [(p, args.sanitizer) for p in projects]) + results = pool.starmap( + build_project, + [(p, args.sanitizer, oss_fuzz_dir) for p in projects] + ) + # Output results failed = [p for success, p in results if not success] - logging.info(f"\n📊 Build completed: Success {len(projects) - len(failed)}/{len(projects)}") + logging.info(f"\n📊 Build completed: Successful {len(projects)-len(failed)}/{len(projects)}") if failed: - logging.warning("❌ Failed projects: " + ", ".join(failed)) + logging.error("❌ Failed projects: " + ", ".join(failed)) if __name__ == "__main__": - main() - + try: + main() + except KeyboardInterrupt: + print("\n🛑 Operation interrupted by user") + sys.exit(1) + except Exception as e: + print(f"💥 Critical error: {e}") + sys.exit(1) \ No newline at end of file From a8e58e89d2d85832268c0bd0184b048d600ef8fd Mon Sep 17 00:00:00 2001 From: joyguoguo <599325370@qq.com> Date: Tue, 22 Jul 2025 22:54:07 +0000 Subject: [PATCH 029/134] Redirect the output to an empty device without retaining any output --- fuzz/build_oss_fuzz.py | 16 +++++----------- 1 file changed, 5 insertions(+), 11 deletions(-) diff --git a/fuzz/build_oss_fuzz.py b/fuzz/build_oss_fuzz.py index ba70a62..14b79e7 100644 --- a/fuzz/build_oss_fuzz.py +++ b/fuzz/build_oss_fuzz.py @@ -40,22 +40,16 @@ def run_command( logging.info(f"▶️ Executing command: {cmd}") try: + # Remove all stdout/stderr capture logic and execute the command directly process = subprocess.Popen( f"yes | {cmd}", shell=True, cwd=str(oss_fuzz_dir), - stdout=subprocess.PIPE, - stderr=subprocess.STDOUT, - text=True, - encoding="utf-8", - errors="replace" + # Redirect the output to an empty device without retaining any output + stdout=subprocess.DEVNULL, + stderr=subprocess.DEVNULL ) - # Real-time streaming output processing - for line in iter(process.stdout.readline, ''): - sys.stdout.write(line) - sys.stdout.flush() - process.wait() exit_code = process.returncode @@ -86,7 +80,7 @@ def build_project(project_name: str, sanitizer: str, oss_fuzz_dir: Path) -> Tupl if not helper_script.exists(): raise FileNotFoundError(f"Critical script missing: {helper_script}") - # Execute build commands + # Execute build commands (The output has been disabled) run_command( f"python3 infra/helper.py build_image {project_name}", oss_fuzz_dir From ae7e7c126da402dcf899c8cdf8053639e0d30b61 Mon Sep 17 00:00:00 2001 From: joyguoguo <599325370@qq.com> Date: Tue, 22 Jul 2025 23:10:12 +0000 Subject: [PATCH 030/134] add always yes --- fuzz/build_images.py | 158 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 158 insertions(+) create mode 100644 fuzz/build_images.py diff --git a/fuzz/build_images.py b/fuzz/build_images.py new file mode 100644 index 0000000..17c7bfc --- /dev/null +++ b/fuzz/build_images.py @@ -0,0 +1,158 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- + +""" +build_images.py + +Parallel build of OSS-Fuzz Docker images. +Uses multiprocessing.Pool to distribute projects across multiple CPU cores. + +Usage: python3 build_images.py [project_list_file] --oss-fuzz-dir /path/to/oss-fuzz [--workers N] +Example: python3 fuzz/build_images.py data/valid_projects.txt \ + --oss-fuzz-dir ./fuzz/oss-fuzz \ + --workers 4 +""" + +import os +import sys +import subprocess +import argparse +import logging +import json +from pathlib import Path +from typing import List, Optional, Tuple +from multiprocessing import Pool, cpu_count + +class CommandExecutionError(Exception): + """Custom command execution exception""" + def __init__(self, message: str, exit_code: Optional[int] = None): + super().__init__(message) + self.exit_code = exit_code + +def run_command( + cmd: str, + oss_fuzz_dir: Path, + allowed_exit_codes: Optional[List[int]] = None +) -> int: + """Execute a command and return the exit code, throws CommandExecutionError on failure""" + allowed_exit_codes = allowed_exit_codes or [0] + logging.info(f"▶️ Executing command: {cmd}") + + try: + process = subprocess.Popen( + f"yes | {cmd}", + shell=True, + cwd=str(oss_fuzz_dir), + stdout=subprocess.DEVNULL, + stderr=subprocess.DEVNULL + ) + + process.wait() + exit_code = process.returncode + + if exit_code in allowed_exit_codes: + return exit_code + raise CommandExecutionError( + f"Command failed (exit code: {exit_code})", + exit_code=exit_code + ) + + except FileNotFoundError as e: + raise CommandExecutionError(f"Command not found: {cmd.split()[0]}") from e + except OSError as e: + raise CommandExecutionError(f"System error: {e}") from e + except subprocess.SubprocessError as e: + raise CommandExecutionError(f"Subprocess error: {e}") from e + +def build_image(project_name: str, oss_fuzz_dir: Path) -> Tuple[bool, str]: + """Docker image build workflow""" + try: + logging.info("=" * 60) + logging.info(f"🔨 Starting Docker build for project: {project_name}") + logging.info(f"📁 OSS-Fuzz directory: {oss_fuzz_dir}") + logging.info("=" * 60) + + # Validate paths + helper_script = oss_fuzz_dir / "infra" / "helper.py" + if not helper_script.exists(): + raise FileNotFoundError(f"Critical script missing: {helper_script}") + + # Execute image build command + run_command( + f"python3 infra/helper.py build_image {project_name}", + oss_fuzz_dir + ) + + logging.info(f"✅ Docker image for {project_name} built successfully") + return (True, project_name) + + except CommandExecutionError as e: + logging.error(f"❌ Docker build for {project_name} failed: {str(e)}") + return (False, project_name) + except Exception as e: + logging.exception(f"🔥 Unhandled exception: {e}") + return (False, project_name) + +def main(): + parser = argparse.ArgumentParser(description="OSS-Fuzz Docker Image Builder") + parser.add_argument("project_list", help="Project list file path") + parser.add_argument("--oss-fuzz-dir", required=True, type=str, + help="OSS-Fuzz directory path") + parser.add_argument("--workers", type=int, default=cpu_count(), + help="Number of parallel worker processes") + parser.add_argument("--output", default="image_build_results.json", + help="Output file for build results") + args = parser.parse_args() + + logging.basicConfig( + level=logging.INFO, + format='[%(levelname)s] [PID:%(process)d] %(message)s' + ) + + # Process paths + oss_fuzz_dir = Path(args.oss_fuzz_dir).resolve() + logging.info(f"📁 Using OSS-Fuzz directory: {oss_fuzz_dir}") + + # Read project list + try: + with open(args.project_list, "r", encoding="utf-8") as f: + projects = [line.strip() for line in f if line.strip()] + logging.info(f"📋 Loaded {len(projects)} projects") + except Exception as e: + logging.error(f"❌ Failed to read project list: {e}") + sys.exit(1) + + # Parallel image builds + with Pool(args.workers) as pool: + results = pool.starmap( + build_image, + [(p, oss_fuzz_dir) for p in projects] + ) + + # Output results + build_results = {project: success for success, project in results} + failed = [p for p in projects if not build_results[p]] + + logging.info(f"\n📊 Docker image builds completed: " + f"Successful {len(projects)-len(failed)}/{len(projects)}") + + if failed: + logging.error("❌ Failed projects: " + ", ".join(failed)) + + # Save build results to JSON file + try: + with open(args.output, "w") as f: + json.dump(build_results, f) + logging.info(f"💾 Build results saved to: {args.output}") + except Exception as e: + logging.error(f"❌ Failed to save build results: {e}") + +if __name__ == "__main__": + try: + main() + except KeyboardInterrupt: + print("\n🛑 Operation interrupted by user") + sys.exit(1) + except Exception as e: + print(f"💥 Critical error: {e}") + sys.exit(1) \ No newline at end of file From 2dc8f97914ac65e326ee5ad7a54f8907185d6412 Mon Sep 17 00:00:00 2001 From: joyguoguo <599325370@qq.com> Date: Wed, 23 Jul 2025 00:19:51 +0000 Subject: [PATCH 031/134] split the build script --- fuzz/build_fuzzers.py | 160 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 160 insertions(+) create mode 100644 fuzz/build_fuzzers.py diff --git a/fuzz/build_fuzzers.py b/fuzz/build_fuzzers.py new file mode 100644 index 0000000..e8d2806 --- /dev/null +++ b/fuzz/build_fuzzers.py @@ -0,0 +1,160 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- + +""" +build_fuzzers.py + +OSS-Fuzz模糊测试器并行构建工具 + +用法: python3 build_fuzzers.py [项目列表文件] --oss-fuzz-dir /path/to/oss-fuzz \ + [--sanitizer type] [--workers N] +示例: python3 fuzz/build_fuzzers.py data/valid_projects.txt \ + --oss-fuzz-dir ./fuzz/oss-fuzz \ + --sanitizer address \ + --workers 8 +""" + +import os +import sys +import subprocess +import argparse +import logging +from pathlib import Path +from typing import List, Optional, Tuple +from multiprocessing import Pool, cpu_count + +class BuildError(Exception): + """基础构建异常""" + def __init__(self, message: str, project: str = "", exit_code: int = None): + super().__init__(message) + self.project = project + self.exit_code = exit_code + +class CommandError(BuildError): + """命令执行异常""" + pass + +class PathError(BuildError): + """路径相关异常""" + pass + +def run_command( + cmd: str, + oss_fuzz_dir: Path, + allowed_exit_codes: Optional[List[int]] = None +) -> int: + """执行命令并返回退出码""" + allowed_exit_codes = allowed_exit_codes or [0] + logging.info(f"▶️ 执行命令: {cmd}") + + try: + process = subprocess.Popen( + cmd, + shell=True, + cwd=str(oss_fuzz_dir), + stdout=subprocess.DEVNULL, + stderr=subprocess.DEVNULL + ) + + process.wait() + exit_code = process.returncode + + if exit_code in allowed_exit_codes: + return exit_code + raise CommandError(f"命令失败", exit_code=exit_code) + + except FileNotFoundError as e: + raise CommandError(f"命令不存在: {cmd.split()[0]}") from e + except OSError as e: + raise CommandError(f"系统错误: {e}") from e + except subprocess.SubprocessError as e: + raise CommandError(f"子进程错误: {e}") from e + +def build_fuzzers(project_name: str, sanitizer: str, oss_fuzz_dir: Path) -> Tuple[bool, str]: + """模糊测试器构建工作流""" + try: + logging.info(f"🔧 开始构建模糊测试器: {project_name}") + + # 验证路径 + helper_script = oss_fuzz_dir / "infra" / "helper.py" + if not helper_script.exists(): + raise PathError(f"关键脚本缺失: {helper_script}") + + # 执行模糊测试器构建命令 + run_command( + f"python3 infra/helper.py build_fuzzers --sanitizer {sanitizer} {project_name}", + oss_fuzz_dir + ) + + logging.info(f"✅ 模糊测试器构建成功: {project_name}") + return (True, project_name) + + except BuildError as e: + logging.error(f"❌ 模糊测试器构建失败: {project_name} - {str(e)}") + return (False, project_name) + except Exception as e: + logging.exception(f"🔥 未处理异常: {project_name}") + return (False, project_name) + +def main(): + parser = argparse.ArgumentParser(description="OSS-Fuzz模糊测试器构建工具") + parser.add_argument("project_list", help="项目列表文件路径") + parser.add_argument("--oss-fuzz-dir", required=True, type=str, + help="OSS-Fuzz目录路径") + parser.add_argument("--sanitizer", default="address", + choices=["address", "memory", "undefined"], + help="模糊测试器检测器类型") + parser.add_argument("--workers", type=int, default=cpu_count(), + help="并行工作进程数") + args = parser.parse_args() + + logging.basicConfig( + level=logging.INFO, + format='[%(levelname)s] %(message)s' + ) + + # 处理路径 + oss_fuzz_dir = Path(args.oss_fuzz_dir).resolve() + logging.info(f"📁 OSS-Fuzz目录: {oss_fuzz_dir}") + + # 读取项目列表 + try: + with open(args.project_list, "r", encoding="utf-8") as f: + projects = [line.strip() for line in f if line.strip()] + logging.info(f"📋 加载项目数: {len(projects)}") + except Exception as e: + logging.error(f"❌ 读取项目列表失败: {e}") + sys.exit(1) + + # 并行模糊测试器构建 + with Pool(args.workers) as pool: + results = pool.starmap( + build_fuzzers, + [(p, args.sanitizer, oss_fuzz_dir) for p in projects] + ) + + # 输出结果 + fuzzer_results = {project: success for success, project in results} + failed = [p for p in projects if not fuzzer_results[p]] + + success_count = len(projects) - len(failed) + logging.info(f"\n📊 构建完成: {success_count}/{len(projects)}") + + if failed: + logging.error("❌ 失败项目: " + ", ".join(failed)) + + # 生成整体状态报告 + logging.info("\n📊 整体构建状态:") + for project in projects: + status = "✅" if fuzzer_results[project] else "❌" + logging.info(f" {project}: {status}") + +if __name__ == "__main__": + try: + main() + except KeyboardInterrupt: + print("\n🛑 操作被中断") + sys.exit(1) + except Exception as e: + print(f"💥 严重错误: {e}") + sys.exit(1) From ebb68c902ffc60ec108b4c6acb8aca4f8557733c Mon Sep 17 00:00:00 2001 From: joyguoguo <599325370@qq.com> Date: Wed, 23 Jul 2025 01:00:25 +0000 Subject: [PATCH 032/134] split the build script --- fuzz/build_fuzzers.py | 148 ++++++++++++++++++++++++--------------- image_build_results.json | 1 + 2 files changed, 93 insertions(+), 56 deletions(-) create mode 100644 image_build_results.json diff --git a/fuzz/build_fuzzers.py b/fuzz/build_fuzzers.py index e8d2806..c02b436 100644 --- a/fuzz/build_fuzzers.py +++ b/fuzz/build_fuzzers.py @@ -4,12 +4,15 @@ """ build_fuzzers.py -OSS-Fuzz模糊测试器并行构建工具 +Parallel build of OSS-Fuzz fuzzers. +Requires Docker images to be built first (using build_images.py). -用法: python3 build_fuzzers.py [项目列表文件] --oss-fuzz-dir /path/to/oss-fuzz \ +Usage: python3 build_fuzzers.py [project_list_file] --oss-fuzz-dir /path/to/oss-fuzz \ + --image-results image_build_results.json \ [--sanitizer type] [--workers N] -示例: python3 fuzz/build_fuzzers.py data/valid_projects.txt \ +Example: python3 build_fuzzers.py data/valid_projects.txt \ --oss-fuzz-dir ./fuzz/oss-fuzz \ + --image-results image_build_results.json \ --sanitizer address \ --workers 8 """ @@ -19,33 +22,25 @@ import subprocess import argparse import logging +import json from pathlib import Path from typing import List, Optional, Tuple from multiprocessing import Pool, cpu_count -class BuildError(Exception): - """基础构建异常""" - def __init__(self, message: str, project: str = "", exit_code: int = None): +class CommandExecutionError(Exception): + """Custom command execution exception""" + def __init__(self, message: str, exit_code: Optional[int] = None): super().__init__(message) - self.project = project self.exit_code = exit_code -class CommandError(BuildError): - """命令执行异常""" - pass - -class PathError(BuildError): - """路径相关异常""" - pass - def run_command( cmd: str, oss_fuzz_dir: Path, allowed_exit_codes: Optional[List[int]] = None ) -> int: - """执行命令并返回退出码""" + """Execute a command and return the exit code, throws CommandExecutionError on failure""" allowed_exit_codes = allowed_exit_codes or [0] - logging.info(f"▶️ 执行命令: {cmd}") + logging.info(f"▶️ Executing command: {cmd}") try: process = subprocess.Popen( @@ -61,100 +56,141 @@ def run_command( if exit_code in allowed_exit_codes: return exit_code - raise CommandError(f"命令失败", exit_code=exit_code) + raise CommandExecutionError( + f"Command failed (exit code: {exit_code})", + exit_code=exit_code + ) except FileNotFoundError as e: - raise CommandError(f"命令不存在: {cmd.split()[0]}") from e + raise CommandExecutionError(f"Command not found: {cmd.split()[0]}") from e except OSError as e: - raise CommandError(f"系统错误: {e}") from e + raise CommandExecutionError(f"System error: {e}") from e except subprocess.SubprocessError as e: - raise CommandError(f"子进程错误: {e}") from e + raise CommandExecutionError(f"Subprocess error: {e}") from e def build_fuzzers(project_name: str, sanitizer: str, oss_fuzz_dir: Path) -> Tuple[bool, str]: - """模糊测试器构建工作流""" + """Fuzzer build workflow""" try: - logging.info(f"🔧 开始构建模糊测试器: {project_name}") + logging.info("=" * 60) + logging.info(f"🔧 Starting fuzzer build for project: {project_name}") + logging.info(f"📁 OSS-Fuzz directory: {oss_fuzz_dir}") + logging.info("=" * 60) - # 验证路径 + # Validate paths helper_script = oss_fuzz_dir / "infra" / "helper.py" if not helper_script.exists(): - raise PathError(f"关键脚本缺失: {helper_script}") + raise FileNotFoundError(f"Critical script missing: {helper_script}") - # 执行模糊测试器构建命令 + # Execute fuzzer build command run_command( f"python3 infra/helper.py build_fuzzers --sanitizer {sanitizer} {project_name}", oss_fuzz_dir ) - logging.info(f"✅ 模糊测试器构建成功: {project_name}") + logging.info(f"✅ Fuzzers for {project_name} built successfully") return (True, project_name) - except BuildError as e: - logging.error(f"❌ 模糊测试器构建失败: {project_name} - {str(e)}") + except CommandExecutionError as e: + logging.error(f"❌ Fuzzer build for {project_name} failed: {str(e)}") return (False, project_name) except Exception as e: - logging.exception(f"🔥 未处理异常: {project_name}") + logging.exception(f"🔥 Unhandled exception: {e}") return (False, project_name) def main(): - parser = argparse.ArgumentParser(description="OSS-Fuzz模糊测试器构建工具") - parser.add_argument("project_list", help="项目列表文件路径") + parser = argparse.ArgumentParser(description="OSS-Fuzz Fuzzer Builder") + parser.add_argument("project_list", help="Project list file path") parser.add_argument("--oss-fuzz-dir", required=True, type=str, - help="OSS-Fuzz目录路径") + help="OSS-Fuzz directory path") parser.add_argument("--sanitizer", default="address", choices=["address", "memory", "undefined"], - help="模糊测试器检测器类型") + help="Fuzzer sanitizer type") parser.add_argument("--workers", type=int, default=cpu_count(), - help="并行工作进程数") + help="Number of parallel worker processes") + parser.add_argument("--image-results", required=True, + help="JSON file with image build results from build_images.py") args = parser.parse_args() logging.basicConfig( level=logging.INFO, - format='[%(levelname)s] %(message)s' + format='[%(levelname)s] [PID:%(process)d] %(message)s' ) - # 处理路径 + # Process paths oss_fuzz_dir = Path(args.oss_fuzz_dir).resolve() - logging.info(f"📁 OSS-Fuzz目录: {oss_fuzz_dir}") + logging.info(f"📁 Using OSS-Fuzz directory: {oss_fuzz_dir}") - # 读取项目列表 + # Read project list try: with open(args.project_list, "r", encoding="utf-8") as f: - projects = [line.strip() for line in f if line.strip()] - logging.info(f"📋 加载项目数: {len(projects)}") + all_projects = [line.strip() for line in f if line.strip()] + logging.info(f"📋 Loaded {len(all_projects)} projects from list") + except Exception as e: + logging.error(f"❌ Failed to read project list: {e}") + sys.exit(1) + + # Load image build results + try: + with open(args.image_results, "r") as f: + image_results = json.load(f) + logging.info(f"📋 Loaded image build results from: {args.image_results}") except Exception as e: - logging.error(f"❌ 读取项目列表失败: {e}") + logging.error(f"❌ Failed to load image build results: {e}") + sys.exit(1) + + # Filter projects with successful image builds + projects_to_build = [p for p in all_projects if p in image_results and image_results[p]] + + if not projects_to_build: + logging.error("❌ No projects with successful image builds found") sys.exit(1) + + skipped = len(all_projects) - len(projects_to_build) + logging.info(f"🔍 Found {len(projects_to_build)} projects with successful image builds " + f"({skipped} skipped due to image build failures)") - # 并行模糊测试器构建 + # Parallel fuzzer builds with Pool(args.workers) as pool: results = pool.starmap( build_fuzzers, - [(p, args.sanitizer, oss_fuzz_dir) for p in projects] + [(p, args.sanitizer, oss_fuzz_dir) for p in projects_to_build] ) - # 输出结果 + # Output results fuzzer_results = {project: success for success, project in results} - failed = [p for p in projects if not fuzzer_results[p]] + failed = [p for p in projects_to_build if not fuzzer_results[p]] - success_count = len(projects) - len(failed) - logging.info(f"\n📊 构建完成: {success_count}/{len(projects)}") + logging.info(f"\n📊 Fuzzer builds completed: " + f"Successful {len(projects_to_build)-len(failed)}/{len(projects_to_build)}") if failed: - logging.error("❌ 失败项目: " + ", ".join(failed)) + logging.error("❌ Failed fuzzer builds: " + ", ".join(failed)) - # 生成整体状态报告 - logging.info("\n📊 整体构建状态:") - for project in projects: - status = "✅" if fuzzer_results[project] else "❌" + # Generate overall status report + overall_results = {} + for project in all_projects: + status = "❌" + if project in image_results and image_results[project]: + if project in fuzzer_results and fuzzer_results[project]: + status = "✅" + elif project in fuzzer_results: + status = "❌ (fuzzer failed)" + else: + status = "❌ (image ok but not built)" + else: + status = "❌ (image failed)" + overall_results[project] = status + + logging.info("\n📊 Overall build status:") + for project, status in overall_results.items(): logging.info(f" {project}: {status}") if __name__ == "__main__": try: main() except KeyboardInterrupt: - print("\n🛑 操作被中断") + print("\n🛑 Operation interrupted by user") sys.exit(1) except Exception as e: - print(f"💥 严重错误: {e}") - sys.exit(1) + print(f"💥 Critical error: {e}") + sys.exit(1) \ No newline at end of file diff --git a/image_build_results.json b/image_build_results.json new file mode 100644 index 0000000..a5c3591 --- /dev/null +++ b/image_build_results.json @@ -0,0 +1 @@ +{"abseil-py": true, "adal": true, "aiohttp": true, "aniso8601": true, "ansible": true, "argcomplete": true, "arrow-py": true, "asn1crypto": true} \ No newline at end of file From 5b12877a6ad2639402fe4bcd8fc1715010ac7994 Mon Sep 17 00:00:00 2001 From: joyguoguo <599325370@qq.com> Date: Wed, 23 Jul 2025 20:55:57 +0000 Subject: [PATCH 033/134] build scripts test successfully --- fuzz/{build_oss_fuzz.py => build_oss_fuzz_whole.py} | 6 +++--- image_build_results.json | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) rename fuzz/{build_oss_fuzz.py => build_oss_fuzz_whole.py} (96%) diff --git a/fuzz/build_oss_fuzz.py b/fuzz/build_oss_fuzz_whole.py similarity index 96% rename from fuzz/build_oss_fuzz.py rename to fuzz/build_oss_fuzz_whole.py index 14b79e7..59d3bea 100644 --- a/fuzz/build_oss_fuzz.py +++ b/fuzz/build_oss_fuzz_whole.py @@ -2,14 +2,14 @@ # -*- coding: utf-8 -*- """ -build_oss_fuzz.py +build_oss_fuzz_whole.py Parallel build of OSS-Fuzz projects (Docker images and Fuzzer compilation). Uses multiprocessing.Pool to distribute projects across multiple CPU cores. -Usage: python3 build_oss_fuzz.py [project_list_file] --oss-fuzz-dir /path/to/oss-fuzz \ +Usage: python3 build_oss_fuzz_whole.py [project_list_file] --oss-fuzz-dir /path/to/oss-fuzz \ [--sanitizer type] [--workers N] -Example: python3 fuzz/build_oss_fuzz.py data/valid_projects.txt \ +Example: python3 fuzz/build_oss_fuzz_whole.py data/valid_projects.txt \ --oss-fuzz-dir ./fuzz/oss-fuzz \ --sanitizer address \ --workers 8 diff --git a/image_build_results.json b/image_build_results.json index a5c3591..93d383e 100644 --- a/image_build_results.json +++ b/image_build_results.json @@ -1 +1 @@ -{"abseil-py": true, "adal": true, "aiohttp": true, "aniso8601": true, "ansible": true, "argcomplete": true, "arrow-py": true, "asn1crypto": true} \ No newline at end of file +{"abseil-py": true, "adal": true, "aiohttp": true, "aniso8601": true, "ansible": true, "argcomplete": true, "arrow-py": true, "asn1crypto": true, "asteval": true, "astroid": true, "asttokens": true, "attrs": true, "autoflake": true, "autopep8": true, "azure-sdk-for-python": true, "babel": true, "black": true, "botocore": true, "bottleneck": true, "bz2file": true, "cachetools": true, "cffi": true, "chardet": true, "charset_normalizer": true, "click": true, "cloud-custodian": true, "configparser": true, "connexion": true, "coveragepy": true, "croniter": true, "cryptography": true, "cssselect": true, "dask": true, "decorator": true, "defusedxml": true, "digest": true, "dill": true, "distlib": true, "dnspython": true, "docutils": true, "ecdsa-python": true, "et-xmlfile": true, "face": true, "filelock": true, "filesystem_spec": true, "flask": true, "flask-jwt-extended": true, "flask-restx": true, "flask-wtf": true, "fonttools": true, "ftfy": true, "g-api-auth-httplib2": true, "g-api-auth-library-python": true, "g-api-pubsub": true, "g-api-py-api-common-protos": true, "g-api-py-oauthlib": true, "g-api-python-bigquery-storage": true, "g-api-python-client": true, "g-api-python-cloud-core": true, "g-api-python-firestore": true, "g-api-python-tasks": true, "g-api-resource-manager": true, "g-api-resumable-media-python": true, "g-api-secret-manager": true, "g-apis-py-api-core": true, "gast": true, "gc-iam": true, "gcloud-error-py": true, "g-cloud-logging-py": true, "gcp-python-cloud-storage": true, "genshi": true, "gitdb": true, "glom": true, "gprof2dot": true, "g-py-bigquery": true, "g-py-crc32c": true, "grpc-py": true, "gunicorn": true, "h11": true, "h5py": true, "hiredis-py": true, "html2text": true, "html5lib-python": true, "httpcore": true, "httpretty": false, "httpx": true, "idna": true, "ijson": true, "importlib_metadata": true, "iniconfig": true, "ipaddress": true, "ipykernel": true, "ipython": true, "isodate": true, "itsdangerous": true, "jedi": true, "jinja2": true, "jmespathpy": true, "joblib": true, "jsmin": true, "jupyter-nbconvert": true, "jupyter_server": true, "kafka": true, "keras": false, "kiwisolver": true, "lark-parser": true, "libcst": true, "looker-sdk": true, "lxml": true, "mako": true, "markupsafe": true, "matplotlib": true, "mccabe": true, "mdit-py-plugins": true, "mdurl": true, "more-itertools": false, "mrab-regex": true, "msal": true, "msgpack-python": true, "multidict": true, "mutagen": true, "nbclassic": true, "nbformat": true, "netaddr-py": true, "networkx": true, "ntlm2": true, "ntlm-auth": true, "numexpr": true, "numpy": true, "oauth2": true, "oauthlib": true, "olefile": true, "openapi-schema-validator": true, "opencensus-python": true, "openpyxl": true, "opt_einsum": true, "oracle-py-cx": true, "orjson": true, "oscrypto": true, "packaging": true, "pandas": true, "paramiko": true, "parse": true, "parsimonious": true, "pasta": true, "pathlib2": true, "pdoc": true, "pem": true, "pendulum": true, "pip": true, "ply": true, "protobuf-python": true, "proto-plus-python": true, "psqlparse": true, "psutil": true, "psycopg2": true, "pyasn1": true, "pyasn1-modules": true, "pycparser": true, "pycrypto": true, "pydantic": true, "pydateutil": true, "pygments": true, "pyjson5": true, "pyjwt": true, "pymysql": true, "pynacl": true, "pyodbc": false, "pyparsing": false, "pyrsistent": true, "py-serde": true, "pytables": true, "pytest-py": true, "python3-openid": true, "python-ecdsa": true, "python-email-validator": true, "python-fastjsonschema": true, "python-future": true, "python-graphviz": true, "python-hyperlink": true, "python-jose": true, "python-lz4": true, "python-markdown": true, "python-markdownify": true, "python-nameparser": true, "python-nvd3": true, "python-pathspec": true, "python-prompt-toolkit": true, "python-pypdf": true, "python-rison": true, "python-rsa": true, "python-tabulate": true, "pytz": true, "pyxdg": true, "pyyaml": true, "pyzmq": true, "redis-py": true, "requests": true, "retry": true, "rfc3967": true, "rich": true, "sacremoses": true, "scikit-learn": true, "scipy": true, "setuptools": true, "sigstore-python": true, "simplejson": true, "six": true, "smart_open": true, "soupsieve": true, "sqlalchemy_jsonfield": true, "sqlalchemy-utils": true, "sqlparse": true, "stack_data": true, "tensorflow-addons": false, "tinycss2": true, "toml": true, "tomlkit": true, "toolbelt": true, "toolz": true, "tqdm": true, "typing_extensions": true, "underscore": true, "uritemplate": true, "urlextract": true, "urllib3": true, "validators": true, "w3lib": true, "websocket-client": true, "wheel": true, "wtforms": true, "xlrd": true, "yarl": true, "zipp": true} \ No newline at end of file From 40588d456c8e515463298a70982d3a95a74a1aad Mon Sep 17 00:00:00 2001 From: joyguoguo <599325370@qq.com> Date: Wed, 23 Jul 2025 22:20:19 +0000 Subject: [PATCH 034/134] build.py --- fuzz/build.py | 329 ++++++++++++++++++++++++++++++++++++++++++ fuzz/build_fuzzers.py | 128 +++++++++++----- 2 files changed, 418 insertions(+), 39 deletions(-) create mode 100644 fuzz/build.py diff --git a/fuzz/build.py b/fuzz/build.py new file mode 100644 index 0000000..e87f33b --- /dev/null +++ b/fuzz/build.py @@ -0,0 +1,329 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- + +""" +OSS-Fuzz Build System + +Combines Docker image building and fuzzer building capabilities. +Supports three modes: 'image', 'fuzzer', or 'both'. + +Usage: + Build images: + python3 build.py --mode image [project_list] --oss-fuzz-dir /path/to/oss-fuzz + + Build fuzzers: + python3 build.py --mode fuzzer [project_list] --oss-fuzz-dir /path/to/oss-fuzz --image-results results.json + + Build both: + python3 build.py --mode both [project_list] --oss-fuzz-dir /path/to/oss-fuzz --sanitizer address + +Example: + python3 ./fuzz/build.py --mode both data/valid_projects.txt --oss-fuzz-dir ./fuzz/oss-fuzz --sanitizer address --workers 8 + +""" + +import os +import sys +import subprocess +import argparse +import logging +import json +from pathlib import Path +from typing import List, Optional, Tuple, Dict +from multiprocessing import Pool, cpu_count + +# ======================================================================================== +# Custom Exceptions +# ======================================================================================== +class BuildError(Exception): + """Base exception for build failures""" + def __init__(self, message: str, project: str = "", exit_code: int = None): + super().__init__(message) + self.project = project + self.exit_code = exit_code + +class CommandError(BuildError): + """Exception for command execution failures""" + pass + +class PathError(BuildError): + """Exception for missing paths or files""" + pass + +class ConfigError(BuildError): + """Exception for configuration errors""" + pass + +# ======================================================================================== +# Helper Functions +# ======================================================================================== +def run_command( + cmd: str, + oss_fuzz_dir: Path, + project: str = "", + allowed_exit_codes: Optional[List[int]] = None, + skip_yes: bool = False +) -> int: + """Execute a command and return the exit code""" + allowed_exit_codes = allowed_exit_codes or [0] + cmd_str = f"yes | {cmd}" if not skip_yes else cmd + logging.debug(f"Executing command [{project}]: {cmd_str}") + + try: + process = subprocess.Popen( + cmd_str if skip_yes else f"yes | {cmd}", + shell=True, + cwd=str(oss_fuzz_dir), + stdout=subprocess.PIPE if skip_yes else subprocess.DEVNULL, + stderr=subprocess.PIPE if skip_yes else subprocess.DEVNULL, + text=True if skip_yes else False + ) + + if skip_yes: + stdout, stderr = process.communicate() + else: + process.wait() + exit_code = process.returncode + + if exit_code in allowed_exit_codes: + return exit_code + + error_msg = f"Command failed (exit code: {exit_code})" + if project: + error_msg += f" for project: {project}" + + if skip_yes and stderr.strip(): + error_msg += f"\nError output:\n{stderr.strip()}" + + if skip_yes and stdout.strip(): + error_msg += f"\nOutput:\n{stdout.strip()}" + + raise CommandError(error_msg, project=project, exit_code=exit_code) + + except FileNotFoundError as e: + raise CommandError(f"Command not found: {cmd.split()[0]}", project=project) from e + except OSError as e: + raise CommandError(f"System error: {e}", project=project) from e + except subprocess.SubprocessError as e: + raise CommandError(f"Subprocess error: {e}", project=project) from e + +# ======================================================================================== +# Build Functions +# ======================================================================================== +def build_image(project_name: str, oss_fuzz_dir: Path) -> Tuple[bool, str]: + """Docker image build workflow""" + try: + logging.info(f"Building Docker image: {project_name}") + + # Validate paths + helper_script = oss_fuzz_dir / "infra" / "helper.py" + if not helper_script.exists(): + raise PathError(f"Missing helper script: {helper_script}", project=project_name) + + # Execute image build command + run_command( + f"python3 infra/helper.py build_image {project_name}", + oss_fuzz_dir, + project=project_name + ) + + logging.info(f"✅ Docker image built: {project_name}") + return (True, project_name) + + except CommandError as e: + logging.error(f"❌ Docker build failed: {project_name} - {str(e)}") + return (False, project_name) + except Exception as e: + logging.error(f"💥 Unhandled exception: {project_name} - {repr(e)}") + return (False, project_name) + +def build_fuzzers(project_name: str, sanitizer: str, oss_fuzz_dir: Path) -> Tuple[bool, str]: + """Fuzzer build workflow""" + try: + logging.info(f"Building fuzzers: {project_name} ({sanitizer} sanitizer)") + + # Validate paths + helper_script = oss_fuzz_dir / "infra" / "helper.py" + if not helper_script.exists(): + raise PathError(f"Missing helper script: {helper_script}", project=project_name) + + # Execute fuzzer build command + run_command( + f"python3 infra/helper.py build_fuzzers --sanitizer {sanitizer} {project_name}", + oss_fuzz_dir, + project=project_name, + skip_yes=True + ) + + logging.info(f"✅ Fuzzers built: {project_name}") + return (True, project_name) + + except BuildError as e: + logging.error(f"❌ Fuzzer build failed: {project_name} - {str(e)}") + return (False, project_name) + except Exception as e: + logging.error(f"💥 Unhandled exception: {project_name} - {repr(e)}") + return (False, project_name) + +# ======================================================================================== +# Main Execution +# ======================================================================================== +def load_projects(file_path: Path) -> List[str]: + """Load project list from file""" + if not file_path.exists(): + raise FileNotFoundError(f"Project list not found: {file_path}") + + with open(file_path, "r", encoding="utf-8") as f: + projects = [line.strip() for line in f if line.strip()] + + if not projects: + raise ConfigError("Project list is empty") + + logging.info(f"Loaded {len(projects)} projects from {file_path}") + return projects + +def execute_builds( + func, + args_list: List[tuple], + worker_count: int, + success_msg: str, + failure_msg: str +) -> Tuple[Dict[str, bool], List[str]]: + """Execute build tasks in parallel and return results""" + results = {} + with Pool(worker_count) as pool: + for success, project in pool.starmap(func, args_list): + results[project] = success + + failed = [p for p, success in results.items() if not success] + success_count = len(results) - len(failed) + + if failed: + logging.error(f"\n❌ {failure_msg}: {len(failed)}/{len(results)} projects") + logging.info(f"\n📊 {success_msg}: {success_count}/{len(results)} projects") + + return results, failed + +def main(): + parser = argparse.ArgumentParser(description="OSS-Fuzz Build System") + parser.add_argument("project_list", help="Project list file path") + parser.add_argument("--oss-fuzz-dir", required=True, type=str, + help="OSS-Fuzz directory path") + parser.add_argument("--mode", choices=['image', 'fuzzer', 'both'], default='both', + help="Build mode: 'image', 'fuzzer', or 'both'") + parser.add_argument("--workers", type=int, default=cpu_count(), + help="Number of parallel worker processes") + parser.add_argument("--sanitizer", default="address", + choices=["address", "memory", "undefined"], + help="Fuzzer sanitizer type") + parser.add_argument("--image-results", default="image_build_results.json", + help="Image build results file (JSON)") + parser.add_argument("--log-level", default="INFO", + choices=["DEBUG", "INFO", "WARNING", "ERROR"], + help="Logging detail level") + args = parser.parse_args() + + # Configure logging + logging.basicConfig( + level=getattr(logging, args.log_level), + format='[%(levelname)s] [PID:%(process)d] %(message)s' + ) + + oss_fuzz_dir = Path(args.oss_fuzz_dir).resolve() + project_file = Path(args.project_list).resolve() + output_file = Path(args.image_results) + + # Sanity checks + if not oss_fuzz_dir.exists(): + logging.critical(f"OSS-Fuzz directory not found: {oss_fuzz_dir}") + sys.exit(1) + + # Load projects + try: + projects = load_projects(project_file) + except Exception as e: + logging.critical(f"❌ Failed to load projects: {e}") + sys.exit(1) + + # Image building workflow + image_results = {} + if args.mode in ['image', 'both']: + logging.info("\n" + "="*60) + logging.info(f"Starting Docker image builds for {len(projects)} projects") + logging.info("="*60 + "\n") + + image_args = [(p, oss_fuzz_dir) for p in projects] + image_results, image_failures = execute_builds( + build_image, + image_args, + args.workers, + "✅ Docker image builds succeeded", + "🚫 Docker image builds failed" + ) + + # Save image build results + try: + with output_file.open("w") as f: + json.dump(image_results, f) + logging.info(f"💾 Image build results saved to: {output_file}") + except Exception as e: + logging.error(f"❌ Failed to save image results: {e}") + + # Fuzzer building workflow + fuzzer_results = {} + if args.mode in ['fuzzer', 'both']: + logging.info("\n" + "="*60) + logging.info(f"Starting fuzzer builds for {len(projects)} projects ({args.sanitizer} sanitizer)") + logging.info("="*60 + "\n") + + # Load image results for fuzzer mode + if args.mode == 'fuzzer': + try: + with output_file.open("r") as f: + image_results = json.load(f) + logging.info(f"📋 Loaded image build results from: {output_file}") + except Exception as e: + logging.critical(f"❌ Failed to load image results: {e}") + sys.exit(1) + + # Filter projects with successful image builds + fuzz_projects = [p for p in projects if image_results.get(p, False)] + if not fuzz_projects: + logging.critical("❌ No projects with successful image builds") + sys.exit(1) + + fuzzer_args = [(p, args.sanitizer, oss_fuzz_dir) for p in fuzz_projects] + fuzzer_results, fuzzer_failures = execute_builds( + build_fuzzers, + fuzzer_args, + args.workers, + "✅ Fuzzer builds succeeded", + "🚫 Fuzzer builds failed" + ) + + # Final summary + logging.info("\n" + "="*60) + logging.info("Build Summary") + logging.info("="*60) + + if args.mode in ['image', 'both']: + image_success = sum(1 for r in image_results.values() if r) + logging.info(f"📦 Docker Images: {image_success}/{len(projects)} succeeded") + + if args.mode in ['fuzzer', 'both']: + if args.mode == 'both': + fuzz_projects = list(fuzzer_results.keys()) + fuzzer_success = sum(1 for r in fuzzer_results.values() if r) + logging.info(f"🔧 Fuzzers: {fuzzer_success}/{len(fuzz_projects)} succeeded") + + logging.info("="*60) + +if __name__ == "__main__": + try: + main() + except KeyboardInterrupt: + print("\n🛑 Operation interrupted by user") + sys.exit(1) + except Exception as e: + logging.critical(f"💥 Critical error: {str(e)}") + sys.exit(1) \ No newline at end of file diff --git a/fuzz/build_fuzzers.py b/fuzz/build_fuzzers.py index c02b436..1dfd825 100644 --- a/fuzz/build_fuzzers.py +++ b/fuzz/build_fuzzers.py @@ -10,7 +10,7 @@ Usage: python3 build_fuzzers.py [project_list_file] --oss-fuzz-dir /path/to/oss-fuzz \ --image-results image_build_results.json \ [--sanitizer type] [--workers N] -Example: python3 build_fuzzers.py data/valid_projects.txt \ +Example: python3 fuzz/build_fuzzers.py data/valid_projects.txt \ --oss-fuzz-dir ./fuzz/oss-fuzz \ --image-results image_build_results.json \ --sanitizer address \ @@ -27,18 +27,32 @@ from typing import List, Optional, Tuple from multiprocessing import Pool, cpu_count -class CommandExecutionError(Exception): - """Custom command execution exception""" - def __init__(self, message: str, exit_code: Optional[int] = None): +class BuildError(Exception): + """Base exception for build failures""" + def __init__(self, message: str, project: str = "", exit_code: int = None): super().__init__(message) + self.project = project self.exit_code = exit_code +class CommandError(BuildError): + """Exception for command execution failures""" + pass + +class PathError(BuildError): + """Exception for missing paths or files""" + pass + +class ConfigError(BuildError): + """Exception for configuration errors""" + pass + def run_command( cmd: str, oss_fuzz_dir: Path, + project: str = "", allowed_exit_codes: Optional[List[int]] = None ) -> int: - """Execute a command and return the exit code, throws CommandExecutionError on failure""" + """Execute a command and return the exit code""" allowed_exit_codes = allowed_exit_codes or [0] logging.info(f"▶️ Executing command: {cmd}") @@ -47,54 +61,67 @@ def run_command( cmd, shell=True, cwd=str(oss_fuzz_dir), - stdout=subprocess.DEVNULL, - stderr=subprocess.DEVNULL + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + text=True ) - process.wait() + stdout, stderr = process.communicate() exit_code = process.returncode if exit_code in allowed_exit_codes: return exit_code - raise CommandExecutionError( - f"Command failed (exit code: {exit_code})", - exit_code=exit_code - ) + + # 构建详细的错误信息 + error_msg = f"Command failed (exit code: {exit_code})" + if project: + error_msg += f" for project: {project}" + + if stderr.strip(): + error_msg += f"\nError output:\n{stderr.strip()}" + + if stdout.strip(): + error_msg += f"\nOutput:\n{stdout.strip()}" + + raise CommandError(error_msg, project=project, exit_code=exit_code) except FileNotFoundError as e: - raise CommandExecutionError(f"Command not found: {cmd.split()[0]}") from e + raise CommandError(f"Command not found: {cmd.split()[0]}", project=project) from e except OSError as e: - raise CommandExecutionError(f"System error: {e}") from e + raise CommandError(f"System error: {e}", project=project) from e except subprocess.SubprocessError as e: - raise CommandExecutionError(f"Subprocess error: {e}") from e + raise CommandError(f"Subprocess error: {e}", project=project) from e def build_fuzzers(project_name: str, sanitizer: str, oss_fuzz_dir: Path) -> Tuple[bool, str]: """Fuzzer build workflow""" try: logging.info("=" * 60) - logging.info(f"🔧 Starting fuzzer build for project: {project_name}") + logging.info(f"🔧 Building fuzzers for: {project_name}") logging.info(f"📁 OSS-Fuzz directory: {oss_fuzz_dir}") logging.info("=" * 60) # Validate paths helper_script = oss_fuzz_dir / "infra" / "helper.py" if not helper_script.exists(): - raise FileNotFoundError(f"Critical script missing: {helper_script}") + raise PathError(f"Missing helper script: {helper_script}", project=project_name) # Execute fuzzer build command run_command( f"python3 infra/helper.py build_fuzzers --sanitizer {sanitizer} {project_name}", - oss_fuzz_dir + oss_fuzz_dir, + project=project_name ) - logging.info(f"✅ Fuzzers for {project_name} built successfully") + logging.info(f"✅ Fuzzers built: {project_name}") return (True, project_name) - except CommandExecutionError as e: - logging.error(f"❌ Fuzzer build for {project_name} failed: {str(e)}") + except BuildError as e: + logging.error(f"❌ Build failed: {project_name}") + logging.error(f" Reason: {str(e)}") return (False, project_name) except Exception as e: - logging.exception(f"🔥 Unhandled exception: {e}") + logging.error(f"🔥 Unhandled exception: {project_name}") + logging.exception(f" Exception details: {e}") return (False, project_name) def main(): @@ -113,7 +140,7 @@ def main(): logging.basicConfig( level=logging.INFO, - format='[%(levelname)s] [PID:%(process)d] %(message)s' + format='[%(levelname)s] %(message)s' ) # Process paths @@ -122,32 +149,53 @@ def main(): # Read project list try: - with open(args.project_list, "r", encoding="utf-8") as f: + project_file = Path(args.project_list) + if not project_file.exists(): + raise FileNotFoundError(f"Project list file not found: {project_file}") + + with open(project_file, "r", encoding="utf-8") as f: all_projects = [line.strip() for line in f if line.strip()] - logging.info(f"📋 Loaded {len(all_projects)} projects from list") + + if not all_projects: + raise ConfigError("Project list is empty") + + logging.info(f"📋 Loaded {len(all_projects)} projects") except Exception as e: logging.error(f"❌ Failed to read project list: {e}") sys.exit(1) # Load image build results try: - with open(args.image_results, "r") as f: + image_results_file = Path(args.image_results) + if not image_results_file.exists(): + raise FileNotFoundError(f"Image results file not found: {image_results_file}") + + with open(image_results_file, "r") as f: image_results = json.load(f) - logging.info(f"📋 Loaded image build results from: {args.image_results}") + + if not isinstance(image_results, dict): + raise ConfigError("Image results should be a JSON object") + + logging.info(f"📋 Loaded image build results: {args.image_results}") + except json.JSONDecodeError as e: + logging.error(f"❌ Failed to parse image build results: {e}") + sys.exit(1) except Exception as e: logging.error(f"❌ Failed to load image build results: {e}") sys.exit(1) # Filter projects with successful image builds projects_to_build = [p for p in all_projects if p in image_results and image_results[p]] + image_failures = [p for p in all_projects if p not in image_results or not image_results[p]] if not projects_to_build: - logging.error("❌ No projects with successful image builds found") + logging.error("❌ No projects with successful image builds") + if image_failures: + logging.error(f" Projects with image build failures: {', '.join(image_failures[:10])}{'...' if len(image_failures) > 10 else ''}") sys.exit(1) skipped = len(all_projects) - len(projects_to_build) - logging.info(f"🔍 Found {len(projects_to_build)} projects with successful image builds " - f"({skipped} skipped due to image build failures)") + logging.info(f"🔍 Building {len(projects_to_build)} projects (skipped {skipped} due to image failures)") # Parallel fuzzer builds with Pool(args.workers) as pool: @@ -160,12 +208,14 @@ def main(): fuzzer_results = {project: success for success, project in results} failed = [p for p in projects_to_build if not fuzzer_results[p]] - logging.info(f"\n📊 Fuzzer builds completed: " - f"Successful {len(projects_to_build)-len(failed)}/{len(projects_to_build)}") + success_count = len(projects_to_build) - len(failed) + logging.info(f"\n📊 Build completed: {success_count}/{len(projects_to_build)}") if failed: - logging.error("❌ Failed fuzzer builds: " + ", ".join(failed)) - + logging.error(f"❌ Failed builds ({len(failed)} projects):") + for project in failed: + logging.error(f" - {project}") + # Generate overall status report overall_results = {} for project in all_projects: @@ -174,14 +224,14 @@ def main(): if project in fuzzer_results and fuzzer_results[project]: status = "✅" elif project in fuzzer_results: - status = "❌ (fuzzer failed)" + status = "❌ (fuzzer)" else: - status = "❌ (image ok but not built)" + status = "❌ (not built)" else: - status = "❌ (image failed)" + status = "❌ (image)" overall_results[project] = status - logging.info("\n📊 Overall build status:") + logging.info("\n📊 Overall status:") for project, status in overall_results.items(): logging.info(f" {project}: {status}") @@ -189,7 +239,7 @@ def main(): try: main() except KeyboardInterrupt: - print("\n🛑 Operation interrupted by user") + print("\n🛑 Operation interrupted") sys.exit(1) except Exception as e: print(f"💥 Critical error: {e}") From 5b52393098fd2c3e149724c9f148f56622d58fc1 Mon Sep 17 00:00:00 2001 From: joyguoguo <599325370@qq.com> Date: Wed, 23 Jul 2025 22:20:51 +0000 Subject: [PATCH 035/134] collect targets first and then run --- fuzz/run_fuzz_all_targets.py | 280 +++++++++++++++++++++++++++++++++++ fuzz/run_fuzz_target.py | 125 ++++++---------- 2 files changed, 328 insertions(+), 77 deletions(-) create mode 100644 fuzz/run_fuzz_all_targets.py diff --git a/fuzz/run_fuzz_all_targets.py b/fuzz/run_fuzz_all_targets.py new file mode 100644 index 0000000..d3ea3d8 --- /dev/null +++ b/fuzz/run_fuzz_all_targets.py @@ -0,0 +1,280 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- + +""" +run_fuzz_all_targets.py + +该脚本采用两阶段方法进行模糊测试: +1. 发现阶段:首先遍历所有指定的项目,收集每一个项目中所有可执行的模糊测试目标 (fuzz target)。 +2. 执行阶段:然后创建一个包含所有 (项目, target) 对的任务池,并使用多进程并行执行所有任务。 + +这种方法可以最大化 CPU 利用率,并提供更清晰的整体进度。 + +用法: python3 run_fuzz_all_targets.py [project_list_file] [--timeout seconds] [--workers N] +示例: python3 fuzz/run_fuzz_all_targets.py data/valid_projects.txt --timeout 60 --workers 4 +""" + +import os +import sys +import subprocess +import argparse +import logging +import time +from datetime import datetime +from pathlib import Path +from typing import Optional, List, Tuple +from multiprocessing import Pool, cpu_count + +# --- 全局配置 --- +HOME_DIR = Path.home() +OSS_FUZZ_DIR = HOME_DIR / "FuzzAug" / "fuzz" / "oss-fuzz" +LOG_DIR = OSS_FUZZ_DIR / "run_logs2" + + +def run_command( + cmd: str, + log_msg: str, + logger: logging.Logger, + allowed_exit_codes: Optional[List[int]] = None, + timeout: int = 3600 # 默认1小时超时 +) -> bool: + """使用实时日志记录和精确的错误处理来执行命令""" + allowed_exit_codes = allowed_exit_codes or [] + logger.info(f"▶️ {log_msg}...") + logger.debug(f" $ {cmd}") + + process = None + try: + process = subprocess.Popen( + cmd, + shell=True, + stdout=subprocess.PIPE, + stderr=subprocess.STDOUT, + text=True, + encoding="utf-8", + errors="replace" + ) + + start_time = time.time() + while process.poll() is None: + # 检查命令是否超时 + if time.time() - start_time > timeout: + logger.error(f"⌛ 命令在 {timeout} 秒后超时") + process.terminate() + try: + process.wait(timeout=5) + except subprocess.TimeoutExpired: + process.kill() + return False + + if process.stdout: + line = process.stdout.readline() + if line: + logger.debug(line.strip()) + else: + # 在某些情况下 stdout 可能暂时为 None + time.sleep(0.1) + + exit_code = process.returncode + if exit_code not in [0, *allowed_exit_codes]: + logger.error(f"❌ 命令执行失败,退出码: {exit_code}") + return False + return True + + except FileNotFoundError: + logger.error(f"🔍 命令未找到: {cmd.split()[0]}") + return False + except PermissionError: + logger.error(f"🔒 执行命令权限不足: {cmd}") + return False + except subprocess.SubprocessError as e: + logger.exception(f"💥 子进程错误: {e}") + return False + except OSError as e: + logger.exception(f"💥 执行命令时发生操作系统错误: {e}") + return False + finally: + if process and process.poll() is None: + try: + process.terminate() + process.wait(timeout=5) + except Exception: + pass + + +def discover_targets(project_name: str, logger: logging.Logger) -> List[str]: + """发现项目的 Fuzz Targets (以 'fuzz_' 开头,无扩展名,且可执行)""" + out_dir = OSS_FUZZ_DIR / "build" / "out" / project_name + targets: List[str] = [] + + if not out_dir.is_dir(): + logger.warning(f"项目 {project_name} 的构建输出目录不存在: {out_dir}") + return targets + + try: + for f in out_dir.iterdir(): + try: + if (f.is_file() and + f.name.startswith("fuzz_") and + '.' not in f.name and + os.access(f, os.X_OK)): + targets.append(f.name) + except OSError as e: + logger.warning(f"⚠️ 检查文件 {f.name} 时出错,已跳过: {e}") + + except PermissionError: + logger.error(f"🔒 访问目录权限不足: {out_dir}") + except OSError as e: + logger.exception(f"💥 发现 Target 时发生操作系统错误: {e}") + + return targets + + +def run_single_target(project_name: str, target_name: str, timeout: int) -> Tuple[bool, str, str]: + """为单个 (项目, target) 对执行模糊测试工作流""" + task_id = f"{project_name}_{target_name}" + logger = logging.getLogger(task_id) + + try: + # 为每个任务配置独立的日志记录器 + logger.setLevel(logging.DEBUG) + LOG_DIR.mkdir(parents=True, exist_ok=True) + timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") + log_file = LOG_DIR / f"run_{task_id}_{timestamp}.log" + file_handler = logging.FileHandler(log_file, encoding="utf-8") + formatter = logging.Formatter( + "%(asctime)s [PID:%(process)d] %(levelname)s: %(message)s", + datefmt="%Y-%m-%d %H:%M:%S" + ) + file_handler.setFormatter(formatter) + logger.addHandler(file_handler) + + os.chdir(OSS_FUZZ_DIR) + + except (OSError, PermissionError) as e: + # 如果日志设置失败,直接打印到控制台 + print(f"❌ 任务 {task_id} 初始化时发生严重错误: {e}") + return False, project_name, target_name + + logger.info(f"🚀 开始测试 -> 项目: {project_name}, Target: {target_name}") + try: + cmd = f"python3 infra/helper.py run_fuzzer {project_name} {target_name} -- -max_total_time={timeout}" + success = run_command( + cmd, + f"运行 Target '{target_name}' (超时={timeout}s)", + logger, + allowed_exit_codes=[1, 124], # 1=发现崩溃, 124=超时,都视为模糊测试的正常退出 + timeout=timeout + 300 # 增加300秒的额外缓冲时间给 helper.py + ) + + if success: + logger.info(f"✅ Target '{target_name}' 运行完成。") + else: + logger.error(f"❌ Target '{target_name}' 运行失败。") + + return success, project_name, target_name + + except Exception as e: + logger.exception(f"💥 运行 target '{target_name}' 时发生意外错误: {e}") + return False, project_name, target_name + finally: + # 清理日志处理器以释放文件句柄 + for handler in logger.handlers[:]: + handler.close() + logger.removeHandler(handler) + + +def main(): + # 主进程日志配置 + logging.basicConfig( + level=logging.INFO, + format="[%(levelname)s] %(message)s", + stream=sys.stdout + ) + logger = logging.getLogger("Main") + + parser = argparse.ArgumentParser(description="OSS-Fuzz 并行模糊测试工具") + parser.add_argument("project_list", help="包含项目名称列表的文件路径") + parser.add_argument("--timeout", type=int, default=60, help="每个 Fuzz Target 的运行超时时间 (秒)") + parser.add_argument("--workers", type=int, default=cpu_count(), help="并行执行的工作进程数") + args = parser.parse_args() + + # --- 1. 读取项目列表 --- + try: + project_path = Path(args.project_list) + with open(project_path, "r", encoding="utf-8") as f: + projects = [line.strip() for line in f if line.strip()] + logger.info(f"📋 从 {project_path.name} 加载了 {len(projects)} 个项目。") + except FileNotFoundError: + logger.error(f"❌ 项目列表文件未找到: {args.project_list}") + sys.exit(1) + except (OSError, PermissionError) as e: + logger.exception(f"💥 读取项目列表时出错: {e}") + sys.exit(1) + + # --- 2. 发现阶段: 收集所有项目的 Fuzz Targets --- + logger.info("\n" + "=" * 20 + " 阶段 1: 发现所有 Fuzz Targets " + "=" * 20) + all_fuzz_tasks = [] + try: + original_cwd = Path.cwd() + os.chdir(OSS_FUZZ_DIR) + for project_name in projects: + targets = discover_targets(project_name, logger) + if targets: + logger.info(f"🔍 在项目 '{project_name}' 中发现 {len(targets)} 个 targets: {', '.join(targets)}") + for target in targets: + all_fuzz_tasks.append((project_name, target)) + else: + logger.warning(f"⚠️ 在项目 '{project_name}' 中未找到任何 Fuzz Targets。") + os.chdir(original_cwd) + except FileNotFoundError: + logger.error(f"❌ OSS-Fuzz 目录不存在: {OSS_FUZZ_DIR}") + sys.exit(1) + except Exception as e: + logger.exception(f"💥 在发现阶段发生未知错误: {e}") + sys.exit(1) + + + if not all_fuzz_tasks: + logger.info("🤷 未发现任何可执行的 Fuzz Targets。程序退出。") + sys.exit(0) + + # --- 3. 执行阶段: 并行运行所有 Fuzzing 任务 --- + logger.info(f"\n✅ 发现阶段完成。共找到 {len(all_fuzz_tasks)} 个模糊测试任务。") + logger.info("=" * 20 + " 阶段 2: 并行执行 Fuzzing " + "=" * 23) + logger.info(f"🚀 使用 {args.workers} 个工作进程开始并行测试 (每个 Target 超时: {args.timeout}s)...") + + tasks_with_args = [(p, t, args.timeout) for p, t in all_fuzz_tasks] + results = [] + + with Pool(args.workers) as pool: + try: + results = pool.starmap(run_single_target, tasks_with_args) + except Exception as e: + logger.error(f"💥 并行执行过程中发生严重错误: {e}") + pool.terminate() + pool.join() + + # --- 4. 汇总阶段 --- + logger.info("\n" + "=" * 20 + " 阶段 3: 结果汇总 " + "=" * 28) + failed_tasks = [(p, t) for success, p, t in results if not success] + total_tasks = len(all_fuzz_tasks) + failed_count = len(failed_tasks) + success_count = total_tasks - failed_count + + logger.info(f"📊 Fuzzing 完成: 成功 {success_count}/{total_tasks}, 失败 {failed_count}/{total_tasks}") + if failed_tasks: + logger.error("❌ 以下 Fuzz Targets 运行失败:") + for project, target in failed_tasks: + logger.error(f" - 项目: {project}, Target: {target}") + + +if __name__ == "__main__": + try: + main() + except KeyboardInterrupt: + print("\n🛑 操作被用户中断。") + sys.exit(1) + except Exception as e: + print(f"\n💥 主程序发生致命错误: {e}") + sys.exit(1) \ No newline at end of file diff --git a/fuzz/run_fuzz_target.py b/fuzz/run_fuzz_target.py index 20f3e67..ee47b63 100644 --- a/fuzz/run_fuzz_target.py +++ b/fuzz/run_fuzz_target.py @@ -17,63 +17,21 @@ import argparse import logging import time -import shutil from datetime import datetime from pathlib import Path -from typing import List, Optional, Tuple -from multiprocessing import Pool, cpu_count, current_process +from typing import Optional +from multiprocessing import Pool, cpu_count # --- Global configuration --- HOME_DIR = Path.home() OSS_FUZZ_DIR = HOME_DIR / "FuzzAug" / "fuzz" / "oss-fuzz" LOG_DIR = OSS_FUZZ_DIR / "run_logs" -def setup_logging(project_name: str) -> logging.Logger: - """Configure hierarchical logger with file and console handlers""" - try: - LOG_DIR.mkdir(parents=True, exist_ok=True) - timestamp = datetime.now().strftime("%Y%m%d%H%M%S") - log_file = LOG_DIR / f"run_{project_name}_{timestamp}.log" - - # Create process-specific logger - logger = logging.getLogger(f"{project_name}.{current_process().name}") - logger.setLevel(logging.DEBUG) - - # File handler (all levels) - file_handler = logging.FileHandler(log_file, encoding="utf-8") - file_handler.setLevel(logging.DEBUG) - file_formatter = logging.Formatter( - "%(asctime)s [PID:%(process)d] %(levelname)s: %(message)s", - datefmt="%Y-%m-%d %H:%M:%S" - ) - file_handler.setFormatter(file_formatter) - - # Console handler (INFO+ only) - console_handler = logging.StreamHandler() - console_handler.setLevel(logging.INFO) - console_formatter = logging.Formatter( - "[%(levelname)s] %(message)s" - ) - console_handler.setFormatter(console_formatter) - - logger.addHandler(file_handler) - logger.addHandler(console_handler) - - # Capture uncaught exceptions - sys.excepthook = lambda exc_type, exc_value, exc_traceback: ( - logger.critical("Unhandled exception", exc_info=(exc_type, exc_value, exc_traceback)) - ) - - return logger - except (PermissionError, OSError) as e: - print(f"❌ Critical logging setup error: {e}") - sys.exit(1) - def run_command( cmd: str, log_msg: str, logger: logging.Logger, - allowed_exit_codes: Optional[List[int]] = None, + allowed_exit_codes: Optional[list[int]] = None, timeout: int = 3600 # 1 hour default timeout ) -> bool: """Execute command with real-time logging and precise error handling""" @@ -138,10 +96,11 @@ def run_command( except: pass -def discover_targets(project_name: str, logger: logging.Logger) -> List[str]: + +def discover_targets(project_name: str, logger: logging.Logger) -> list[str]: """Discover fuzz targets (fuzz_ prefix, no extension, executable)""" out_dir = OSS_FUZZ_DIR / "build" / "out" / project_name - targets: List[str] = [] + targets: list[str] = [] try: if not out_dir.exists(): @@ -150,11 +109,10 @@ def discover_targets(project_name: str, logger: logging.Logger) -> List[str]: for f in out_dir.iterdir(): try: - # 核心修改:检查无后缀的可执行文件 if (f.is_file() and f.name.startswith("fuzz_") and - '.' not in f.name and # 确保无文件后缀 - os.access(f, os.X_OK)): # 确保可执行权限 + '.' not in f.name and + os.access(f, os.X_OK)): targets.append(f.name) except OSError as e: logger.warning(f"⚠️ Skipping file {f.name} due to access error: {e}") @@ -168,10 +126,33 @@ def discover_targets(project_name: str, logger: logging.Logger) -> List[str]: return targets -def run_project(project_name: str, timeout: int) -> Tuple[bool, str]: + +def run_project(project_name: str, timeout: int) -> tuple[bool, str]: """Testing workflow for a single project with precise error handling""" try: - logger = setup_logging(project_name) + + logger = logging.getLogger(project_name) + logger.setLevel(logging.DEBUG) + + # 创建日志文件 + LOG_DIR.mkdir(parents=True, exist_ok=True) + timestamp = datetime.now().strftime("%Y%m%d%H%M%S") + log_file = LOG_DIR / f"run_{project_name}_{timestamp}.log" + file_handler = logging.FileHandler(log_file, encoding="utf-8") + + # 配置日志格式 + formatter = logging.Formatter( + "%(asctime)s [PID:%(process)d] %(levelname)s: %(message)s", + datefmt="%Y-%m-%d %H:%M:%S" + ) + file_handler.setFormatter(formatter) + logger.addHandler(file_handler) + + # 添加控制台输出 + console_handler = logging.StreamHandler() + console_handler.setFormatter(logging.Formatter("[%(levelname)s] %(message)s")) + logger.addHandler(console_handler) + os.chdir(OSS_FUZZ_DIR) except (OSError, PermissionError) as e: print(f"❌ Critical error initializing project {project_name}: {e}") @@ -179,7 +160,6 @@ def run_project(project_name: str, timeout: int) -> Tuple[bool, str]: logger.info("=" * 60) logger.info(f"🚀 Starting testing for project: {project_name}") - logger.debug(f"📝 Log path: {[h.baseFilename for h in logger.handlers if isinstance(h, logging.FileHandler)]}") logger.info("=" * 60) try: @@ -215,16 +195,16 @@ def run_project(project_name: str, timeout: int) -> Tuple[bool, str]: else: logger.error(f"❌ One or more targets failed for {project_name}") + # 清理日志处理器 + for handler in logger.handlers[:]: + handler.close() + logger.removeHandler(handler) + return (all_success, project_name) -def _create_fake_async_result(result: bool, project_name: str): - """模拟 Pool.apply_async 返回值,便于错误恢复""" - class FakeApplyResult: - def get(self, timeout=None): - return (result, project_name) - return FakeApplyResult() def main(): + # 主进程日志配置 logging.basicConfig( level=logging.INFO, format="[%(levelname)s] %(message)s" @@ -262,30 +242,21 @@ def main(): sys.exit(1) with Pool(args.workers) as pool: - async_results = [] - for p in projects: - try: - async_results.append(pool.apply_async(run_project, (p, args.timeout))) - except Exception as e: - logger.error(f"💥 Failed to schedule project {p}: {e}") - async_results.append(_create_fake_async_result(False, p)) - - final_results: List[Tuple[bool, str]] = [] - for res in async_results: - try: - final_results.append(res.get(timeout=args.timeout * 2)) - except TimeoutError: - logger.error("⌛ Project execution timed out") - final_results.append((False, "unknown")) - except Exception as e: - logger.error(f"💥 Error collecting result: {e}") - final_results.append((False, "unknown")) + try: + # 使用starmap同步执行所有任务 + final_results = pool.starmap(run_project, [(p, args.timeout) for p in projects]) + except Exception as e: + logger.error(f"💥 Parallel execution failed: {e}") + # 出错时返回所有项目失败状态 + final_results = [(False, p) for p in projects] + # 汇总结果 failed = [p for success, p in final_results if not success] logger.info(f"\n📊 Testing completed: Success {len(projects)-len(failed)}/{len(projects)}") if failed: logger.error("❌ Failed projects: " + ", ".join(failed)) + if __name__ == "__main__": try: main() From 8285e3f93360cb74a95d02c249be1f1112ce3631 Mon Sep 17 00:00:00 2001 From: joyguoguo <599325370@qq.com> Date: Wed, 23 Jul 2025 22:29:25 +0000 Subject: [PATCH 036/134] list, tuple, ptional --- fuzz/build.py | 25 +++++++++++++------------ 1 file changed, 13 insertions(+), 12 deletions(-) diff --git a/fuzz/build.py b/fuzz/build.py index e87f33b..ebaf087 100644 --- a/fuzz/build.py +++ b/fuzz/build.py @@ -18,8 +18,10 @@ python3 build.py --mode both [project_list] --oss-fuzz-dir /path/to/oss-fuzz --sanitizer address Example: - python3 ./fuzz/build.py --mode both data/valid_projects.txt --oss-fuzz-dir ./fuzz/oss-fuzz --sanitizer address --workers 8 - + python3 ./fuzz/build.py --mode both data/valid_projects.txt \ + --oss-fuzz-dir ./fuzz/oss-fuzz \ + --sanitizer address \ + --workers 8 """ import os @@ -29,8 +31,7 @@ import logging import json from pathlib import Path -from typing import List, Optional, Tuple, Dict -from multiprocessing import Pool, cpu_count +from returns.maybe import Maybe # ======================================================================================== # Custom Exceptions @@ -61,11 +62,11 @@ def run_command( cmd: str, oss_fuzz_dir: Path, project: str = "", - allowed_exit_codes: Optional[List[int]] = None, + allowed_exit_codes: Maybe[list[int]] = Maybe.empty, skip_yes: bool = False ) -> int: """Execute a command and return the exit code""" - allowed_exit_codes = allowed_exit_codes or [0] + allowed_exit_codes = allowed_exit_codes.or_else([0]) cmd_str = f"yes | {cmd}" if not skip_yes else cmd logging.debug(f"Executing command [{project}]: {cmd_str}") @@ -110,7 +111,7 @@ def run_command( # ======================================================================================== # Build Functions # ======================================================================================== -def build_image(project_name: str, oss_fuzz_dir: Path) -> Tuple[bool, str]: +def build_image(project_name: str, oss_fuzz_dir: Path) -> tuple[bool, str]: """Docker image build workflow""" try: logging.info(f"Building Docker image: {project_name}") @@ -137,7 +138,7 @@ def build_image(project_name: str, oss_fuzz_dir: Path) -> Tuple[bool, str]: logging.error(f"💥 Unhandled exception: {project_name} - {repr(e)}") return (False, project_name) -def build_fuzzers(project_name: str, sanitizer: str, oss_fuzz_dir: Path) -> Tuple[bool, str]: +def build_fuzzers(project_name: str, sanitizer: str, oss_fuzz_dir: Path) -> tuple[bool, str]: """Fuzzer build workflow""" try: logging.info(f"Building fuzzers: {project_name} ({sanitizer} sanitizer)") @@ -168,7 +169,7 @@ def build_fuzzers(project_name: str, sanitizer: str, oss_fuzz_dir: Path) -> Tupl # ======================================================================================== # Main Execution # ======================================================================================== -def load_projects(file_path: Path) -> List[str]: +def load_projects(file_path: Path) -> list[str]: """Load project list from file""" if not file_path.exists(): raise FileNotFoundError(f"Project list not found: {file_path}") @@ -184,11 +185,11 @@ def load_projects(file_path: Path) -> List[str]: def execute_builds( func, - args_list: List[tuple], + args_list: list[tuple], worker_count: int, success_msg: str, failure_msg: str -) -> Tuple[Dict[str, bool], List[str]]: +) -> tuple[dict[str, bool], list[str]]: """Execute build tasks in parallel and return results""" results = {} with Pool(worker_count) as pool: @@ -211,7 +212,7 @@ def main(): help="OSS-Fuzz directory path") parser.add_argument("--mode", choices=['image', 'fuzzer', 'both'], default='both', help="Build mode: 'image', 'fuzzer', or 'both'") - parser.add_argument("--workers", type=int, default=cpu_count(), + parser.add_argument("--workers", type=int, default=os.cpu_count(), help="Number of parallel worker processes") parser.add_argument("--sanitizer", default="address", choices=["address", "memory", "undefined"], From 7e9add9fee1692b1f478df8882f4d4a7bff18957 Mon Sep 17 00:00:00 2001 From: joyguoguo <599325370@qq.com> Date: Wed, 23 Jul 2025 22:34:11 +0000 Subject: [PATCH 037/134] list,tuple,optional --- fuzz/run_fuzz_all_targets.py | 74 +++++++++++++++--------------------- 1 file changed, 31 insertions(+), 43 deletions(-) diff --git a/fuzz/run_fuzz_all_targets.py b/fuzz/run_fuzz_all_targets.py index d3ea3d8..27385f0 100644 --- a/fuzz/run_fuzz_all_targets.py +++ b/fuzz/run_fuzz_all_targets.py @@ -5,10 +5,10 @@ run_fuzz_all_targets.py 该脚本采用两阶段方法进行模糊测试: -1. 发现阶段:首先遍历所有指定的项目,收集每一个项目中所有可执行的模糊测试目标 (fuzz target)。 -2. 执行阶段:然后创建一个包含所有 (项目, target) 对的任务池,并使用多进程并行执行所有任务。 +1. 发现阶段:遍历所有指定项目,收集每个项目中所有可执行的模糊测试目标(fuzz target) +2. 执行阶段:创建包含所有(项目, target)对的任务池,使用多进程并行执行所有任务 -这种方法可以最大化 CPU 利用率,并提供更清晰的整体进度。 +这种方法最大化CPU利用率并提供清晰的整体进度[2](@ref)。 用法: python3 run_fuzz_all_targets.py [project_list_file] [--timeout seconds] [--workers N] 示例: python3 fuzz/run_fuzz_all_targets.py data/valid_projects.txt --timeout 60 --workers 4 @@ -22,8 +22,8 @@ import time from datetime import datetime from pathlib import Path -from typing import Optional, List, Tuple from multiprocessing import Pool, cpu_count +from returns.maybe import Maybe, Nothing, Some # --- 全局配置 --- HOME_DIR = Path.home() @@ -35,11 +35,11 @@ def run_command( cmd: str, log_msg: str, logger: logging.Logger, - allowed_exit_codes: Optional[List[int]] = None, + allowed_exit_codes: Maybe[list[int]] = Nothing, timeout: int = 3600 # 默认1小时超时 ) -> bool: """使用实时日志记录和精确的错误处理来执行命令""" - allowed_exit_codes = allowed_exit_codes or [] + allowed_codes = allowed_exit_codes.value_or([]) logger.info(f"▶️ {log_msg}...") logger.debug(f" $ {cmd}") @@ -57,7 +57,6 @@ def run_command( start_time = time.time() while process.poll() is None: - # 检查命令是否超时 if time.time() - start_time > timeout: logger.error(f"⌛ 命令在 {timeout} 秒后超时") process.terminate() @@ -72,11 +71,10 @@ def run_command( if line: logger.debug(line.strip()) else: - # 在某些情况下 stdout 可能暂时为 None time.sleep(0.1) exit_code = process.returncode - if exit_code not in [0, *allowed_exit_codes]: + if exit_code not in [0, *allowed_codes]: logger.error(f"❌ 命令执行失败,退出码: {exit_code}") return False return True @@ -102,10 +100,10 @@ def run_command( pass -def discover_targets(project_name: str, logger: logging.Logger) -> List[str]: - """发现项目的 Fuzz Targets (以 'fuzz_' 开头,无扩展名,且可执行)""" +def discover_targets(project_name: str, logger: logging.Logger) -> list[str]: + """发现项目的Fuzz Targets(以'fuzz_'开头,无扩展名,且可执行)""" out_dir = OSS_FUZZ_DIR / "build" / "out" / project_name - targets: List[str] = [] + targets: list[str] = [] if not out_dir.is_dir(): logger.warning(f"项目 {project_name} 的构建输出目录不存在: {out_dir}") @@ -125,18 +123,17 @@ def discover_targets(project_name: str, logger: logging.Logger) -> List[str]: except PermissionError: logger.error(f"🔒 访问目录权限不足: {out_dir}") except OSError as e: - logger.exception(f"💥 发现 Target 时发生操作系统错误: {e}") + logger.exception(f"💥 发现Target时发生操作系统错误: {e}") return targets -def run_single_target(project_name: str, target_name: str, timeout: int) -> Tuple[bool, str, str]: - """为单个 (项目, target) 对执行模糊测试工作流""" +def run_single_target(project_name: str, target_name: str, timeout: int) -> tuple[bool, str, str]: + """为单个(项目, target)对执行模糊测试工作流""" task_id = f"{project_name}_{target_name}" logger = logging.getLogger(task_id) try: - # 为每个任务配置独立的日志记录器 logger.setLevel(logging.DEBUG) LOG_DIR.mkdir(parents=True, exist_ok=True) timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") @@ -148,11 +145,9 @@ def run_single_target(project_name: str, target_name: str, timeout: int) -> Tupl ) file_handler.setFormatter(formatter) logger.addHandler(file_handler) - os.chdir(OSS_FUZZ_DIR) except (OSError, PermissionError) as e: - # 如果日志设置失败,直接打印到控制台 print(f"❌ 任务 {task_id} 初始化时发生严重错误: {e}") return False, project_name, target_name @@ -161,10 +156,10 @@ def run_single_target(project_name: str, target_name: str, timeout: int) -> Tupl cmd = f"python3 infra/helper.py run_fuzzer {project_name} {target_name} -- -max_total_time={timeout}" success = run_command( cmd, - f"运行 Target '{target_name}' (超时={timeout}s)", + f"运行Target '{target_name}' (超时={timeout}s)", logger, - allowed_exit_codes=[1, 124], # 1=发现崩溃, 124=超时,都视为模糊测试的正常退出 - timeout=timeout + 300 # 增加300秒的额外缓冲时间给 helper.py + allowed_exit_codes=Some([1, 124]), # 1=发现崩溃, 124=超时 + timeout=timeout + 300 ) if success: @@ -175,17 +170,15 @@ def run_single_target(project_name: str, target_name: str, timeout: int) -> Tupl return success, project_name, target_name except Exception as e: - logger.exception(f"💥 运行 target '{target_name}' 时发生意外错误: {e}") + logger.exception(f"💥 运行target '{target_name}' 时发生意外错误: {e}") return False, project_name, target_name finally: - # 清理日志处理器以释放文件句柄 for handler in logger.handlers[:]: handler.close() logger.removeHandler(handler) def main(): - # 主进程日志配置 logging.basicConfig( level=logging.INFO, format="[%(levelname)s] %(message)s", @@ -193,13 +186,12 @@ def main(): ) logger = logging.getLogger("Main") - parser = argparse.ArgumentParser(description="OSS-Fuzz 并行模糊测试工具") + parser = argparse.ArgumentParser(description="OSS-Fuzz并行模糊测试工具") parser.add_argument("project_list", help="包含项目名称列表的文件路径") - parser.add_argument("--timeout", type=int, default=60, help="每个 Fuzz Target 的运行超时时间 (秒)") + parser.add_argument("--timeout", type=int, default=60, help="每个Fuzz Target的运行超时时间(秒)") parser.add_argument("--workers", type=int, default=cpu_count(), help="并行执行的工作进程数") args = parser.parse_args() - # --- 1. 读取项目列表 --- try: project_path = Path(args.project_list) with open(project_path, "r", encoding="utf-8") as f: @@ -212,40 +204,37 @@ def main(): logger.exception(f"💥 读取项目列表时出错: {e}") sys.exit(1) - # --- 2. 发现阶段: 收集所有项目的 Fuzz Targets --- - logger.info("\n" + "=" * 20 + " 阶段 1: 发现所有 Fuzz Targets " + "=" * 20) - all_fuzz_tasks = [] + logger.info("\n" + "=" * 20 + " 阶段1: 发现所有Fuzz Targets " + "=" * 20) + all_fuzz_tasks: list[tuple[str, str]] = [] try: original_cwd = Path.cwd() os.chdir(OSS_FUZZ_DIR) for project_name in projects: targets = discover_targets(project_name, logger) if targets: - logger.info(f"🔍 在项目 '{project_name}' 中发现 {len(targets)} 个 targets: {', '.join(targets)}") + logger.info(f"🔍 在项目 '{project_name}' 中发现 {len(targets)} 个targets: {', '.join(targets)}") for target in targets: all_fuzz_tasks.append((project_name, target)) else: - logger.warning(f"⚠️ 在项目 '{project_name}' 中未找到任何 Fuzz Targets。") + logger.warning(f"⚠️ 在项目 '{project_name}' 中未找到任何Fuzz Targets。") os.chdir(original_cwd) except FileNotFoundError: - logger.error(f"❌ OSS-Fuzz 目录不存在: {OSS_FUZZ_DIR}") + logger.error(f"❌ OSS-Fuzz目录不存在: {OSS_FUZZ_DIR}") sys.exit(1) except Exception as e: logger.exception(f"💥 在发现阶段发生未知错误: {e}") sys.exit(1) - if not all_fuzz_tasks: - logger.info("🤷 未发现任何可执行的 Fuzz Targets。程序退出。") + logger.info("🤷 未发现任何可执行的Fuzz Targets。程序退出。") sys.exit(0) - # --- 3. 执行阶段: 并行运行所有 Fuzzing 任务 --- logger.info(f"\n✅ 发现阶段完成。共找到 {len(all_fuzz_tasks)} 个模糊测试任务。") - logger.info("=" * 20 + " 阶段 2: 并行执行 Fuzzing " + "=" * 23) - logger.info(f"🚀 使用 {args.workers} 个工作进程开始并行测试 (每个 Target 超时: {args.timeout}s)...") + logger.info("=" * 20 + " 阶段2: 并行执行Fuzzing " + "=" * 23) + logger.info(f"🚀 使用 {args.workers} 个工作进程开始并行测试(每个Target超时: {args.timeout}s)...") tasks_with_args = [(p, t, args.timeout) for p, t in all_fuzz_tasks] - results = [] + results: list[tuple[bool, str, str]] = [] with Pool(args.workers) as pool: try: @@ -255,16 +244,15 @@ def main(): pool.terminate() pool.join() - # --- 4. 汇总阶段 --- - logger.info("\n" + "=" * 20 + " 阶段 3: 结果汇总 " + "=" * 28) + logger.info("\n" + "=" * 20 + " 阶段3: 结果汇总 " + "=" * 28) failed_tasks = [(p, t) for success, p, t in results if not success] total_tasks = len(all_fuzz_tasks) failed_count = len(failed_tasks) success_count = total_tasks - failed_count - logger.info(f"📊 Fuzzing 完成: 成功 {success_count}/{total_tasks}, 失败 {failed_count}/{total_tasks}") + logger.info(f"📊 Fuzzing完成: 成功 {success_count}/{total_tasks}, 失败 {failed_count}/{total_tasks}") if failed_tasks: - logger.error("❌ 以下 Fuzz Targets 运行失败:") + logger.error("❌ 以下Fuzz Targets运行失败:") for project, target in failed_tasks: logger.error(f" - 项目: {project}, Target: {target}") From 6afc91982d311a2b99c16403f9bc099906ffae92 Mon Sep 17 00:00:00 2001 From: joyguoguo <599325370@qq.com> Date: Thu, 24 Jul 2025 00:26:19 +0000 Subject: [PATCH 038/134] translate --- fuzz/run_fuzz_all_targets.py | 124 +++++++++++++++++++---------------- 1 file changed, 67 insertions(+), 57 deletions(-) diff --git a/fuzz/run_fuzz_all_targets.py b/fuzz/run_fuzz_all_targets.py index 27385f0..74bed97 100644 --- a/fuzz/run_fuzz_all_targets.py +++ b/fuzz/run_fuzz_all_targets.py @@ -4,14 +4,14 @@ """ run_fuzz_all_targets.py -该脚本采用两阶段方法进行模糊测试: -1. 发现阶段:遍历所有指定项目,收集每个项目中所有可执行的模糊测试目标(fuzz target) -2. 执行阶段:创建包含所有(项目, target)对的任务池,使用多进程并行执行所有任务 +This script employs a two-phase approach for fuzz testing: +1. Discovery phase: Traverse all specified projects to collect all executable fuzz targets in each project +2. Execution phase: Create a task pool containing all (project, target) pairs and execute them in parallel using multiprocessing -这种方法最大化CPU利用率并提供清晰的整体进度[2](@ref)。 +This approach maximizes CPU utilization and provides clear overall progress[2](@ref). -用法: python3 run_fuzz_all_targets.py [project_list_file] [--timeout seconds] [--workers N] -示例: python3 fuzz/run_fuzz_all_targets.py data/valid_projects.txt --timeout 60 --workers 4 +Usage: python3 run_fuzz_all_targets.py [project_list_file] [--timeout seconds] [--workers N] +Example: python3 fuzz/run_fuzz_all_targets.py data/valid_projects.txt --timeout 60 --workers 4 """ import os @@ -25,7 +25,7 @@ from multiprocessing import Pool, cpu_count from returns.maybe import Maybe, Nothing, Some -# --- 全局配置 --- +# --- Global configuration --- HOME_DIR = Path.home() OSS_FUZZ_DIR = HOME_DIR / "FuzzAug" / "fuzz" / "oss-fuzz" LOG_DIR = OSS_FUZZ_DIR / "run_logs2" @@ -36,9 +36,9 @@ def run_command( log_msg: str, logger: logging.Logger, allowed_exit_codes: Maybe[list[int]] = Nothing, - timeout: int = 3600 # 默认1小时超时 + timeout: int = 3600 # Default 1-hour timeout ) -> bool: - """使用实时日志记录和精确的错误处理来执行命令""" + """Execute commands with real-time logging and precise error handling""" allowed_codes = allowed_exit_codes.value_or([]) logger.info(f"▶️ {log_msg}...") logger.debug(f" $ {cmd}") @@ -58,7 +58,7 @@ def run_command( start_time = time.time() while process.poll() is None: if time.time() - start_time > timeout: - logger.error(f"⌛ 命令在 {timeout} 秒后超时") + logger.error(f"⌛ Command timed out after {timeout} seconds") process.terminate() try: process.wait(timeout=5) @@ -75,21 +75,21 @@ def run_command( exit_code = process.returncode if exit_code not in [0, *allowed_codes]: - logger.error(f"❌ 命令执行失败,退出码: {exit_code}") + logger.error(f"❌ Command execution failed, exit code: {exit_code}") return False return True except FileNotFoundError: - logger.error(f"🔍 命令未找到: {cmd.split()[0]}") + logger.error(f"🔍 Command not found: {cmd.split()[0]}") return False except PermissionError: - logger.error(f"🔒 执行命令权限不足: {cmd}") + logger.error(f"🔒 Insufficient permissions to execute command: {cmd}") return False except subprocess.SubprocessError as e: - logger.exception(f"💥 子进程错误: {e}") + logger.exception(f"💥 Subprocess error: {e}") return False except OSError as e: - logger.exception(f"💥 执行命令时发生操作系统错误: {e}") + logger.exception(f"💥 Operating system error during command execution: {e}") return False finally: if process and process.poll() is None: @@ -101,12 +101,12 @@ def run_command( def discover_targets(project_name: str, logger: logging.Logger) -> list[str]: - """发现项目的Fuzz Targets(以'fuzz_'开头,无扩展名,且可执行)""" + """Discover fuzz targets for a project (starting with 'fuzz_', no extension, and executable)""" out_dir = OSS_FUZZ_DIR / "build" / "out" / project_name targets: list[str] = [] if not out_dir.is_dir(): - logger.warning(f"项目 {project_name} 的构建输出目录不存在: {out_dir}") + logger.warning(f"Build output directory for project {project_name} does not exist: {out_dir}") return targets try: @@ -118,18 +118,18 @@ def discover_targets(project_name: str, logger: logging.Logger) -> list[str]: os.access(f, os.X_OK)): targets.append(f.name) except OSError as e: - logger.warning(f"⚠️ 检查文件 {f.name} 时出错,已跳过: {e}") + logger.warning(f"⚠️ Error while checking file {f.name}, skipped: {e}") except PermissionError: - logger.error(f"🔒 访问目录权限不足: {out_dir}") + logger.error(f"🔒 Insufficient permissions to access directory: {out_dir}") except OSError as e: - logger.exception(f"💥 发现Target时发生操作系统错误: {e}") + logger.exception(f"💥 Operating system error occurred while discovering targets: {e}") return targets def run_single_target(project_name: str, target_name: str, timeout: int) -> tuple[bool, str, str]: - """为单个(项目, target)对执行模糊测试工作流""" + """Execute fuzz testing workflow for a single (project, target) pair""" task_id = f"{project_name}_{target_name}" logger = logging.getLogger(task_id) @@ -148,29 +148,29 @@ def run_single_target(project_name: str, target_name: str, timeout: int) -> tupl os.chdir(OSS_FUZZ_DIR) except (OSError, PermissionError) as e: - print(f"❌ 任务 {task_id} 初始化时发生严重错误: {e}") + print(f"❌ Critical error occurred during initialization of task {task_id}: {e}") return False, project_name, target_name - logger.info(f"🚀 开始测试 -> 项目: {project_name}, Target: {target_name}") + logger.info(f"🚀 Starting test -> Project: {project_name}, Target: {target_name}") try: cmd = f"python3 infra/helper.py run_fuzzer {project_name} {target_name} -- -max_total_time={timeout}" success = run_command( cmd, - f"运行Target '{target_name}' (超时={timeout}s)", + f"Running Target '{target_name}' (timeout={timeout}s)", logger, - allowed_exit_codes=Some([1, 124]), # 1=发现崩溃, 124=超时 + allowed_exit_codes=Some([1, 124]), # 1=Crashes found, 124=Timeout timeout=timeout + 300 ) if success: - logger.info(f"✅ Target '{target_name}' 运行完成。") + logger.info(f"✅ Target '{target_name}' completed successfully.") else: - logger.error(f"❌ Target '{target_name}' 运行失败。") + logger.error(f"❌ Target '{target_name}' failed.") return success, project_name, target_name except Exception as e: - logger.exception(f"💥 运行target '{target_name}' 时发生意外错误: {e}") + logger.exception(f"💥 Unexpected error occurred while running target '{target_name}': {e}") return False, project_name, target_name finally: for handler in logger.handlers[:]: @@ -179,6 +179,7 @@ def run_single_target(project_name: str, target_name: str, timeout: int) -> tupl def main(): + # Configure main process logging logging.basicConfig( level=logging.INFO, format="[%(levelname)s] %(message)s", @@ -186,83 +187,92 @@ def main(): ) logger = logging.getLogger("Main") - parser = argparse.ArgumentParser(description="OSS-Fuzz并行模糊测试工具") - parser.add_argument("project_list", help="包含项目名称列表的文件路径") - parser.add_argument("--timeout", type=int, default=60, help="每个Fuzz Target的运行超时时间(秒)") - parser.add_argument("--workers", type=int, default=cpu_count(), help="并行执行的工作进程数") + # Set up command line argument parsing + parser = argparse.ArgumentParser(description="OSS-Fuzz Parallel Fuzz Testing Tool") + parser.add_argument("project_list", help="File path containing list of project names") + parser.add_argument("--timeout", type=int, default=60, help="Timeout duration per Fuzz Target (seconds)") + parser.add_argument("--workers", type=int, default=cpu_count(), help="Number of parallel worker processes") args = parser.parse_args() + # 1. Read project list file try: project_path = Path(args.project_list) with open(project_path, "r", encoding="utf-8") as f: projects = [line.strip() for line in f if line.strip()] - logger.info(f"📋 从 {project_path.name} 加载了 {len(projects)} 个项目。") + logger.info(f"📋 Loaded {len(projects)} projects from {project_path.name}.") except FileNotFoundError: - logger.error(f"❌ 项目列表文件未找到: {args.project_list}") + logger.error(f"❌ Project list file not found: {args.project_list}") sys.exit(1) except (OSError, PermissionError) as e: - logger.exception(f"💥 读取项目列表时出错: {e}") + logger.exception(f"💥 Error occurred while reading project list: {e}") sys.exit(1) - logger.info("\n" + "=" * 20 + " 阶段1: 发现所有Fuzz Targets " + "=" * 20) - all_fuzz_tasks: list[tuple[str, str]] = [] + # 2. Discovery phase: Collect all fuzz targets + logger.info("\n" + "=" * 20 + " Phase 1: Discover all Fuzz Targets " + "=" * 20) + all_fuzz_tasks: list[tuple[str, str]] = [] # Store (project, target) tuples try: - original_cwd = Path.cwd() - os.chdir(OSS_FUZZ_DIR) + original_cwd = Path.cwd() # Save current working directory + os.chdir(OSS_FUZZ_DIR) # Switch to OSS-Fuzz directory for project_name in projects: targets = discover_targets(project_name, logger) if targets: - logger.info(f"🔍 在项目 '{project_name}' 中发现 {len(targets)} 个targets: {', '.join(targets)}") + logger.info(f"🔍 Discovered {len(targets)} targets in project '{project_name}': {', '.join(targets)}") for target in targets: all_fuzz_tasks.append((project_name, target)) else: - logger.warning(f"⚠️ 在项目 '{project_name}' 中未找到任何Fuzz Targets。") - os.chdir(original_cwd) + logger.warning(f"⚠️ No Fuzz Targets found in project '{project_name}'.") + os.chdir(original_cwd) # Restore original working directory except FileNotFoundError: - logger.error(f"❌ OSS-Fuzz目录不存在: {OSS_FUZZ_DIR}") + logger.error(f"❌ OSS-Fuzz directory does not exist: {OSS_FUZZ_DIR}") sys.exit(1) except Exception as e: - logger.exception(f"💥 在发现阶段发生未知错误: {e}") + logger.exception(f"💥 Unknown error occurred during discovery phase: {e}") sys.exit(1) + # Check if any valid targets were found if not all_fuzz_tasks: - logger.info("🤷 未发现任何可执行的Fuzz Targets。程序退出。") + logger.info("🤷 No executable Fuzz Targets found. Program exits.") sys.exit(0) - logger.info(f"\n✅ 发现阶段完成。共找到 {len(all_fuzz_tasks)} 个模糊测试任务。") - logger.info("=" * 20 + " 阶段2: 并行执行Fuzzing " + "=" * 23) - logger.info(f"🚀 使用 {args.workers} 个工作进程开始并行测试(每个Target超时: {args.timeout}s)...") + # 3. Execution phase: Parallel fuzz testing + logger.info(f"\n✅ Discovery phase completed. Found a total of {len(all_fuzz_tasks)} fuzz tasks.") + logger.info("=" * 20 + " Phase 2: Parallel Fuzzing Execution " + "=" * 23) + logger.info(f"🚀 Starting parallel testing with {args.workers} worker processes (timeout per target: {args.timeout}s)...") + # Prepare task parameters (project, target, timeout) tasks_with_args = [(p, t, args.timeout) for p, t in all_fuzz_tasks] - results: list[tuple[bool, str, str]] = [] + results: list[tuple[bool, str, str]] = [] # Store results (success, project, target) + # Execute in parallel using process pool with Pool(args.workers) as pool: try: results = pool.starmap(run_single_target, tasks_with_args) except Exception as e: - logger.error(f"💥 并行执行过程中发生严重错误: {e}") + logger.error(f"💥 Critical error occurred during parallel execution: {e}") pool.terminate() pool.join() - logger.info("\n" + "=" * 20 + " 阶段3: 结果汇总 " + "=" * 28) - failed_tasks = [(p, t) for success, p, t in results if not success] + # 4. Result summary and reporting + logger.info("\n" + "=" * 20 + " Phase 3: Result Summary " + "=" * 28) + failed_tasks = [(p, t) for success, p, t in results if not success] # List of failed tasks total_tasks = len(all_fuzz_tasks) failed_count = len(failed_tasks) success_count = total_tasks - failed_count - logger.info(f"📊 Fuzzing完成: 成功 {success_count}/{total_tasks}, 失败 {failed_count}/{total_tasks}") + # Output statistical summary + logger.info(f"📊 Fuzzing completed: Success {success_count}/{total_tasks}, Failed {failed_count}/{total_tasks}") if failed_tasks: - logger.error("❌ 以下Fuzz Targets运行失败:") + logger.error("❌ The following Fuzz Targets failed:") for project, target in failed_tasks: - logger.error(f" - 项目: {project}, Target: {target}") + logger.error(f" - Project: {project}, Target: {target}") # List detailed failures if __name__ == "__main__": try: main() except KeyboardInterrupt: - print("\n🛑 操作被用户中断。") + print("\n🛑 Operation interrupted by user.") sys.exit(1) except Exception as e: - print(f"\n💥 主程序发生致命错误: {e}") + print(f"\n💥 Fatal error in main program: {e}") sys.exit(1) \ No newline at end of file From ea76d9dcc85c0c4e763bb4d168cef37301ceb065 Mon Sep 17 00:00:00 2001 From: joyguoguo <599325370@qq.com> Date: Thu, 24 Jul 2025 00:30:32 +0000 Subject: [PATCH 039/134] build_fuzz.py, run_fuzz_all_target.py --- fuzz/{build.py => build_fuzz.py} | 8 +- fuzz/build_fuzzers.py | 246 ---------------------------- fuzz/build_images.py | 158 ------------------ fuzz/build_oss_fuzz_whole.py | 154 ------------------ fuzz/run_fuzz_target.py | 268 ------------------------------- 5 files changed, 4 insertions(+), 830 deletions(-) rename fuzz/{build.py => build_fuzz.py} (96%) delete mode 100644 fuzz/build_fuzzers.py delete mode 100644 fuzz/build_images.py delete mode 100644 fuzz/build_oss_fuzz_whole.py delete mode 100644 fuzz/run_fuzz_target.py diff --git a/fuzz/build.py b/fuzz/build_fuzz.py similarity index 96% rename from fuzz/build.py rename to fuzz/build_fuzz.py index ebaf087..87c5a44 100644 --- a/fuzz/build.py +++ b/fuzz/build_fuzz.py @@ -9,16 +9,16 @@ Usage: Build images: - python3 build.py --mode image [project_list] --oss-fuzz-dir /path/to/oss-fuzz + python3 build_fuzz.py --mode image [project_list] --oss-fuzz-dir /path/to/oss-fuzz Build fuzzers: - python3 build.py --mode fuzzer [project_list] --oss-fuzz-dir /path/to/oss-fuzz --image-results results.json + python3 build_fuzz.py --mode fuzzer [project_list] --oss-fuzz-dir /path/to/oss-fuzz --image-results results.json Build both: - python3 build.py --mode both [project_list] --oss-fuzz-dir /path/to/oss-fuzz --sanitizer address + python3 build_fuzz.py --mode both [project_list] --oss-fuzz-dir /path/to/oss-fuzz --sanitizer address Example: - python3 ./fuzz/build.py --mode both data/valid_projects.txt \ + python3 ./fuzz/build_fuzz.py --mode both data/valid_projects.txt \ --oss-fuzz-dir ./fuzz/oss-fuzz \ --sanitizer address \ --workers 8 diff --git a/fuzz/build_fuzzers.py b/fuzz/build_fuzzers.py deleted file mode 100644 index 1dfd825..0000000 --- a/fuzz/build_fuzzers.py +++ /dev/null @@ -1,246 +0,0 @@ -#!/usr/bin/env python3 -# -*- coding: utf-8 -*- - -""" -build_fuzzers.py - -Parallel build of OSS-Fuzz fuzzers. -Requires Docker images to be built first (using build_images.py). - -Usage: python3 build_fuzzers.py [project_list_file] --oss-fuzz-dir /path/to/oss-fuzz \ - --image-results image_build_results.json \ - [--sanitizer type] [--workers N] -Example: python3 fuzz/build_fuzzers.py data/valid_projects.txt \ - --oss-fuzz-dir ./fuzz/oss-fuzz \ - --image-results image_build_results.json \ - --sanitizer address \ - --workers 8 -""" - -import os -import sys -import subprocess -import argparse -import logging -import json -from pathlib import Path -from typing import List, Optional, Tuple -from multiprocessing import Pool, cpu_count - -class BuildError(Exception): - """Base exception for build failures""" - def __init__(self, message: str, project: str = "", exit_code: int = None): - super().__init__(message) - self.project = project - self.exit_code = exit_code - -class CommandError(BuildError): - """Exception for command execution failures""" - pass - -class PathError(BuildError): - """Exception for missing paths or files""" - pass - -class ConfigError(BuildError): - """Exception for configuration errors""" - pass - -def run_command( - cmd: str, - oss_fuzz_dir: Path, - project: str = "", - allowed_exit_codes: Optional[List[int]] = None -) -> int: - """Execute a command and return the exit code""" - allowed_exit_codes = allowed_exit_codes or [0] - logging.info(f"▶️ Executing command: {cmd}") - - try: - process = subprocess.Popen( - cmd, - shell=True, - cwd=str(oss_fuzz_dir), - stdout=subprocess.PIPE, - stderr=subprocess.PIPE, - text=True - ) - - stdout, stderr = process.communicate() - exit_code = process.returncode - - if exit_code in allowed_exit_codes: - return exit_code - - # 构建详细的错误信息 - error_msg = f"Command failed (exit code: {exit_code})" - if project: - error_msg += f" for project: {project}" - - if stderr.strip(): - error_msg += f"\nError output:\n{stderr.strip()}" - - if stdout.strip(): - error_msg += f"\nOutput:\n{stdout.strip()}" - - raise CommandError(error_msg, project=project, exit_code=exit_code) - - except FileNotFoundError as e: - raise CommandError(f"Command not found: {cmd.split()[0]}", project=project) from e - except OSError as e: - raise CommandError(f"System error: {e}", project=project) from e - except subprocess.SubprocessError as e: - raise CommandError(f"Subprocess error: {e}", project=project) from e - -def build_fuzzers(project_name: str, sanitizer: str, oss_fuzz_dir: Path) -> Tuple[bool, str]: - """Fuzzer build workflow""" - try: - logging.info("=" * 60) - logging.info(f"🔧 Building fuzzers for: {project_name}") - logging.info(f"📁 OSS-Fuzz directory: {oss_fuzz_dir}") - logging.info("=" * 60) - - # Validate paths - helper_script = oss_fuzz_dir / "infra" / "helper.py" - if not helper_script.exists(): - raise PathError(f"Missing helper script: {helper_script}", project=project_name) - - # Execute fuzzer build command - run_command( - f"python3 infra/helper.py build_fuzzers --sanitizer {sanitizer} {project_name}", - oss_fuzz_dir, - project=project_name - ) - - logging.info(f"✅ Fuzzers built: {project_name}") - return (True, project_name) - - except BuildError as e: - logging.error(f"❌ Build failed: {project_name}") - logging.error(f" Reason: {str(e)}") - return (False, project_name) - except Exception as e: - logging.error(f"🔥 Unhandled exception: {project_name}") - logging.exception(f" Exception details: {e}") - return (False, project_name) - -def main(): - parser = argparse.ArgumentParser(description="OSS-Fuzz Fuzzer Builder") - parser.add_argument("project_list", help="Project list file path") - parser.add_argument("--oss-fuzz-dir", required=True, type=str, - help="OSS-Fuzz directory path") - parser.add_argument("--sanitizer", default="address", - choices=["address", "memory", "undefined"], - help="Fuzzer sanitizer type") - parser.add_argument("--workers", type=int, default=cpu_count(), - help="Number of parallel worker processes") - parser.add_argument("--image-results", required=True, - help="JSON file with image build results from build_images.py") - args = parser.parse_args() - - logging.basicConfig( - level=logging.INFO, - format='[%(levelname)s] %(message)s' - ) - - # Process paths - oss_fuzz_dir = Path(args.oss_fuzz_dir).resolve() - logging.info(f"📁 Using OSS-Fuzz directory: {oss_fuzz_dir}") - - # Read project list - try: - project_file = Path(args.project_list) - if not project_file.exists(): - raise FileNotFoundError(f"Project list file not found: {project_file}") - - with open(project_file, "r", encoding="utf-8") as f: - all_projects = [line.strip() for line in f if line.strip()] - - if not all_projects: - raise ConfigError("Project list is empty") - - logging.info(f"📋 Loaded {len(all_projects)} projects") - except Exception as e: - logging.error(f"❌ Failed to read project list: {e}") - sys.exit(1) - - # Load image build results - try: - image_results_file = Path(args.image_results) - if not image_results_file.exists(): - raise FileNotFoundError(f"Image results file not found: {image_results_file}") - - with open(image_results_file, "r") as f: - image_results = json.load(f) - - if not isinstance(image_results, dict): - raise ConfigError("Image results should be a JSON object") - - logging.info(f"📋 Loaded image build results: {args.image_results}") - except json.JSONDecodeError as e: - logging.error(f"❌ Failed to parse image build results: {e}") - sys.exit(1) - except Exception as e: - logging.error(f"❌ Failed to load image build results: {e}") - sys.exit(1) - - # Filter projects with successful image builds - projects_to_build = [p for p in all_projects if p in image_results and image_results[p]] - image_failures = [p for p in all_projects if p not in image_results or not image_results[p]] - - if not projects_to_build: - logging.error("❌ No projects with successful image builds") - if image_failures: - logging.error(f" Projects with image build failures: {', '.join(image_failures[:10])}{'...' if len(image_failures) > 10 else ''}") - sys.exit(1) - - skipped = len(all_projects) - len(projects_to_build) - logging.info(f"🔍 Building {len(projects_to_build)} projects (skipped {skipped} due to image failures)") - - # Parallel fuzzer builds - with Pool(args.workers) as pool: - results = pool.starmap( - build_fuzzers, - [(p, args.sanitizer, oss_fuzz_dir) for p in projects_to_build] - ) - - # Output results - fuzzer_results = {project: success for success, project in results} - failed = [p for p in projects_to_build if not fuzzer_results[p]] - - success_count = len(projects_to_build) - len(failed) - logging.info(f"\n📊 Build completed: {success_count}/{len(projects_to_build)}") - - if failed: - logging.error(f"❌ Failed builds ({len(failed)} projects):") - for project in failed: - logging.error(f" - {project}") - - # Generate overall status report - overall_results = {} - for project in all_projects: - status = "❌" - if project in image_results and image_results[project]: - if project in fuzzer_results and fuzzer_results[project]: - status = "✅" - elif project in fuzzer_results: - status = "❌ (fuzzer)" - else: - status = "❌ (not built)" - else: - status = "❌ (image)" - overall_results[project] = status - - logging.info("\n📊 Overall status:") - for project, status in overall_results.items(): - logging.info(f" {project}: {status}") - -if __name__ == "__main__": - try: - main() - except KeyboardInterrupt: - print("\n🛑 Operation interrupted") - sys.exit(1) - except Exception as e: - print(f"💥 Critical error: {e}") - sys.exit(1) \ No newline at end of file diff --git a/fuzz/build_images.py b/fuzz/build_images.py deleted file mode 100644 index 17c7bfc..0000000 --- a/fuzz/build_images.py +++ /dev/null @@ -1,158 +0,0 @@ -#!/usr/bin/env python3 -# -*- coding: utf-8 -*- - -""" -build_images.py - -Parallel build of OSS-Fuzz Docker images. -Uses multiprocessing.Pool to distribute projects across multiple CPU cores. - -Usage: python3 build_images.py [project_list_file] --oss-fuzz-dir /path/to/oss-fuzz [--workers N] -Example: python3 fuzz/build_images.py data/valid_projects.txt \ - --oss-fuzz-dir ./fuzz/oss-fuzz \ - --workers 4 -""" - -import os -import sys -import subprocess -import argparse -import logging -import json -from pathlib import Path -from typing import List, Optional, Tuple -from multiprocessing import Pool, cpu_count - -class CommandExecutionError(Exception): - """Custom command execution exception""" - def __init__(self, message: str, exit_code: Optional[int] = None): - super().__init__(message) - self.exit_code = exit_code - -def run_command( - cmd: str, - oss_fuzz_dir: Path, - allowed_exit_codes: Optional[List[int]] = None -) -> int: - """Execute a command and return the exit code, throws CommandExecutionError on failure""" - allowed_exit_codes = allowed_exit_codes or [0] - logging.info(f"▶️ Executing command: {cmd}") - - try: - process = subprocess.Popen( - f"yes | {cmd}", - shell=True, - cwd=str(oss_fuzz_dir), - stdout=subprocess.DEVNULL, - stderr=subprocess.DEVNULL - ) - - process.wait() - exit_code = process.returncode - - if exit_code in allowed_exit_codes: - return exit_code - raise CommandExecutionError( - f"Command failed (exit code: {exit_code})", - exit_code=exit_code - ) - - except FileNotFoundError as e: - raise CommandExecutionError(f"Command not found: {cmd.split()[0]}") from e - except OSError as e: - raise CommandExecutionError(f"System error: {e}") from e - except subprocess.SubprocessError as e: - raise CommandExecutionError(f"Subprocess error: {e}") from e - -def build_image(project_name: str, oss_fuzz_dir: Path) -> Tuple[bool, str]: - """Docker image build workflow""" - try: - logging.info("=" * 60) - logging.info(f"🔨 Starting Docker build for project: {project_name}") - logging.info(f"📁 OSS-Fuzz directory: {oss_fuzz_dir}") - logging.info("=" * 60) - - # Validate paths - helper_script = oss_fuzz_dir / "infra" / "helper.py" - if not helper_script.exists(): - raise FileNotFoundError(f"Critical script missing: {helper_script}") - - # Execute image build command - run_command( - f"python3 infra/helper.py build_image {project_name}", - oss_fuzz_dir - ) - - logging.info(f"✅ Docker image for {project_name} built successfully") - return (True, project_name) - - except CommandExecutionError as e: - logging.error(f"❌ Docker build for {project_name} failed: {str(e)}") - return (False, project_name) - except Exception as e: - logging.exception(f"🔥 Unhandled exception: {e}") - return (False, project_name) - -def main(): - parser = argparse.ArgumentParser(description="OSS-Fuzz Docker Image Builder") - parser.add_argument("project_list", help="Project list file path") - parser.add_argument("--oss-fuzz-dir", required=True, type=str, - help="OSS-Fuzz directory path") - parser.add_argument("--workers", type=int, default=cpu_count(), - help="Number of parallel worker processes") - parser.add_argument("--output", default="image_build_results.json", - help="Output file for build results") - args = parser.parse_args() - - logging.basicConfig( - level=logging.INFO, - format='[%(levelname)s] [PID:%(process)d] %(message)s' - ) - - # Process paths - oss_fuzz_dir = Path(args.oss_fuzz_dir).resolve() - logging.info(f"📁 Using OSS-Fuzz directory: {oss_fuzz_dir}") - - # Read project list - try: - with open(args.project_list, "r", encoding="utf-8") as f: - projects = [line.strip() for line in f if line.strip()] - logging.info(f"📋 Loaded {len(projects)} projects") - except Exception as e: - logging.error(f"❌ Failed to read project list: {e}") - sys.exit(1) - - # Parallel image builds - with Pool(args.workers) as pool: - results = pool.starmap( - build_image, - [(p, oss_fuzz_dir) for p in projects] - ) - - # Output results - build_results = {project: success for success, project in results} - failed = [p for p in projects if not build_results[p]] - - logging.info(f"\n📊 Docker image builds completed: " - f"Successful {len(projects)-len(failed)}/{len(projects)}") - - if failed: - logging.error("❌ Failed projects: " + ", ".join(failed)) - - # Save build results to JSON file - try: - with open(args.output, "w") as f: - json.dump(build_results, f) - logging.info(f"💾 Build results saved to: {args.output}") - except Exception as e: - logging.error(f"❌ Failed to save build results: {e}") - -if __name__ == "__main__": - try: - main() - except KeyboardInterrupt: - print("\n🛑 Operation interrupted by user") - sys.exit(1) - except Exception as e: - print(f"💥 Critical error: {e}") - sys.exit(1) \ No newline at end of file diff --git a/fuzz/build_oss_fuzz_whole.py b/fuzz/build_oss_fuzz_whole.py deleted file mode 100644 index 59d3bea..0000000 --- a/fuzz/build_oss_fuzz_whole.py +++ /dev/null @@ -1,154 +0,0 @@ -#!/usr/bin/env python3 -# -*- coding: utf-8 -*- - -""" -build_oss_fuzz_whole.py - -Parallel build of OSS-Fuzz projects (Docker images and Fuzzer compilation). -Uses multiprocessing.Pool to distribute projects across multiple CPU cores. - -Usage: python3 build_oss_fuzz_whole.py [project_list_file] --oss-fuzz-dir /path/to/oss-fuzz \ - [--sanitizer type] [--workers N] -Example: python3 fuzz/build_oss_fuzz_whole.py data/valid_projects.txt \ - --oss-fuzz-dir ./fuzz/oss-fuzz \ - --sanitizer address \ - --workers 8 -""" - -import os -import sys -import subprocess -import argparse -import logging -from pathlib import Path -from typing import List, Optional, Tuple -from multiprocessing import Pool, cpu_count - -class CommandExecutionError(Exception): - """Custom command execution exception""" - def __init__(self, message: str, exit_code: Optional[int] = None): - super().__init__(message) - self.exit_code = exit_code - -def run_command( - cmd: str, - oss_fuzz_dir: Path, - allowed_exit_codes: Optional[List[int]] = None -) -> int: - """Execute a command and return the exit code, throws CommandExecutionError on failure""" - allowed_exit_codes = allowed_exit_codes or [0] - logging.info(f"▶️ Executing command: {cmd}") - - try: - # Remove all stdout/stderr capture logic and execute the command directly - process = subprocess.Popen( - f"yes | {cmd}", - shell=True, - cwd=str(oss_fuzz_dir), - # Redirect the output to an empty device without retaining any output - stdout=subprocess.DEVNULL, - stderr=subprocess.DEVNULL - ) - - process.wait() - exit_code = process.returncode - - if exit_code in allowed_exit_codes: - return exit_code - raise CommandExecutionError( - f"Command failed (exit code: {exit_code})", - exit_code=exit_code - ) - - except FileNotFoundError as e: - raise CommandExecutionError(f"Command not found: {cmd.split()[0]}") from e - except OSError as e: - raise CommandExecutionError(f"System error: {e}") from e - except subprocess.SubprocessError as e: - raise CommandExecutionError(f"Subprocess error: {e}") from e - -def build_project(project_name: str, sanitizer: str, oss_fuzz_dir: Path) -> Tuple[bool, str]: - """Project build workflow""" - try: - logging.info("=" * 60) - logging.info(f"🔨 Starting build for project: {project_name}") - logging.info(f"📁 OSS-Fuzz directory: {oss_fuzz_dir}") - logging.info("=" * 60) - - # Validate paths - helper_script = oss_fuzz_dir / "infra" / "helper.py" - if not helper_script.exists(): - raise FileNotFoundError(f"Critical script missing: {helper_script}") - - # Execute build commands (The output has been disabled) - run_command( - f"python3 infra/helper.py build_image {project_name}", - oss_fuzz_dir - ) - run_command( - f"python3 infra/helper.py build_fuzzers --sanitizer {sanitizer} {project_name}", - oss_fuzz_dir - ) - - logging.info(f"✅ Project {project_name} built successfully") - return (True, project_name) - - except CommandExecutionError as e: - logging.error(f"❌ Project {project_name} build failed: {str(e)}") - return (False, project_name) - except Exception as e: - logging.exception(f"🔥 Unhandled exception: {e}") - return (False, project_name) - -def main(): - parser = argparse.ArgumentParser(description="OSS-Fuzz parallel build tool") - parser.add_argument("project_list", help="Project list file path") - parser.add_argument("--oss-fuzz-dir", required=True, type=str, - help="OSS-Fuzz directory path") - parser.add_argument("--sanitizer", default="address", - choices=["address", "memory", "undefined"], - help="Fuzzer sanitizer type") - parser.add_argument("--workers", type=int, default=cpu_count(), - help="Number of parallel worker processes") - args = parser.parse_args() - - logging.basicConfig( - level=logging.INFO, - format='[%(levelname)s] [PID:%(process)d] %(message)s' - ) - - # Process paths - oss_fuzz_dir = Path(args.oss_fuzz_dir).resolve() - logging.info(f"📁 Using OSS-Fuzz directory: {oss_fuzz_dir}") - - # Read project list - try: - with open(args.project_list, "r", encoding="utf-8") as f: - projects = [line.strip() for line in f if line.strip()] - logging.info(f"📋 Loaded {len(projects)} projects") - except Exception as e: - logging.error(f"❌ Failed to read project list: {e}") - sys.exit(1) - - # Parallel build - with Pool(args.workers) as pool: - results = pool.starmap( - build_project, - [(p, args.sanitizer, oss_fuzz_dir) for p in projects] - ) - - # Output results - failed = [p for success, p in results if not success] - logging.info(f"\n📊 Build completed: Successful {len(projects)-len(failed)}/{len(projects)}") - if failed: - logging.error("❌ Failed projects: " + ", ".join(failed)) - -if __name__ == "__main__": - try: - main() - except KeyboardInterrupt: - print("\n🛑 Operation interrupted by user") - sys.exit(1) - except Exception as e: - print(f"💥 Critical error: {e}") - sys.exit(1) \ No newline at end of file diff --git a/fuzz/run_fuzz_target.py b/fuzz/run_fuzz_target.py deleted file mode 100644 index ee47b63..0000000 --- a/fuzz/run_fuzz_target.py +++ /dev/null @@ -1,268 +0,0 @@ -#!/usr/bin/env python3 -# -*- coding: utf-8 -*- - -""" -run_fuzz_target.py - -Run OSS-Fuzz test targets in parallel with enhanced logging and precise exception handling. -Uses multiprocessing.Pool and logging module for robust task management. - -Usage: python3 run_fuzz_target.py [project_list_file] [--timeout seconds] [--workers N] -Example: python3 fuzz/run_fuzz_target.py data/valid_projects.txt --timeout 60 --workers 4 -""" - -import os -import sys -import subprocess -import argparse -import logging -import time -from datetime import datetime -from pathlib import Path -from typing import Optional -from multiprocessing import Pool, cpu_count - -# --- Global configuration --- -HOME_DIR = Path.home() -OSS_FUZZ_DIR = HOME_DIR / "FuzzAug" / "fuzz" / "oss-fuzz" -LOG_DIR = OSS_FUZZ_DIR / "run_logs" - -def run_command( - cmd: str, - log_msg: str, - logger: logging.Logger, - allowed_exit_codes: Optional[list[int]] = None, - timeout: int = 3600 # 1 hour default timeout -) -> bool: - """Execute command with real-time logging and precise error handling""" - allowed_exit_codes = allowed_exit_codes or [] - logger.info(f"▶️ {log_msg}...") - logger.debug(f" $ {cmd}") - - process = None - try: - process = subprocess.Popen( - cmd, - shell=True, - stdout=subprocess.PIPE, - stderr=subprocess.STDOUT, - text=True, - encoding="utf-8", - errors="replace" - ) - - start_time = time.time() - while process.poll() is None: - if time.time() - start_time > timeout: - logger.error(f"⌛ Command timed out after {timeout} seconds") - process.terminate() - try: - process.wait(timeout=5) - except subprocess.TimeoutExpired: - process.kill() - return False - - if process.stdout: - line = process.stdout.readline() - if line: - logger.debug(line.strip()) - else: - logger.warning("Process stdout is None") - time.sleep(0.1) - - exit_code = process.returncode - if exit_code not in [0, *allowed_exit_codes]: - logger.error(f"❌ Command failed with exit code: {exit_code}") - return False - return True - - except FileNotFoundError: - logger.error(f"🔍 Command not found: {cmd.split()[0]}") - return False - except PermissionError: - logger.error(f"🔒 Permission denied for command: {cmd}") - return False - except subprocess.SubprocessError as e: - logger.exception(f"💥 Subprocess error: {e}") - return False - except OSError as e: - logger.exception(f"💥 OS error during command execution: {e}") - return False - finally: - if process and process.poll() is None: - try: - process.terminate() - process.wait(timeout=5) - except: - pass - - -def discover_targets(project_name: str, logger: logging.Logger) -> list[str]: - """Discover fuzz targets (fuzz_ prefix, no extension, executable)""" - out_dir = OSS_FUZZ_DIR / "build" / "out" / project_name - targets: list[str] = [] - - try: - if not out_dir.exists(): - logger.warning(f"⚠️ Build directory not found: {out_dir}") - return targets - - for f in out_dir.iterdir(): - try: - if (f.is_file() and - f.name.startswith("fuzz_") and - '.' not in f.name and - os.access(f, os.X_OK)): - targets.append(f.name) - except OSError as e: - logger.warning(f"⚠️ Skipping file {f.name} due to access error: {e}") - - except FileNotFoundError: - logger.error(f"❌ Directory not found: {out_dir}") - except PermissionError: - logger.error(f"🔒 Permission denied accessing: {out_dir}") - except OSError as e: - logger.exception(f"💥 OS error during target discovery: {e}") - - return targets - - -def run_project(project_name: str, timeout: int) -> tuple[bool, str]: - """Testing workflow for a single project with precise error handling""" - try: - - logger = logging.getLogger(project_name) - logger.setLevel(logging.DEBUG) - - # 创建日志文件 - LOG_DIR.mkdir(parents=True, exist_ok=True) - timestamp = datetime.now().strftime("%Y%m%d%H%M%S") - log_file = LOG_DIR / f"run_{project_name}_{timestamp}.log" - file_handler = logging.FileHandler(log_file, encoding="utf-8") - - # 配置日志格式 - formatter = logging.Formatter( - "%(asctime)s [PID:%(process)d] %(levelname)s: %(message)s", - datefmt="%Y-%m-%d %H:%M:%S" - ) - file_handler.setFormatter(formatter) - logger.addHandler(file_handler) - - # 添加控制台输出 - console_handler = logging.StreamHandler() - console_handler.setFormatter(logging.Formatter("[%(levelname)s] %(message)s")) - logger.addHandler(console_handler) - - os.chdir(OSS_FUZZ_DIR) - except (OSError, PermissionError) as e: - print(f"❌ Critical error initializing project {project_name}: {e}") - return (False, project_name) - - logger.info("=" * 60) - logger.info(f"🚀 Starting testing for project: {project_name}") - logger.info("=" * 60) - - try: - targets = discover_targets(project_name, logger) - if not targets: - logger.error("⚠️ No test targets found") - return (False, project_name) - logger.info(f"🔍 Discovered {len(targets)} test targets: {', '.join(targets)}") - except Exception as e: - logger.exception(f"💥 Target discovery failed unexpectedly: {e}") - return (False, project_name) - - all_success = True - for i, target in enumerate(targets, 1): - try: - cmd = f"python3 infra/helper.py run_fuzzer {project_name} {target} -- -max_total_time={timeout}" - success = run_command( - cmd, - f"Running target [{i}/{len(targets)}] {target} (timeout={timeout}s)", - logger, - allowed_exit_codes=[1, 124], - timeout=timeout + 300 - ) - all_success &= success - if not success: - logger.error(f"❌ Target failed: {target}") - except Exception as e: - logger.exception(f"💥 Unexpected error running target {target}: {e}") - all_success = False - - if all_success: - logger.info(f"✅ All targets completed successfully for {project_name}") - else: - logger.error(f"❌ One or more targets failed for {project_name}") - - # 清理日志处理器 - for handler in logger.handlers[:]: - handler.close() - logger.removeHandler(handler) - - return (all_success, project_name) - - -def main(): - # 主进程日志配置 - logging.basicConfig( - level=logging.INFO, - format="[%(levelname)s] %(message)s" - ) - logger = logging.getLogger("Main") - - parser = argparse.ArgumentParser(description="OSS-Fuzz Parallel Testing Tool") - parser.add_argument("project_list", help="Project list file path") - parser.add_argument("--timeout", type=int, default=60, help="Timeout per target test (seconds)") - parser.add_argument("--workers", type=int, default=cpu_count()) - args = parser.parse_args() - - logger.info(f"🚀 Starting parallel fuzzing (workers={args.workers}, timeout={args.timeout}s)") - - try: - project_path = Path(args.project_list) - if not project_path.exists(): - raise FileNotFoundError(f"Project list file not found: {project_path}") - - if not project_path.is_file(): - raise ValueError(f"Path is not a file: {project_path}") - - with open(project_path, "r") as f: - projects = [line.strip() for line in f if line.strip()] - - logger.info(f"📋 Loaded {len(projects)} projects from {project_path}") - except FileNotFoundError as e: - logger.error(f"❌ {e}") - sys.exit(1) - except PermissionError as e: - logger.error(f"🔒 Permission denied: {e}") - sys.exit(1) - except (OSError, ValueError) as e: - logger.exception(f"💥 Error reading project list: {e}") - sys.exit(1) - - with Pool(args.workers) as pool: - try: - # 使用starmap同步执行所有任务 - final_results = pool.starmap(run_project, [(p, args.timeout) for p in projects]) - except Exception as e: - logger.error(f"💥 Parallel execution failed: {e}") - # 出错时返回所有项目失败状态 - final_results = [(False, p) for p in projects] - - # 汇总结果 - failed = [p for success, p in final_results if not success] - logger.info(f"\n📊 Testing completed: Success {len(projects)-len(failed)}/{len(projects)}") - if failed: - logger.error("❌ Failed projects: " + ", ".join(failed)) - - -if __name__ == "__main__": - try: - main() - except KeyboardInterrupt: - print("\n🛑 Operation cancelled by user") - sys.exit(1) - except Exception as e: - print(f"💥 Critical error in main: {e}") - sys.exit(1) \ No newline at end of file From b0f7b86fb789efbe9e2c047b07a3dae53e91f3f9 Mon Sep 17 00:00:00 2001 From: joyguoguo <599325370@qq.com> Date: Thu, 24 Jul 2025 00:54:25 +0000 Subject: [PATCH 040/134] correct --- fuzz/build_fuzz.py | 136 ++++++++++++++++++++++----------------------- 1 file changed, 68 insertions(+), 68 deletions(-) diff --git a/fuzz/build_fuzz.py b/fuzz/build_fuzz.py index 87c5a44..51a1a79 100644 --- a/fuzz/build_fuzz.py +++ b/fuzz/build_fuzz.py @@ -8,13 +8,13 @@ Supports three modes: 'image', 'fuzzer', or 'both'. Usage: - Build images: + Build images: python3 build_fuzz.py --mode image [project_list] --oss-fuzz-dir /path/to/oss-fuzz - - Build fuzzers: + + Build fuzzers: python3 build_fuzz.py --mode fuzzer [project_list] --oss-fuzz-dir /path/to/oss-fuzz --image-results results.json - - Build both: + + Build both: python3 build_fuzz.py --mode both [project_list] --oss-fuzz-dir /path/to/oss-fuzz --sanitizer address Example: @@ -32,13 +32,15 @@ import json from pathlib import Path from returns.maybe import Maybe +from multiprocessing import Pool +from typing import Dict, List, Tuple # ======================================================================================== # Custom Exceptions # ======================================================================================== class BuildError(Exception): """Base exception for build failures""" - def __init__(self, message: str, project: str = "", exit_code: int = None): + def __init__(self, message: str, project: str = "", exit_code: int | None = None): super().__init__(message) self.project = project self.exit_code = exit_code @@ -62,45 +64,42 @@ def run_command( cmd: str, oss_fuzz_dir: Path, project: str = "", - allowed_exit_codes: Maybe[list[int]] = Maybe.empty, + allowed_exit_codes: Maybe[List[int]] = Maybe.empty, skip_yes: bool = False ) -> int: """Execute a command and return the exit code""" - allowed_exit_codes = allowed_exit_codes.or_else([0]) + allowed_codes = allowed_exit_codes.value_or([0]) cmd_str = f"yes | {cmd}" if not skip_yes else cmd logging.debug(f"Executing command [{project}]: {cmd_str}") - + try: process = subprocess.Popen( - cmd_str if skip_yes else f"yes | {cmd}", + cmd_str, shell=True, cwd=str(oss_fuzz_dir), stdout=subprocess.PIPE if skip_yes else subprocess.DEVNULL, stderr=subprocess.PIPE if skip_yes else subprocess.DEVNULL, - text=True if skip_yes else False + text=True ) - - if skip_yes: - stdout, stderr = process.communicate() - else: - process.wait() + + stdout, stderr = process.communicate() exit_code = process.returncode - - if exit_code in allowed_exit_codes: + + if exit_code in allowed_codes: return exit_code - + error_msg = f"Command failed (exit code: {exit_code})" if project: error_msg += f" for project: {project}" - - if skip_yes and stderr.strip(): + + if stderr and stderr.strip(): error_msg += f"\nError output:\n{stderr.strip()}" - - if skip_yes and stdout.strip(): + + if stdout and stdout.strip(): error_msg += f"\nOutput:\n{stdout.strip()}" - + raise CommandError(error_msg, project=project, exit_code=exit_code) - + except FileNotFoundError as e: raise CommandError(f"Command not found: {cmd.split()[0]}", project=project) from e except OSError as e: @@ -111,26 +110,26 @@ def run_command( # ======================================================================================== # Build Functions # ======================================================================================== -def build_image(project_name: str, oss_fuzz_dir: Path) -> tuple[bool, str]: +def build_image(project_name: str, oss_fuzz_dir: Path) -> Tuple[bool, str]: """Docker image build workflow""" try: logging.info(f"Building Docker image: {project_name}") - + # Validate paths helper_script = oss_fuzz_dir / "infra" / "helper.py" if not helper_script.exists(): raise PathError(f"Missing helper script: {helper_script}", project=project_name) - + # Execute image build command run_command( f"python3 infra/helper.py build_image {project_name}", oss_fuzz_dir, project=project_name ) - + logging.info(f"✅ Docker image built: {project_name}") return (True, project_name) - + except CommandError as e: logging.error(f"❌ Docker build failed: {project_name} - {str(e)}") return (False, project_name) @@ -138,16 +137,16 @@ def build_image(project_name: str, oss_fuzz_dir: Path) -> tuple[bool, str]: logging.error(f"💥 Unhandled exception: {project_name} - {repr(e)}") return (False, project_name) -def build_fuzzers(project_name: str, sanitizer: str, oss_fuzz_dir: Path) -> tuple[bool, str]: +def build_fuzzers(project_name: str, sanitizer: str, oss_fuzz_dir: Path) -> Tuple[bool, str]: """Fuzzer build workflow""" try: logging.info(f"Building fuzzers: {project_name} ({sanitizer} sanitizer)") - + # Validate paths helper_script = oss_fuzz_dir / "infra" / "helper.py" if not helper_script.exists(): raise PathError(f"Missing helper script: {helper_script}", project=project_name) - + # Execute fuzzer build command run_command( f"python3 infra/helper.py build_fuzzers --sanitizer {sanitizer} {project_name}", @@ -155,10 +154,10 @@ def build_fuzzers(project_name: str, sanitizer: str, oss_fuzz_dir: Path) -> tupl project=project_name, skip_yes=True ) - + logging.info(f"✅ Fuzzers built: {project_name}") return (True, project_name) - + except BuildError as e: logging.error(f"❌ Fuzzer build failed: {project_name} - {str(e)}") return (False, project_name) @@ -169,57 +168,57 @@ def build_fuzzers(project_name: str, sanitizer: str, oss_fuzz_dir: Path) -> tupl # ======================================================================================== # Main Execution # ======================================================================================== -def load_projects(file_path: Path) -> list[str]: +def load_projects(file_path: Path) -> List[str]: """Load project list from file""" if not file_path.exists(): raise FileNotFoundError(f"Project list not found: {file_path}") - + with open(file_path, "r", encoding="utf-8") as f: projects = [line.strip() for line in f if line.strip()] - + if not projects: raise ConfigError("Project list is empty") - - logging.info(f"Loaded {len(projects)} projects from {file_path}") + + logging.info(f"Loaded {len(projects)} projects from {file_path.name}") return projects def execute_builds( func, - args_list: list[tuple], + args_list: List[Tuple], worker_count: int, success_msg: str, failure_msg: str -) -> tuple[dict[str, bool], list[str]]: +) -> Tuple[Dict[str, bool], List[str]]: """Execute build tasks in parallel and return results""" - results = {} + results: Dict[str, bool] = {} with Pool(worker_count) as pool: for success, project in pool.starmap(func, args_list): results[project] = success failed = [p for p, success in results.items() if not success] success_count = len(results) - len(failed) - + if failed: logging.error(f"\n❌ {failure_msg}: {len(failed)}/{len(results)} projects") logging.info(f"\n📊 {success_msg}: {success_count}/{len(results)} projects") - + return results, failed def main(): parser = argparse.ArgumentParser(description="OSS-Fuzz Build System") parser.add_argument("project_list", help="Project list file path") - parser.add_argument("--oss-fuzz-dir", required=True, type=str, + parser.add_argument("--oss-fuzz-dir", required=True, type=str, help="OSS-Fuzz directory path") parser.add_argument("--mode", choices=['image', 'fuzzer', 'both'], default='both', help="Build mode: 'image', 'fuzzer', or 'both'") parser.add_argument("--workers", type=int, default=os.cpu_count(), help="Number of parallel worker processes") - parser.add_argument("--sanitizer", default="address", + parser.add_argument("--sanitizer", default="address", choices=["address", "memory", "undefined"], help="Fuzzer sanitizer type") parser.add_argument("--image-results", default="image_build_results.json", help="Image build results file (JSON)") - parser.add_argument("--log-level", default="INFO", + parser.add_argument("--log-level", default="INFO", choices=["DEBUG", "INFO", "WARNING", "ERROR"], help="Logging detail level") args = parser.parse_args() @@ -229,16 +228,16 @@ def main(): level=getattr(logging, args.log_level), format='[%(levelname)s] [PID:%(process)d] %(message)s' ) - + oss_fuzz_dir = Path(args.oss_fuzz_dir).resolve() project_file = Path(args.project_list).resolve() output_file = Path(args.image_results) - + # Sanity checks if not oss_fuzz_dir.exists(): logging.critical(f"OSS-Fuzz directory not found: {oss_fuzz_dir}") sys.exit(1) - + # Load projects try: projects = load_projects(project_file) @@ -247,12 +246,12 @@ def main(): sys.exit(1) # Image building workflow - image_results = {} + image_results: Dict[str, bool] = {} if args.mode in ['image', 'both']: logging.info("\n" + "="*60) logging.info(f"Starting Docker image builds for {len(projects)} projects") logging.info("="*60 + "\n") - + image_args = [(p, oss_fuzz_dir) for p in projects] image_results, image_failures = execute_builds( build_image, @@ -261,22 +260,23 @@ def main(): "✅ Docker image builds succeeded", "🚫 Docker image builds failed" ) - + # Save image build results try: with output_file.open("w") as f: - json.dump(image_results, f) + json.dump(image_results, f, indent=4) logging.info(f"💾 Image build results saved to: {output_file}") except Exception as e: logging.error(f"❌ Failed to save image results: {e}") - + # Fuzzer building workflow - fuzzer_results = {} + fuzzer_results: Dict[str, bool] = {} + fuzz_projects: List[str] = [] if args.mode in ['fuzzer', 'both']: logging.info("\n" + "="*60) - logging.info(f"Starting fuzzer builds for {len(projects)} projects ({args.sanitizer} sanitizer)") + logging.info(f"Starting fuzzer builds ({args.sanitizer} sanitizer)") logging.info("="*60 + "\n") - + # Load image results for fuzzer mode if args.mode == 'fuzzer': try: @@ -286,13 +286,15 @@ def main(): except Exception as e: logging.critical(f"❌ Failed to load image results: {e}") sys.exit(1) - + # Filter projects with successful image builds fuzz_projects = [p for p in projects if image_results.get(p, False)] if not fuzz_projects: - logging.critical("❌ No projects with successful image builds") - sys.exit(1) + logging.critical("❌ No projects with successful image builds to fuzz.") + sys.exit(0) + logging.info(f"Attempting to build fuzzers for {len(fuzz_projects)} projects with successful image builds.") + fuzzer_args = [(p, args.sanitizer, oss_fuzz_dir) for p in fuzz_projects] fuzzer_results, fuzzer_failures = execute_builds( build_fuzzers, @@ -301,19 +303,17 @@ def main(): "✅ Fuzzer builds succeeded", "🚫 Fuzzer builds failed" ) - + # Final summary logging.info("\n" + "="*60) logging.info("Build Summary") logging.info("="*60) - + if args.mode in ['image', 'both']: image_success = sum(1 for r in image_results.values() if r) logging.info(f"📦 Docker Images: {image_success}/{len(projects)} succeeded") - - if args.mode in ['fuzzer', 'both']: - if args.mode == 'both': - fuzz_projects = list(fuzzer_results.keys()) + + if args.mode in ['fuzzer', 'both'] and fuzz_projects: fuzzer_success = sum(1 for r in fuzzer_results.values() if r) logging.info(f"🔧 Fuzzers: {fuzzer_success}/{len(fuzz_projects)} succeeded") From 19fa83e2a68074b96ecc8f31646b356edd5db2aa Mon Sep 17 00:00:00 2001 From: joyguoguo <599325370@qq.com> Date: Thu, 24 Jul 2025 07:08:44 +0000 Subject: [PATCH 041/134] original --- fuzz/run_fuzz_all_targets_input.py | 278 +++++++++++++++++++++++++++++ 1 file changed, 278 insertions(+) create mode 100644 fuzz/run_fuzz_all_targets_input.py diff --git a/fuzz/run_fuzz_all_targets_input.py b/fuzz/run_fuzz_all_targets_input.py new file mode 100644 index 0000000..74bed97 --- /dev/null +++ b/fuzz/run_fuzz_all_targets_input.py @@ -0,0 +1,278 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- + +""" +run_fuzz_all_targets.py + +This script employs a two-phase approach for fuzz testing: +1. Discovery phase: Traverse all specified projects to collect all executable fuzz targets in each project +2. Execution phase: Create a task pool containing all (project, target) pairs and execute them in parallel using multiprocessing + +This approach maximizes CPU utilization and provides clear overall progress[2](@ref). + +Usage: python3 run_fuzz_all_targets.py [project_list_file] [--timeout seconds] [--workers N] +Example: python3 fuzz/run_fuzz_all_targets.py data/valid_projects.txt --timeout 60 --workers 4 +""" + +import os +import sys +import subprocess +import argparse +import logging +import time +from datetime import datetime +from pathlib import Path +from multiprocessing import Pool, cpu_count +from returns.maybe import Maybe, Nothing, Some + +# --- Global configuration --- +HOME_DIR = Path.home() +OSS_FUZZ_DIR = HOME_DIR / "FuzzAug" / "fuzz" / "oss-fuzz" +LOG_DIR = OSS_FUZZ_DIR / "run_logs2" + + +def run_command( + cmd: str, + log_msg: str, + logger: logging.Logger, + allowed_exit_codes: Maybe[list[int]] = Nothing, + timeout: int = 3600 # Default 1-hour timeout +) -> bool: + """Execute commands with real-time logging and precise error handling""" + allowed_codes = allowed_exit_codes.value_or([]) + logger.info(f"▶️ {log_msg}...") + logger.debug(f" $ {cmd}") + + process = None + try: + process = subprocess.Popen( + cmd, + shell=True, + stdout=subprocess.PIPE, + stderr=subprocess.STDOUT, + text=True, + encoding="utf-8", + errors="replace" + ) + + start_time = time.time() + while process.poll() is None: + if time.time() - start_time > timeout: + logger.error(f"⌛ Command timed out after {timeout} seconds") + process.terminate() + try: + process.wait(timeout=5) + except subprocess.TimeoutExpired: + process.kill() + return False + + if process.stdout: + line = process.stdout.readline() + if line: + logger.debug(line.strip()) + else: + time.sleep(0.1) + + exit_code = process.returncode + if exit_code not in [0, *allowed_codes]: + logger.error(f"❌ Command execution failed, exit code: {exit_code}") + return False + return True + + except FileNotFoundError: + logger.error(f"🔍 Command not found: {cmd.split()[0]}") + return False + except PermissionError: + logger.error(f"🔒 Insufficient permissions to execute command: {cmd}") + return False + except subprocess.SubprocessError as e: + logger.exception(f"💥 Subprocess error: {e}") + return False + except OSError as e: + logger.exception(f"💥 Operating system error during command execution: {e}") + return False + finally: + if process and process.poll() is None: + try: + process.terminate() + process.wait(timeout=5) + except Exception: + pass + + +def discover_targets(project_name: str, logger: logging.Logger) -> list[str]: + """Discover fuzz targets for a project (starting with 'fuzz_', no extension, and executable)""" + out_dir = OSS_FUZZ_DIR / "build" / "out" / project_name + targets: list[str] = [] + + if not out_dir.is_dir(): + logger.warning(f"Build output directory for project {project_name} does not exist: {out_dir}") + return targets + + try: + for f in out_dir.iterdir(): + try: + if (f.is_file() and + f.name.startswith("fuzz_") and + '.' not in f.name and + os.access(f, os.X_OK)): + targets.append(f.name) + except OSError as e: + logger.warning(f"⚠️ Error while checking file {f.name}, skipped: {e}") + + except PermissionError: + logger.error(f"🔒 Insufficient permissions to access directory: {out_dir}") + except OSError as e: + logger.exception(f"💥 Operating system error occurred while discovering targets: {e}") + + return targets + + +def run_single_target(project_name: str, target_name: str, timeout: int) -> tuple[bool, str, str]: + """Execute fuzz testing workflow for a single (project, target) pair""" + task_id = f"{project_name}_{target_name}" + logger = logging.getLogger(task_id) + + try: + logger.setLevel(logging.DEBUG) + LOG_DIR.mkdir(parents=True, exist_ok=True) + timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") + log_file = LOG_DIR / f"run_{task_id}_{timestamp}.log" + file_handler = logging.FileHandler(log_file, encoding="utf-8") + formatter = logging.Formatter( + "%(asctime)s [PID:%(process)d] %(levelname)s: %(message)s", + datefmt="%Y-%m-%d %H:%M:%S" + ) + file_handler.setFormatter(formatter) + logger.addHandler(file_handler) + os.chdir(OSS_FUZZ_DIR) + + except (OSError, PermissionError) as e: + print(f"❌ Critical error occurred during initialization of task {task_id}: {e}") + return False, project_name, target_name + + logger.info(f"🚀 Starting test -> Project: {project_name}, Target: {target_name}") + try: + cmd = f"python3 infra/helper.py run_fuzzer {project_name} {target_name} -- -max_total_time={timeout}" + success = run_command( + cmd, + f"Running Target '{target_name}' (timeout={timeout}s)", + logger, + allowed_exit_codes=Some([1, 124]), # 1=Crashes found, 124=Timeout + timeout=timeout + 300 + ) + + if success: + logger.info(f"✅ Target '{target_name}' completed successfully.") + else: + logger.error(f"❌ Target '{target_name}' failed.") + + return success, project_name, target_name + + except Exception as e: + logger.exception(f"💥 Unexpected error occurred while running target '{target_name}': {e}") + return False, project_name, target_name + finally: + for handler in logger.handlers[:]: + handler.close() + logger.removeHandler(handler) + + +def main(): + # Configure main process logging + logging.basicConfig( + level=logging.INFO, + format="[%(levelname)s] %(message)s", + stream=sys.stdout + ) + logger = logging.getLogger("Main") + + # Set up command line argument parsing + parser = argparse.ArgumentParser(description="OSS-Fuzz Parallel Fuzz Testing Tool") + parser.add_argument("project_list", help="File path containing list of project names") + parser.add_argument("--timeout", type=int, default=60, help="Timeout duration per Fuzz Target (seconds)") + parser.add_argument("--workers", type=int, default=cpu_count(), help="Number of parallel worker processes") + args = parser.parse_args() + + # 1. Read project list file + try: + project_path = Path(args.project_list) + with open(project_path, "r", encoding="utf-8") as f: + projects = [line.strip() for line in f if line.strip()] + logger.info(f"📋 Loaded {len(projects)} projects from {project_path.name}.") + except FileNotFoundError: + logger.error(f"❌ Project list file not found: {args.project_list}") + sys.exit(1) + except (OSError, PermissionError) as e: + logger.exception(f"💥 Error occurred while reading project list: {e}") + sys.exit(1) + + # 2. Discovery phase: Collect all fuzz targets + logger.info("\n" + "=" * 20 + " Phase 1: Discover all Fuzz Targets " + "=" * 20) + all_fuzz_tasks: list[tuple[str, str]] = [] # Store (project, target) tuples + try: + original_cwd = Path.cwd() # Save current working directory + os.chdir(OSS_FUZZ_DIR) # Switch to OSS-Fuzz directory + for project_name in projects: + targets = discover_targets(project_name, logger) + if targets: + logger.info(f"🔍 Discovered {len(targets)} targets in project '{project_name}': {', '.join(targets)}") + for target in targets: + all_fuzz_tasks.append((project_name, target)) + else: + logger.warning(f"⚠️ No Fuzz Targets found in project '{project_name}'.") + os.chdir(original_cwd) # Restore original working directory + except FileNotFoundError: + logger.error(f"❌ OSS-Fuzz directory does not exist: {OSS_FUZZ_DIR}") + sys.exit(1) + except Exception as e: + logger.exception(f"💥 Unknown error occurred during discovery phase: {e}") + sys.exit(1) + + # Check if any valid targets were found + if not all_fuzz_tasks: + logger.info("🤷 No executable Fuzz Targets found. Program exits.") + sys.exit(0) + + # 3. Execution phase: Parallel fuzz testing + logger.info(f"\n✅ Discovery phase completed. Found a total of {len(all_fuzz_tasks)} fuzz tasks.") + logger.info("=" * 20 + " Phase 2: Parallel Fuzzing Execution " + "=" * 23) + logger.info(f"🚀 Starting parallel testing with {args.workers} worker processes (timeout per target: {args.timeout}s)...") + + # Prepare task parameters (project, target, timeout) + tasks_with_args = [(p, t, args.timeout) for p, t in all_fuzz_tasks] + results: list[tuple[bool, str, str]] = [] # Store results (success, project, target) + + # Execute in parallel using process pool + with Pool(args.workers) as pool: + try: + results = pool.starmap(run_single_target, tasks_with_args) + except Exception as e: + logger.error(f"💥 Critical error occurred during parallel execution: {e}") + pool.terminate() + pool.join() + + # 4. Result summary and reporting + logger.info("\n" + "=" * 20 + " Phase 3: Result Summary " + "=" * 28) + failed_tasks = [(p, t) for success, p, t in results if not success] # List of failed tasks + total_tasks = len(all_fuzz_tasks) + failed_count = len(failed_tasks) + success_count = total_tasks - failed_count + + # Output statistical summary + logger.info(f"📊 Fuzzing completed: Success {success_count}/{total_tasks}, Failed {failed_count}/{total_tasks}") + if failed_tasks: + logger.error("❌ The following Fuzz Targets failed:") + for project, target in failed_tasks: + logger.error(f" - Project: {project}, Target: {target}") # List detailed failures + + +if __name__ == "__main__": + try: + main() + except KeyboardInterrupt: + print("\n🛑 Operation interrupted by user.") + sys.exit(1) + except Exception as e: + print(f"\n💥 Fatal error in main program: {e}") + sys.exit(1) \ No newline at end of file From 47cf6e9f282d622e6ddfe821b3a4b71419685308 Mon Sep 17 00:00:00 2001 From: joyguoguo <599325370@qq.com> Date: Thu, 24 Jul 2025 07:11:26 +0000 Subject: [PATCH 042/134] record input --- fuzz/run_fuzz_all_targets_input.py | 198 ++++++++++++++++++++++++++--- image_build_results.json | 2 +- 2 files changed, 182 insertions(+), 18 deletions(-) diff --git a/fuzz/run_fuzz_all_targets_input.py b/fuzz/run_fuzz_all_targets_input.py index 74bed97..631f3e6 100644 --- a/fuzz/run_fuzz_all_targets_input.py +++ b/fuzz/run_fuzz_all_targets_input.py @@ -2,16 +2,23 @@ # -*- coding: utf-8 -*- """ -run_fuzz_all_targets.py +run_fuzz_all_targets_input.py + +Enhanced with input instrumentation to capture fuzzing inputs. This script employs a two-phase approach for fuzz testing: 1. Discovery phase: Traverse all specified projects to collect all executable fuzz targets in each project 2. Execution phase: Create a task pool containing all (project, target) pairs and execute them in parallel using multiprocessing +3. Input capture: Instrument fuzz targets to record all inputs during fuzzing -This approach maximizes CPU utilization and provides clear overall progress[2](@ref). +Key Enhancements: +- Added input instrumentation to capture fuzzing inputs +- Created dedicated input storage directory structure +- Added AST-based function instrumentation +- Added input recording and analysis capabilities -Usage: python3 run_fuzz_all_targets.py [project_list_file] [--timeout seconds] [--workers N] -Example: python3 fuzz/run_fuzz_all_targets.py data/valid_projects.txt --timeout 60 --workers 4 +Usage: python3 run_fuzz_all_targets_input [project_list_file] [--timeout seconds] [--workers N] [--record-inputs] +Example: python3 fuzz/run_fuzz_all_targets_input.py data/valid_projects.txt --timeout 60 --workers 4 --record-inputs """ import os @@ -20,25 +27,144 @@ import argparse import logging import time +import ast +import astor +import shutil from datetime import datetime from pathlib import Path from multiprocessing import Pool, cpu_count from returns.maybe import Maybe, Nothing, Some +from typing import Optional, List, Tuple # --- Global configuration --- HOME_DIR = Path.home() OSS_FUZZ_DIR = HOME_DIR / "FuzzAug" / "fuzz" / "oss-fuzz" LOG_DIR = OSS_FUZZ_DIR / "run_logs2" +INPUT_DIR = OSS_FUZZ_DIR / "fuzz_inputs" # Directory to store captured inputs + +class FunctionInstrumenter(ast.NodeTransformer): + """AST transformer to instrument function entries for input recording""" + def visit_FunctionDef(self, node): + """Instrument function definition to add input recording""" + # Add print statement at the beginning of the function + input_record_stmt = ast.Expr( + value=ast.Call( + func=ast.Name(id='print', ctx=ast.Load()), + args=[ast.Constant(value=f"INPUT_CAPTURE: {node.name} received input: {{data}}")], + keywords=[] + ) + ) + + # Insert the print statement at the top of the function body + if node.body: + node.body.insert(0, input_record_stmt) + + return node + +def instrument_code(source_code: str, target_function: str) -> str: + """ + Instrument source code to record inputs for specific function + + Args: + source_code: Original source code + target_function: Name of the function to instrument + + Returns: + Instrumented source code + """ + try: + # Parse the source code into an AST + tree = ast.parse(source_code) + + # Create instrumenter and apply transformations + instrumenter = FunctionInstrumenter() + modified_tree = instrumenter.visit(tree) + + # Add missing location information for generated nodes + ast.fix_missing_locations(modified_tree) + + # Generate the modified source code + return astor.to_source(modified_tree) + except Exception as e: + logging.error(f"🔧 Code instrumentation failed: {str(e)}") + return source_code # Return original if instrumentation fails +def prepare_target_for_input_capture(project_name: str, target_name: str) -> Path: + """ + Prepare a fuzz target for input capture by instrumenting its code + + Args: + project_name: Name of the project + target_name: Name of the target to instrument + + Returns: + Path to the instrumented target executable + """ + try: + # Create project-specific input directory + project_input_dir = INPUT_DIR / project_name + project_input_dir.mkdir(parents=True, exist_ok=True) + + # Create target-specific input directory + target_input_dir = project_input_dir / target_name + target_input_dir.mkdir(exist_ok=True) + + logging.info(f"📁 Created input directory: {target_input_dir}") + + # Original target path + original_target = OSS_FUZZ_DIR / "build" / "out" / project_name / target_name + + # Backup original target + backup_target = original_target.with_name(f"{target_name}_original") + if not backup_target.exists(): + shutil.copy2(original_target, backup_target) + + # Read target source code (simplified for demonstration) + # In a real implementation, we'd need to locate the actual source files + # This is a placeholder to demonstrate the instrumentation concept + source_file = OSS_FUZZ_DIR / "projects" / project_name / "fuzzers" / f"{target_name}.c" + + if source_file.exists(): + with open(source_file, "r") as f: + source_code = f.read() + + # Instrument the code + instrumented_code = instrument_code(source_code, "LLVMFuzzerTestOneInput") + + # Write instrumented code to a new file + instrumented_file = source_file.with_name(f"{target_name}_instrumented.c") + with open(instrumented_file, "w") as f: + f.write(instrumented_code) + + logging.info(f"🔧 Instrumented {target_name} for input capture") + + # Rebuild the target with instrumented code + # This step is simplified - in reality would use OSS-Fuzz build system + rebuild_cmd = f"python3 infra/helper.py build_fuzzers --sanitizer address {project_name}" + run_command( + rebuild_cmd, + f"Rebuilding {target_name} with instrumentation", + logging.getLogger("Main"), + timeout=1200 + ) + + return original_target + else: + logging.warning(f"⚠️ Source file not found for instrumentation: {source_file}") + return original_target + except Exception as e: + logging.error(f"❌ Failed to instrument {target_name}: {str(e)}") + return OSS_FUZZ_DIR / "build" / "out" / project_name / target_name def run_command( cmd: str, log_msg: str, logger: logging.Logger, allowed_exit_codes: Maybe[list[int]] = Nothing, - timeout: int = 3600 # Default 1-hour timeout + timeout: int = 3600, # Default 1-hour timeout + env: Optional[dict] = None # Added env parameter for input capture ) -> bool: - """Execute commands with real-time logging and precise error handling""" + """Execute commands with real-time logging, precise error handling, and input capture""" allowed_codes = allowed_exit_codes.value_or([]) logger.info(f"▶️ {log_msg}...") logger.debug(f" $ {cmd}") @@ -52,7 +178,8 @@ def run_command( stderr=subprocess.STDOUT, text=True, encoding="utf-8", - errors="replace" + errors="replace", + env=env # Pass environment variables ) start_time = time.time() @@ -69,7 +196,11 @@ def run_command( if process.stdout: line = process.stdout.readline() if line: - logger.debug(line.strip()) + # Capture input data when detected + if "INPUT_CAPTURE:" in line: + logger.debug(f"📥 {line.strip()}") + else: + logger.debug(line.strip()) else: time.sleep(0.1) @@ -99,7 +230,6 @@ def run_command( except Exception: pass - def discover_targets(project_name: str, logger: logging.Logger) -> list[str]: """Discover fuzz targets for a project (starting with 'fuzz_', no extension, and executable)""" out_dir = OSS_FUZZ_DIR / "build" / "out" / project_name @@ -127,9 +257,8 @@ def discover_targets(project_name: str, logger: logging.Logger) -> list[str]: return targets - -def run_single_target(project_name: str, target_name: str, timeout: int) -> tuple[bool, str, str]: - """Execute fuzz testing workflow for a single (project, target) pair""" +def run_single_target(project_name: str, target_name: str, timeout: int, record_inputs: bool = False) -> tuple[bool, str, str]: + """Execute fuzz testing workflow for a single (project, target) pair with input capture""" task_id = f"{project_name}_{target_name}" logger = logging.getLogger(task_id) @@ -152,6 +281,23 @@ def run_single_target(project_name: str, target_name: str, timeout: int) -> tupl return False, project_name, target_name logger.info(f"🚀 Starting test -> Project: {project_name}, Target: {target_name}") + + # Prepare environment for input capture if requested + env = None + if record_inputs: + # Prepare target for input capture + target_path = prepare_target_for_input_capture(project_name, target_name) + logger.info(f"🔧 Instrumented {target_name} for input capture") + + # Create input directory for this run + input_dir = INPUT_DIR / project_name / target_name / timestamp + input_dir.mkdir(parents=True, exist_ok=True) + + # Set environment variable for input storage + env = os.environ.copy() + env["FUZZ_INPUT_DIR"] = str(input_dir) + logger.info(f"📁 Inputs will be stored in: {input_dir}") + try: cmd = f"python3 infra/helper.py run_fuzzer {project_name} {target_name} -- -max_total_time={timeout}" success = run_command( @@ -159,7 +305,8 @@ def run_single_target(project_name: str, target_name: str, timeout: int) -> tupl f"Running Target '{target_name}' (timeout={timeout}s)", logger, allowed_exit_codes=Some([1, 124]), # 1=Crashes found, 124=Timeout - timeout=timeout + 300 + timeout=timeout + 300, + env=env # Pass environment for input capture ) if success: @@ -177,6 +324,12 @@ def run_single_target(project_name: str, target_name: str, timeout: int) -> tupl handler.close() logger.removeHandler(handler) + # Log input capture results + if record_inputs and env: + input_dir = Path(env["FUZZ_INPUT_DIR"]) + if input_dir.exists(): + input_count = len(list(input_dir.glob("*.bin"))) + logger.info(f"📥 Captured {input_count} inputs for {target_name}") def main(): # Configure main process logging @@ -188,10 +341,11 @@ def main(): logger = logging.getLogger("Main") # Set up command line argument parsing - parser = argparse.ArgumentParser(description="OSS-Fuzz Parallel Fuzz Testing Tool") + parser = argparse.ArgumentParser(description="OSS-Fuzz Parallel Fuzz Testing Tool with Input Capture") parser.add_argument("project_list", help="File path containing list of project names") parser.add_argument("--timeout", type=int, default=60, help="Timeout duration per Fuzz Target (seconds)") parser.add_argument("--workers", type=int, default=cpu_count(), help="Number of parallel worker processes") + parser.add_argument("--record-inputs", action="store_true", help="Enable input capture during fuzzing") args = parser.parse_args() # 1. Read project list file @@ -234,13 +388,17 @@ def main(): logger.info("🤷 No executable Fuzz Targets found. Program exits.") sys.exit(0) - # 3. Execution phase: Parallel fuzz testing + # 3. Execution phase: Parallel fuzz testing with input capture logger.info(f"\n✅ Discovery phase completed. Found a total of {len(all_fuzz_tasks)} fuzz tasks.") + if args.record_inputs: + logger.info("🔔 Input capture is ENABLED. All fuzzing inputs will be recorded.") + logger.info(f"📁 Inputs will be stored in: {INPUT_DIR}") + logger.info("=" * 20 + " Phase 2: Parallel Fuzzing Execution " + "=" * 23) logger.info(f"🚀 Starting parallel testing with {args.workers} worker processes (timeout per target: {args.timeout}s)...") - # Prepare task parameters (project, target, timeout) - tasks_with_args = [(p, t, args.timeout) for p, t in all_fuzz_tasks] + # Prepare task parameters (project, target, timeout, record_inputs) + tasks_with_args = [(p, t, args.timeout, args.record_inputs) for p, t in all_fuzz_tasks] results: list[tuple[bool, str, str]] = [] # Store results (success, project, target) # Execute in parallel using process pool @@ -261,6 +419,12 @@ def main(): # Output statistical summary logger.info(f"📊 Fuzzing completed: Success {success_count}/{total_tasks}, Failed {failed_count}/{total_tasks}") + + if args.record_inputs: + total_inputs = sum(len(list((INPUT_DIR / p / t).glob("*/*.bin"))) for p, t in all_fuzz_tasks) + logger.info(f"📥 Total inputs captured: {total_inputs}") + logger.info(f"💾 Inputs stored at: {INPUT_DIR}") + if failed_tasks: logger.error("❌ The following Fuzz Targets failed:") for project, target in failed_tasks: diff --git a/image_build_results.json b/image_build_results.json index 93d383e..4af9787 100644 --- a/image_build_results.json +++ b/image_build_results.json @@ -1 +1 @@ -{"abseil-py": true, "adal": true, "aiohttp": true, "aniso8601": true, "ansible": true, "argcomplete": true, "arrow-py": true, "asn1crypto": true, "asteval": true, "astroid": true, "asttokens": true, "attrs": true, "autoflake": true, "autopep8": true, "azure-sdk-for-python": true, "babel": true, "black": true, "botocore": true, "bottleneck": true, "bz2file": true, "cachetools": true, "cffi": true, "chardet": true, "charset_normalizer": true, "click": true, "cloud-custodian": true, "configparser": true, "connexion": true, "coveragepy": true, "croniter": true, "cryptography": true, "cssselect": true, "dask": true, "decorator": true, "defusedxml": true, "digest": true, "dill": true, "distlib": true, "dnspython": true, "docutils": true, "ecdsa-python": true, "et-xmlfile": true, "face": true, "filelock": true, "filesystem_spec": true, "flask": true, "flask-jwt-extended": true, "flask-restx": true, "flask-wtf": true, "fonttools": true, "ftfy": true, "g-api-auth-httplib2": true, "g-api-auth-library-python": true, "g-api-pubsub": true, "g-api-py-api-common-protos": true, "g-api-py-oauthlib": true, "g-api-python-bigquery-storage": true, "g-api-python-client": true, "g-api-python-cloud-core": true, "g-api-python-firestore": true, "g-api-python-tasks": true, "g-api-resource-manager": true, "g-api-resumable-media-python": true, "g-api-secret-manager": true, "g-apis-py-api-core": true, "gast": true, "gc-iam": true, "gcloud-error-py": true, "g-cloud-logging-py": true, "gcp-python-cloud-storage": true, "genshi": true, "gitdb": true, "glom": true, "gprof2dot": true, "g-py-bigquery": true, "g-py-crc32c": true, "grpc-py": true, "gunicorn": true, "h11": true, "h5py": true, "hiredis-py": true, "html2text": true, "html5lib-python": true, "httpcore": true, "httpretty": false, "httpx": true, "idna": true, "ijson": true, "importlib_metadata": true, "iniconfig": true, "ipaddress": true, "ipykernel": true, "ipython": true, "isodate": true, "itsdangerous": true, "jedi": true, "jinja2": true, "jmespathpy": true, "joblib": true, "jsmin": true, "jupyter-nbconvert": true, "jupyter_server": true, "kafka": true, "keras": false, "kiwisolver": true, "lark-parser": true, "libcst": true, "looker-sdk": true, "lxml": true, "mako": true, "markupsafe": true, "matplotlib": true, "mccabe": true, "mdit-py-plugins": true, "mdurl": true, "more-itertools": false, "mrab-regex": true, "msal": true, "msgpack-python": true, "multidict": true, "mutagen": true, "nbclassic": true, "nbformat": true, "netaddr-py": true, "networkx": true, "ntlm2": true, "ntlm-auth": true, "numexpr": true, "numpy": true, "oauth2": true, "oauthlib": true, "olefile": true, "openapi-schema-validator": true, "opencensus-python": true, "openpyxl": true, "opt_einsum": true, "oracle-py-cx": true, "orjson": true, "oscrypto": true, "packaging": true, "pandas": true, "paramiko": true, "parse": true, "parsimonious": true, "pasta": true, "pathlib2": true, "pdoc": true, "pem": true, "pendulum": true, "pip": true, "ply": true, "protobuf-python": true, "proto-plus-python": true, "psqlparse": true, "psutil": true, "psycopg2": true, "pyasn1": true, "pyasn1-modules": true, "pycparser": true, "pycrypto": true, "pydantic": true, "pydateutil": true, "pygments": true, "pyjson5": true, "pyjwt": true, "pymysql": true, "pynacl": true, "pyodbc": false, "pyparsing": false, "pyrsistent": true, "py-serde": true, "pytables": true, "pytest-py": true, "python3-openid": true, "python-ecdsa": true, "python-email-validator": true, "python-fastjsonschema": true, "python-future": true, "python-graphviz": true, "python-hyperlink": true, "python-jose": true, "python-lz4": true, "python-markdown": true, "python-markdownify": true, "python-nameparser": true, "python-nvd3": true, "python-pathspec": true, "python-prompt-toolkit": true, "python-pypdf": true, "python-rison": true, "python-rsa": true, "python-tabulate": true, "pytz": true, "pyxdg": true, "pyyaml": true, "pyzmq": true, "redis-py": true, "requests": true, "retry": true, "rfc3967": true, "rich": true, "sacremoses": true, "scikit-learn": true, "scipy": true, "setuptools": true, "sigstore-python": true, "simplejson": true, "six": true, "smart_open": true, "soupsieve": true, "sqlalchemy_jsonfield": true, "sqlalchemy-utils": true, "sqlparse": true, "stack_data": true, "tensorflow-addons": false, "tinycss2": true, "toml": true, "tomlkit": true, "toolbelt": true, "toolz": true, "tqdm": true, "typing_extensions": true, "underscore": true, "uritemplate": true, "urlextract": true, "urllib3": true, "validators": true, "w3lib": true, "websocket-client": true, "wheel": true, "wtforms": true, "xlrd": true, "yarl": true, "zipp": true} \ No newline at end of file +{"abseil-py": true, "adal": true, "aiohttp": true, "aniso8601": true, "ansible": true, "argcomplete": true, "arrow-py": true, "asn1crypto": true, "asteval": true, "astroid": true, "asttokens": true, "attrs": true, "autoflake": true, "autopep8": true, "azure-sdk-for-python": true, "babel": true, "black": true, "botocore": true, "bottleneck": true, "bz2file": true, "cachetools": true, "cffi": true, "chardet": true, "charset_normalizer": true, "click": true, "cloud-custodian": true, "configparser": true, "connexion": true, "coveragepy": true, "croniter": true, "cryptography": true, "cssselect": true, "dask": true, "decorator": true, "defusedxml": true, "digest": true, "dill": true, "distlib": true, "dnspython": true, "docutils": true, "ecdsa-python": true, "et-xmlfile": true, "face": true, "filelock": true, "filesystem_spec": true, "flask": true, "flask-jwt-extended": true, "flask-restx": true, "flask-wtf": true, "fonttools": true, "ftfy": true, "g-api-auth-httplib2": true, "g-api-auth-library-python": true, "g-api-pubsub": true, "g-api-py-api-common-protos": true, "g-api-py-oauthlib": true, "g-api-python-bigquery-storage": true, "g-api-python-client": true, "g-api-python-cloud-core": true, "g-api-python-firestore": true, "g-api-python-tasks": true, "g-api-resource-manager": true, "g-api-resumable-media-python": true, "g-api-secret-manager": true, "g-apis-py-api-core": true, "gast": true, "gc-iam": true, "gcloud-error-py": true, "g-cloud-logging-py": true, "gcp-python-cloud-storage": true, "genshi": true, "gitdb": true, "glom": true, "gprof2dot": true, "g-py-bigquery": true, "g-py-crc32c": true, "grpc-py": true, "gunicorn": true, "h11": true, "h5py": true, "hiredis-py": true, "html2text": true, "html5lib-python": true, "httpcore": true, "httpretty": true, "httpx": true, "idna": true, "ijson": true, "importlib_metadata": true, "iniconfig": true, "ipaddress": true, "ipykernel": true, "ipython": true, "isodate": true, "itsdangerous": true, "jedi": true, "jinja2": true, "jmespathpy": true, "joblib": true, "jsmin": true, "jupyter-nbconvert": true, "jupyter_server": true, "kafka": true, "keras": false, "kiwisolver": true, "lark-parser": true, "libcst": true, "looker-sdk": true, "lxml": true, "mako": true, "markupsafe": true, "matplotlib": true, "mccabe": true, "mdit-py-plugins": true, "mdurl": true, "more-itertools": true, "mrab-regex": true, "msal": true, "msgpack-python": true, "multidict": true, "mutagen": true, "nbclassic": true, "nbformat": true, "netaddr-py": true, "networkx": true, "ntlm2": true, "ntlm-auth": true, "numexpr": true, "numpy": true, "oauth2": true, "oauthlib": true, "olefile": true, "openapi-schema-validator": true, "opencensus-python": true, "openpyxl": true, "opt_einsum": true, "oracle-py-cx": true, "orjson": true, "oscrypto": true, "packaging": true, "pandas": true, "paramiko": true, "parse": true, "parsimonious": true, "pasta": true, "pathlib2": true, "pdoc": true, "pem": true, "pendulum": true, "pip": true, "ply": true, "protobuf-python": true, "proto-plus-python": true, "psqlparse": true, "psutil": true, "psycopg2": true, "pyasn1": true, "pyasn1-modules": true, "pycparser": true, "pycrypto": true, "pydantic": true, "pydateutil": true, "pygments": true, "pyjson5": true, "pyjwt": true, "pymysql": true, "pynacl": true, "pyodbc": true, "pyparsing": true, "pyrsistent": true, "py-serde": true, "pytables": true, "pytest-py": true, "python3-openid": true, "python-ecdsa": true, "python-email-validator": true, "python-fastjsonschema": true, "python-future": true, "python-graphviz": true, "python-hyperlink": true, "python-jose": true, "python-lz4": true, "python-markdown": true, "python-markdownify": true, "python-nameparser": true, "python-nvd3": true, "python-pathspec": true, "python-prompt-toolkit": true, "python-pypdf": true, "python-rison": true, "python-rsa": true, "python-tabulate": true, "pytz": true, "pyxdg": true, "pyyaml": true, "pyzmq": true, "redis-py": true, "requests": true, "retry": true, "rfc3967": true, "rich": true, "sacremoses": true, "scikit-learn": true, "scipy": true, "setuptools": true, "sigstore-python": true, "simplejson": true, "six": true, "smart_open": true, "soupsieve": true, "sqlalchemy_jsonfield": true, "sqlalchemy-utils": true, "sqlparse": true, "stack_data": true, "tensorflow-addons": false, "tinycss2": true, "toml": true, "tomlkit": true, "toolbelt": true, "toolz": true, "tqdm": true, "typing_extensions": true, "underscore": true, "uritemplate": true, "urlextract": true, "urllib3": true, "validators": true, "w3lib": true, "websocket-client": true, "wheel": true, "wtforms": true, "xlrd": true, "yarl": true, "zipp": true} \ No newline at end of file From 76c63ac68bf424ddacc564f65110f769962afb99 Mon Sep 17 00:00:00 2001 From: joyguoguo <599325370@qq.com> Date: Thu, 24 Jul 2025 10:15:39 +0000 Subject: [PATCH 043/134] Fatal error in main program: cannot unpack non-iterable NoneType object --- fuzz/run_fuzz_all_targets_input.py | 234 ++++++++++++++++------------- 1 file changed, 132 insertions(+), 102 deletions(-) diff --git a/fuzz/run_fuzz_all_targets_input.py b/fuzz/run_fuzz_all_targets_input.py index 631f3e6..5c16d8d 100644 --- a/fuzz/run_fuzz_all_targets_input.py +++ b/fuzz/run_fuzz_all_targets_input.py @@ -34,33 +34,39 @@ from pathlib import Path from multiprocessing import Pool, cpu_count from returns.maybe import Maybe, Nothing, Some -from typing import Optional, List, Tuple # --- Global configuration --- HOME_DIR = Path.home() OSS_FUZZ_DIR = HOME_DIR / "FuzzAug" / "fuzz" / "oss-fuzz" -LOG_DIR = OSS_FUZZ_DIR / "run_logs2" +LOG_DIR = OSS_FUZZ_DIR / "run_logs3" INPUT_DIR = OSS_FUZZ_DIR / "fuzz_inputs" # Directory to store captured inputs class FunctionInstrumenter(ast.NodeTransformer): """AST transformer to instrument function entries for input recording""" + def __init__(self, target_functions: list[str]): + self.target_functions = target_functions + super().__init__() + def visit_FunctionDef(self, node): """Instrument function definition to add input recording""" - # Add print statement at the beginning of the function - input_record_stmt = ast.Expr( - value=ast.Call( - func=ast.Name(id='print', ctx=ast.Load()), - args=[ast.Constant(value=f"INPUT_CAPTURE: {node.name} received input: {{data}}")], - keywords=[] + # 只对目标函数进行插桩 + if node.name in self.target_functions: + # Add print statement at the beginning of the function + input_record_stmt = ast.Expr( + value=ast.Call( + func=ast.Name(id='print', ctx=ast.Load()), + args=[ast.Constant(value=f"INPUT_CAPTURE: {node.name} received input: {{data}}")], + keywords=[] + ) ) - ) - - # Insert the print statement at the top of the function body - if node.body: - node.body.insert(0, input_record_stmt) + + # Insert the print statement at the top of the function body + if node.body: + node.body.insert(0, input_record_stmt) return node + def instrument_code(source_code: str, target_function: str) -> str: """ Instrument source code to record inputs for specific function @@ -91,14 +97,14 @@ def instrument_code(source_code: str, target_function: str) -> str: def prepare_target_for_input_capture(project_name: str, target_name: str) -> Path: """ - Prepare a fuzz target for input capture by instrumenting its code + Prepare a Python fuzz target for input capture by instrumenting its code Args: project_name: Name of the project target_name: Name of the target to instrument Returns: - Path to the instrumented target executable + Path to the instrumented Python script """ try: # Create project-specific input directory @@ -111,50 +117,49 @@ def prepare_target_for_input_capture(project_name: str, target_name: str) -> Pat logging.info(f"📁 Created input directory: {target_input_dir}") - # Original target path - original_target = OSS_FUZZ_DIR / "build" / "out" / project_name / target_name + # Locate Python source file (support multiple extensions) + possible_extensions = [".py", ".pyw"] + source_file = None - # Backup original target - backup_target = original_target.with_name(f"{target_name}_original") - if not backup_target.exists(): - shutil.copy2(original_target, backup_target) + # Try possible file extensions + for ext in possible_extensions: + candidate = OSS_FUZZ_DIR / "projects" / project_name / f"{target_name}{ext}" + if candidate.exists(): + source_file = candidate + break - # Read target source code (simplified for demonstration) - # In a real implementation, we'd need to locate the actual source files - # This is a placeholder to demonstrate the instrumentation concept - source_file = OSS_FUZZ_DIR / "projects" / project_name / "fuzzers" / f"{target_name}.c" + if not source_file: + logging.warning(f"⚠️ Python source file not found for: {target_name}") + return None + + # Backup original source file + backup_file = source_file.with_name(f"{target_name}_original{source_file.suffix}") + if not backup_file.exists(): + shutil.copy2(source_file, backup_file) + logging.info(f"💾 Backed up original file: {backup_file}") + + # Read source code + with open(source_file, "r") as f: + source_code = f.read() + + # Instrument the code - use Python-specific entry function + possible_entry_functions = ["TestInput", "TestOneInput"] + instrumented_code = instrument_code(source_code, possible_entry_functions) + + + # Write instrumented code to a new file with same extension + instrumented_file = source_file.with_name(f"{target_name}_instrumented{source_file.suffix}") + with open(instrumented_file, "w") as f: + f.write(instrumented_code) + + logging.info(f"🔧 Instrumented {target_name} for input capture") + + # Python doesn't need rebuilding - return instrumented script path + return instrumented_file - if source_file.exists(): - with open(source_file, "r") as f: - source_code = f.read() - - # Instrument the code - instrumented_code = instrument_code(source_code, "LLVMFuzzerTestOneInput") - - # Write instrumented code to a new file - instrumented_file = source_file.with_name(f"{target_name}_instrumented.c") - with open(instrumented_file, "w") as f: - f.write(instrumented_code) - - logging.info(f"🔧 Instrumented {target_name} for input capture") - - # Rebuild the target with instrumented code - # This step is simplified - in reality would use OSS-Fuzz build system - rebuild_cmd = f"python3 infra/helper.py build_fuzzers --sanitizer address {project_name}" - run_command( - rebuild_cmd, - f"Rebuilding {target_name} with instrumentation", - logging.getLogger("Main"), - timeout=1200 - ) - - return original_target - else: - logging.warning(f"⚠️ Source file not found for instrumentation: {source_file}") - return original_target except Exception as e: logging.error(f"❌ Failed to instrument {target_name}: {str(e)}") - return OSS_FUZZ_DIR / "build" / "out" / project_name / target_name + return None def run_command( cmd: str, @@ -162,7 +167,7 @@ def run_command( logger: logging.Logger, allowed_exit_codes: Maybe[list[int]] = Nothing, timeout: int = 3600, # Default 1-hour timeout - env: Optional[dict] = None # Added env parameter for input capture + env: Maybe[dict] = Nothing # Use Maybe instead of Optional ) -> bool: """Execute commands with real-time logging, precise error handling, and input capture""" allowed_codes = allowed_exit_codes.value_or([]) @@ -171,6 +176,9 @@ def run_command( process = None try: + # Convert Maybe[dict] to actual environment or None + env_dict = env.value_or(None) + process = subprocess.Popen( cmd, shell=True, @@ -179,7 +187,7 @@ def run_command( text=True, encoding="utf-8", errors="replace", - env=env # Pass environment variables + env=env_dict ) start_time = time.time() @@ -233,7 +241,7 @@ def run_command( def discover_targets(project_name: str, logger: logging.Logger) -> list[str]: """Discover fuzz targets for a project (starting with 'fuzz_', no extension, and executable)""" out_dir = OSS_FUZZ_DIR / "build" / "out" / project_name - targets: list[str] = [] + targets = [] # Use built-in list type if not out_dir.is_dir(): logger.warning(f"Build output directory for project {project_name} does not exist: {out_dir}") @@ -280,56 +288,77 @@ def run_single_target(project_name: str, target_name: str, timeout: int, record_ print(f"❌ Critical error occurred during initialization of task {task_id}: {e}") return False, project_name, target_name - logger.info(f"🚀 Starting test -> Project: {project_name}, Target: {target_name}") + logger.info(f"🚀 Starting test -> Project: {project_name}, Target: {target_name}") - # Prepare environment for input capture if requested - env = None - if record_inputs: - # Prepare target for input capture - target_path = prepare_target_for_input_capture(project_name, target_name) - logger.info(f"🔧 Instrumented {target_name} for input capture") - - # Create input directory for this run - input_dir = INPUT_DIR / project_name / target_name / timestamp - input_dir.mkdir(parents=True, exist_ok=True) + # Prepare environment for input capture if requested + env = Nothing # Initialize as Maybe container + instrumented_file = None - # Set environment variable for input storage - env = os.environ.copy() - env["FUZZ_INPUT_DIR"] = str(input_dir) - logger.info(f"📁 Inputs will be stored in: {input_dir}") - - try: - cmd = f"python3 infra/helper.py run_fuzzer {project_name} {target_name} -- -max_total_time={timeout}" - success = run_command( - cmd, - f"Running Target '{target_name}' (timeout={timeout}s)", - logger, - allowed_exit_codes=Some([1, 124]), # 1=Crashes found, 124=Timeout - timeout=timeout + 300, - env=env # Pass environment for input capture - ) - - if success: - logger.info(f"✅ Target '{target_name}' completed successfully.") + if record_inputs: + # 准备输入捕获 + instrumented_file = prepare_target_for_input_capture(project_name, target_name) + if not instrumented_file: + logger.error(f"❌ Failed to instrument {target_name}") + return False, project_name, target_name + + logger.info(f"🔧 Instrumented {target_name} for input capture") + + # 创建输入目录 + timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") + input_dir = INPUT_DIR / project_name / target_name / timestamp + input_dir.mkdir(parents=True, exist_ok=True) + + # 设置环境变量 + env_dict = os.environ.copy() + env_dict["FUZZ_INPUT_DIR"] = str(input_dir) + env = Some(env_dict) + logger.info(f"📁 Inputs will be stored in: {input_dir}") + + # 关键修改:使用插桩后的脚本运行 + cmd = f"python3 {instrumented_file} -- -max_total_time={timeout}" else: - logger.error(f"❌ Target '{target_name}' failed.") + # 使用原始目标 + cmd = f"python3 infra/helper.py run_fuzzer {project_name} {target_name} -- -max_total_time={timeout}" + try: + # Use instrumented file if available, otherwise use original + target_to_run = instrumented_file.name if instrumented_file else target_name + + cmd = f"python3 infra/helper.py run_fuzzer {project_name} {target_to_run} -- -max_total_time={timeout}" + success = run_command( + cmd, + f"Running Target '{target_name}' (timeout={timeout}s)", + logger, + allowed_exit_codes=Some([1, 124]), # 1=Crashes found, 124=Timeout + timeout=timeout + 300, + env=env # Pass Maybe container + ) - return success, project_name, target_name + if success: + logger.info(f"✅ Target '{target_name}' completed successfully.") + else: + logger.error(f"❌ Target '{target_name}' failed.") - except Exception as e: - logger.exception(f"💥 Unexpected error occurred while running target '{target_name}': {e}") - return False, project_name, target_name - finally: - for handler in logger.handlers[:]: - handler.close() - logger.removeHandler(handler) + return success, project_name, target_name - # Log input capture results - if record_inputs and env: - input_dir = Path(env["FUZZ_INPUT_DIR"]) - if input_dir.exists(): - input_count = len(list(input_dir.glob("*.bin"))) - logger.info(f"📥 Captured {input_count} inputs for {target_name}") + except Exception as e: + logger.exception(f"💥 Unexpected error occurred while running target '{target_name}': {e}") + return False, project_name, target_name + finally: + for handler in logger.handlers[:]: + handler.close() + logger.removeHandler(handler) + + # Log input capture results + if record_inputs and env.is_just: + input_dir = Path(env.unwrap()["FUZZ_INPUT_DIR"]) + if input_dir.exists(): + # Count all input files, not just .bin + input_count = len(list(input_dir.glob("*"))) + logger.info(f"📥 Captured {input_count} inputs for {target_name}") + + except (OSError, PermissionError) as e: + print(f"❌ Critical error occurred during initialization of task {task_id}: {e}") + return False, project_name, target_name def main(): # Configure main process logging @@ -363,7 +392,8 @@ def main(): # 2. Discovery phase: Collect all fuzz targets logger.info("\n" + "=" * 20 + " Phase 1: Discover all Fuzz Targets " + "=" * 20) - all_fuzz_tasks: list[tuple[str, str]] = [] # Store (project, target) tuples + all_fuzz_tasks = [] # Use built-in list type + try: original_cwd = Path.cwd() # Save current working directory os.chdir(OSS_FUZZ_DIR) # Switch to OSS-Fuzz directory From 1c608dc0719b0795ca696646b4e59b5e08464f91 Mon Sep 17 00:00:00 2001 From: joyguoguo <599325370@qq.com> Date: Thu, 24 Jul 2025 22:14:04 +0000 Subject: [PATCH 044/134] name 'target_functions' is not defined fuzz_util_instrumented.py does not seem to exist --- fuzz/run_fuzz_all_targets_input.py | 99 ++++++++++++++---------------- 1 file changed, 45 insertions(+), 54 deletions(-) diff --git a/fuzz/run_fuzz_all_targets_input.py b/fuzz/run_fuzz_all_targets_input.py index 5c16d8d..4d38ffb 100644 --- a/fuzz/run_fuzz_all_targets_input.py +++ b/fuzz/run_fuzz_all_targets_input.py @@ -67,7 +67,7 @@ def visit_FunctionDef(self, node): return node -def instrument_code(source_code: str, target_function: str) -> str: +def instrument_code(source_code: str, target_function: list[str]) -> str: """ Instrument source code to record inputs for specific function @@ -83,7 +83,7 @@ def instrument_code(source_code: str, target_function: str) -> str: tree = ast.parse(source_code) # Create instrumenter and apply transformations - instrumenter = FunctionInstrumenter() + instrumenter = FunctionInstrumenter(target_functions) modified_tree = instrumenter.visit(tree) # Add missing location information for generated nodes @@ -284,81 +284,72 @@ def run_single_target(project_name: str, target_name: str, timeout: int, record_ logger.addHandler(file_handler) os.chdir(OSS_FUZZ_DIR) - except (OSError, PermissionError) as e: - print(f"❌ Critical error occurred during initialization of task {task_id}: {e}") - return False, project_name, target_name - logger.info(f"🚀 Starting test -> Project: {project_name}, Target: {target_name}") - - # Prepare environment for input capture if requested - env = Nothing # Initialize as Maybe container + + # Prepare environment for input capture if requested + env = Nothing instrumented_file = None - + if record_inputs: - # 准备输入捕获 instrumented_file = prepare_target_for_input_capture(project_name, target_name) if not instrumented_file: logger.error(f"❌ Failed to instrument {target_name}") return False, project_name, target_name - + logger.info(f"🔧 Instrumented {target_name} for input capture") - - # 创建输入目录 + timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") input_dir = INPUT_DIR / project_name / target_name / timestamp input_dir.mkdir(parents=True, exist_ok=True) - - # 设置环境变量 + env_dict = os.environ.copy() env_dict["FUZZ_INPUT_DIR"] = str(input_dir) env = Some(env_dict) + logger.info(f"📁 Inputs will be stored in: {input_dir}") - - # 关键修改:使用插桩后的脚本运行 - cmd = f"python3 {instrumented_file} -- -max_total_time={timeout}" + + # Prepare command + target_to_run = instrumented_file.name if instrumented_file else target_name + cmd = f"python3 infra/helper.py run_fuzzer {project_name} {target_to_run} -- -max_total_time={timeout}" + + # Execute command + success = run_command( + cmd, + f"Running Target '{target_name}' (timeout={timeout}s)", + logger, + allowed_exit_codes=Some([1, 124]), + timeout=timeout + 300, + env=env + ) + + if success: + logger.info(f"✅ Target '{target_name}' completed successfully.") else: - # 使用原始目标 - cmd = f"python3 infra/helper.py run_fuzzer {project_name} {target_name} -- -max_total_time={timeout}" - try: - # Use instrumented file if available, otherwise use original - target_to_run = instrumented_file.name if instrumented_file else target_name - - cmd = f"python3 infra/helper.py run_fuzzer {project_name} {target_to_run} -- -max_total_time={timeout}" - success = run_command( - cmd, - f"Running Target '{target_name}' (timeout={timeout}s)", - logger, - allowed_exit_codes=Some([1, 124]), # 1=Crashes found, 124=Timeout - timeout=timeout + 300, - env=env # Pass Maybe container - ) + logger.error(f"❌ Target '{target_name}' failed.") - if success: - logger.info(f"✅ Target '{target_name}' completed successfully.") - else: - logger.error(f"❌ Target '{target_name}' failed.") + return success, project_name, target_name - return success, project_name, target_name + except Exception as e: + logger.exception(f"💥 Unexpected error occurred while running target '{target_name}': {e}") + return False, project_name, target_name - except Exception as e: - logger.exception(f"💥 Unexpected error occurred while running target '{target_name}': {e}") - return False, project_name, target_name - finally: - for handler in logger.handlers[:]: - handler.close() - logger.removeHandler(handler) - - # Log input capture results - if record_inputs and env.is_just: + finally: + for handler in logger.handlers[:]: + handler.close() + logger.removeHandler(handler) + + if record_inputs and env.is_just: + try: input_dir = Path(env.unwrap()["FUZZ_INPUT_DIR"]) if input_dir.exists(): - # Count all input files, not just .bin input_count = len(list(input_dir.glob("*"))) logger.info(f"📥 Captured {input_count} inputs for {target_name}") - - except (OSError, PermissionError) as e: - print(f"❌ Critical error occurred during initialization of task {task_id}: {e}") - return False, project_name, target_name + except Exception as e: + logger.warning(f"⚠️ Failed to summarize captured inputs: {e}") + + # Final fallback (defensive) + return False, project_name, target_name + def main(): # Configure main process logging From 34ef83304571b9b12326f539870d33dfae67c7e4 Mon Sep 17 00:00:00 2001 From: joyguoguo <599325370@qq.com> Date: Thu, 24 Jul 2025 22:20:09 +0000 Subject: [PATCH 045/134] =?UTF-8?q?=E5=87=86=E5=A4=87=E5=A4=A7=E6=94=B9?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- fuzz/run_fuzz_ds.py | 425 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 425 insertions(+) create mode 100644 fuzz/run_fuzz_ds.py diff --git a/fuzz/run_fuzz_ds.py b/fuzz/run_fuzz_ds.py new file mode 100644 index 0000000..b0c4c59 --- /dev/null +++ b/fuzz/run_fuzz_ds.py @@ -0,0 +1,425 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- + +""" +run_fuzz_all_targets_input.py + +Enhanced with input instrumentation to capture fuzzing inputs. + +This script employs a two-phase approach for fuzz testing: +1. Discovery phase: Traverse all specified projects to collect all executable fuzz targets in each project +2. Execution phase: Create a task pool containing all (project, target) pairs and execute them in parallel using multiprocessing +3. Input capture: Instrument fuzz targets to record all inputs during fuzzing + +Key Enhancements: +- Added input instrumentation to capture fuzzing inputs +- Created dedicated input storage directory structure +- Added AST-based function instrumentation +- Added input recording and analysis capabilities + +Usage: python3 run_fuzz_all_targets_input [project_list_file] [--timeout seconds] [--workers N] [--record-inputs] +Example: python3 fuzz/run_fuzz_all_targets_input.py data/valid_projects.txt --timeout 60 --workers 4 --record-inputs +""" + +import os +import sys +import subprocess +import argparse +import logging +import time +import ast +import astor +import shutil +from datetime import datetime +from pathlib import Path +from multiprocessing import Pool, cpu_count +from returns.maybe import Maybe, Nothing, Some + +# --- Global configuration --- +HOME_DIR = Path.home() +OSS_FUZZ_DIR = HOME_DIR / "FuzzAug" / "fuzz" / "oss-fuzz" +LOG_DIR = OSS_FUZZ_DIR / "run_logs3" +INPUT_DIR = OSS_FUZZ_DIR / "fuzz_inputs" # Directory to store captured inputs + +class FunctionInstrumenter(ast.NodeTransformer): + def __init__(self, target_functions: list[str]): + self.target_functions = target_functions + + def visit_FunctionDef(self, node): + if node.name in self.target_functions: + print_stmt = ast.parse(f'print("INPUT_CAPTURE: {node.name} called")').body[0] + node.body.insert(0, print_stmt) + return self.generic_visit(node) + +def instrument_code(source_code: str, target_functions: list[str]) -> str: + tree = ast.parse(source_code) + instrumenter = FunctionInstrumenter(target_functions) + instrumented_tree = instrumenter.visit(tree) + ast.fix_missing_locations(instrumented_tree) + return ast.unparse(instrumented_tree) +def prepare_target_for_input_capture(project_name: str, target_name: str) -> Path: + """ + Prepare a Python fuzz target for input capture by instrumenting its code + + Args: + project_name: Name of the project + target_name: Name of the target to instrument + + Returns: + Path to the instrumented Python script + """ + try: + # Create project-specific input directory + project_input_dir = INPUT_DIR / project_name + project_input_dir.mkdir(parents=True, exist_ok=True) + + # Create target-specific input directory + target_input_dir = project_input_dir / target_name + target_input_dir.mkdir(exist_ok=True) + + logging.info(f"📁 Created input directory: {target_input_dir}") + + # Locate Python source file (support multiple extensions) + possible_extensions = [".py", ".pyw"] + source_file = None + + # Try possible file extensions + for ext in possible_extensions: + candidate = OSS_FUZZ_DIR / "projects" / project_name / f"{target_name}{ext}" + if candidate.exists(): + source_file = candidate + break + + if not source_file: + logging.warning(f"⚠️ Python source file not found for: {target_name}") + return None + + # Backup original source file + backup_file = source_file.with_name(f"{target_name}_original{source_file.suffix}") + if not backup_file.exists(): + shutil.copy2(source_file, backup_file) + logging.info(f"💾 Backed up original file: {backup_file}") + + # Read source code + with open(source_file, "r") as f: + source_code = f.read() + + # Instrument the code - use Python-specific entry function + possible_entry_functions = ["TestInput", "TestOneInput"] + instrumented_code = instrument_code(source_code, possible_entry_functions) + + + # Write instrumented code to a new file with same extension + instrumented_file = source_file.with_name(f"{target_name}_instrumented{source_file.suffix}") + with open(instrumented_file, "w") as f: + f.write(instrumented_code) + + logging.info(f"🔧 Instrumented {target_name} for input capture") + + # Python doesn't need rebuilding - return instrumented script path + return instrumented_file + + except Exception as e: + logging.error(f"❌ Failed to instrument {target_name}: {str(e)}") + return None + +def run_command( + cmd: str, + log_msg: str, + logger: logging.Logger, + allowed_exit_codes: Maybe[list[int]] = Nothing, + timeout: int = 3600, # Default 1-hour timeout + env: Maybe[dict] = Nothing # Use Maybe instead of Optional +) -> bool: + """Execute commands with real-time logging, precise error handling, and input capture""" + allowed_codes = allowed_exit_codes.value_or([]) + logger.info(f"▶️ {log_msg}...") + logger.debug(f" $ {cmd}") + + process = None + try: + # Convert Maybe[dict] to actual environment or None + env_dict = env.value_or(None) + + process = subprocess.Popen( + cmd, + shell=True, + stdout=subprocess.PIPE, + stderr=subprocess.STDOUT, + text=True, + encoding="utf-8", + errors="replace", + env=env_dict + ) + + start_time = time.time() + while process.poll() is None: + if time.time() - start_time > timeout: + logger.error(f"⌛ Command timed out after {timeout} seconds") + process.terminate() + try: + process.wait(timeout=5) + except subprocess.TimeoutExpired: + process.kill() + return False + + if process.stdout: + line = process.stdout.readline() + if line: + # Capture input data when detected + if "INPUT_CAPTURE:" in line: + logger.debug(f"📥 {line.strip()}") + else: + logger.debug(line.strip()) + else: + time.sleep(0.1) + + exit_code = process.returncode + if exit_code not in [0, *allowed_codes]: + logger.error(f"❌ Command execution failed, exit code: {exit_code}") + return False + return True + + except FileNotFoundError: + logger.error(f"🔍 Command not found: {cmd.split()[0]}") + return False + except PermissionError: + logger.error(f"🔒 Insufficient permissions to execute command: {cmd}") + return False + except subprocess.SubprocessError as e: + logger.exception(f"💥 Subprocess error: {e}") + return False + except OSError as e: + logger.exception(f"💥 Operating system error during command execution: {e}") + return False + finally: + if process and process.poll() is None: + try: + process.terminate() + process.wait(timeout=5) + except Exception: + pass + +def discover_targets(project_name: str, logger: logging.Logger) -> list[str]: + """Discover fuzz targets for a project (starting with 'fuzz_', no extension, and executable)""" + out_dir = OSS_FUZZ_DIR / "build" / "out" / project_name + targets = [] # Use built-in list type + + if not out_dir.is_dir(): + logger.warning(f"Build output directory for project {project_name} does not exist: {out_dir}") + return targets + + try: + for f in out_dir.iterdir(): + try: + if (f.is_file() and + f.name.startswith("fuzz_") and + '.' not in f.name and + os.access(f, os.X_OK)): + targets.append(f.name) + except OSError as e: + logger.warning(f"⚠️ Error while checking file {f.name}, skipped: {e}") + + except PermissionError: + logger.error(f"🔒 Insufficient permissions to access directory: {out_dir}") + except OSError as e: + logger.exception(f"💥 Operating system error occurred while discovering targets: {e}") + + return targets + +def run_single_target(project_name: str, target_name: str, timeout: int, record_inputs: bool = False) -> tuple[bool, str, str]: + """Execute fuzz testing workflow for a single (project, target) pair with input capture""" + task_id = f"{project_name}_{target_name}" + logger = logging.getLogger(task_id) + + try: + logger.setLevel(logging.DEBUG) + LOG_DIR.mkdir(parents=True, exist_ok=True) + timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") + log_file = LOG_DIR / f"run_{task_id}_{timestamp}.log" + file_handler = logging.FileHandler(log_file, encoding="utf-8") + formatter = logging.Formatter( + "%(asctime)s [PID:%(process)d] %(levelname)s: %(message)s", + datefmt="%Y-%m-%d %H:%M:%S" + ) + file_handler.setFormatter(formatter) + logger.addHandler(file_handler) + os.chdir(OSS_FUZZ_DIR) + + logger.info(f"🚀 Starting test -> Project: {project_name}, Target: {target_name}") + + # Prepare environment for input capture if requested + env = Nothing + instrumented_file = None + + if record_inputs: + instrumented_file = prepare_target_for_input_capture(project_name, target_name) + if not instrumented_file: + logger.error(f"❌ Failed to instrument {target_name}") + return False, project_name, target_name + + logger.info(f"🔧 Instrumented {target_name} for input capture") + + timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") + input_dir = INPUT_DIR / project_name / target_name / timestamp + input_dir.mkdir(parents=True, exist_ok=True) + + env_dict = os.environ.copy() + env_dict["FUZZ_INPUT_DIR"] = str(input_dir) + env = Some(env_dict) + + logger.info(f"📁 Inputs will be stored in: {input_dir}") + + # Prepare command + target_to_run = instrumented_file.name if instrumented_file else target_name + cmd = f"python3 infra/helper.py run_fuzzer {project_name} {target_to_run} -- -max_total_time={timeout}" + + # Execute command + success = run_command( + cmd, + f"Running Target '{target_name}' (timeout={timeout}s)", + logger, + allowed_exit_codes=Some([1, 124]), + timeout=timeout + 300, + env=env + ) + + if success: + logger.info(f"✅ Target '{target_name}' completed successfully.") + else: + logger.error(f"❌ Target '{target_name}' failed.") + + return success, project_name, target_name + + except Exception as e: + logger.exception(f"💥 Unexpected error occurred while running target '{target_name}': {e}") + return False, project_name, target_name + + finally: + for handler in logger.handlers[:]: + handler.close() + logger.removeHandler(handler) + + if record_inputs and env.is_just: + try: + input_dir = Path(env.unwrap()["FUZZ_INPUT_DIR"]) + if input_dir.exists(): + input_count = len(list(input_dir.glob("*"))) + logger.info(f"📥 Captured {input_count} inputs for {target_name}") + except Exception as e: + logger.warning(f"⚠️ Failed to summarize captured inputs: {e}") + + # Final fallback (defensive) + return False, project_name, target_name + + +def main(): + # Configure main process logging + logging.basicConfig( + level=logging.INFO, + format="[%(levelname)s] %(message)s", + stream=sys.stdout + ) + logger = logging.getLogger("Main") + + # Set up command line argument parsing + parser = argparse.ArgumentParser(description="OSS-Fuzz Parallel Fuzz Testing Tool with Input Capture") + parser.add_argument("project_list", help="File path containing list of project names") + parser.add_argument("--timeout", type=int, default=60, help="Timeout duration per Fuzz Target (seconds)") + parser.add_argument("--workers", type=int, default=cpu_count(), help="Number of parallel worker processes") + parser.add_argument("--record-inputs", action="store_true", help="Enable input capture during fuzzing") + args = parser.parse_args() + + # 1. Read project list file + try: + project_path = Path(args.project_list) + with open(project_path, "r", encoding="utf-8") as f: + projects = [line.strip() for line in f if line.strip()] + logger.info(f"📋 Loaded {len(projects)} projects from {project_path.name}.") + except FileNotFoundError: + logger.error(f"❌ Project list file not found: {args.project_list}") + sys.exit(1) + except (OSError, PermissionError) as e: + logger.exception(f"💥 Error occurred while reading project list: {e}") + sys.exit(1) + + # 2. Discovery phase: Collect all fuzz targets + logger.info("\n" + "=" * 20 + " Phase 1: Discover all Fuzz Targets " + "=" * 20) + all_fuzz_tasks = [] # Use built-in list type + + try: + original_cwd = Path.cwd() # Save current working directory + os.chdir(OSS_FUZZ_DIR) # Switch to OSS-Fuzz directory + for project_name in projects: + targets = discover_targets(project_name, logger) + if targets: + logger.info(f"🔍 Discovered {len(targets)} targets in project '{project_name}': {', '.join(targets)}") + for target in targets: + all_fuzz_tasks.append((project_name, target)) + else: + logger.warning(f"⚠️ No Fuzz Targets found in project '{project_name}'.") + os.chdir(original_cwd) # Restore original working directory + except FileNotFoundError: + logger.error(f"❌ OSS-Fuzz directory does not exist: {OSS_FUZZ_DIR}") + sys.exit(1) + except Exception as e: + logger.exception(f"💥 Unknown error occurred during discovery phase: {e}") + sys.exit(1) + + # Check if any valid targets were found + if not all_fuzz_tasks: + logger.info("🤷 No executable Fuzz Targets found. Program exits.") + sys.exit(0) + + # 3. Execution phase: Parallel fuzz testing with input capture + logger.info(f"\n✅ Discovery phase completed. Found a total of {len(all_fuzz_tasks)} fuzz tasks.") + if args.record_inputs: + logger.info("🔔 Input capture is ENABLED. All fuzzing inputs will be recorded.") + logger.info(f"📁 Inputs will be stored in: {INPUT_DIR}") + + logger.info("=" * 20 + " Phase 2: Parallel Fuzzing Execution " + "=" * 23) + logger.info(f"🚀 Starting parallel testing with {args.workers} worker processes (timeout per target: {args.timeout}s)...") + + # Prepare task parameters (project, target, timeout, record_inputs) + tasks_with_args = [(p, t, args.timeout, args.record_inputs) for p, t in all_fuzz_tasks] + results: list[tuple[bool, str, str]] = [] # Store results (success, project, target) + + # Execute in parallel using process pool + with Pool(args.workers) as pool: + try: + results = pool.starmap(run_single_target, tasks_with_args) + except Exception as e: + logger.error(f"💥 Critical error occurred during parallel execution: {e}") + pool.terminate() + pool.join() + + # 4. Result summary and reporting + logger.info("\n" + "=" * 20 + " Phase 3: Result Summary " + "=" * 28) + failed_tasks = [(p, t) for success, p, t in results if not success] # List of failed tasks + total_tasks = len(all_fuzz_tasks) + failed_count = len(failed_tasks) + success_count = total_tasks - failed_count + + # Output statistical summary + logger.info(f"📊 Fuzzing completed: Success {success_count}/{total_tasks}, Failed {failed_count}/{total_tasks}") + + if args.record_inputs: + total_inputs = sum(len(list((INPUT_DIR / p / t).glob("*/*.bin"))) for p, t in all_fuzz_tasks) + logger.info(f"📥 Total inputs captured: {total_inputs}") + logger.info(f"💾 Inputs stored at: {INPUT_DIR}") + + if failed_tasks: + logger.error("❌ The following Fuzz Targets failed:") + for project, target in failed_tasks: + logger.error(f" - Project: {project}, Target: {target}") # List detailed failures + + +if __name__ == "__main__": + try: + main() + except KeyboardInterrupt: + print("\n🛑 Operation interrupted by user.") + sys.exit(1) + except Exception as e: + print(f"\n💥 Fatal error in main program: {e}") + sys.exit(1) \ No newline at end of file From 510cbe7958cbc4d9a7afee9f325cb29032113483 Mon Sep 17 00:00:00 2001 From: joyguoguo <599325370@qq.com> Date: Fri, 25 Jul 2025 00:16:37 +0000 Subject: [PATCH 046/134] create modify file script add"print(data)" to each fuzz_.py --- fuzz/modify_fuzz_files.py | 66 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 66 insertions(+) create mode 100644 fuzz/modify_fuzz_files.py diff --git a/fuzz/modify_fuzz_files.py b/fuzz/modify_fuzz_files.py new file mode 100644 index 0000000..eb7b37e --- /dev/null +++ b/fuzz/modify_fuzz_files.py @@ -0,0 +1,66 @@ +import os +import re + +def add_print_to_testoneinput(file_path): + with open(file_path, 'r') as f: + content = f.read() + + # 正则表达式匹配TestOneInput或TestInput函数定义及其函数体 + pattern = r'(\bdef\s+(TestOneInput|TestInput)\(data\):\s*\n)((?:[ \t]+.*\n|\s*\n)*)' + matches = re.finditer(pattern, content, re.MULTILINE) + + new_content = content + for match in reversed(list(matches)): + function_def = match.group(1) + function_body = match.group(3) + + # 在函数体开头添加print(data)语句 + new_function_body = re.sub( + r'^([ \t]*)(.*\n)', + r'\g<1>\2\g<1>print(data)\n', + function_body, + count=1 + ) + + # 只有在函数体非空且未添加过print时才替换 + if new_function_body != function_body: + new_content = ( + new_content[:match.start(3)] + + new_function_body + + new_content[match.end(3):] + ) + + return new_content + +def main(): + projects_path = "/home/jiayiguo/FuzzAug/fuzz/oss-fuzz/projects" + valid_projects_file = "valid_projects.txt" + + with open(valid_projects_file, 'r') as f: + projects = [line.strip() for line in f if line.strip()] + + for project in projects: + project_dir = os.path.join(projects_path, project) + + if not os.path.isdir(project_dir): + continue + + for root, _, files in os.walk(project_dir): + for file in files: + if file.startswith('fuzz_') and file.endswith('.py'): + file_path = os.path.join(root, file) + + try: + new_content = add_print_to_testoneinput(file_path) + + # 保存修改后的文件(添加_print后缀) + new_file_path = file_path.rsplit('.', 1)[0] + '_print.py' + with open(new_file_path, 'w') as f: + f.write(new_content) + print(f"Processed: {file_path} -> {new_file_path}") + + except Exception as e: + print(f"Error processing {file_path}: {str(e)}") + +if __name__ == "__main__": + main() \ No newline at end of file From 61009ab0f91a57080a4e15c107619ab6bc173365 Mon Sep 17 00:00:00 2001 From: joyguoguo <599325370@qq.com> Date: Fri, 25 Jul 2025 00:29:27 +0000 Subject: [PATCH 047/134] build_fuzzer script --- fuzz/build_fuzzers.py | 246 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 246 insertions(+) create mode 100644 fuzz/build_fuzzers.py diff --git a/fuzz/build_fuzzers.py b/fuzz/build_fuzzers.py new file mode 100644 index 0000000..1dfd825 --- /dev/null +++ b/fuzz/build_fuzzers.py @@ -0,0 +1,246 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- + +""" +build_fuzzers.py + +Parallel build of OSS-Fuzz fuzzers. +Requires Docker images to be built first (using build_images.py). + +Usage: python3 build_fuzzers.py [project_list_file] --oss-fuzz-dir /path/to/oss-fuzz \ + --image-results image_build_results.json \ + [--sanitizer type] [--workers N] +Example: python3 fuzz/build_fuzzers.py data/valid_projects.txt \ + --oss-fuzz-dir ./fuzz/oss-fuzz \ + --image-results image_build_results.json \ + --sanitizer address \ + --workers 8 +""" + +import os +import sys +import subprocess +import argparse +import logging +import json +from pathlib import Path +from typing import List, Optional, Tuple +from multiprocessing import Pool, cpu_count + +class BuildError(Exception): + """Base exception for build failures""" + def __init__(self, message: str, project: str = "", exit_code: int = None): + super().__init__(message) + self.project = project + self.exit_code = exit_code + +class CommandError(BuildError): + """Exception for command execution failures""" + pass + +class PathError(BuildError): + """Exception for missing paths or files""" + pass + +class ConfigError(BuildError): + """Exception for configuration errors""" + pass + +def run_command( + cmd: str, + oss_fuzz_dir: Path, + project: str = "", + allowed_exit_codes: Optional[List[int]] = None +) -> int: + """Execute a command and return the exit code""" + allowed_exit_codes = allowed_exit_codes or [0] + logging.info(f"▶️ Executing command: {cmd}") + + try: + process = subprocess.Popen( + cmd, + shell=True, + cwd=str(oss_fuzz_dir), + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + text=True + ) + + stdout, stderr = process.communicate() + exit_code = process.returncode + + if exit_code in allowed_exit_codes: + return exit_code + + # 构建详细的错误信息 + error_msg = f"Command failed (exit code: {exit_code})" + if project: + error_msg += f" for project: {project}" + + if stderr.strip(): + error_msg += f"\nError output:\n{stderr.strip()}" + + if stdout.strip(): + error_msg += f"\nOutput:\n{stdout.strip()}" + + raise CommandError(error_msg, project=project, exit_code=exit_code) + + except FileNotFoundError as e: + raise CommandError(f"Command not found: {cmd.split()[0]}", project=project) from e + except OSError as e: + raise CommandError(f"System error: {e}", project=project) from e + except subprocess.SubprocessError as e: + raise CommandError(f"Subprocess error: {e}", project=project) from e + +def build_fuzzers(project_name: str, sanitizer: str, oss_fuzz_dir: Path) -> Tuple[bool, str]: + """Fuzzer build workflow""" + try: + logging.info("=" * 60) + logging.info(f"🔧 Building fuzzers for: {project_name}") + logging.info(f"📁 OSS-Fuzz directory: {oss_fuzz_dir}") + logging.info("=" * 60) + + # Validate paths + helper_script = oss_fuzz_dir / "infra" / "helper.py" + if not helper_script.exists(): + raise PathError(f"Missing helper script: {helper_script}", project=project_name) + + # Execute fuzzer build command + run_command( + f"python3 infra/helper.py build_fuzzers --sanitizer {sanitizer} {project_name}", + oss_fuzz_dir, + project=project_name + ) + + logging.info(f"✅ Fuzzers built: {project_name}") + return (True, project_name) + + except BuildError as e: + logging.error(f"❌ Build failed: {project_name}") + logging.error(f" Reason: {str(e)}") + return (False, project_name) + except Exception as e: + logging.error(f"🔥 Unhandled exception: {project_name}") + logging.exception(f" Exception details: {e}") + return (False, project_name) + +def main(): + parser = argparse.ArgumentParser(description="OSS-Fuzz Fuzzer Builder") + parser.add_argument("project_list", help="Project list file path") + parser.add_argument("--oss-fuzz-dir", required=True, type=str, + help="OSS-Fuzz directory path") + parser.add_argument("--sanitizer", default="address", + choices=["address", "memory", "undefined"], + help="Fuzzer sanitizer type") + parser.add_argument("--workers", type=int, default=cpu_count(), + help="Number of parallel worker processes") + parser.add_argument("--image-results", required=True, + help="JSON file with image build results from build_images.py") + args = parser.parse_args() + + logging.basicConfig( + level=logging.INFO, + format='[%(levelname)s] %(message)s' + ) + + # Process paths + oss_fuzz_dir = Path(args.oss_fuzz_dir).resolve() + logging.info(f"📁 Using OSS-Fuzz directory: {oss_fuzz_dir}") + + # Read project list + try: + project_file = Path(args.project_list) + if not project_file.exists(): + raise FileNotFoundError(f"Project list file not found: {project_file}") + + with open(project_file, "r", encoding="utf-8") as f: + all_projects = [line.strip() for line in f if line.strip()] + + if not all_projects: + raise ConfigError("Project list is empty") + + logging.info(f"📋 Loaded {len(all_projects)} projects") + except Exception as e: + logging.error(f"❌ Failed to read project list: {e}") + sys.exit(1) + + # Load image build results + try: + image_results_file = Path(args.image_results) + if not image_results_file.exists(): + raise FileNotFoundError(f"Image results file not found: {image_results_file}") + + with open(image_results_file, "r") as f: + image_results = json.load(f) + + if not isinstance(image_results, dict): + raise ConfigError("Image results should be a JSON object") + + logging.info(f"📋 Loaded image build results: {args.image_results}") + except json.JSONDecodeError as e: + logging.error(f"❌ Failed to parse image build results: {e}") + sys.exit(1) + except Exception as e: + logging.error(f"❌ Failed to load image build results: {e}") + sys.exit(1) + + # Filter projects with successful image builds + projects_to_build = [p for p in all_projects if p in image_results and image_results[p]] + image_failures = [p for p in all_projects if p not in image_results or not image_results[p]] + + if not projects_to_build: + logging.error("❌ No projects with successful image builds") + if image_failures: + logging.error(f" Projects with image build failures: {', '.join(image_failures[:10])}{'...' if len(image_failures) > 10 else ''}") + sys.exit(1) + + skipped = len(all_projects) - len(projects_to_build) + logging.info(f"🔍 Building {len(projects_to_build)} projects (skipped {skipped} due to image failures)") + + # Parallel fuzzer builds + with Pool(args.workers) as pool: + results = pool.starmap( + build_fuzzers, + [(p, args.sanitizer, oss_fuzz_dir) for p in projects_to_build] + ) + + # Output results + fuzzer_results = {project: success for success, project in results} + failed = [p for p in projects_to_build if not fuzzer_results[p]] + + success_count = len(projects_to_build) - len(failed) + logging.info(f"\n📊 Build completed: {success_count}/{len(projects_to_build)}") + + if failed: + logging.error(f"❌ Failed builds ({len(failed)} projects):") + for project in failed: + logging.error(f" - {project}") + + # Generate overall status report + overall_results = {} + for project in all_projects: + status = "❌" + if project in image_results and image_results[project]: + if project in fuzzer_results and fuzzer_results[project]: + status = "✅" + elif project in fuzzer_results: + status = "❌ (fuzzer)" + else: + status = "❌ (not built)" + else: + status = "❌ (image)" + overall_results[project] = status + + logging.info("\n📊 Overall status:") + for project, status in overall_results.items(): + logging.info(f" {project}: {status}") + +if __name__ == "__main__": + try: + main() + except KeyboardInterrupt: + print("\n🛑 Operation interrupted") + sys.exit(1) + except Exception as e: + print(f"💥 Critical error: {e}") + sys.exit(1) \ No newline at end of file From d3bbc132c0a7a4dff6acbf8079097aa3efebcb32 Mon Sep 17 00:00:00 2001 From: joyguoguo <599325370@qq.com> Date: Fri, 25 Jul 2025 18:07:51 +0000 Subject: [PATCH 048/134] modify tuple dict list --- fuzz/build_fuzz.py | 21 ++++++++++----------- 1 file changed, 10 insertions(+), 11 deletions(-) diff --git a/fuzz/build_fuzz.py b/fuzz/build_fuzz.py index 51a1a79..96e1f18 100644 --- a/fuzz/build_fuzz.py +++ b/fuzz/build_fuzz.py @@ -33,7 +33,6 @@ from pathlib import Path from returns.maybe import Maybe from multiprocessing import Pool -from typing import Dict, List, Tuple # ======================================================================================== # Custom Exceptions @@ -64,7 +63,7 @@ def run_command( cmd: str, oss_fuzz_dir: Path, project: str = "", - allowed_exit_codes: Maybe[List[int]] = Maybe.empty, + allowed_exit_codes: Maybe[list[int]] = Maybe.empty, skip_yes: bool = False ) -> int: """Execute a command and return the exit code""" @@ -110,7 +109,7 @@ def run_command( # ======================================================================================== # Build Functions # ======================================================================================== -def build_image(project_name: str, oss_fuzz_dir: Path) -> Tuple[bool, str]: +def build_image(project_name: str, oss_fuzz_dir: Path) -> tuple[bool, str]: """Docker image build workflow""" try: logging.info(f"Building Docker image: {project_name}") @@ -137,7 +136,7 @@ def build_image(project_name: str, oss_fuzz_dir: Path) -> Tuple[bool, str]: logging.error(f"💥 Unhandled exception: {project_name} - {repr(e)}") return (False, project_name) -def build_fuzzers(project_name: str, sanitizer: str, oss_fuzz_dir: Path) -> Tuple[bool, str]: +def build_fuzzers(project_name: str, sanitizer: str, oss_fuzz_dir: Path) -> tuple[bool, str]: """Fuzzer build workflow""" try: logging.info(f"Building fuzzers: {project_name} ({sanitizer} sanitizer)") @@ -168,7 +167,7 @@ def build_fuzzers(project_name: str, sanitizer: str, oss_fuzz_dir: Path) -> Tupl # ======================================================================================== # Main Execution # ======================================================================================== -def load_projects(file_path: Path) -> List[str]: +def load_projects(file_path: Path) -> list[str]: """Load project list from file""" if not file_path.exists(): raise FileNotFoundError(f"Project list not found: {file_path}") @@ -184,13 +183,13 @@ def load_projects(file_path: Path) -> List[str]: def execute_builds( func, - args_list: List[Tuple], + args_list: list[tuple], worker_count: int, success_msg: str, failure_msg: str -) -> Tuple[Dict[str, bool], List[str]]: +) -> tuple[dict[str, bool], list[str]]: """Execute build tasks in parallel and return results""" - results: Dict[str, bool] = {} + results = {} with Pool(worker_count) as pool: for success, project in pool.starmap(func, args_list): results[project] = success @@ -246,7 +245,7 @@ def main(): sys.exit(1) # Image building workflow - image_results: Dict[str, bool] = {} + image_results = {} if args.mode in ['image', 'both']: logging.info("\n" + "="*60) logging.info(f"Starting Docker image builds for {len(projects)} projects") @@ -270,8 +269,8 @@ def main(): logging.error(f"❌ Failed to save image results: {e}") # Fuzzer building workflow - fuzzer_results: Dict[str, bool] = {} - fuzz_projects: List[str] = [] + fuzzer_results = {} + fuzz_projects = [] if args.mode in ['fuzzer', 'both']: logging.info("\n" + "="*60) logging.info(f"Starting fuzzer builds ({args.sanitizer} sanitizer)") From 62847f359d2d55255a68b043df6805a17933a935 Mon Sep 17 00:00:00 2001 From: joyguoguo <599325370@qq.com> Date: Fri, 25 Jul 2025 18:22:32 +0000 Subject: [PATCH 049/134] remove stdout stderr in build fuzz --- fuzz/build_fuzz.py | 20 ++++---------------- 1 file changed, 4 insertions(+), 16 deletions(-) diff --git a/fuzz/build_fuzz.py b/fuzz/build_fuzz.py index 96e1f18..3f0910c 100644 --- a/fuzz/build_fuzz.py +++ b/fuzz/build_fuzz.py @@ -18,7 +18,7 @@ python3 build_fuzz.py --mode both [project_list] --oss-fuzz-dir /path/to/oss-fuzz --sanitizer address Example: - python3 ./fuzz/build_fuzz.py --mode both data/valid_projects.txt \ + python3 ./fuzz/build_fuzz.py --mode both data/valid_projects2.txt \ --oss-fuzz-dir ./fuzz/oss-fuzz \ --sanitizer address \ --workers 8 @@ -66,7 +66,7 @@ def run_command( allowed_exit_codes: Maybe[list[int]] = Maybe.empty, skip_yes: bool = False ) -> int: - """Execute a command and return the exit code""" + """Execute a command and return the exit code (no stdout/stderr capture)""" allowed_codes = allowed_exit_codes.value_or([0]) cmd_str = f"yes | {cmd}" if not skip_yes else cmd logging.debug(f"Executing command [{project}]: {cmd_str}") @@ -75,14 +75,9 @@ def run_command( process = subprocess.Popen( cmd_str, shell=True, - cwd=str(oss_fuzz_dir), - stdout=subprocess.PIPE if skip_yes else subprocess.DEVNULL, - stderr=subprocess.PIPE if skip_yes else subprocess.DEVNULL, - text=True + cwd=str(oss_fuzz_dir) ) - - stdout, stderr = process.communicate() - exit_code = process.returncode + exit_code = process.wait() if exit_code in allowed_codes: return exit_code @@ -90,13 +85,6 @@ def run_command( error_msg = f"Command failed (exit code: {exit_code})" if project: error_msg += f" for project: {project}" - - if stderr and stderr.strip(): - error_msg += f"\nError output:\n{stderr.strip()}" - - if stdout and stdout.strip(): - error_msg += f"\nOutput:\n{stdout.strip()}" - raise CommandError(error_msg, project=project, exit_code=exit_code) except FileNotFoundError as e: From 4a5f9ff3346ac7b8333c995e67349798558fc534 Mon Sep 17 00:00:00 2001 From: joyguoguo <599325370@qq.com> Date: Fri, 25 Jul 2025 19:46:57 +0000 Subject: [PATCH 050/134] test successfully --- fuzz/build_fuzz.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fuzz/build_fuzz.py b/fuzz/build_fuzz.py index 3f0910c..481fba9 100644 --- a/fuzz/build_fuzz.py +++ b/fuzz/build_fuzz.py @@ -18,7 +18,7 @@ python3 build_fuzz.py --mode both [project_list] --oss-fuzz-dir /path/to/oss-fuzz --sanitizer address Example: - python3 ./fuzz/build_fuzz.py --mode both data/valid_projects2.txt \ + python3 ./fuzz/build_fuzz.py --mode both data/valid_projects3.txt \ --oss-fuzz-dir ./fuzz/oss-fuzz \ --sanitizer address \ --workers 8 From 3f736cb00b57ee2f0124120ead196a27f0d3da9b Mon Sep 17 00:00:00 2001 From: joyguoguo <599325370@qq.com> Date: Fri, 25 Jul 2025 22:56:58 +0000 Subject: [PATCH 051/134] rename run fuzz ds to run fuzz print1 --- fuzz/{run_fuzz_ds.py => run_fuzz_print1.py} | 226 ++++---------------- 1 file changed, 42 insertions(+), 184 deletions(-) rename fuzz/{run_fuzz_ds.py => run_fuzz_print1.py} (53%) diff --git a/fuzz/run_fuzz_ds.py b/fuzz/run_fuzz_print1.py similarity index 53% rename from fuzz/run_fuzz_ds.py rename to fuzz/run_fuzz_print1.py index b0c4c59..640d527 100644 --- a/fuzz/run_fuzz_ds.py +++ b/fuzz/run_fuzz_print1.py @@ -2,23 +2,17 @@ # -*- coding: utf-8 -*- """ -run_fuzz_all_targets_input.py - -Enhanced with input instrumentation to capture fuzzing inputs. +run_fuzz_print1.py This script employs a two-phase approach for fuzz testing: -1. Discovery phase: Traverse all specified projects to collect all executable fuzz targets in each project +1. Discovery phase: Traverse all specified projects to collect all executable fuzz targets which start with "fuzz_" and end with "print1" in each project 2. Execution phase: Create a task pool containing all (project, target) pairs and execute them in parallel using multiprocessing -3. Input capture: Instrument fuzz targets to record all inputs during fuzzing -Key Enhancements: -- Added input instrumentation to capture fuzzing inputs -- Created dedicated input storage directory structure -- Added AST-based function instrumentation -- Added input recording and analysis capabilities +This approach maximizes CPU utilization and provides clear overall progress[2](@ref). + +Usage: python3 run_fuzz_print1.py [project_list_file] [--oss-fuzz-dir /your/custom/path/to/oss-fuzz] [--timeout seconds] [--workers N] +Example: python3 fuzz/run_fuzz_print1.py data/valid_projects3.txt --oss-fuzz-dir /home/jiayiguo/FuzzAug/fuzz/oss-fuzz --timeout 60 --workers 4 -Usage: python3 run_fuzz_all_targets_input [project_list_file] [--timeout seconds] [--workers N] [--record-inputs] -Example: python3 fuzz/run_fuzz_all_targets_input.py data/valid_projects.txt --timeout 60 --workers 4 --record-inputs """ import os @@ -27,120 +21,29 @@ import argparse import logging import time -import ast -import astor -import shutil from datetime import datetime from pathlib import Path from multiprocessing import Pool, cpu_count from returns.maybe import Maybe, Nothing, Some -# --- Global configuration --- -HOME_DIR = Path.home() -OSS_FUZZ_DIR = HOME_DIR / "FuzzAug" / "fuzz" / "oss-fuzz" -LOG_DIR = OSS_FUZZ_DIR / "run_logs3" -INPUT_DIR = OSS_FUZZ_DIR / "fuzz_inputs" # Directory to store captured inputs - -class FunctionInstrumenter(ast.NodeTransformer): - def __init__(self, target_functions: list[str]): - self.target_functions = target_functions - - def visit_FunctionDef(self, node): - if node.name in self.target_functions: - print_stmt = ast.parse(f'print("INPUT_CAPTURE: {node.name} called")').body[0] - node.body.insert(0, print_stmt) - return self.generic_visit(node) - -def instrument_code(source_code: str, target_functions: list[str]) -> str: - tree = ast.parse(source_code) - instrumenter = FunctionInstrumenter(target_functions) - instrumented_tree = instrumenter.visit(tree) - ast.fix_missing_locations(instrumented_tree) - return ast.unparse(instrumented_tree) -def prepare_target_for_input_capture(project_name: str, target_name: str) -> Path: - """ - Prepare a Python fuzz target for input capture by instrumenting its code - - Args: - project_name: Name of the project - target_name: Name of the target to instrument - - Returns: - Path to the instrumented Python script - """ - try: - # Create project-specific input directory - project_input_dir = INPUT_DIR / project_name - project_input_dir.mkdir(parents=True, exist_ok=True) - - # Create target-specific input directory - target_input_dir = project_input_dir / target_name - target_input_dir.mkdir(exist_ok=True) - - logging.info(f"📁 Created input directory: {target_input_dir}") - - # Locate Python source file (support multiple extensions) - possible_extensions = [".py", ".pyw"] - source_file = None - - # Try possible file extensions - for ext in possible_extensions: - candidate = OSS_FUZZ_DIR / "projects" / project_name / f"{target_name}{ext}" - if candidate.exists(): - source_file = candidate - break - - if not source_file: - logging.warning(f"⚠️ Python source file not found for: {target_name}") - return None - - # Backup original source file - backup_file = source_file.with_name(f"{target_name}_original{source_file.suffix}") - if not backup_file.exists(): - shutil.copy2(source_file, backup_file) - logging.info(f"💾 Backed up original file: {backup_file}") - - # Read source code - with open(source_file, "r") as f: - source_code = f.read() - - # Instrument the code - use Python-specific entry function - possible_entry_functions = ["TestInput", "TestOneInput"] - instrumented_code = instrument_code(source_code, possible_entry_functions) - - - # Write instrumented code to a new file with same extension - instrumented_file = source_file.with_name(f"{target_name}_instrumented{source_file.suffix}") - with open(instrumented_file, "w") as f: - f.write(instrumented_code) - - logging.info(f"🔧 Instrumented {target_name} for input capture") - - # Python doesn't need rebuilding - return instrumented script path - return instrumented_file - - except Exception as e: - logging.error(f"❌ Failed to instrument {target_name}: {str(e)}") - return None + + + def run_command( cmd: str, log_msg: str, logger: logging.Logger, allowed_exit_codes: Maybe[list[int]] = Nothing, - timeout: int = 3600, # Default 1-hour timeout - env: Maybe[dict] = Nothing # Use Maybe instead of Optional + timeout: int = 3600 # Default 1-hour timeout ) -> bool: - """Execute commands with real-time logging, precise error handling, and input capture""" + """Execute commands with real-time logging and precise error handling""" allowed_codes = allowed_exit_codes.value_or([]) logger.info(f"▶️ {log_msg}...") logger.debug(f" $ {cmd}") process = None try: - # Convert Maybe[dict] to actual environment or None - env_dict = env.value_or(None) - process = subprocess.Popen( cmd, shell=True, @@ -148,8 +51,7 @@ def run_command( stderr=subprocess.STDOUT, text=True, encoding="utf-8", - errors="replace", - env=env_dict + errors="replace" ) start_time = time.time() @@ -166,11 +68,7 @@ def run_command( if process.stdout: line = process.stdout.readline() if line: - # Capture input data when detected - if "INPUT_CAPTURE:" in line: - logger.debug(f"📥 {line.strip()}") - else: - logger.debug(line.strip()) + logger.debug(line.strip()) else: time.sleep(0.1) @@ -200,10 +98,11 @@ def run_command( except Exception: pass -def discover_targets(project_name: str, logger: logging.Logger) -> list[str]: + +def discover_targets(project_name: str, oss_fuzz_dir: Path, logger: logging.Logger) -> list[str]: """Discover fuzz targets for a project (starting with 'fuzz_', no extension, and executable)""" - out_dir = OSS_FUZZ_DIR / "build" / "out" / project_name - targets = [] # Use built-in list type + out_dir = oss_fuzz_dir / "build" / "out" / project_name + targets: list[str] = [] if not out_dir.is_dir(): logger.warning(f"Build output directory for project {project_name} does not exist: {out_dir}") @@ -215,6 +114,7 @@ def discover_targets(project_name: str, logger: logging.Logger) -> list[str]: if (f.is_file() and f.name.startswith("fuzz_") and '.' not in f.name and + f.name.endswith("print1") and os.access(f, os.X_OK)): targets.append(f.name) except OSError as e: @@ -227,11 +127,12 @@ def discover_targets(project_name: str, logger: logging.Logger) -> list[str]: return targets -def run_single_target(project_name: str, target_name: str, timeout: int, record_inputs: bool = False) -> tuple[bool, str, str]: - """Execute fuzz testing workflow for a single (project, target) pair with input capture""" + +def run_single_target(project_name: str, target_name: str, timeout: int, oss_fuzz_dir: Path) -> tuple[bool, str, str]: + """Execute fuzz testing workflow for a single (project, target) pair""" task_id = f"{project_name}_{target_name}" logger = logging.getLogger(task_id) - + LOG_DIR = oss_fuzz_dir / "run_pj3_logs" try: logger.setLevel(logging.DEBUG) LOG_DIR.mkdir(parents=True, exist_ok=True) @@ -244,44 +145,21 @@ def run_single_target(project_name: str, target_name: str, timeout: int, record_ ) file_handler.setFormatter(formatter) logger.addHandler(file_handler) - os.chdir(OSS_FUZZ_DIR) - - logger.info(f"🚀 Starting test -> Project: {project_name}, Target: {target_name}") - - # Prepare environment for input capture if requested - env = Nothing - instrumented_file = None - - if record_inputs: - instrumented_file = prepare_target_for_input_capture(project_name, target_name) - if not instrumented_file: - logger.error(f"❌ Failed to instrument {target_name}") - return False, project_name, target_name - - logger.info(f"🔧 Instrumented {target_name} for input capture") - - timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") - input_dir = INPUT_DIR / project_name / target_name / timestamp - input_dir.mkdir(parents=True, exist_ok=True) + os.chdir(oss_fuzz_dir) - env_dict = os.environ.copy() - env_dict["FUZZ_INPUT_DIR"] = str(input_dir) - env = Some(env_dict) - - logger.info(f"📁 Inputs will be stored in: {input_dir}") - - # Prepare command - target_to_run = instrumented_file.name if instrumented_file else target_name - cmd = f"python3 infra/helper.py run_fuzzer {project_name} {target_to_run} -- -max_total_time={timeout}" + except (OSError, PermissionError) as e: + print(f"❌ Critical error occurred during initialization of task {task_id}: {e}") + return False, project_name, target_name - # Execute command + logger.info(f"🚀 Starting test -> Project: {project_name}, Target: {target_name}") + try: + cmd = f"python3 infra/helper.py run_fuzzer {project_name} {target_name} -- -max_total_time={timeout}" success = run_command( cmd, f"Running Target '{target_name}' (timeout={timeout}s)", logger, - allowed_exit_codes=Some([1, 124]), - timeout=timeout + 300, - env=env + allowed_exit_codes=Some([1, 124]), # 1=Crashes found, 124=Timeout + timeout=timeout + 300 ) if success: @@ -294,24 +172,11 @@ def run_single_target(project_name: str, target_name: str, timeout: int, record_ except Exception as e: logger.exception(f"💥 Unexpected error occurred while running target '{target_name}': {e}") return False, project_name, target_name - finally: for handler in logger.handlers[:]: handler.close() logger.removeHandler(handler) - if record_inputs and env.is_just: - try: - input_dir = Path(env.unwrap()["FUZZ_INPUT_DIR"]) - if input_dir.exists(): - input_count = len(list(input_dir.glob("*"))) - logger.info(f"📥 Captured {input_count} inputs for {target_name}") - except Exception as e: - logger.warning(f"⚠️ Failed to summarize captured inputs: {e}") - - # Final fallback (defensive) - return False, project_name, target_name - def main(): # Configure main process logging @@ -323,13 +188,16 @@ def main(): logger = logging.getLogger("Main") # Set up command line argument parsing - parser = argparse.ArgumentParser(description="OSS-Fuzz Parallel Fuzz Testing Tool with Input Capture") + parser = argparse.ArgumentParser(description="OSS-Fuzz Parallel Fuzz Testing Tool") parser.add_argument("project_list", help="File path containing list of project names") + parser.add_argument("--oss-fuzz-dir", type=Path, required=True, help="Path to the oss-fuzz directory (e.g., /path/to/oss-fuzz)") parser.add_argument("--timeout", type=int, default=60, help="Timeout duration per Fuzz Target (seconds)") parser.add_argument("--workers", type=int, default=cpu_count(), help="Number of parallel worker processes") - parser.add_argument("--record-inputs", action="store_true", help="Enable input capture during fuzzing") args = parser.parse_args() + OSS_FUZZ_DIR = args.oss_fuzz_dir.resolve() + LOG_DIR = OSS_FUZZ_DIR / "run_ds_logs" + # 1. Read project list file try: project_path = Path(args.project_list) @@ -345,13 +213,13 @@ def main(): # 2. Discovery phase: Collect all fuzz targets logger.info("\n" + "=" * 20 + " Phase 1: Discover all Fuzz Targets " + "=" * 20) - all_fuzz_tasks = [] # Use built-in list type - + all_fuzz_tasks: list[tuple[str, str]] = [] # Store (project, target) tuples try: original_cwd = Path.cwd() # Save current working directory os.chdir(OSS_FUZZ_DIR) # Switch to OSS-Fuzz directory for project_name in projects: - targets = discover_targets(project_name, logger) + targets = discover_targets(project_name, OSS_FUZZ_DIR, logger) + if targets: logger.info(f"🔍 Discovered {len(targets)} targets in project '{project_name}': {', '.join(targets)}") for target in targets: @@ -371,17 +239,13 @@ def main(): logger.info("🤷 No executable Fuzz Targets found. Program exits.") sys.exit(0) - # 3. Execution phase: Parallel fuzz testing with input capture + # 3. Execution phase: Parallel fuzz testing logger.info(f"\n✅ Discovery phase completed. Found a total of {len(all_fuzz_tasks)} fuzz tasks.") - if args.record_inputs: - logger.info("🔔 Input capture is ENABLED. All fuzzing inputs will be recorded.") - logger.info(f"📁 Inputs will be stored in: {INPUT_DIR}") - logger.info("=" * 20 + " Phase 2: Parallel Fuzzing Execution " + "=" * 23) logger.info(f"🚀 Starting parallel testing with {args.workers} worker processes (timeout per target: {args.timeout}s)...") - # Prepare task parameters (project, target, timeout, record_inputs) - tasks_with_args = [(p, t, args.timeout, args.record_inputs) for p, t in all_fuzz_tasks] + # Prepare task parameters (project, target, timeout) + tasks_with_args = [(p, t, args.timeout, OSS_FUZZ_DIR) for p, t in all_fuzz_tasks] results: list[tuple[bool, str, str]] = [] # Store results (success, project, target) # Execute in parallel using process pool @@ -402,12 +266,6 @@ def main(): # Output statistical summary logger.info(f"📊 Fuzzing completed: Success {success_count}/{total_tasks}, Failed {failed_count}/{total_tasks}") - - if args.record_inputs: - total_inputs = sum(len(list((INPUT_DIR / p / t).glob("*/*.bin"))) for p, t in all_fuzz_tasks) - logger.info(f"📥 Total inputs captured: {total_inputs}") - logger.info(f"💾 Inputs stored at: {INPUT_DIR}") - if failed_tasks: logger.error("❌ The following Fuzz Targets failed:") for project, target in failed_tasks: From d5c2b2710641d7708168f60385e42cea4fc4b26f Mon Sep 17 00:00:00 2001 From: joyguoguo <599325370@qq.com> Date: Fri, 25 Jul 2025 22:57:56 +0000 Subject: [PATCH 052/134] add print(data ) to fuzz target and rename the file with "_print1" --- fuzz/modify_fuzz_files.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fuzz/modify_fuzz_files.py b/fuzz/modify_fuzz_files.py index eb7b37e..de8333f 100644 --- a/fuzz/modify_fuzz_files.py +++ b/fuzz/modify_fuzz_files.py @@ -34,7 +34,7 @@ def add_print_to_testoneinput(file_path): def main(): projects_path = "/home/jiayiguo/FuzzAug/fuzz/oss-fuzz/projects" - valid_projects_file = "valid_projects.txt" + valid_projects_file = "data/valid_projects.txt" with open(valid_projects_file, 'r') as f: projects = [line.strip() for line in f if line.strip()] @@ -54,7 +54,7 @@ def main(): new_content = add_print_to_testoneinput(file_path) # 保存修改后的文件(添加_print后缀) - new_file_path = file_path.rsplit('.', 1)[0] + '_print.py' + new_file_path = file_path.rsplit('.', 1)[0] + '_print1.py' with open(new_file_path, 'w') as f: f.write(new_content) print(f"Processed: {file_path} -> {new_file_path}") From d74a6bffcbbfa2e569f31f32e2c92910a9bd7b14 Mon Sep 17 00:00:00 2001 From: joyguoguo <599325370@qq.com> Date: Fri, 25 Jul 2025 23:11:14 +0000 Subject: [PATCH 053/134] oss -fuzz change --- fuzz/oss-fuzz | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fuzz/oss-fuzz b/fuzz/oss-fuzz index f73b405..4bbbeb5 160000 --- a/fuzz/oss-fuzz +++ b/fuzz/oss-fuzz @@ -1 +1 @@ -Subproject commit f73b405d84e886bac90f8b15200230f08a2709c9 +Subproject commit 4bbbeb59599ad38b7984191e2e83bc9a61f7fd4b From c6a6c23e1e9e5a1f1fa7cba92e6a677243de9a85 Mon Sep 17 00:00:00 2001 From: joyguoguo <599325370@qq.com> Date: Fri, 25 Jul 2025 23:14:47 +0000 Subject: [PATCH 054/134] rename the print1.py --- fuzz/{run_fuzz_print1.py => run_fuzz_all_targets_print1.py} | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) rename fuzz/{run_fuzz_print1.py => run_fuzz_all_targets_print1.py} (97%) diff --git a/fuzz/run_fuzz_print1.py b/fuzz/run_fuzz_all_targets_print1.py similarity index 97% rename from fuzz/run_fuzz_print1.py rename to fuzz/run_fuzz_all_targets_print1.py index 640d527..7f1b5c3 100644 --- a/fuzz/run_fuzz_print1.py +++ b/fuzz/run_fuzz_all_targets_print1.py @@ -2,7 +2,7 @@ # -*- coding: utf-8 -*- """ -run_fuzz_print1.py +run_fuzz_all_targets_print1.py This script employs a two-phase approach for fuzz testing: 1. Discovery phase: Traverse all specified projects to collect all executable fuzz targets which start with "fuzz_" and end with "print1" in each project @@ -10,8 +10,8 @@ This approach maximizes CPU utilization and provides clear overall progress[2](@ref). -Usage: python3 run_fuzz_print1.py [project_list_file] [--oss-fuzz-dir /your/custom/path/to/oss-fuzz] [--timeout seconds] [--workers N] -Example: python3 fuzz/run_fuzz_print1.py data/valid_projects3.txt --oss-fuzz-dir /home/jiayiguo/FuzzAug/fuzz/oss-fuzz --timeout 60 --workers 4 +Usage: python3 run_fuzz_all_targets_print1.py [project_list_file] [--oss-fuzz-dir /your/custom/path/to/oss-fuzz] [--timeout seconds] [--workers N] +Example: python3 fuzz/run_fuzz_all_targets_print1.py data/valid_projects3.txt --oss-fuzz-dir /home/jiayiguo/FuzzAug/fuzz/oss-fuzz --timeout 60 --workers 4 """ From 9d5c9b636d7ceb6368fe4be9c4cca79602fa1487 Mon Sep 17 00:00:00 2001 From: joyguoguo <599325370@qq.com> Date: Fri, 25 Jul 2025 23:19:31 +0000 Subject: [PATCH 055/134] modify the exegesis --- fuzz/run_fuzz_all_targets_print1.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fuzz/run_fuzz_all_targets_print1.py b/fuzz/run_fuzz_all_targets_print1.py index 7f1b5c3..ce557c3 100644 --- a/fuzz/run_fuzz_all_targets_print1.py +++ b/fuzz/run_fuzz_all_targets_print1.py @@ -100,7 +100,7 @@ def run_command( def discover_targets(project_name: str, oss_fuzz_dir: Path, logger: logging.Logger) -> list[str]: - """Discover fuzz targets for a project (starting with 'fuzz_', no extension, and executable)""" + """Discover fuzz targets for a project (starting with 'fuzz_', ending with 'print1', no extension, and executable)""" out_dir = oss_fuzz_dir / "build" / "out" / project_name targets: list[str] = [] From 521045ea3a0d075de47ec461779241f5620862a4 Mon Sep 17 00:00:00 2001 From: joyguoguo <599325370@qq.com> Date: Fri, 25 Jul 2025 23:44:13 +0000 Subject: [PATCH 056/134] modify --- fuzz/run_fuzz_all_targets.py | 31 +- fuzz/run_fuzz_all_targets_input.py | 463 ----------------------------- 2 files changed, 18 insertions(+), 476 deletions(-) delete mode 100644 fuzz/run_fuzz_all_targets_input.py diff --git a/fuzz/run_fuzz_all_targets.py b/fuzz/run_fuzz_all_targets.py index 74bed97..fe147d8 100644 --- a/fuzz/run_fuzz_all_targets.py +++ b/fuzz/run_fuzz_all_targets.py @@ -10,8 +10,9 @@ This approach maximizes CPU utilization and provides clear overall progress[2](@ref). -Usage: python3 run_fuzz_all_targets.py [project_list_file] [--timeout seconds] [--workers N] -Example: python3 fuzz/run_fuzz_all_targets.py data/valid_projects.txt --timeout 60 --workers 4 +Usage: python3 run_fuzz_all_targets.py [project_list_file] [--oss-fuzz-dir /your/custom/path/to/oss-fuzz] [--timeout seconds] [--workers N] +Example: python3 fuzz/run_fuzz_all_targets.py data/valid_projects3.txt --oss-fuzz-dir /home/jiayiguo/FuzzAug/fuzz/oss-fuzz --timeout 60 --workers 4 + """ import os @@ -25,10 +26,8 @@ from multiprocessing import Pool, cpu_count from returns.maybe import Maybe, Nothing, Some -# --- Global configuration --- -HOME_DIR = Path.home() -OSS_FUZZ_DIR = HOME_DIR / "FuzzAug" / "fuzz" / "oss-fuzz" -LOG_DIR = OSS_FUZZ_DIR / "run_logs2" + + def run_command( @@ -100,9 +99,9 @@ def run_command( pass -def discover_targets(project_name: str, logger: logging.Logger) -> list[str]: +def discover_targets(project_name: str, oss_fuzz_dir: Path, logger: logging.Logger) -> list[str]: """Discover fuzz targets for a project (starting with 'fuzz_', no extension, and executable)""" - out_dir = OSS_FUZZ_DIR / "build" / "out" / project_name + out_dir = oss_fuzz_dir / "build" / "out" / project_name targets: list[str] = [] if not out_dir.is_dir(): @@ -115,6 +114,7 @@ def discover_targets(project_name: str, logger: logging.Logger) -> list[str]: if (f.is_file() and f.name.startswith("fuzz_") and '.' not in f.name and + # f.name.endswith("print1") and os.access(f, os.X_OK)): targets.append(f.name) except OSError as e: @@ -128,11 +128,11 @@ def discover_targets(project_name: str, logger: logging.Logger) -> list[str]: return targets -def run_single_target(project_name: str, target_name: str, timeout: int) -> tuple[bool, str, str]: +def run_single_target(project_name: str, target_name: str, timeout: int, oss_fuzz_dir: Path) -> tuple[bool, str, str]: """Execute fuzz testing workflow for a single (project, target) pair""" task_id = f"{project_name}_{target_name}" logger = logging.getLogger(task_id) - + LOG_DIR = oss_fuzz_dir / "run_pj3_logs" try: logger.setLevel(logging.DEBUG) LOG_DIR.mkdir(parents=True, exist_ok=True) @@ -145,7 +145,7 @@ def run_single_target(project_name: str, target_name: str, timeout: int) -> tupl ) file_handler.setFormatter(formatter) logger.addHandler(file_handler) - os.chdir(OSS_FUZZ_DIR) + os.chdir(oss_fuzz_dir) except (OSError, PermissionError) as e: print(f"❌ Critical error occurred during initialization of task {task_id}: {e}") @@ -190,10 +190,14 @@ def main(): # Set up command line argument parsing parser = argparse.ArgumentParser(description="OSS-Fuzz Parallel Fuzz Testing Tool") parser.add_argument("project_list", help="File path containing list of project names") + parser.add_argument("--oss-fuzz-dir", type=Path, required=True, help="Path to the oss-fuzz directory (e.g., /path/to/oss-fuzz)") parser.add_argument("--timeout", type=int, default=60, help="Timeout duration per Fuzz Target (seconds)") parser.add_argument("--workers", type=int, default=cpu_count(), help="Number of parallel worker processes") args = parser.parse_args() + OSS_FUZZ_DIR = args.oss_fuzz_dir.resolve() + LOG_DIR = OSS_FUZZ_DIR / "run_fuzz_all_targets_logs" + # 1. Read project list file try: project_path = Path(args.project_list) @@ -214,7 +218,8 @@ def main(): original_cwd = Path.cwd() # Save current working directory os.chdir(OSS_FUZZ_DIR) # Switch to OSS-Fuzz directory for project_name in projects: - targets = discover_targets(project_name, logger) + targets = discover_targets(project_name, OSS_FUZZ_DIR, logger) + if targets: logger.info(f"🔍 Discovered {len(targets)} targets in project '{project_name}': {', '.join(targets)}") for target in targets: @@ -240,7 +245,7 @@ def main(): logger.info(f"🚀 Starting parallel testing with {args.workers} worker processes (timeout per target: {args.timeout}s)...") # Prepare task parameters (project, target, timeout) - tasks_with_args = [(p, t, args.timeout) for p, t in all_fuzz_tasks] + tasks_with_args = [(p, t, args.timeout, OSS_FUZZ_DIR) for p, t in all_fuzz_tasks] results: list[tuple[bool, str, str]] = [] # Store results (success, project, target) # Execute in parallel using process pool diff --git a/fuzz/run_fuzz_all_targets_input.py b/fuzz/run_fuzz_all_targets_input.py deleted file mode 100644 index 4d38ffb..0000000 --- a/fuzz/run_fuzz_all_targets_input.py +++ /dev/null @@ -1,463 +0,0 @@ -#!/usr/bin/env python3 -# -*- coding: utf-8 -*- - -""" -run_fuzz_all_targets_input.py - -Enhanced with input instrumentation to capture fuzzing inputs. - -This script employs a two-phase approach for fuzz testing: -1. Discovery phase: Traverse all specified projects to collect all executable fuzz targets in each project -2. Execution phase: Create a task pool containing all (project, target) pairs and execute them in parallel using multiprocessing -3. Input capture: Instrument fuzz targets to record all inputs during fuzzing - -Key Enhancements: -- Added input instrumentation to capture fuzzing inputs -- Created dedicated input storage directory structure -- Added AST-based function instrumentation -- Added input recording and analysis capabilities - -Usage: python3 run_fuzz_all_targets_input [project_list_file] [--timeout seconds] [--workers N] [--record-inputs] -Example: python3 fuzz/run_fuzz_all_targets_input.py data/valid_projects.txt --timeout 60 --workers 4 --record-inputs -""" - -import os -import sys -import subprocess -import argparse -import logging -import time -import ast -import astor -import shutil -from datetime import datetime -from pathlib import Path -from multiprocessing import Pool, cpu_count -from returns.maybe import Maybe, Nothing, Some - -# --- Global configuration --- -HOME_DIR = Path.home() -OSS_FUZZ_DIR = HOME_DIR / "FuzzAug" / "fuzz" / "oss-fuzz" -LOG_DIR = OSS_FUZZ_DIR / "run_logs3" -INPUT_DIR = OSS_FUZZ_DIR / "fuzz_inputs" # Directory to store captured inputs - -class FunctionInstrumenter(ast.NodeTransformer): - """AST transformer to instrument function entries for input recording""" - def __init__(self, target_functions: list[str]): - self.target_functions = target_functions - super().__init__() - - def visit_FunctionDef(self, node): - """Instrument function definition to add input recording""" - # 只对目标函数进行插桩 - if node.name in self.target_functions: - # Add print statement at the beginning of the function - input_record_stmt = ast.Expr( - value=ast.Call( - func=ast.Name(id='print', ctx=ast.Load()), - args=[ast.Constant(value=f"INPUT_CAPTURE: {node.name} received input: {{data}}")], - keywords=[] - ) - ) - - # Insert the print statement at the top of the function body - if node.body: - node.body.insert(0, input_record_stmt) - - return node - - -def instrument_code(source_code: str, target_function: list[str]) -> str: - """ - Instrument source code to record inputs for specific function - - Args: - source_code: Original source code - target_function: Name of the function to instrument - - Returns: - Instrumented source code - """ - try: - # Parse the source code into an AST - tree = ast.parse(source_code) - - # Create instrumenter and apply transformations - instrumenter = FunctionInstrumenter(target_functions) - modified_tree = instrumenter.visit(tree) - - # Add missing location information for generated nodes - ast.fix_missing_locations(modified_tree) - - # Generate the modified source code - return astor.to_source(modified_tree) - except Exception as e: - logging.error(f"🔧 Code instrumentation failed: {str(e)}") - return source_code # Return original if instrumentation fails - -def prepare_target_for_input_capture(project_name: str, target_name: str) -> Path: - """ - Prepare a Python fuzz target for input capture by instrumenting its code - - Args: - project_name: Name of the project - target_name: Name of the target to instrument - - Returns: - Path to the instrumented Python script - """ - try: - # Create project-specific input directory - project_input_dir = INPUT_DIR / project_name - project_input_dir.mkdir(parents=True, exist_ok=True) - - # Create target-specific input directory - target_input_dir = project_input_dir / target_name - target_input_dir.mkdir(exist_ok=True) - - logging.info(f"📁 Created input directory: {target_input_dir}") - - # Locate Python source file (support multiple extensions) - possible_extensions = [".py", ".pyw"] - source_file = None - - # Try possible file extensions - for ext in possible_extensions: - candidate = OSS_FUZZ_DIR / "projects" / project_name / f"{target_name}{ext}" - if candidate.exists(): - source_file = candidate - break - - if not source_file: - logging.warning(f"⚠️ Python source file not found for: {target_name}") - return None - - # Backup original source file - backup_file = source_file.with_name(f"{target_name}_original{source_file.suffix}") - if not backup_file.exists(): - shutil.copy2(source_file, backup_file) - logging.info(f"💾 Backed up original file: {backup_file}") - - # Read source code - with open(source_file, "r") as f: - source_code = f.read() - - # Instrument the code - use Python-specific entry function - possible_entry_functions = ["TestInput", "TestOneInput"] - instrumented_code = instrument_code(source_code, possible_entry_functions) - - - # Write instrumented code to a new file with same extension - instrumented_file = source_file.with_name(f"{target_name}_instrumented{source_file.suffix}") - with open(instrumented_file, "w") as f: - f.write(instrumented_code) - - logging.info(f"🔧 Instrumented {target_name} for input capture") - - # Python doesn't need rebuilding - return instrumented script path - return instrumented_file - - except Exception as e: - logging.error(f"❌ Failed to instrument {target_name}: {str(e)}") - return None - -def run_command( - cmd: str, - log_msg: str, - logger: logging.Logger, - allowed_exit_codes: Maybe[list[int]] = Nothing, - timeout: int = 3600, # Default 1-hour timeout - env: Maybe[dict] = Nothing # Use Maybe instead of Optional -) -> bool: - """Execute commands with real-time logging, precise error handling, and input capture""" - allowed_codes = allowed_exit_codes.value_or([]) - logger.info(f"▶️ {log_msg}...") - logger.debug(f" $ {cmd}") - - process = None - try: - # Convert Maybe[dict] to actual environment or None - env_dict = env.value_or(None) - - process = subprocess.Popen( - cmd, - shell=True, - stdout=subprocess.PIPE, - stderr=subprocess.STDOUT, - text=True, - encoding="utf-8", - errors="replace", - env=env_dict - ) - - start_time = time.time() - while process.poll() is None: - if time.time() - start_time > timeout: - logger.error(f"⌛ Command timed out after {timeout} seconds") - process.terminate() - try: - process.wait(timeout=5) - except subprocess.TimeoutExpired: - process.kill() - return False - - if process.stdout: - line = process.stdout.readline() - if line: - # Capture input data when detected - if "INPUT_CAPTURE:" in line: - logger.debug(f"📥 {line.strip()}") - else: - logger.debug(line.strip()) - else: - time.sleep(0.1) - - exit_code = process.returncode - if exit_code not in [0, *allowed_codes]: - logger.error(f"❌ Command execution failed, exit code: {exit_code}") - return False - return True - - except FileNotFoundError: - logger.error(f"🔍 Command not found: {cmd.split()[0]}") - return False - except PermissionError: - logger.error(f"🔒 Insufficient permissions to execute command: {cmd}") - return False - except subprocess.SubprocessError as e: - logger.exception(f"💥 Subprocess error: {e}") - return False - except OSError as e: - logger.exception(f"💥 Operating system error during command execution: {e}") - return False - finally: - if process and process.poll() is None: - try: - process.terminate() - process.wait(timeout=5) - except Exception: - pass - -def discover_targets(project_name: str, logger: logging.Logger) -> list[str]: - """Discover fuzz targets for a project (starting with 'fuzz_', no extension, and executable)""" - out_dir = OSS_FUZZ_DIR / "build" / "out" / project_name - targets = [] # Use built-in list type - - if not out_dir.is_dir(): - logger.warning(f"Build output directory for project {project_name} does not exist: {out_dir}") - return targets - - try: - for f in out_dir.iterdir(): - try: - if (f.is_file() and - f.name.startswith("fuzz_") and - '.' not in f.name and - os.access(f, os.X_OK)): - targets.append(f.name) - except OSError as e: - logger.warning(f"⚠️ Error while checking file {f.name}, skipped: {e}") - - except PermissionError: - logger.error(f"🔒 Insufficient permissions to access directory: {out_dir}") - except OSError as e: - logger.exception(f"💥 Operating system error occurred while discovering targets: {e}") - - return targets - -def run_single_target(project_name: str, target_name: str, timeout: int, record_inputs: bool = False) -> tuple[bool, str, str]: - """Execute fuzz testing workflow for a single (project, target) pair with input capture""" - task_id = f"{project_name}_{target_name}" - logger = logging.getLogger(task_id) - - try: - logger.setLevel(logging.DEBUG) - LOG_DIR.mkdir(parents=True, exist_ok=True) - timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") - log_file = LOG_DIR / f"run_{task_id}_{timestamp}.log" - file_handler = logging.FileHandler(log_file, encoding="utf-8") - formatter = logging.Formatter( - "%(asctime)s [PID:%(process)d] %(levelname)s: %(message)s", - datefmt="%Y-%m-%d %H:%M:%S" - ) - file_handler.setFormatter(formatter) - logger.addHandler(file_handler) - os.chdir(OSS_FUZZ_DIR) - - logger.info(f"🚀 Starting test -> Project: {project_name}, Target: {target_name}") - - # Prepare environment for input capture if requested - env = Nothing - instrumented_file = None - - if record_inputs: - instrumented_file = prepare_target_for_input_capture(project_name, target_name) - if not instrumented_file: - logger.error(f"❌ Failed to instrument {target_name}") - return False, project_name, target_name - - logger.info(f"🔧 Instrumented {target_name} for input capture") - - timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") - input_dir = INPUT_DIR / project_name / target_name / timestamp - input_dir.mkdir(parents=True, exist_ok=True) - - env_dict = os.environ.copy() - env_dict["FUZZ_INPUT_DIR"] = str(input_dir) - env = Some(env_dict) - - logger.info(f"📁 Inputs will be stored in: {input_dir}") - - # Prepare command - target_to_run = instrumented_file.name if instrumented_file else target_name - cmd = f"python3 infra/helper.py run_fuzzer {project_name} {target_to_run} -- -max_total_time={timeout}" - - # Execute command - success = run_command( - cmd, - f"Running Target '{target_name}' (timeout={timeout}s)", - logger, - allowed_exit_codes=Some([1, 124]), - timeout=timeout + 300, - env=env - ) - - if success: - logger.info(f"✅ Target '{target_name}' completed successfully.") - else: - logger.error(f"❌ Target '{target_name}' failed.") - - return success, project_name, target_name - - except Exception as e: - logger.exception(f"💥 Unexpected error occurred while running target '{target_name}': {e}") - return False, project_name, target_name - - finally: - for handler in logger.handlers[:]: - handler.close() - logger.removeHandler(handler) - - if record_inputs and env.is_just: - try: - input_dir = Path(env.unwrap()["FUZZ_INPUT_DIR"]) - if input_dir.exists(): - input_count = len(list(input_dir.glob("*"))) - logger.info(f"📥 Captured {input_count} inputs for {target_name}") - except Exception as e: - logger.warning(f"⚠️ Failed to summarize captured inputs: {e}") - - # Final fallback (defensive) - return False, project_name, target_name - - -def main(): - # Configure main process logging - logging.basicConfig( - level=logging.INFO, - format="[%(levelname)s] %(message)s", - stream=sys.stdout - ) - logger = logging.getLogger("Main") - - # Set up command line argument parsing - parser = argparse.ArgumentParser(description="OSS-Fuzz Parallel Fuzz Testing Tool with Input Capture") - parser.add_argument("project_list", help="File path containing list of project names") - parser.add_argument("--timeout", type=int, default=60, help="Timeout duration per Fuzz Target (seconds)") - parser.add_argument("--workers", type=int, default=cpu_count(), help="Number of parallel worker processes") - parser.add_argument("--record-inputs", action="store_true", help="Enable input capture during fuzzing") - args = parser.parse_args() - - # 1. Read project list file - try: - project_path = Path(args.project_list) - with open(project_path, "r", encoding="utf-8") as f: - projects = [line.strip() for line in f if line.strip()] - logger.info(f"📋 Loaded {len(projects)} projects from {project_path.name}.") - except FileNotFoundError: - logger.error(f"❌ Project list file not found: {args.project_list}") - sys.exit(1) - except (OSError, PermissionError) as e: - logger.exception(f"💥 Error occurred while reading project list: {e}") - sys.exit(1) - - # 2. Discovery phase: Collect all fuzz targets - logger.info("\n" + "=" * 20 + " Phase 1: Discover all Fuzz Targets " + "=" * 20) - all_fuzz_tasks = [] # Use built-in list type - - try: - original_cwd = Path.cwd() # Save current working directory - os.chdir(OSS_FUZZ_DIR) # Switch to OSS-Fuzz directory - for project_name in projects: - targets = discover_targets(project_name, logger) - if targets: - logger.info(f"🔍 Discovered {len(targets)} targets in project '{project_name}': {', '.join(targets)}") - for target in targets: - all_fuzz_tasks.append((project_name, target)) - else: - logger.warning(f"⚠️ No Fuzz Targets found in project '{project_name}'.") - os.chdir(original_cwd) # Restore original working directory - except FileNotFoundError: - logger.error(f"❌ OSS-Fuzz directory does not exist: {OSS_FUZZ_DIR}") - sys.exit(1) - except Exception as e: - logger.exception(f"💥 Unknown error occurred during discovery phase: {e}") - sys.exit(1) - - # Check if any valid targets were found - if not all_fuzz_tasks: - logger.info("🤷 No executable Fuzz Targets found. Program exits.") - sys.exit(0) - - # 3. Execution phase: Parallel fuzz testing with input capture - logger.info(f"\n✅ Discovery phase completed. Found a total of {len(all_fuzz_tasks)} fuzz tasks.") - if args.record_inputs: - logger.info("🔔 Input capture is ENABLED. All fuzzing inputs will be recorded.") - logger.info(f"📁 Inputs will be stored in: {INPUT_DIR}") - - logger.info("=" * 20 + " Phase 2: Parallel Fuzzing Execution " + "=" * 23) - logger.info(f"🚀 Starting parallel testing with {args.workers} worker processes (timeout per target: {args.timeout}s)...") - - # Prepare task parameters (project, target, timeout, record_inputs) - tasks_with_args = [(p, t, args.timeout, args.record_inputs) for p, t in all_fuzz_tasks] - results: list[tuple[bool, str, str]] = [] # Store results (success, project, target) - - # Execute in parallel using process pool - with Pool(args.workers) as pool: - try: - results = pool.starmap(run_single_target, tasks_with_args) - except Exception as e: - logger.error(f"💥 Critical error occurred during parallel execution: {e}") - pool.terminate() - pool.join() - - # 4. Result summary and reporting - logger.info("\n" + "=" * 20 + " Phase 3: Result Summary " + "=" * 28) - failed_tasks = [(p, t) for success, p, t in results if not success] # List of failed tasks - total_tasks = len(all_fuzz_tasks) - failed_count = len(failed_tasks) - success_count = total_tasks - failed_count - - # Output statistical summary - logger.info(f"📊 Fuzzing completed: Success {success_count}/{total_tasks}, Failed {failed_count}/{total_tasks}") - - if args.record_inputs: - total_inputs = sum(len(list((INPUT_DIR / p / t).glob("*/*.bin"))) for p, t in all_fuzz_tasks) - logger.info(f"📥 Total inputs captured: {total_inputs}") - logger.info(f"💾 Inputs stored at: {INPUT_DIR}") - - if failed_tasks: - logger.error("❌ The following Fuzz Targets failed:") - for project, target in failed_tasks: - logger.error(f" - Project: {project}, Target: {target}") # List detailed failures - - -if __name__ == "__main__": - try: - main() - except KeyboardInterrupt: - print("\n🛑 Operation interrupted by user.") - sys.exit(1) - except Exception as e: - print(f"\n💥 Fatal error in main program: {e}") - sys.exit(1) \ No newline at end of file From 6c18832ee0dcf906d917ba5513f202088e627337 Mon Sep 17 00:00:00 2001 From: joyguoguo <599325370@qq.com> Date: Fri, 25 Jul 2025 23:44:56 +0000 Subject: [PATCH 057/134] modify log name --- fuzz/run_fuzz_all_targets_print1.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fuzz/run_fuzz_all_targets_print1.py b/fuzz/run_fuzz_all_targets_print1.py index ce557c3..8b5c31d 100644 --- a/fuzz/run_fuzz_all_targets_print1.py +++ b/fuzz/run_fuzz_all_targets_print1.py @@ -132,7 +132,7 @@ def run_single_target(project_name: str, target_name: str, timeout: int, oss_fuz """Execute fuzz testing workflow for a single (project, target) pair""" task_id = f"{project_name}_{target_name}" logger = logging.getLogger(task_id) - LOG_DIR = oss_fuzz_dir / "run_pj3_logs" + LOG_DIR = oss_fuzz_dir / "run_fuzz_all_targets_logs" try: logger.setLevel(logging.DEBUG) LOG_DIR.mkdir(parents=True, exist_ok=True) From 679ec482923bd8d8897df40dec4f9860d50fb0c0 Mon Sep 17 00:00:00 2001 From: joyguoguo <599325370@qq.com> Date: Sat, 26 Jul 2025 00:00:08 +0000 Subject: [PATCH 058/134] type error --- fuzz/build_fuzzers.py | 4 ++-- image_build_results.json | 11 ++++++++++- 2 files changed, 12 insertions(+), 3 deletions(-) diff --git a/fuzz/build_fuzzers.py b/fuzz/build_fuzzers.py index 1dfd825..8a8a0a4 100644 --- a/fuzz/build_fuzzers.py +++ b/fuzz/build_fuzzers.py @@ -29,7 +29,7 @@ class BuildError(Exception): """Base exception for build failures""" - def __init__(self, message: str, project: str = "", exit_code: int = None): + def __init__(self, message: str, project: str = "", exit_code: Optional[int] = None): super().__init__(message) self.project = project self.exit_code = exit_code @@ -72,7 +72,7 @@ def run_command( if exit_code in allowed_exit_codes: return exit_code - # 构建详细的错误信息 + # Build detailed error message error_msg = f"Command failed (exit code: {exit_code})" if project: error_msg += f" for project: {project}" diff --git a/image_build_results.json b/image_build_results.json index 4af9787..2e47cc5 100644 --- a/image_build_results.json +++ b/image_build_results.json @@ -1 +1,10 @@ -{"abseil-py": true, "adal": true, "aiohttp": true, "aniso8601": true, "ansible": true, "argcomplete": true, "arrow-py": true, "asn1crypto": true, "asteval": true, "astroid": true, "asttokens": true, "attrs": true, "autoflake": true, "autopep8": true, "azure-sdk-for-python": true, "babel": true, "black": true, "botocore": true, "bottleneck": true, "bz2file": true, "cachetools": true, "cffi": true, "chardet": true, "charset_normalizer": true, "click": true, "cloud-custodian": true, "configparser": true, "connexion": true, "coveragepy": true, "croniter": true, "cryptography": true, "cssselect": true, "dask": true, "decorator": true, "defusedxml": true, "digest": true, "dill": true, "distlib": true, "dnspython": true, "docutils": true, "ecdsa-python": true, "et-xmlfile": true, "face": true, "filelock": true, "filesystem_spec": true, "flask": true, "flask-jwt-extended": true, "flask-restx": true, "flask-wtf": true, "fonttools": true, "ftfy": true, "g-api-auth-httplib2": true, "g-api-auth-library-python": true, "g-api-pubsub": true, "g-api-py-api-common-protos": true, "g-api-py-oauthlib": true, "g-api-python-bigquery-storage": true, "g-api-python-client": true, "g-api-python-cloud-core": true, "g-api-python-firestore": true, "g-api-python-tasks": true, "g-api-resource-manager": true, "g-api-resumable-media-python": true, "g-api-secret-manager": true, "g-apis-py-api-core": true, "gast": true, "gc-iam": true, "gcloud-error-py": true, "g-cloud-logging-py": true, "gcp-python-cloud-storage": true, "genshi": true, "gitdb": true, "glom": true, "gprof2dot": true, "g-py-bigquery": true, "g-py-crc32c": true, "grpc-py": true, "gunicorn": true, "h11": true, "h5py": true, "hiredis-py": true, "html2text": true, "html5lib-python": true, "httpcore": true, "httpretty": true, "httpx": true, "idna": true, "ijson": true, "importlib_metadata": true, "iniconfig": true, "ipaddress": true, "ipykernel": true, "ipython": true, "isodate": true, "itsdangerous": true, "jedi": true, "jinja2": true, "jmespathpy": true, "joblib": true, "jsmin": true, "jupyter-nbconvert": true, "jupyter_server": true, "kafka": true, "keras": false, "kiwisolver": true, "lark-parser": true, "libcst": true, "looker-sdk": true, "lxml": true, "mako": true, "markupsafe": true, "matplotlib": true, "mccabe": true, "mdit-py-plugins": true, "mdurl": true, "more-itertools": true, "mrab-regex": true, "msal": true, "msgpack-python": true, "multidict": true, "mutagen": true, "nbclassic": true, "nbformat": true, "netaddr-py": true, "networkx": true, "ntlm2": true, "ntlm-auth": true, "numexpr": true, "numpy": true, "oauth2": true, "oauthlib": true, "olefile": true, "openapi-schema-validator": true, "opencensus-python": true, "openpyxl": true, "opt_einsum": true, "oracle-py-cx": true, "orjson": true, "oscrypto": true, "packaging": true, "pandas": true, "paramiko": true, "parse": true, "parsimonious": true, "pasta": true, "pathlib2": true, "pdoc": true, "pem": true, "pendulum": true, "pip": true, "ply": true, "protobuf-python": true, "proto-plus-python": true, "psqlparse": true, "psutil": true, "psycopg2": true, "pyasn1": true, "pyasn1-modules": true, "pycparser": true, "pycrypto": true, "pydantic": true, "pydateutil": true, "pygments": true, "pyjson5": true, "pyjwt": true, "pymysql": true, "pynacl": true, "pyodbc": true, "pyparsing": true, "pyrsistent": true, "py-serde": true, "pytables": true, "pytest-py": true, "python3-openid": true, "python-ecdsa": true, "python-email-validator": true, "python-fastjsonschema": true, "python-future": true, "python-graphviz": true, "python-hyperlink": true, "python-jose": true, "python-lz4": true, "python-markdown": true, "python-markdownify": true, "python-nameparser": true, "python-nvd3": true, "python-pathspec": true, "python-prompt-toolkit": true, "python-pypdf": true, "python-rison": true, "python-rsa": true, "python-tabulate": true, "pytz": true, "pyxdg": true, "pyyaml": true, "pyzmq": true, "redis-py": true, "requests": true, "retry": true, "rfc3967": true, "rich": true, "sacremoses": true, "scikit-learn": true, "scipy": true, "setuptools": true, "sigstore-python": true, "simplejson": true, "six": true, "smart_open": true, "soupsieve": true, "sqlalchemy_jsonfield": true, "sqlalchemy-utils": true, "sqlparse": true, "stack_data": true, "tensorflow-addons": false, "tinycss2": true, "toml": true, "tomlkit": true, "toolbelt": true, "toolz": true, "tqdm": true, "typing_extensions": true, "underscore": true, "uritemplate": true, "urlextract": true, "urllib3": true, "validators": true, "w3lib": true, "websocket-client": true, "wheel": true, "wtforms": true, "xlrd": true, "yarl": true, "zipp": true} \ No newline at end of file +{ + "asteval": true, + "astroid": true, + "asttokens": true, + "attrs": true, + "autoflake": true, + "autopep8": true, + "azure-sdk-for-python": true, + "babel": true +} \ No newline at end of file From b4d18115368c2ed0277dae73ca61733f6fbbe6b6 Mon Sep 17 00:00:00 2001 From: joyguoguo <599325370@qq.com> Date: Sat, 26 Jul 2025 00:05:39 +0000 Subject: [PATCH 059/134] list dict tuple --- fuzz/build_fuzzers.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/fuzz/build_fuzzers.py b/fuzz/build_fuzzers.py index 8a8a0a4..65fa93a 100644 --- a/fuzz/build_fuzzers.py +++ b/fuzz/build_fuzzers.py @@ -24,7 +24,7 @@ import logging import json from pathlib import Path -from typing import List, Optional, Tuple +from typing import Optional from multiprocessing import Pool, cpu_count class BuildError(Exception): @@ -50,7 +50,7 @@ def run_command( cmd: str, oss_fuzz_dir: Path, project: str = "", - allowed_exit_codes: Optional[List[int]] = None + allowed_exit_codes: Optional[list[int]] = None ) -> int: """Execute a command and return the exit code""" allowed_exit_codes = allowed_exit_codes or [0] @@ -92,7 +92,7 @@ def run_command( except subprocess.SubprocessError as e: raise CommandError(f"Subprocess error: {e}", project=project) from e -def build_fuzzers(project_name: str, sanitizer: str, oss_fuzz_dir: Path) -> Tuple[bool, str]: +def build_fuzzers(project_name: str, sanitizer: str, oss_fuzz_dir: Path) -> tuple[bool, str]: """Fuzzer build workflow""" try: logging.info("=" * 60) From d030841f68ef8762f78584f06c9a5e35668f0c7e Mon Sep 17 00:00:00 2001 From: joyguoguo <599325370@qq.com> Date: Sat, 26 Jul 2025 00:09:54 +0000 Subject: [PATCH 060/134] type error --- fuzz/build_fuzz.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fuzz/build_fuzz.py b/fuzz/build_fuzz.py index 481fba9..3cbb505 100644 --- a/fuzz/build_fuzz.py +++ b/fuzz/build_fuzz.py @@ -233,7 +233,7 @@ def main(): sys.exit(1) # Image building workflow - image_results = {} + image_results: dict[str, bool] = {} if args.mode in ['image', 'both']: logging.info("\n" + "="*60) logging.info(f"Starting Docker image builds for {len(projects)} projects") @@ -257,7 +257,7 @@ def main(): logging.error(f"❌ Failed to save image results: {e}") # Fuzzer building workflow - fuzzer_results = {} + fuzzer_results: dict[str, bool] = {} fuzz_projects = [] if args.mode in ['fuzzer', 'both']: logging.info("\n" + "="*60) From 3f9df9d3a6ba35c2bbdef5692ba90b1b5b86f9f6 Mon Sep 17 00:00:00 2001 From: joyguoguo <599325370@qq.com> Date: Mon, 28 Jul 2025 23:30:30 +0000 Subject: [PATCH 061/134] construct errors module --- fuzz/build_fuzz.py | 24 ++---------------------- fuzz/build_fuzzers.py | 20 +------------------- fuzz/errors.py | 20 ++++++++++++++++++++ 3 files changed, 23 insertions(+), 41 deletions(-) create mode 100644 fuzz/errors.py diff --git a/fuzz/build_fuzz.py b/fuzz/build_fuzz.py index 3cbb505..5db274e 100644 --- a/fuzz/build_fuzz.py +++ b/fuzz/build_fuzz.py @@ -18,7 +18,7 @@ python3 build_fuzz.py --mode both [project_list] --oss-fuzz-dir /path/to/oss-fuzz --sanitizer address Example: - python3 ./fuzz/build_fuzz.py --mode both data/valid_projects3.txt \ + python3 fuzz/build_fuzz.py --mode both data/valid_projects3.txt \ --oss-fuzz-dir ./fuzz/oss-fuzz \ --sanitizer address \ --workers 8 @@ -33,28 +33,8 @@ from pathlib import Path from returns.maybe import Maybe from multiprocessing import Pool +from errors import BuildError, CommandError, PathError, ConfigError -# ======================================================================================== -# Custom Exceptions -# ======================================================================================== -class BuildError(Exception): - """Base exception for build failures""" - def __init__(self, message: str, project: str = "", exit_code: int | None = None): - super().__init__(message) - self.project = project - self.exit_code = exit_code - -class CommandError(BuildError): - """Exception for command execution failures""" - pass - -class PathError(BuildError): - """Exception for missing paths or files""" - pass - -class ConfigError(BuildError): - """Exception for configuration errors""" - pass # ======================================================================================== # Helper Functions diff --git a/fuzz/build_fuzzers.py b/fuzz/build_fuzzers.py index 65fa93a..a252dbd 100644 --- a/fuzz/build_fuzzers.py +++ b/fuzz/build_fuzzers.py @@ -26,25 +26,7 @@ from pathlib import Path from typing import Optional from multiprocessing import Pool, cpu_count - -class BuildError(Exception): - """Base exception for build failures""" - def __init__(self, message: str, project: str = "", exit_code: Optional[int] = None): - super().__init__(message) - self.project = project - self.exit_code = exit_code - -class CommandError(BuildError): - """Exception for command execution failures""" - pass - -class PathError(BuildError): - """Exception for missing paths or files""" - pass - -class ConfigError(BuildError): - """Exception for configuration errors""" - pass +from errors import BuildError, CommandError, PathError, ConfigError def run_command( cmd: str, diff --git a/fuzz/errors.py b/fuzz/errors.py new file mode 100644 index 0000000..294e642 --- /dev/null +++ b/fuzz/errors.py @@ -0,0 +1,20 @@ +# fuzz/errors.py + +class BuildError(Exception): + """Base exception for build failures""" + def __init__(self, message: str, project: str = "", exit_code: int | None = None): + super().__init__(message) + self.project = project + self.exit_code = exit_code + +class CommandError(BuildError): + """Exception for command execution failures""" + pass + +class PathError(BuildError): + """Exception for missing paths or files""" + pass + +class ConfigError(BuildError): + """Exception for configuration errors""" + pass From 85b7904e9dd0700b7e00c830bde1562c356783c5 Mon Sep 17 00:00:00 2001 From: joyguoguo <599325370@qq.com> Date: Mon, 28 Jul 2025 23:45:10 +0000 Subject: [PATCH 062/134] run_command module --- fuzz/command_util.py | 42 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 42 insertions(+) create mode 100644 fuzz/command_util.py diff --git a/fuzz/command_util.py b/fuzz/command_util.py new file mode 100644 index 0000000..2f0c65a --- /dev/null +++ b/fuzz/command_util.py @@ -0,0 +1,42 @@ +# command_util.py + +import subprocess +import logging +from returns.maybe import Maybe +from errors import CommandError +from pathlib import Path + +def run_command( + cmd: str, + oss_fuzz_dir: Path, + project: str = "", + allowed_exit_codes: Maybe[list[int]] = Maybe.empty, + skip_yes: bool = False +) -> int: + """Execute a command and return the exit code (no stdout/stderr capture)""" + allowed_codes = allowed_exit_codes.value_or([0]) + cmd_str = f"yes | {cmd}" if not skip_yes else cmd + logging.debug(f"Executing command [{project}]: {cmd_str}") + + try: + process = subprocess.Popen( + cmd_str, + shell=True, + cwd=str(oss_fuzz_dir) + ) + exit_code = process.wait() + + if exit_code in allowed_codes: + return exit_code + + error_msg = f"Command failed (exit code: {exit_code})" + if project: + error_msg += f" for project: {project}" + raise CommandError(error_msg, project=project, exit_code=exit_code) + + except FileNotFoundError as e: + raise CommandError(f"Command not found: {cmd.split()[0]}", project=project) from e + except OSError as e: + raise CommandError(f"System error: {e}", project=project) from e + except subprocess.SubprocessError as e: + raise CommandError(f"Subprocess error: {e}", project=project) from e From 40102b44e237374f5e374c76de0d68ae61ccbf30 Mon Sep 17 00:00:00 2001 From: joyguoguo <599325370@qq.com> Date: Mon, 28 Jul 2025 23:52:08 +0000 Subject: [PATCH 063/134] combine the run_command instrument to one file --- fuzz/build_fuzz.py | 2 + fuzz/command_util.py | 114 +++++++++++++++++++++++----- fuzz/run_fuzz_all_targets_print1.py | 2 +- image_build_results.json | 4 - 4 files changed, 96 insertions(+), 26 deletions(-) diff --git a/fuzz/build_fuzz.py b/fuzz/build_fuzz.py index 5db274e..c1f7aeb 100644 --- a/fuzz/build_fuzz.py +++ b/fuzz/build_fuzz.py @@ -34,6 +34,8 @@ from returns.maybe import Maybe from multiprocessing import Pool from errors import BuildError, CommandError, PathError, ConfigError +from command_util import run_command_build_fuzz as run_command + # ======================================================================================== diff --git a/fuzz/command_util.py b/fuzz/command_util.py index 2f0c65a..7455af2 100644 --- a/fuzz/command_util.py +++ b/fuzz/command_util.py @@ -6,37 +6,109 @@ from errors import CommandError from pathlib import Path -def run_command( +import subprocess +import time +import logging +from pathlib import Path +from returns.maybe import Maybe +from errors import CommandError + +def _run_subprocess( + cmd: str, + cwd: Path = None, + capture_output: bool = False, + timeout: int = None, + logger: logging.Logger = None, +) -> tuple[int, list[str]]: + """ + 低层执行子进程命令 + - capture_output=True:捕获 stdout,返回输出列表 + - timeout 秒超时(无超时则None) + - logger 用于实时打印输出 + 返回:(退出码, 输出行列表) + """ + process = subprocess.Popen( + cmd, + shell=True, + cwd=str(cwd) if cwd else None, + stdout=subprocess.PIPE if capture_output else None, + stderr=subprocess.STDOUT if capture_output else None, + text=True, + encoding="utf-8", + errors="replace", + ) + + output_lines = [] + start_time = time.time() + + try: + if capture_output: + while True: + line = process.stdout.readline() + if line: + output_lines.append(line.rstrip()) + if logger: + logger.debug(line.rstrip()) + elif process.poll() is not None: + break + + if timeout and (time.time() - start_time) > timeout: + if logger: + logger.error(f"⌛ Command timed out after {timeout} seconds") + process.terminate() + try: + process.wait(timeout=5) + except subprocess.TimeoutExpired: + process.kill() + return -1, output_lines + time.sleep(0.05) + else: + # 不捕获输出,直接等待结束 + process.wait(timeout=timeout) + + except Exception as e: + if logger: + logger.exception(f"Error during command execution: {e}") + process.kill() + raise e + + return process.returncode, output_lines + + +def run_command_build_fuzz( cmd: str, oss_fuzz_dir: Path, project: str = "", allowed_exit_codes: Maybe[list[int]] = Maybe.empty, skip_yes: bool = False ) -> int: - """Execute a command and return the exit code (no stdout/stderr capture)""" + """build_fuzz.py 中使用的 run_command,简化版,抛异常""" allowed_codes = allowed_exit_codes.value_or([0]) cmd_str = f"yes | {cmd}" if not skip_yes else cmd - logging.debug(f"Executing command [{project}]: {cmd_str}") - - try: - process = subprocess.Popen( - cmd_str, - shell=True, - cwd=str(oss_fuzz_dir) - ) - exit_code = process.wait() - - if exit_code in allowed_codes: - return exit_code - + exit_code, _ = _run_subprocess(cmd_str, cwd=oss_fuzz_dir) + if exit_code not in allowed_codes: error_msg = f"Command failed (exit code: {exit_code})" if project: error_msg += f" for project: {project}" raise CommandError(error_msg, project=project, exit_code=exit_code) + return exit_code + + +def run_command_fuzz_all_targets( + cmd: str, + log_msg: str, + logger: logging.Logger, + allowed_exit_codes: Maybe[list[int]] = Maybe.empty, + timeout: int = 3600, +) -> bool: + """run_fuzz_all_targets_print1.py 中使用,带实时日志与超时,返回bool""" + logger.info(f"▶️ {log_msg}...") + logger.debug(f" $ {cmd}") + + allowed_codes = allowed_exit_codes.value_or([]) + exit_code, _ = _run_subprocess(cmd, capture_output=True, timeout=timeout, logger=logger) + if exit_code not in [0, *allowed_codes]: + logger.error(f"❌ Command execution failed, exit code: {exit_code}") + return False + return True - except FileNotFoundError as e: - raise CommandError(f"Command not found: {cmd.split()[0]}", project=project) from e - except OSError as e: - raise CommandError(f"System error: {e}", project=project) from e - except subprocess.SubprocessError as e: - raise CommandError(f"Subprocess error: {e}", project=project) from e diff --git a/fuzz/run_fuzz_all_targets_print1.py b/fuzz/run_fuzz_all_targets_print1.py index 8b5c31d..c5e15eb 100644 --- a/fuzz/run_fuzz_all_targets_print1.py +++ b/fuzz/run_fuzz_all_targets_print1.py @@ -25,7 +25,7 @@ from pathlib import Path from multiprocessing import Pool, cpu_count from returns.maybe import Maybe, Nothing, Some - +from command_util import run_command_fuzz_all_targets as run_command diff --git a/image_build_results.json b/image_build_results.json index 2e47cc5..72b9fe6 100644 --- a/image_build_results.json +++ b/image_build_results.json @@ -1,8 +1,4 @@ { - "asteval": true, - "astroid": true, - "asttokens": true, - "attrs": true, "autoflake": true, "autopep8": true, "azure-sdk-for-python": true, From e0c7740f66e636e8ac2af786fc7a93fbff26538d Mon Sep 17 00:00:00 2001 From: joyguoguo <599325370@qq.com> Date: Tue, 29 Jul 2025 00:05:28 +0000 Subject: [PATCH 064/134] remove the run_command --- fuzz/build_fuzz.py | 40 ----------------- fuzz/run_fuzz_all_targets_print1.py | 70 ----------------------------- 2 files changed, 110 deletions(-) diff --git a/fuzz/build_fuzz.py b/fuzz/build_fuzz.py index c1f7aeb..00f8af6 100644 --- a/fuzz/build_fuzz.py +++ b/fuzz/build_fuzz.py @@ -36,46 +36,6 @@ from errors import BuildError, CommandError, PathError, ConfigError from command_util import run_command_build_fuzz as run_command - - -# ======================================================================================== -# Helper Functions -# ======================================================================================== -def run_command( - cmd: str, - oss_fuzz_dir: Path, - project: str = "", - allowed_exit_codes: Maybe[list[int]] = Maybe.empty, - skip_yes: bool = False -) -> int: - """Execute a command and return the exit code (no stdout/stderr capture)""" - allowed_codes = allowed_exit_codes.value_or([0]) - cmd_str = f"yes | {cmd}" if not skip_yes else cmd - logging.debug(f"Executing command [{project}]: {cmd_str}") - - try: - process = subprocess.Popen( - cmd_str, - shell=True, - cwd=str(oss_fuzz_dir) - ) - exit_code = process.wait() - - if exit_code in allowed_codes: - return exit_code - - error_msg = f"Command failed (exit code: {exit_code})" - if project: - error_msg += f" for project: {project}" - raise CommandError(error_msg, project=project, exit_code=exit_code) - - except FileNotFoundError as e: - raise CommandError(f"Command not found: {cmd.split()[0]}", project=project) from e - except OSError as e: - raise CommandError(f"System error: {e}", project=project) from e - except subprocess.SubprocessError as e: - raise CommandError(f"Subprocess error: {e}", project=project) from e - # ======================================================================================== # Build Functions # ======================================================================================== diff --git a/fuzz/run_fuzz_all_targets_print1.py b/fuzz/run_fuzz_all_targets_print1.py index c5e15eb..04400c6 100644 --- a/fuzz/run_fuzz_all_targets_print1.py +++ b/fuzz/run_fuzz_all_targets_print1.py @@ -29,76 +29,6 @@ - -def run_command( - cmd: str, - log_msg: str, - logger: logging.Logger, - allowed_exit_codes: Maybe[list[int]] = Nothing, - timeout: int = 3600 # Default 1-hour timeout -) -> bool: - """Execute commands with real-time logging and precise error handling""" - allowed_codes = allowed_exit_codes.value_or([]) - logger.info(f"▶️ {log_msg}...") - logger.debug(f" $ {cmd}") - - process = None - try: - process = subprocess.Popen( - cmd, - shell=True, - stdout=subprocess.PIPE, - stderr=subprocess.STDOUT, - text=True, - encoding="utf-8", - errors="replace" - ) - - start_time = time.time() - while process.poll() is None: - if time.time() - start_time > timeout: - logger.error(f"⌛ Command timed out after {timeout} seconds") - process.terminate() - try: - process.wait(timeout=5) - except subprocess.TimeoutExpired: - process.kill() - return False - - if process.stdout: - line = process.stdout.readline() - if line: - logger.debug(line.strip()) - else: - time.sleep(0.1) - - exit_code = process.returncode - if exit_code not in [0, *allowed_codes]: - logger.error(f"❌ Command execution failed, exit code: {exit_code}") - return False - return True - - except FileNotFoundError: - logger.error(f"🔍 Command not found: {cmd.split()[0]}") - return False - except PermissionError: - logger.error(f"🔒 Insufficient permissions to execute command: {cmd}") - return False - except subprocess.SubprocessError as e: - logger.exception(f"💥 Subprocess error: {e}") - return False - except OSError as e: - logger.exception(f"💥 Operating system error during command execution: {e}") - return False - finally: - if process and process.poll() is None: - try: - process.terminate() - process.wait(timeout=5) - except Exception: - pass - - def discover_targets(project_name: str, oss_fuzz_dir: Path, logger: logging.Logger) -> list[str]: """Discover fuzz targets for a project (starting with 'fuzz_', ending with 'print1', no extension, and executable)""" out_dir = oss_fuzz_dir / "build" / "out" / project_name From 0631504097b8184f1e534a97b83e4bbf3e89d4fb Mon Sep 17 00:00:00 2001 From: joyguoguo <599325370@qq.com> Date: Tue, 29 Jul 2025 00:16:33 +0000 Subject: [PATCH 065/134] modify --- fuzz/build_fuzzers.py | 77 +++++++++---------- fuzz/run_fuzz_all_targets.py | 141 +++++++++++++++++------------------ 2 files changed, 108 insertions(+), 110 deletions(-) diff --git a/fuzz/build_fuzzers.py b/fuzz/build_fuzzers.py index a252dbd..ab8ac7d 100644 --- a/fuzz/build_fuzzers.py +++ b/fuzz/build_fuzzers.py @@ -27,52 +27,53 @@ from typing import Optional from multiprocessing import Pool, cpu_count from errors import BuildError, CommandError, PathError, ConfigError - -def run_command( - cmd: str, - oss_fuzz_dir: Path, - project: str = "", - allowed_exit_codes: Optional[list[int]] = None -) -> int: - """Execute a command and return the exit code""" - allowed_exit_codes = allowed_exit_codes or [0] - logging.info(f"▶️ Executing command: {cmd}") +from command_util import run_command_build_fuzz as run_command + +# def run_command( +# cmd: str, +# oss_fuzz_dir: Path, +# project: str = "", +# allowed_exit_codes: Optional[list[int]] = None +# ) -> int: +# """Execute a command and return the exit code""" +# allowed_exit_codes = allowed_exit_codes or [0] +# logging.info(f"▶️ Executing command: {cmd}") - try: - process = subprocess.Popen( - cmd, - shell=True, - cwd=str(oss_fuzz_dir), - stdout=subprocess.PIPE, - stderr=subprocess.PIPE, - text=True - ) +# try: +# process = subprocess.Popen( +# cmd, +# shell=True, +# cwd=str(oss_fuzz_dir), +# stdout=subprocess.PIPE, +# stderr=subprocess.PIPE, +# text=True +# ) - stdout, stderr = process.communicate() - exit_code = process.returncode +# stdout, stderr = process.communicate() +# exit_code = process.returncode - if exit_code in allowed_exit_codes: - return exit_code +# if exit_code in allowed_exit_codes: +# return exit_code - # Build detailed error message - error_msg = f"Command failed (exit code: {exit_code})" - if project: - error_msg += f" for project: {project}" +# # Build detailed error message +# error_msg = f"Command failed (exit code: {exit_code})" +# if project: +# error_msg += f" for project: {project}" - if stderr.strip(): - error_msg += f"\nError output:\n{stderr.strip()}" +# if stderr.strip(): +# error_msg += f"\nError output:\n{stderr.strip()}" - if stdout.strip(): - error_msg += f"\nOutput:\n{stdout.strip()}" +# if stdout.strip(): +# error_msg += f"\nOutput:\n{stdout.strip()}" - raise CommandError(error_msg, project=project, exit_code=exit_code) +# raise CommandError(error_msg, project=project, exit_code=exit_code) - except FileNotFoundError as e: - raise CommandError(f"Command not found: {cmd.split()[0]}", project=project) from e - except OSError as e: - raise CommandError(f"System error: {e}", project=project) from e - except subprocess.SubprocessError as e: - raise CommandError(f"Subprocess error: {e}", project=project) from e +# except FileNotFoundError as e: +# raise CommandError(f"Command not found: {cmd.split()[0]}", project=project) from e +# except OSError as e: +# raise CommandError(f"System error: {e}", project=project) from e +# except subprocess.SubprocessError as e: +# raise CommandError(f"Subprocess error: {e}", project=project) from e def build_fuzzers(project_name: str, sanitizer: str, oss_fuzz_dir: Path) -> tuple[bool, str]: """Fuzzer build workflow""" diff --git a/fuzz/run_fuzz_all_targets.py b/fuzz/run_fuzz_all_targets.py index fe147d8..372e13a 100644 --- a/fuzz/run_fuzz_all_targets.py +++ b/fuzz/run_fuzz_all_targets.py @@ -25,78 +25,75 @@ from pathlib import Path from multiprocessing import Pool, cpu_count from returns.maybe import Maybe, Nothing, Some - - - - - -def run_command( - cmd: str, - log_msg: str, - logger: logging.Logger, - allowed_exit_codes: Maybe[list[int]] = Nothing, - timeout: int = 3600 # Default 1-hour timeout -) -> bool: - """Execute commands with real-time logging and precise error handling""" - allowed_codes = allowed_exit_codes.value_or([]) - logger.info(f"▶️ {log_msg}...") - logger.debug(f" $ {cmd}") - - process = None - try: - process = subprocess.Popen( - cmd, - shell=True, - stdout=subprocess.PIPE, - stderr=subprocess.STDOUT, - text=True, - encoding="utf-8", - errors="replace" - ) - - start_time = time.time() - while process.poll() is None: - if time.time() - start_time > timeout: - logger.error(f"⌛ Command timed out after {timeout} seconds") - process.terminate() - try: - process.wait(timeout=5) - except subprocess.TimeoutExpired: - process.kill() - return False - - if process.stdout: - line = process.stdout.readline() - if line: - logger.debug(line.strip()) - else: - time.sleep(0.1) - - exit_code = process.returncode - if exit_code not in [0, *allowed_codes]: - logger.error(f"❌ Command execution failed, exit code: {exit_code}") - return False - return True - - except FileNotFoundError: - logger.error(f"🔍 Command not found: {cmd.split()[0]}") - return False - except PermissionError: - logger.error(f"🔒 Insufficient permissions to execute command: {cmd}") - return False - except subprocess.SubprocessError as e: - logger.exception(f"💥 Subprocess error: {e}") - return False - except OSError as e: - logger.exception(f"💥 Operating system error during command execution: {e}") - return False - finally: - if process and process.poll() is None: - try: - process.terminate() - process.wait(timeout=5) - except Exception: - pass +from command_util import run_command_fuzz_all_targets as run_command + +# def run_command( +# cmd: str, +# log_msg: str, +# logger: logging.Logger, +# allowed_exit_codes: Maybe[list[int]] = Nothing, +# timeout: int = 3600 # Default 1-hour timeout +# ) -> bool: +# """Execute commands with real-time logging and precise error handling""" +# allowed_codes = allowed_exit_codes.value_or([]) +# logger.info(f"▶️ {log_msg}...") +# logger.debug(f" $ {cmd}") + +# process = None +# try: +# process = subprocess.Popen( +# cmd, +# shell=True, +# stdout=subprocess.PIPE, +# stderr=subprocess.STDOUT, +# text=True, +# encoding="utf-8", +# errors="replace" +# ) + +# start_time = time.time() +# while process.poll() is None: +# if time.time() - start_time > timeout: +# logger.error(f"⌛ Command timed out after {timeout} seconds") +# process.terminate() +# try: +# process.wait(timeout=5) +# except subprocess.TimeoutExpired: +# process.kill() +# return False + +# if process.stdout: +# line = process.stdout.readline() +# if line: +# logger.debug(line.strip()) +# else: +# time.sleep(0.1) + +# exit_code = process.returncode +# if exit_code not in [0, *allowed_codes]: +# logger.error(f"❌ Command execution failed, exit code: {exit_code}") +# return False +# return True + +# except FileNotFoundError: +# logger.error(f"🔍 Command not found: {cmd.split()[0]}") +# return False +# except PermissionError: +# logger.error(f"🔒 Insufficient permissions to execute command: {cmd}") +# return False +# except subprocess.SubprocessError as e: +# logger.exception(f"💥 Subprocess error: {e}") +# return False +# except OSError as e: +# logger.exception(f"💥 Operating system error during command execution: {e}") +# return False +# finally: +# if process and process.poll() is None: +# try: +# process.terminate() +# process.wait(timeout=5) +# except Exception: +# pass def discover_targets(project_name: str, oss_fuzz_dir: Path, logger: logging.Logger) -> list[str]: From 5e28b5c989949f087dfc47cd8b638cb353ed2bef Mon Sep 17 00:00:00 2001 From: joyguoguo <599325370@qq.com> Date: Tue, 29 Jul 2025 00:28:46 +0000 Subject: [PATCH 066/134] mytype check --- fuzz/command_util.py | 22 +++++++++------------- 1 file changed, 9 insertions(+), 13 deletions(-) diff --git a/fuzz/command_util.py b/fuzz/command_util.py index 7455af2..bb8e0dd 100644 --- a/fuzz/command_util.py +++ b/fuzz/command_util.py @@ -1,24 +1,20 @@ -# command_util.py - -import subprocess -import logging -from returns.maybe import Maybe -from errors import CommandError -from pathlib import Path +# fuzz/command_util.py import subprocess import time import logging from pathlib import Path +from typing import Optional from returns.maybe import Maybe from errors import CommandError + def _run_subprocess( cmd: str, - cwd: Path = None, + cwd: Optional[Path] = None, capture_output: bool = False, - timeout: int = None, - logger: logging.Logger = None, + timeout: Optional[int] = None, + logger: Optional[logging.Logger] = None, ) -> tuple[int, list[str]]: """ 低层执行子进程命令 @@ -43,6 +39,7 @@ def _run_subprocess( try: if capture_output: + assert process.stdout is not None # ✅ MyPy static check while True: line = process.stdout.readline() if line: @@ -79,7 +76,7 @@ def run_command_build_fuzz( cmd: str, oss_fuzz_dir: Path, project: str = "", - allowed_exit_codes: Maybe[list[int]] = Maybe.empty, + allowed_exit_codes: Maybe[list[int]] = Maybe.empty(), skip_yes: bool = False ) -> int: """build_fuzz.py 中使用的 run_command,简化版,抛异常""" @@ -98,7 +95,7 @@ def run_command_fuzz_all_targets( cmd: str, log_msg: str, logger: logging.Logger, - allowed_exit_codes: Maybe[list[int]] = Maybe.empty, + allowed_exit_codes: Maybe[list[int]] = Maybe.empty(), timeout: int = 3600, ) -> bool: """run_fuzz_all_targets_print1.py 中使用,带实时日志与超时,返回bool""" @@ -111,4 +108,3 @@ def run_command_fuzz_all_targets( logger.error(f"❌ Command execution failed, exit code: {exit_code}") return False return True - From 368c0e4699a48e3d19d5859c7f4636f5e3edb383 Mon Sep 17 00:00:00 2001 From: joyguoguo <599325370@qq.com> Date: Tue, 29 Jul 2025 00:45:10 +0000 Subject: [PATCH 067/134] mytype --- fuzz/command_util.py | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/fuzz/command_util.py b/fuzz/command_util.py index bb8e0dd..0d04364 100644 --- a/fuzz/command_util.py +++ b/fuzz/command_util.py @@ -4,11 +4,10 @@ import time import logging from pathlib import Path -from typing import Optional +from typing import Optional, cast from returns.maybe import Maybe from errors import CommandError - def _run_subprocess( cmd: str, cwd: Optional[Path] = None, @@ -39,9 +38,13 @@ def _run_subprocess( try: if capture_output: - assert process.stdout is not None # ✅ MyPy static check + # 类型断言确保 stdout 不为 None + stdout = cast(Optional[subprocess.PIPE], process.stdout) + if stdout is None: + raise RuntimeError("Stdout should not be None when capture_output is True") + while True: - line = process.stdout.readline() + line = stdout.readline() if line: output_lines.append(line.rstrip()) if logger: @@ -49,7 +52,7 @@ def _run_subprocess( elif process.poll() is not None: break - if timeout and (time.time() - start_time) > timeout: + if timeout is not None and (time.time() - start_time) > timeout: if logger: logger.error(f"⌛ Command timed out after {timeout} seconds") process.terminate() @@ -107,4 +110,4 @@ def run_command_fuzz_all_targets( if exit_code not in [0, *allowed_codes]: logger.error(f"❌ Command execution failed, exit code: {exit_code}") return False - return True + return True \ No newline at end of file From 1be1dfa648f56fd4547d7390e718c7b7efc90bcc Mon Sep 17 00:00:00 2001 From: joyguoguo <599325370@qq.com> Date: Tue, 29 Jul 2025 00:49:18 +0000 Subject: [PATCH 068/134] mytype --- fuzz/command_util.py | 59 +++++++++++++++++++++++++------------------- 1 file changed, 33 insertions(+), 26 deletions(-) diff --git a/fuzz/command_util.py b/fuzz/command_util.py index 0d04364..76032ec 100644 --- a/fuzz/command_util.py +++ b/fuzz/command_util.py @@ -1,24 +1,24 @@ -# fuzz/command_util.py +# command_util.py import subprocess -import time import logging +import time from pathlib import Path -from typing import Optional, cast +from typing import Optional, Tuple, List from returns.maybe import Maybe from errors import CommandError def _run_subprocess( cmd: str, - cwd: Optional[Path] = None, + cwd: Optional[Path] = None, # 修复:添加 Optional 类型 capture_output: bool = False, - timeout: Optional[int] = None, - logger: Optional[logging.Logger] = None, -) -> tuple[int, list[str]]: + timeout: Optional[int] = None, # 修复:添加 Optional 类型 + logger: Optional[logging.Logger] = None, # 修复:添加 Optional 类型 +) -> Tuple[int, List[str]]: # 建议使用 Tuple 替代 tuple """ 低层执行子进程命令 - capture_output=True:捕获 stdout,返回输出列表 - - timeout 秒超时(无超时则None) + - timeout 秒超时(无超时则 None) - logger 用于实时打印输出 返回:(退出码, 输出行列表) """ @@ -33,28 +33,29 @@ def _run_subprocess( errors="replace", ) - output_lines = [] + output_lines: List[str] = [] start_time = time.time() try: if capture_output: - # 类型断言确保 stdout 不为 None - stdout = cast(Optional[subprocess.PIPE], process.stdout) - if stdout is None: - raise RuntimeError("Stdout should not be None when capture_output is True") + # 确保 stdout 不是 None + if process.stdout is None: + raise RuntimeError("stdout is unexpectedly None in capture mode") while True: - line = stdout.readline() + line = process.stdout.readline() if line: - output_lines.append(line.rstrip()) + line_stripped = line.rstrip() + output_lines.append(line_stripped) if logger: - logger.debug(line.rstrip()) + logger.debug(line_stripped) elif process.poll() is not None: break - if timeout is not None and (time.time() - start_time) > timeout: + # 处理超时逻辑 + if timeout and (time.time() - start_time) > timeout: if logger: - logger.error(f"⌛ Command timed out after {timeout} seconds") + logger.error(f"⌛ 命令超时,耗时 {timeout} 秒") process.terminate() try: process.wait(timeout=5) @@ -68,9 +69,9 @@ def _run_subprocess( except Exception as e: if logger: - logger.exception(f"Error during command execution: {e}") + logger.exception(f"执行命令时出错:{e}") process.kill() - raise e + raise CommandError(f"Command failed: {str(e)}") from e return process.returncode, output_lines @@ -79,7 +80,7 @@ def run_command_build_fuzz( cmd: str, oss_fuzz_dir: Path, project: str = "", - allowed_exit_codes: Maybe[list[int]] = Maybe.empty(), + allowed_exit_codes: Maybe[List[int]] = Maybe.empty, skip_yes: bool = False ) -> int: """build_fuzz.py 中使用的 run_command,简化版,抛异常""" @@ -87,7 +88,7 @@ def run_command_build_fuzz( cmd_str = f"yes | {cmd}" if not skip_yes else cmd exit_code, _ = _run_subprocess(cmd_str, cwd=oss_fuzz_dir) if exit_code not in allowed_codes: - error_msg = f"Command failed (exit code: {exit_code})" + error_msg = f"命令失败(退出码:{exit_code})" if project: error_msg += f" for project: {project}" raise CommandError(error_msg, project=project, exit_code=exit_code) @@ -98,16 +99,22 @@ def run_command_fuzz_all_targets( cmd: str, log_msg: str, logger: logging.Logger, - allowed_exit_codes: Maybe[list[int]] = Maybe.empty(), + allowed_exit_codes: Maybe[List[int]] = Maybe.empty, timeout: int = 3600, ) -> bool: - """run_fuzz_all_targets_print1.py 中使用,带实时日志与超时,返回bool""" + """run_fuzz_all_targets_print1.py 中使用,带实时日志与超时,返回 bool""" logger.info(f"▶️ {log_msg}...") logger.debug(f" $ {cmd}") allowed_codes = allowed_exit_codes.value_or([]) - exit_code, _ = _run_subprocess(cmd, capture_output=True, timeout=timeout, logger=logger) + exit_code, _ = _run_subprocess( + cmd, + capture_output=True, + timeout=timeout, + logger=logger + ) + if exit_code not in [0, *allowed_codes]: - logger.error(f"❌ Command execution failed, exit code: {exit_code}") + logger.error(f"❌ 命令执行失败,退出码:{exit_code}") return False return True \ No newline at end of file From 09ba145886f9cff5ef295553f93ff0fd508c18da Mon Sep 17 00:00:00 2001 From: joyguoguo <599325370@qq.com> Date: Tue, 29 Jul 2025 00:58:19 +0000 Subject: [PATCH 069/134] mytype --- fuzz/command_util.py | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/fuzz/command_util.py b/fuzz/command_util.py index 76032ec..63e2c76 100644 --- a/fuzz/command_util.py +++ b/fuzz/command_util.py @@ -4,17 +4,17 @@ import logging import time from pathlib import Path -from typing import Optional, Tuple, List +from typing import Optional from returns.maybe import Maybe from errors import CommandError def _run_subprocess( cmd: str, - cwd: Optional[Path] = None, # 修复:添加 Optional 类型 + cwd: Optional[Path] = None, capture_output: bool = False, - timeout: Optional[int] = None, # 修复:添加 Optional 类型 - logger: Optional[logging.Logger] = None, # 修复:添加 Optional 类型 -) -> Tuple[int, List[str]]: # 建议使用 Tuple 替代 tuple + timeout: Optional[int] = None, + logger: Optional[logging.Logger] = None, +) -> tuple[int, list[str]]: """ 低层执行子进程命令 - capture_output=True:捕获 stdout,返回输出列表 @@ -33,12 +33,12 @@ def _run_subprocess( errors="replace", ) - output_lines: List[str] = [] + output_lines: list[str] = [] start_time = time.time() try: if capture_output: - # 确保 stdout 不是 None + if process.stdout is None: raise RuntimeError("stdout is unexpectedly None in capture mode") @@ -52,7 +52,7 @@ def _run_subprocess( elif process.poll() is not None: break - # 处理超时逻辑 + # Handle timeout logic if timeout and (time.time() - start_time) > timeout: if logger: logger.error(f"⌛ 命令超时,耗时 {timeout} 秒") @@ -64,7 +64,7 @@ def _run_subprocess( return -1, output_lines time.sleep(0.05) else: - # 不捕获输出,直接等待结束 + process.wait(timeout=timeout) except Exception as e: @@ -80,7 +80,7 @@ def run_command_build_fuzz( cmd: str, oss_fuzz_dir: Path, project: str = "", - allowed_exit_codes: Maybe[List[int]] = Maybe.empty, + allowed_exit_codes: Maybe[list[int]] = Maybe.empty, skip_yes: bool = False ) -> int: """build_fuzz.py 中使用的 run_command,简化版,抛异常""" @@ -99,7 +99,7 @@ def run_command_fuzz_all_targets( cmd: str, log_msg: str, logger: logging.Logger, - allowed_exit_codes: Maybe[List[int]] = Maybe.empty, + allowed_exit_codes: Maybe[list[int]] = Maybe.empty, timeout: int = 3600, ) -> bool: """run_fuzz_all_targets_print1.py 中使用,带实时日志与超时,返回 bool""" From 7a1f248f2079437809d5ecb89d48037c10bb9d28 Mon Sep 17 00:00:00 2001 From: joyguoguo <599325370@qq.com> Date: Tue, 29 Jul 2025 01:05:34 +0000 Subject: [PATCH 070/134] translate --- fuzz/command_util.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/fuzz/command_util.py b/fuzz/command_util.py index 63e2c76..198de4b 100644 --- a/fuzz/command_util.py +++ b/fuzz/command_util.py @@ -16,11 +16,11 @@ def _run_subprocess( logger: Optional[logging.Logger] = None, ) -> tuple[int, list[str]]: """ - 低层执行子进程命令 - - capture_output=True:捕获 stdout,返回输出列表 - - timeout 秒超时(无超时则 None) - - logger 用于实时打印输出 - 返回:(退出码, 输出行列表) + Execute child process commands at a lower level + - capture_output=True: Captures stdout, returns a list of outputs + - timeout (None without timeout) + Logger for real-time printouts + Return: (Exit Code, Output Line List) """ process = subprocess.Popen( cmd, @@ -83,12 +83,12 @@ def run_command_build_fuzz( allowed_exit_codes: Maybe[list[int]] = Maybe.empty, skip_yes: bool = False ) -> int: - """build_fuzz.py 中使用的 run_command,简化版,抛异常""" + """run_command used in build_fuzz.py, build_fuzzers.py """ allowed_codes = allowed_exit_codes.value_or([0]) cmd_str = f"yes | {cmd}" if not skip_yes else cmd exit_code, _ = _run_subprocess(cmd_str, cwd=oss_fuzz_dir) if exit_code not in allowed_codes: - error_msg = f"命令失败(退出码:{exit_code})" + error_msg = f"The command failed(exit code:{exit_code})" if project: error_msg += f" for project: {project}" raise CommandError(error_msg, project=project, exit_code=exit_code) @@ -102,7 +102,7 @@ def run_command_fuzz_all_targets( allowed_exit_codes: Maybe[list[int]] = Maybe.empty, timeout: int = 3600, ) -> bool: - """run_fuzz_all_targets_print1.py 中使用,带实时日志与超时,返回 bool""" + """run_command used in run_fuzz_all_targets_print1.py, run_fuzz_all_targets.py""" logger.info(f"▶️ {log_msg}...") logger.debug(f" $ {cmd}") @@ -115,6 +115,6 @@ def run_command_fuzz_all_targets( ) if exit_code not in [0, *allowed_codes]: - logger.error(f"❌ 命令执行失败,退出码:{exit_code}") + logger.error(f"❌ The command failed, exit code:{exit_code}") return False return True \ No newline at end of file From 6a062bb590c376663aa10f7e61a549337658bca3 Mon Sep 17 00:00:00 2001 From: joyguoguo <599325370@qq.com> Date: Tue, 29 Jul 2025 01:07:02 +0000 Subject: [PATCH 071/134] remove run command --- fuzz/run_fuzz_all_targets.py | 69 ------------------------------------ 1 file changed, 69 deletions(-) diff --git a/fuzz/run_fuzz_all_targets.py b/fuzz/run_fuzz_all_targets.py index 372e13a..8990e33 100644 --- a/fuzz/run_fuzz_all_targets.py +++ b/fuzz/run_fuzz_all_targets.py @@ -27,75 +27,6 @@ from returns.maybe import Maybe, Nothing, Some from command_util import run_command_fuzz_all_targets as run_command -# def run_command( -# cmd: str, -# log_msg: str, -# logger: logging.Logger, -# allowed_exit_codes: Maybe[list[int]] = Nothing, -# timeout: int = 3600 # Default 1-hour timeout -# ) -> bool: -# """Execute commands with real-time logging and precise error handling""" -# allowed_codes = allowed_exit_codes.value_or([]) -# logger.info(f"▶️ {log_msg}...") -# logger.debug(f" $ {cmd}") - -# process = None -# try: -# process = subprocess.Popen( -# cmd, -# shell=True, -# stdout=subprocess.PIPE, -# stderr=subprocess.STDOUT, -# text=True, -# encoding="utf-8", -# errors="replace" -# ) - -# start_time = time.time() -# while process.poll() is None: -# if time.time() - start_time > timeout: -# logger.error(f"⌛ Command timed out after {timeout} seconds") -# process.terminate() -# try: -# process.wait(timeout=5) -# except subprocess.TimeoutExpired: -# process.kill() -# return False - -# if process.stdout: -# line = process.stdout.readline() -# if line: -# logger.debug(line.strip()) -# else: -# time.sleep(0.1) - -# exit_code = process.returncode -# if exit_code not in [0, *allowed_codes]: -# logger.error(f"❌ Command execution failed, exit code: {exit_code}") -# return False -# return True - -# except FileNotFoundError: -# logger.error(f"🔍 Command not found: {cmd.split()[0]}") -# return False -# except PermissionError: -# logger.error(f"🔒 Insufficient permissions to execute command: {cmd}") -# return False -# except subprocess.SubprocessError as e: -# logger.exception(f"💥 Subprocess error: {e}") -# return False -# except OSError as e: -# logger.exception(f"💥 Operating system error during command execution: {e}") -# return False -# finally: -# if process and process.poll() is None: -# try: -# process.terminate() -# process.wait(timeout=5) -# except Exception: -# pass - - def discover_targets(project_name: str, oss_fuzz_dir: Path, logger: logging.Logger) -> list[str]: """Discover fuzz targets for a project (starting with 'fuzz_', no extension, and executable)""" out_dir = oss_fuzz_dir / "build" / "out" / project_name From 26eceebe4ec971a2cc1345e51e4cc6609d17a617 Mon Sep 17 00:00:00 2001 From: joyguoguo <599325370@qq.com> Date: Tue, 29 Jul 2025 18:49:16 +0000 Subject: [PATCH 072/134] timeout - shell instrument --- fuzz/command_util.py | 43 +++++++++++++++++++++++-------------------- 1 file changed, 23 insertions(+), 20 deletions(-) diff --git a/fuzz/command_util.py b/fuzz/command_util.py index 198de4b..f29c9a3 100644 --- a/fuzz/command_util.py +++ b/fuzz/command_util.py @@ -18,10 +18,16 @@ def _run_subprocess( """ Execute child process commands at a lower level - capture_output=True: Captures stdout, returns a list of outputs - - timeout (None without timeout) - Logger for real-time printouts + - timeout: Uses shell's `timeout` command for timeout handling + - logger: For real-time printouts Return: (Exit Code, Output Line List) """ + # 如果有超时要求,使用 shell 的 timeout 命令 + if timeout and timeout > 0: + cmd = f"timeout {timeout} {cmd}" + if logger: + logger.debug(f"⌛ Adding timeout ({timeout}s) to command") + process = subprocess.Popen( cmd, shell=True, @@ -34,7 +40,6 @@ def _run_subprocess( ) output_lines: list[str] = [] - start_time = time.time() try: if capture_output: @@ -51,21 +56,9 @@ def _run_subprocess( logger.debug(line_stripped) elif process.poll() is not None: break - - # Handle timeout logic - if timeout and (time.time() - start_time) > timeout: - if logger: - logger.error(f"⌛ 命令超时,耗时 {timeout} 秒") - process.terminate() - try: - process.wait(timeout=5) - except subprocess.TimeoutExpired: - process.kill() - return -1, output_lines time.sleep(0.05) else: - - process.wait(timeout=timeout) + process.wait() except Exception as e: if logger: @@ -83,12 +76,16 @@ def run_command_build_fuzz( allowed_exit_codes: Maybe[list[int]] = Maybe.empty, skip_yes: bool = False ) -> int: - """run_command used in build_fuzz.py, build_fuzzers.py """ + """run_command used in build_fuzz.py, build_fuzzers.py""" allowed_codes = allowed_exit_codes.value_or([0]) cmd_str = f"yes | {cmd}" if not skip_yes else cmd exit_code, _ = _run_subprocess(cmd_str, cwd=oss_fuzz_dir) + + # 处理 timeout 的特殊退出码 (124) + exit_code = 124 if exit_code == 124 else exit_code + if exit_code not in allowed_codes: - error_msg = f"The command failed(exit code:{exit_code})" + error_msg = f"The command failed (exit code: {exit_code})" if project: error_msg += f" for project: {project}" raise CommandError(error_msg, project=project, exit_code=exit_code) @@ -106,7 +103,9 @@ def run_command_fuzz_all_targets( logger.info(f"▶️ {log_msg}...") logger.debug(f" $ {cmd}") - allowed_codes = allowed_exit_codes.value_or([]) + # 允许超时退出码 124 + allowed_codes = allowed_exit_codes.value_or([]) + [124] + exit_code, _ = _run_subprocess( cmd, capture_output=True, @@ -114,7 +113,11 @@ def run_command_fuzz_all_targets( logger=logger ) + # 返回 124 表示超时 + if exit_code == 124: + logger.warning(f"⌛ Command timed out after {timeout} seconds") + if exit_code not in [0, *allowed_codes]: - logger.error(f"❌ The command failed, exit code:{exit_code}") + logger.error(f"❌ The command failed, exit code: {exit_code}") return False return True \ No newline at end of file From 7e91c6c5c2c5e6f32bcd4d54955dfb4dc24e056d Mon Sep 17 00:00:00 2001 From: joyguoguo <599325370@qq.com> Date: Tue, 29 Jul 2025 21:09:39 +0000 Subject: [PATCH 073/134] correct in out error and return Popen directly --- fuzz/command_util.py | 190 +++++++++++++++++++++++++++++-------------- 1 file changed, 128 insertions(+), 62 deletions(-) diff --git a/fuzz/command_util.py b/fuzz/command_util.py index f29c9a3..8270d64 100644 --- a/fuzz/command_util.py +++ b/fuzz/command_util.py @@ -3,31 +3,36 @@ import subprocess import logging import time +import os +import pty +import tty +import termios from pathlib import Path -from typing import Optional +from typing import Optional, Tuple, List from returns.maybe import Maybe from errors import CommandError -def _run_subprocess( +def create_popen_object( cmd: str, - cwd: Optional[Path] = None, + cwd: Optional[Path] = None, capture_output: bool = False, - timeout: Optional[int] = None, - logger: Optional[logging.Logger] = None, -) -> tuple[int, list[str]]: + timeout: Optional[int] = None, + logger: Optional[logging.Logger] = None, +) -> subprocess.Popen: """ - Execute child process commands at a lower level - - capture_output=True: Captures stdout, returns a list of outputs - - timeout: Uses shell's `timeout` command for timeout handling - - logger: For real-time printouts - Return: (Exit Code, Output Line List) + 创建并返回 Popen 对象,不等待进程结束 + - capture_output: 是否捕获输出 + - timeout: 使用 shell 的 timeout 命令处理超时 + - logger: 用于实时打印输出 + 返回: Popen 对象 """ - # 如果有超时要求,使用 shell 的 timeout 命令 + # 添加超时命令 if timeout and timeout > 0: - cmd = f"timeout {timeout} {cmd}" + cmd = f"timeout {timeout}s {cmd}" if logger: logger.debug(f"⌛ Adding timeout ({timeout}s) to command") + # 创建 Popen 对象 process = subprocess.Popen( cmd, shell=True, @@ -38,36 +43,97 @@ def _run_subprocess( encoding="utf-8", errors="replace", ) + + return process - output_lines: list[str] = [] +def run_subprocess_with_pty( + cmd: str, + cwd: Optional[Path] = None, + timeout: Optional[int] = None, + logger: Optional[logging.Logger] = None, +) -> Tuple[int, List[str]]: + """ + 使用伪终端执行命令,解决终端设置问题 + - timeout: 使用 shell 的 timeout 命令处理超时 + - logger: 用于实时打印输出 + 返回: (退出码, 输出行列表) + """ + # 添加超时命令 + if timeout and timeout > 0: + cmd = f"timeout {timeout}s {cmd}" + if logger: + logger.debug(f"⌛ Adding timeout ({timeout}s) to command") + # 使用伪终端执行命令 + master_fd, slave_fd = pty.openpty() + + # 设置伪终端为原始模式 + old_settings = termios.tcgetattr(master_fd) + tty.setraw(master_fd) + + process = subprocess.Popen( + cmd, + shell=True, + cwd=str(cwd) if cwd else None, + stdin=slave_fd, + stdout=slave_fd, + stderr=slave_fd, + close_fds=True, + start_new_session=True + ) + + os.close(slave_fd) + + output_lines = [] try: - if capture_output: - - if process.stdout is None: - raise RuntimeError("stdout is unexpectedly None in capture mode") - - while True: - line = process.stdout.readline() - if line: - line_stripped = line.rstrip() - output_lines.append(line_stripped) - if logger: - logger.debug(line_stripped) - elif process.poll() is not None: + while True: + try: + data = os.read(master_fd, 1024) + if not data: break - time.sleep(0.05) - else: - process.wait() - - except Exception as e: - if logger: - logger.exception(f"执行命令时出错:{e}") - process.kill() - raise CommandError(f"Command failed: {str(e)}") from e - + decoded = data.decode("utf-8", "replace") + output_lines.append(decoded.strip()) + if logger: + logger.debug(decoded.strip()) + except OSError: + break + finally: + # 恢复终端设置 + termios.tcsetattr(master_fd, termios.TCSADRAIN, old_settings) + os.close(master_fd) + process.wait() + return process.returncode, output_lines +def run_command_fuzz_all_targets( + cmd: str, + log_msg: str, + logger: logging.Logger, + allowed_exit_codes: Maybe[list[int]] = Maybe.empty, + timeout: int = 3600, +) -> bool: + """run_command used in run_fuzz_all_targets_print1.py, run_fuzz_all_targets.py""" + logger.info(f"▶️ {log_msg}...") + logger.debug(f" $ {cmd}") + + # 允许超时退出码 124 + allowed_codes = allowed_exit_codes.value_or([]) + [124] + + # 使用伪终端解决终端设置问题 + exit_code, _ = run_subprocess_with_pty( + cmd, + timeout=timeout, + logger=logger + ) + + # 返回 124 表示超时 + if exit_code == 124: + logger.warning(f"⌛ Command timed out after {timeout} seconds") + + if exit_code not in [0, *allowed_codes]: + logger.error(f"❌ The command failed, exit code: {exit_code}") + return False + return True def run_command_build_fuzz( cmd: str, @@ -92,32 +158,32 @@ def run_command_build_fuzz( return exit_code -def run_command_fuzz_all_targets( - cmd: str, - log_msg: str, - logger: logging.Logger, - allowed_exit_codes: Maybe[list[int]] = Maybe.empty, - timeout: int = 3600, -) -> bool: - """run_command used in run_fuzz_all_targets_print1.py, run_fuzz_all_targets.py""" - logger.info(f"▶️ {log_msg}...") - logger.debug(f" $ {cmd}") +# def run_command_fuzz_all_targets( +# cmd: str, +# log_msg: str, +# logger: logging.Logger, +# allowed_exit_codes: Maybe[list[int]] = Maybe.empty, +# timeout: int = 3600, +# ) -> bool: +# """run_command used in run_fuzz_all_targets_print1.py, run_fuzz_all_targets.py""" +# logger.info(f"▶️ {log_msg}...") +# logger.debug(f" $ {cmd}") - # 允许超时退出码 124 - allowed_codes = allowed_exit_codes.value_or([]) + [124] +# # 允许超时退出码 124 +# allowed_codes = allowed_exit_codes.value_or([]) + [124] - exit_code, _ = _run_subprocess( - cmd, - capture_output=True, - timeout=timeout, - logger=logger - ) +# exit_code, _ = _run_subprocess( +# cmd, +# capture_output=True, +# timeout=timeout, +# logger=logger +# ) - # 返回 124 表示超时 - if exit_code == 124: - logger.warning(f"⌛ Command timed out after {timeout} seconds") +# # 返回 124 表示超时 +# if exit_code == 124: +# logger.warning(f"⌛ Command timed out after {timeout} seconds") - if exit_code not in [0, *allowed_codes]: - logger.error(f"❌ The command failed, exit code: {exit_code}") - return False - return True \ No newline at end of file +# if exit_code not in [0, *allowed_codes]: +# logger.error(f"❌ The command failed, exit code: {exit_code}") +# return False +# return True \ No newline at end of file From f9edfafa40b39549f2951d502a79a26a04a9fc97 Mon Sep 17 00:00:00 2001 From: joyguoguo <599325370@qq.com> Date: Sat, 2 Aug 2025 02:15:32 +0000 Subject: [PATCH 074/134] ready to change from rust script --- fuzz/collect_fuzz_python.py | 267 ++++++++++++++++++++++++++++++++++++ 1 file changed, 267 insertions(+) create mode 100644 fuzz/collect_fuzz_python.py diff --git a/fuzz/collect_fuzz_python.py b/fuzz/collect_fuzz_python.py new file mode 100644 index 0000000..bcc4b5e --- /dev/null +++ b/fuzz/collect_fuzz_python.py @@ -0,0 +1,267 @@ +"""script for rust fuzzing and transforming test_template""" + +import logging +from typing import Optional +import fire +import os +from UniTSyn.frontend.util import wrap_repo, parallel_subprocess +import subprocess +from os.path import join as pjoin, abspath +from tqdm import tqdm +from pathos.multiprocessing import ProcessingPool +import random +from difflib import SequenceMatcher +from itertools import islice + + +def transform_repos(repos: list[str], jobs: int): + def transform_one_repo(repo_path: str): + return subprocess.Popen( + ["rust-fuzzer-gen", repo_path], + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + ) + + logging.info(f"Running rust-fuzz-gen on {len(repos)} repos") + parallel_subprocess(repos, jobs, transform_one_repo, on_exit=None) + + +def get_target_list(p: subprocess.Popen): + match p.stdout: + case None: + return [] + case _: + return p.stdout.read().decode("utf-8").split("\n") + + +def fuzz_one_target(target: tuple[str, str], timeout): + repo_path, target_name = target + with open(pjoin(repo_path, "fuzz_inputs", target_name), "w") as f: + return subprocess.Popen( + # todo: find out why -max_total_time doesn't work + # ["cargo", "fuzz", "run", target_name, "--", f"-max_total_time={timeout}"], + [ + "bash", + "-c", + f"timeout {timeout} cargo fuzz run {target_name}", + ], + cwd=repo_path, + stdout=f, + stderr=subprocess.DEVNULL, + ) + + +def build(repos: list[str], jobs: int): + logging.info(f"Building fuzzing targets in {len(repos)} repos") + _ = parallel_subprocess( + repos, + jobs, + lambda path: subprocess.Popen( + ["cargo", "fuzz", "build"], + cwd=path, + stdout=subprocess.DEVNULL, + stderr=subprocess.DEVNULL, + ), + on_exit=None, + ) + + +def fuzz_repos(repos: list[str], jobs: int, timeout: int = 60): + logging.info("Collecting all fuzz targets") + + target_map = parallel_subprocess( + repos, + jobs, + lambda path: subprocess.Popen( + ["cargo", "fuzz", "list"], cwd=path, stdout=subprocess.PIPE + ), + on_exit=get_target_list, + ) + targets: list[tuple[str, str]] = [ + (k, v) for k, vs in target_map.items() for v in vs if len(v) > 0 + ] + for repo in repos: + os.makedirs(pjoin(repo, "fuzz_inputs"), exist_ok=True) + + logging.info(f"Running cargo fuzz on {len(targets)} targets for {timeout} seconds") + parallel_subprocess( + targets, jobs, lambda p: fuzz_one_target(p, timeout), on_exit=None + ) + + +def substitute_input(template: str, input_data: str, idx: int) -> str: + return template.replace( + '[] ; # [doc = "This is a test template"]', f"{input_data} ; " + ).replace("fn test_something ()", f"fn test_{idx} ()") + + +def has_similar(selected: list[str], x: str, thresh: float = 0.8) -> bool: + def similar(a, b): + return SequenceMatcher(None, a, b).ratio() + + return any(map(lambda y: similar(x, y) > thresh, selected)) + + +def substitute_one_repo( + repo: str, + targets: list[str], + n_fuzz: int, + strategy: str, + max_len: int, + sim_thresh: float, +): + template_dir = pjoin(repo, "tests-gen") + input_dir = pjoin(repo, "fuzz_inputs") + for t in targets: + if t == "": + continue + + # format template before loading + template_path = pjoin(template_dir, t + ".rs") + try: + with open(template_path) as f_template: + template = f_template.read() + with open(pjoin(input_dir, t), "r") as f_input: + all_inputs = [i for i in f_input.read().splitlines() if i != "[]"] + + inputs: list[str] + if strategy == "shuffle": + random.shuffle(all_inputs) + inputs = list( + islice(filter(lambda x: len(x) < max_len, all_inputs), n_fuzz) + ) + elif strategy == "reverse": + inputs = [] + for x in reversed(all_inputs): + if len(inputs) >= n_fuzz: + break + if len(x) > max_len or has_similar(inputs, x, sim_thresh): + continue + inputs.append(x) + + else: + inputs = all_inputs[:n_fuzz] + + tests = [ + substitute_input(template, input_data, i) + for i, input_data in enumerate(inputs) + ] + generated_test_path = pjoin(template_dir, f"{t}.inputs.rs") + with open(generated_test_path, "w") as f_template: + f_template.write("\n".join(tests)) + + # format generated tests + subprocess.run(["rustfmt", str(generated_test_path)], check=False) + except FileNotFoundError: + logging.debug(f"Template {template_path} not found") + + +def testgen_repos( + repos: list[str], + jobs: int, + n_fuzz: int = 100, + strategy: str = "shuffle", + max_len: int = 100, + sim_thresh: float = 0.8, +): + """Generate tests from fuzz inputs + + Args: + repos (list[str]): list of repo paths + jobs (int): number of parallel jobs to use + n_fuzz (int, optional): number of fuzz data to use. Defaults to 100. + """ + target_map = parallel_subprocess( + repos, + jobs, + lambda path: subprocess.Popen( + ["cargo", "fuzz", "list"], cwd=path, stdout=subprocess.PIPE + ), + on_exit=get_target_list, + use_tqdm=False, + ) + logging.info("Substitute fuzz data to test templates") + with ProcessingPool(jobs) as p: + _ = list( + tqdm( + p.map( + lambda item: substitute_one_repo( + item[0], item[1], n_fuzz, strategy, max_len, sim_thresh + ), + target_map.items(), + ) + ) + ) + + +def main( + repo_id: str = "image-rs/image-png", + repo_root: str = "data/rust_repos/", + timeout: int = 60, + jobs: int = 80, + limits: Optional[int] = None, + pipeline: str = "transform", + n_fuzz: int = 100, + strategy: str = "shuffle", + max_len: int = 100, + sim_thresh: float = 0.8, +): + """collect fuzzing data from rust repos + + Args: + repo_id (str, optional): repo id. Defaults to "marshallpierce/rust-base64". + repo_root (str, optional): directory contains all the repos. Defaults to "data/rust_repos/". + timeout (int, optional): max_total_time to fuzz. Defaults to 60. + jobs (int, optional): number of parallel jobs to use. Defaults to CORES. + limits (Optional[int], optional): number of repos to process, None if use all of them. + pipeline (str, optional): what to do. Defaults to "transform". + + --- below only needed for testgen pipeline --- + n_fuzz (int, optional): number of fuzz data to use. Defaults to 100. + strategy (str, optional): shuffle or reverse, + max_len (int, optional): maximum length for fuzzing inputs + sim_thresh (float, optional), similarity threshold for fuzzing inputs + """ + try: + repo_id_list = [ + ll for line in open(repo_id, "r").readlines() if len(ll := line.strip()) > 0 + ] + except FileNotFoundError: + repo_id_list = [repo_id] + if limits is not None: + repo_id_list = repo_id_list[:limits] + logging.info(f"Loaded {len(repo_id_list)} repos to be processed") + + logging.info("Collecting all rust repos") + repos = [] + for repo_id in repo_id_list: + repo_path = os.path.join(repo_root, wrap_repo(repo_id)) + if os.path.exists(repo_path) and os.path.isdir(repo_path): + subdirectories = [ + os.path.join(repo_path, d) + for d in os.listdir(repo_path) + if os.path.isdir(os.path.join(repo_path, d)) + ] + repos.append(abspath(subdirectories[0])) + + match pipeline: + case "transform": + transform_repos(repos, jobs) + case "build": + build(repos, jobs) + case "fuzz": + fuzz_repos(repos, jobs, timeout=timeout) + case "testgen": + testgen_repos(repos, jobs, n_fuzz, strategy, max_len, sim_thresh) + case "all": + transform_repos(repos, jobs) + build(repos, jobs) + fuzz_repos(repos, jobs, timeout=timeout) + testgen_repos(repos, jobs, n_fuzz, strategy, max_len, sim_thresh) + case _: + logging.error(f"Unknown pipeline {pipeline}") + + +if __name__ == "__main__": + logging.basicConfig(level=logging.INFO) + fire.Fire(main) From 3821133bcaac57a578908117a0aeaa53f616ec76 Mon Sep 17 00:00:00 2001 From: joyguoguo <599325370@qq.com> Date: Mon, 4 Aug 2025 19:08:15 +0000 Subject: [PATCH 075/134] =?UTF-8?q?=E4=BF=AE=E6=94=B9build=5Fimage?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- fuzz/collect_fuzz_python.py | 265 ++++++++++++++++++++++++++++-------- 1 file changed, 209 insertions(+), 56 deletions(-) diff --git a/fuzz/collect_fuzz_python.py b/fuzz/collect_fuzz_python.py index bcc4b5e..6073b34 100644 --- a/fuzz/collect_fuzz_python.py +++ b/fuzz/collect_fuzz_python.py @@ -1,104 +1,214 @@ -"""script for rust fuzzing and transforming test_template""" - +""" +用于 python 项目模糊测试(fuzzing)和测试模板转换的脚本 +""" +# 导入日志记录模块,用于输出程序运行信息 import logging +# 从 typing 模块导入 Optional,用于类型提示,表示一个值可以是某个类型或者 None from typing import Optional +# 导入 fire 库,用于快速创建命令行界面 import fire +# 导入 os 模块,用于与操作系统交互,如文件路径操作 import os +# 从自定义的 UniTSyn 工具库中导入辅助函数 from UniTSyn.frontend.util import wrap_repo, parallel_subprocess +# 导入 subprocess 模块,用于创建和管理子进程 import subprocess +# 从 os.path 中导入 join 和 abspath,分别用于拼接路径和获取绝对路径 from os.path import join as pjoin, abspath +# 导入 tqdm 库,用于显示进度条 from tqdm import tqdm +# 从 pathos.multiprocessing 导入 ProcessingPool,用于创建进程池以实现并行处理 from pathos.multiprocessing import ProcessingPool +# 导入 random 模块,用于生成随机数 import random +# 从 difflib 导入 SequenceMatcher,用于比较序列(如字符串)的相似度 from difflib import SequenceMatcher +# 从 itertools 导入 islice,用于对迭代器进行切片操作 from itertools import islice -def transform_repos(repos: list[str], jobs: int): - def transform_one_repo(repo_path: str): - return subprocess.Popen( - ["rust-fuzzer-gen", repo_path], - stdout=subprocess.PIPE, - stderr=subprocess.PIPE, - ) - - logging.info(f"Running rust-fuzz-gen on {len(repos)} repos") - parallel_subprocess(repos, jobs, transform_one_repo, on_exit=None) +# def transform_repos(repos: list[str], jobs: int): +# """ +# 对一组仓库执行 `rust-fuzzer-gen` 命令,以生成模糊测试模板。 + +# Args: +# repos (list[str]): 包含多个仓库路径的列表。 +# jobs (int): 并行执行的任务数量。 +# """ +# def transform_one_repo(repo_path: str): +# """ +# 对单个仓库启动 `rust-fuzzer-gen` 进程。 + +# Args: +# repo_path (str): 单个仓库的路径。 + +# Returns: +# subprocess.Popen: 启动的子进程对象。 +# """ +# # 启动一个子进程来执行 `rust-fuzzer-gen` 命令 +# return subprocess.Popen( +# ["rust-fuzzer-gen", repo_path], +# stdout=subprocess.PIPE, # 捕获标准输出 +# stderr=subprocess.PIPE, # 捕获标准错误 +# ) + +# # 记录日志,说明正在对多少个仓库进行操作 +# logging.info(f"Running rust-fuzz-gen on {len(repos)} repos") +# # 使用并行处理工具来同时对多个仓库执行 transform_one_repo 函数 +# parallel_subprocess(repos, jobs, transform_one_repo, on_exit=None) def get_target_list(p: subprocess.Popen): + """ + 从子进程的输出中解析出模糊测试目标列表。 + + Args: + p (subprocess.Popen): 一个已完成的子进程对象。 + + Returns: + list[str]: 模糊测试目标的名称列表。 + """ + # 使用 match 语句检查子进程的标准输出 match p.stdout: + # 如果标准输出为 None,则返回空列表 case None: return [] + # 否则,读取标准输出,解码为 UTF-8 字符串,并按换行符分割成列表 case _: return p.stdout.read().decode("utf-8").split("\n") def fuzz_one_target(target: tuple[str, str], timeout): + """ + 对单个模糊测试目标执行 `cargo fuzz run` 命令。 + + Args: + target (tuple[str, str]): 一个元组,包含仓库路径和目标名称。 + timeout (int): 模糊测试的超时时间(秒)。 + + Returns: + subprocess.Popen: 启动的模糊测试子进程对象。 + """ + # 解包元组,获取仓库路径和目标名称 repo_path, target_name = target + # 创建一个文件用于存放该目标的模糊测试输入 with open(pjoin(repo_path, "fuzz_inputs", target_name), "w") as f: + # 启动一个子进程来执行模糊测试命令 return subprocess.Popen( - # todo: find out why -max_total_time doesn't work + # todo: 研究为什么 -max_total_time 参数不起作用 # ["cargo", "fuzz", "run", target_name, "--", f"-max_total_time={timeout}"], + # 使用 bash -c 和 timeout 命令来强制实现超时功能 [ "bash", "-c", f"timeout {timeout} cargo fuzz run {target_name}", ], - cwd=repo_path, - stdout=f, - stderr=subprocess.DEVNULL, + cwd=repo_path, # 在指定的仓库路径下执行命令 + stdout=f, # 将标准输出重定向到文件 + stderr=subprocess.DEVNULL, # 丢弃标准错误输出 ) -def build(repos: list[str], jobs: int): - logging.info(f"Building fuzzing targets in {len(repos)} repos") +def build_image(repos: list[str], jobs: int): + """ + 构建每个仓库对应的 OSS-Fuzz 项目的 Docker 镜像。 + + Args: + repos (list[str]): 仓库路径列表(每个应包含一个已接入 OSS-Fuzz 的项目)。 + jobs (int): 并行任务数。 + """ + logging.info(f"Building Docker images for {len(repos)} OSS-Fuzz projects") + + def _build_cmd(path: str): + # 获取 OSS-Fuzz 项目名(例如路径最后一段或自定义映射) + project_name = os.path.basename(path.rstrip("/")) + return subprocess.Popen( + ["python3", "infra/helper.py", "build_image", project_name], + cwd=os.path.abspath(os.path.join(path, "../../")), # 仓库路径的上上一级 + stdout=subprocess.DEVNULL, + stderr=subprocess.DEVNULL, + ) + _ = parallel_subprocess( repos, jobs, - lambda path: subprocess.Popen( - ["cargo", "fuzz", "build"], - cwd=path, - stdout=subprocess.DEVNULL, - stderr=subprocess.DEVNULL, - ), + _build_cmd, on_exit=None, ) - def fuzz_repos(repos: list[str], jobs: int, timeout: int = 60): + """ + 对一组仓库执行模糊测试。 + + Args: + repos (list[str]): 仓库路径列表。 + jobs (int): 并行任务数。 + timeout (int, optional): 每个目标的模糊测试超时时间(秒)。默认为 60。 + """ + # 记录日志,说明正在收集所有模糊测试目标 logging.info("Collecting all fuzz targets") + # 并行执行 `cargo fuzz list` 来获取所有仓库的模糊测试目标 target_map = parallel_subprocess( repos, jobs, lambda path: subprocess.Popen( ["cargo", "fuzz", "list"], cwd=path, stdout=subprocess.PIPE ), - on_exit=get_target_list, + on_exit=get_target_list, # 使用 get_target_list 函数处理每个子进程的输出 ) + # 将 `target_map` 整理成一个 (仓库路径, 目标名称) 的元组列表 targets: list[tuple[str, str]] = [ (k, v) for k, vs in target_map.items() for v in vs if len(v) > 0 ] + # 为每个仓库创建存放模糊测试输入的目录 for repo in repos: os.makedirs(pjoin(repo, "fuzz_inputs"), exist_ok=True) + # 记录日志,说明即将开始模糊测试 logging.info(f"Running cargo fuzz on {len(targets)} targets for {timeout} seconds") + # 并行执行模糊测试 parallel_subprocess( targets, jobs, lambda p: fuzz_one_target(p, timeout), on_exit=None ) def substitute_input(template: str, input_data: str, idx: int) -> str: + """ + 将模糊测试的输入数据替换到测试模板中。 + + Args: + template (str): 测试模板字符串。 + input_data (str): 单条模糊测试输入数据。 + idx (int): 测试用例的索引号。 + + Returns: + str: 替换完成后的测试代码字符串。 + """ + # 替换模板中的占位符为实际的输入数据 return template.replace( '[] ; # [doc = "This is a test template"]', f"{input_data} ; " + # 替换模板中的函数名为唯一的测试函数名 ).replace("fn test_something ()", f"fn test_{idx} ()") def has_similar(selected: list[str], x: str, thresh: float = 0.8) -> bool: + """ + 检查字符串 `x` 是否与 `selected` 列表中的任何字符串足够相似。 + + Args: + selected (list[str]): 已选择的字符串列表。 + x (str): 待检查的字符串。 + thresh (float, optional): 相似度阈值。默认为 0.8。 + + Returns: + bool: 如果存在相似字符串,则返回 True,否则返回 False。 + """ + # 定义一个内部函数来计算两个字符串的相似度 def similar(a, b): return SequenceMatcher(None, a, b).ratio() + # 检查列表中是否有任何一个字符串与 x 的相似度超过阈值 return any(map(lambda y: similar(x, y) > thresh, selected)) @@ -110,49 +220,73 @@ def substitute_one_repo( max_len: int, sim_thresh: float, ): + """ + 处理单个仓库,将其模糊测试输入替换到测试模板中以生成测试文件。 + + Args: + repo (str): 仓库路径。 + targets (list[str]): 该仓库的模糊测试目标列表。 + n_fuzz (int): 要使用的模糊测试输入数量。 + strategy (str): 选择输入的策略("shuffle", "reverse" 等)。 + max_len (int): 模糊测试输入的最大长度。 + sim_thresh (float): 用于去重的相似度阈值。 + """ + # 定义模板目录和输入目录的路径 template_dir = pjoin(repo, "tests-gen") input_dir = pjoin(repo, "fuzz_inputs") + # 遍历该仓库的所有模糊测试目标 for t in targets: - if t == "": + if t == "": # 跳过空目标 continue - # format template before loading template_path = pjoin(template_dir, t + ".rs") try: + # 读取测试模板文件 with open(template_path) as f_template: template = f_template.read() + # 读取对应的模糊测试输入文件 with open(pjoin(input_dir, t), "r") as f_input: + # 过滤掉空的输入行 all_inputs = [i for i in f_input.read().splitlines() if i != "[]"] inputs: list[str] + # 根据不同的策略来选择输入数据 if strategy == "shuffle": + # 随机打乱所有输入 random.shuffle(all_inputs) + # 过滤掉过长的输入,并取前 n_fuzz 个 inputs = list( islice(filter(lambda x: len(x) < max_len, all_inputs), n_fuzz) ) elif strategy == "reverse": + # 从后往前选择,同时进行去重和长度过滤 inputs = [] for x in reversed(all_inputs): - if len(inputs) >= n_fuzz: + if len(inputs) >= n_fuzz: # 如果已选够,则停止 break + # 如果输入过长或与已选输入相似,则跳过 if len(x) > max_len or has_similar(inputs, x, sim_thresh): continue inputs.append(x) - else: + # 默认策略:直接取前 n_fuzz 个输入 inputs = all_inputs[:n_fuzz] + # 使用选定的输入数据生成测试用例代码 tests = [ substitute_input(template, input_data, i) for i, input_data in enumerate(inputs) ] + # 定义生成的测试文件的路径 generated_test_path = pjoin(template_dir, f"{t}.inputs.rs") + # 将生成的测试代码写入文件 with open(generated_test_path, "w") as f_template: f_template.write("\n".join(tests)) - # format generated tests + # 使用 rustfmt 工具格式化生成的测试文件 subprocess.run(["rustfmt", str(generated_test_path)], check=False) except FileNotFoundError: + # 如果找不到模板文件,则记录一条调试信息 logging.debug(f"Template {template_path} not found") @@ -164,13 +298,18 @@ def testgen_repos( max_len: int = 100, sim_thresh: float = 0.8, ): - """Generate tests from fuzz inputs + """ + 从模糊测试的输入数据生成最终的测试用例。 Args: - repos (list[str]): list of repo paths - jobs (int): number of parallel jobs to use - n_fuzz (int, optional): number of fuzz data to use. Defaults to 100. + repos (list[str]): 仓库路径列表。 + jobs (int): 并行任务数。 + n_fuzz (int, optional): 每个目标要使用的模糊测试输入数量。默认为 100。 + strategy (str, optional): 选择输入的策略。默认为 "shuffle"。 + max_len (int, optional): 输入的最大长度。默认为 100。 + sim_thresh (float, optional): 输入的相似度阈值。默认为 0.8。 """ + # 首先,获取所有仓库的模糊测试目标 target_map = parallel_subprocess( repos, jobs, @@ -178,13 +317,17 @@ def testgen_repos( ["cargo", "fuzz", "list"], cwd=path, stdout=subprocess.PIPE ), on_exit=get_target_list, - use_tqdm=False, + use_tqdm=False, # 不在此处显示进度条 ) + # 记录日志 logging.info("Substitute fuzz data to test templates") + # 使用进程池并行处理每个仓库的替换任务 with ProcessingPool(jobs) as p: + # 使用 tqdm 显示总体进度 _ = list( tqdm( p.map( + # 对 target_map 中的每个项(仓库及其目标列表)调用 substitute_one_repo lambda item: substitute_one_repo( item[0], item[1], n_fuzz, strategy, max_len, sim_thresh ), @@ -206,37 +349,43 @@ def main( max_len: int = 100, sim_thresh: float = 0.8, ): - """collect fuzzing data from rust repos + """ + 从 Rust 仓库中收集模糊测试数据的主函数。 Args: - repo_id (str, optional): repo id. Defaults to "marshallpierce/rust-base64". - repo_root (str, optional): directory contains all the repos. Defaults to "data/rust_repos/". - timeout (int, optional): max_total_time to fuzz. Defaults to 60. - jobs (int, optional): number of parallel jobs to use. Defaults to CORES. - limits (Optional[int], optional): number of repos to process, None if use all of them. - pipeline (str, optional): what to do. Defaults to "transform". - - --- below only needed for testgen pipeline --- - n_fuzz (int, optional): number of fuzz data to use. Defaults to 100. - strategy (str, optional): shuffle or reverse, - max_len (int, optional): maximum length for fuzzing inputs - sim_thresh (float, optional), similarity threshold for fuzzing inputs + repo_id (str, optional): 单个仓库 ID 或包含多个仓库 ID 的文件路径。 + repo_root (str, optional): 存放所有仓库的根目录。 + timeout (int, optional): 模糊测试的超时时间。 + jobs (int, optional): 并行任务数。 + limits (Optional[int], optional): 要处理的仓库数量上限,None 表示处理所有。 + pipeline (str, optional): 要执行的流程("transform", "build", "fuzz", "testgen", "all")。 + --- 以下参数仅用于 testgen 流程 --- + n_fuzz (int, optional): 使用的模糊测试输入数量。 + strategy (str, optional): 选择输入的策略。 + max_len (int, optional): 输入的最大长度。 + sim_thresh (float, optional): 输入的相似度阈值。 """ try: + # 尝试将 repo_id 作为一个文件路径打开,读取仓库 ID 列表 repo_id_list = [ ll for line in open(repo_id, "r").readlines() if len(ll := line.strip()) > 0 ] except FileNotFoundError: + # 如果文件不存在,则认为 repo_id 就是单个仓库的 ID repo_id_list = [repo_id] + + # 如果设置了数量限制,则对列表进行切片 if limits is not None: repo_id_list = repo_id_list[:limits] logging.info(f"Loaded {len(repo_id_list)} repos to be processed") logging.info("Collecting all rust repos") repos = [] + # 遍历仓库 ID 列表,构建完整的本地路径 for repo_id in repo_id_list: repo_path = os.path.join(repo_root, wrap_repo(repo_id)) if os.path.exists(repo_path) and os.path.isdir(repo_path): + # 找到仓库下的第一个子目录(通常是项目本身) subdirectories = [ os.path.join(repo_path, d) for d in os.listdir(repo_path) @@ -244,17 +393,18 @@ def main( ] repos.append(abspath(subdirectories[0])) + # 根据 `pipeline` 参数选择要执行的流程 match pipeline: - case "transform": - transform_repos(repos, jobs) - case "build": - build(repos, jobs) + # case "transform": + # transform_repos(repos, jobs) + case "build_image": + build_image(repos, jobs) case "fuzz": fuzz_repos(repos, jobs, timeout=timeout) case "testgen": testgen_repos(repos, jobs, n_fuzz, strategy, max_len, sim_thresh) - case "all": - transform_repos(repos, jobs) + case "all": # 执行全部流程 + # transform_repos(repos, jobs) build(repos, jobs) fuzz_repos(repos, jobs, timeout=timeout) testgen_repos(repos, jobs, n_fuzz, strategy, max_len, sim_thresh) @@ -262,6 +412,9 @@ def main( logging.error(f"Unknown pipeline {pipeline}") +# 当脚本作为主程序执行时 if __name__ == "__main__": + # 配置日志记录的基本设置,级别为 INFO logging.basicConfig(level=logging.INFO) - fire.Fire(main) + # 使用 fire 库将 main 函数暴露为命令行接口 + fire.Fire(main) \ No newline at end of file From 6e5c7644ba6f64c0148e02ac9ac3bdd07bfba949 Mon Sep 17 00:00:00 2001 From: joyguoguo <599325370@qq.com> Date: Mon, 4 Aug 2025 19:13:24 +0000 Subject: [PATCH 076/134] y/n --- fuzz/collect_fuzz_python.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/fuzz/collect_fuzz_python.py b/fuzz/collect_fuzz_python.py index 6073b34..d590fa5 100644 --- a/fuzz/collect_fuzz_python.py +++ b/fuzz/collect_fuzz_python.py @@ -111,7 +111,7 @@ def fuzz_one_target(target: tuple[str, str], timeout): def build_image(repos: list[str], jobs: int): """ - 构建每个仓库对应的 OSS-Fuzz 项目的 Docker 镜像。 + 构建每个仓库对应的 OSS-Fuzz 项目的 Docker 镜像,并自动回答 y/N 提示为 y。 Args: repos (list[str]): 仓库路径列表(每个应包含一个已接入 OSS-Fuzz 的项目)。 @@ -120,13 +120,13 @@ def build_image(repos: list[str], jobs: int): logging.info(f"Building Docker images for {len(repos)} OSS-Fuzz projects") def _build_cmd(path: str): - # 获取 OSS-Fuzz 项目名(例如路径最后一段或自定义映射) project_name = os.path.basename(path.rstrip("/")) return subprocess.Popen( - ["python3", "infra/helper.py", "build_image", project_name], - cwd=os.path.abspath(os.path.join(path, "../../")), # 仓库路径的上上一级 + ["yes", "|", "python3", "infra/helper.py", "build_image", project_name], + cwd=os.path.abspath(os.path.join(path, "../../")), stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL, + shell=True # 使用 shell 来执行带管道的命令 ) _ = parallel_subprocess( @@ -136,6 +136,7 @@ def _build_cmd(path: str): on_exit=None, ) + def fuzz_repos(repos: list[str], jobs: int, timeout: int = 60): """ 对一组仓库执行模糊测试。 From 00290598dc9d888fff8c861ae2bd835f1bf9a5a5 Mon Sep 17 00:00:00 2001 From: joyguoguo <599325370@qq.com> Date: Mon, 4 Aug 2025 19:28:28 +0000 Subject: [PATCH 077/134] correct repo_id and repo_name in main --- fuzz/collect_fuzz_python.py | 62 ++++++++++++++----------------------- 1 file changed, 24 insertions(+), 38 deletions(-) diff --git a/fuzz/collect_fuzz_python.py b/fuzz/collect_fuzz_python.py index d590fa5..c1ebd44 100644 --- a/fuzz/collect_fuzz_python.py +++ b/fuzz/collect_fuzz_python.py @@ -339,8 +339,8 @@ def testgen_repos( def main( - repo_id: str = "image-rs/image-png", - repo_root: str = "data/rust_repos/", + repo_id: str = "../data/valid_projects3.txt", + repo_root: str = "oss-fuzz/projects/", timeout: int = 60, jobs: int = 80, limits: Optional[int] = None, @@ -351,68 +351,54 @@ def main( sim_thresh: float = 0.8, ): """ - 从 Rust 仓库中收集模糊测试数据的主函数。 + 从多个 OSS-Fuzz 项目中执行指定阶段(构建镜像、模糊测试、测试生成等)的主函数。 Args: - repo_id (str, optional): 单个仓库 ID 或包含多个仓库 ID 的文件路径。 - repo_root (str, optional): 存放所有仓库的根目录。 - timeout (int, optional): 模糊测试的超时时间。 - jobs (int, optional): 并行任务数。 - limits (Optional[int], optional): 要处理的仓库数量上限,None 表示处理所有。 - pipeline (str, optional): 要执行的流程("transform", "build", "fuzz", "testgen", "all")。 - --- 以下参数仅用于 testgen 流程 --- - n_fuzz (int, optional): 使用的模糊测试输入数量。 - strategy (str, optional): 选择输入的策略。 - max_len (int, optional): 输入的最大长度。 - sim_thresh (float, optional): 输入的相似度阈值。 + repo_id (str): 文件路径,包含 OSS-Fuzz 项目名称(每行一个)。 + repo_root (str): 所有 OSS-Fuzz 项目所在的根目录。 + timeout (int): 模糊测试的超时时间。 + jobs (int): 并行任务数。 + limits (Optional[int]): 处理项目数量的上限。 + pipeline (str): 执行阶段:build_image, fuzz, testgen, all。 + n_fuzz, strategy, max_len, sim_thresh: testgen 参数。 """ try: - # 尝试将 repo_id 作为一个文件路径打开,读取仓库 ID 列表 - repo_id_list = [ - ll for line in open(repo_id, "r").readlines() if len(ll := line.strip()) > 0 - ] + with open(repo_id, "r") as f: + repo_id_list = [line.strip() for line in f if line.strip()] except FileNotFoundError: - # 如果文件不存在,则认为 repo_id 就是单个仓库的 ID repo_id_list = [repo_id] - - # 如果设置了数量限制,则对列表进行切片 + if limits is not None: repo_id_list = repo_id_list[:limits] + logging.info(f"Loaded {len(repo_id_list)} repos to be processed") - logging.info("Collecting all rust repos") + logging.info("Collecting all OSS-Fuzz project directories") repos = [] - # 遍历仓库 ID 列表,构建完整的本地路径 for repo_id in repo_id_list: - repo_path = os.path.join(repo_root, wrap_repo(repo_id)) - if os.path.exists(repo_path) and os.path.isdir(repo_path): - # 找到仓库下的第一个子目录(通常是项目本身) - subdirectories = [ - os.path.join(repo_path, d) - for d in os.listdir(repo_path) - if os.path.isdir(os.path.join(repo_path, d)) - ] - repos.append(abspath(subdirectories[0])) + repo_path = abspath(os.path.join(repo_root, repo_id)) + if os.path.isdir(repo_path): + repos.append(repo_path) - # 根据 `pipeline` 参数选择要执行的流程 match pipeline: - # case "transform": - # transform_repos(repos, jobs) case "build_image": build_image(repos, jobs) case "fuzz": fuzz_repos(repos, jobs, timeout=timeout) case "testgen": testgen_repos(repos, jobs, n_fuzz, strategy, max_len, sim_thresh) - case "all": # 执行全部流程 - # transform_repos(repos, jobs) - build(repos, jobs) + case "all": + build_image(repos, jobs) fuzz_repos(repos, jobs, timeout=timeout) testgen_repos(repos, jobs, n_fuzz, strategy, max_len, sim_thresh) case _: logging.error(f"Unknown pipeline {pipeline}") +if __name__ == "__main__": + logging.basicConfig(level=logging.INFO) + fire.Fire(main) + # 当脚本作为主程序执行时 if __name__ == "__main__": # 配置日志记录的基本设置,级别为 INFO From 1d815ebd8d75a3c739cc253737346cc822c73bee Mon Sep 17 00:00:00 2001 From: joyguoguo <599325370@qq.com> Date: Mon, 4 Aug 2025 19:48:25 +0000 Subject: [PATCH 078/134] =?UTF-8?q?test=20build=5Fimage=20=E6=9E=84?= =?UTF-8?q?=E5=BB=BA=E6=97=A5=E5=BF=97?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- fuzz/collect_fuzz_python.py | 23 ++++++++++++++++------- 1 file changed, 16 insertions(+), 7 deletions(-) diff --git a/fuzz/collect_fuzz_python.py b/fuzz/collect_fuzz_python.py index c1ebd44..32152ab 100644 --- a/fuzz/collect_fuzz_python.py +++ b/fuzz/collect_fuzz_python.py @@ -109,9 +109,11 @@ def fuzz_one_target(target: tuple[str, str], timeout): ) +from datetime import datetime + def build_image(repos: list[str], jobs: int): """ - 构建每个仓库对应的 OSS-Fuzz 项目的 Docker 镜像,并自动回答 y/N 提示为 y。 + 构建每个仓库对应的 OSS-Fuzz 项目的 Docker 镜像,并将构建日志写入 fuzz_pipeline_log 目录。 Args: repos (list[str]): 仓库路径列表(每个应包含一个已接入 OSS-Fuzz 的项目)。 @@ -119,14 +121,21 @@ def build_image(repos: list[str], jobs: int): """ logging.info(f"Building Docker images for {len(repos)} OSS-Fuzz projects") + log_dir = os.path.abspath("fuzz_pipeline_log") + os.makedirs(log_dir, exist_ok=True) + def _build_cmd(path: str): project_name = os.path.basename(path.rstrip("/")) + timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") + log_file = os.path.join(log_dir, f"{project_name}_{timestamp}.log") + + logging.info(f"Start building {project_name}, logging to {log_file}") return subprocess.Popen( - ["yes", "|", "python3", "infra/helper.py", "build_image", project_name], + f"yes | python3 infra/helper.py build_image {project_name}", cwd=os.path.abspath(os.path.join(path, "../../")), - stdout=subprocess.DEVNULL, - stderr=subprocess.DEVNULL, - shell=True # 使用 shell 来执行带管道的命令 + stdout=open(log_file, "w"), + stderr=subprocess.STDOUT, + shell=True, ) _ = parallel_subprocess( @@ -339,8 +348,8 @@ def testgen_repos( def main( - repo_id: str = "../data/valid_projects3.txt", - repo_root: str = "oss-fuzz/projects/", + repo_id: str = "data/valid_projects3.txt", + repo_root: str = "fuzz/oss-fuzz/projects/", timeout: int = 60, jobs: int = 80, limits: Optional[int] = None, From 4333456e89ba6e9ffd537dc2000379bea5908daf Mon Sep 17 00:00:00 2001 From: joyguoguo <599325370@qq.com> Date: Mon, 4 Aug 2025 19:59:30 +0000 Subject: [PATCH 079/134] add build_fuzzer --- fuzz/collect_fuzz_python.py | 37 +++++++++++++++++++++++++++++++++++++ 1 file changed, 37 insertions(+) diff --git a/fuzz/collect_fuzz_python.py b/fuzz/collect_fuzz_python.py index 32152ab..8dc5350 100644 --- a/fuzz/collect_fuzz_python.py +++ b/fuzz/collect_fuzz_python.py @@ -144,6 +144,39 @@ def _build_cmd(path: str): _build_cmd, on_exit=None, ) +def build_fuzzer(repos: list[str], jobs: int): + """ + 对构建成功的项目并行构建模糊测试器 + + Args: + repos (list[str]): 仓库路径列表(每个应包含一个已接入 OSS-Fuzz 的项目)。 + jobs (int): 并行任务数。 + """ + logging.info(f"Building fuzzers for {len(repos)} OSS-Fuzz projects") + + log_dir = os.path.abspath("fuzz_pipeline_log") + os.makedirs(log_dir, exist_ok=True) + + def _build_cmd(path: str): + project_name = os.path.basename(path.rstrip("/")) + timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") + log_file = os.path.join(log_dir, f"{project_name}_fuzzer_{timestamp}.log") + + logging.info(f"Start building fuzzers for {project_name}, logging to {log_file}") + return subprocess.Popen( + f"python3 infra/helper.py build_fuzzers --sanitizer address {project_name}", + cwd=os.path.abspath(os.path.join(path, "../../")), + stdout=open(log_file, "w"), + stderr=subprocess.STDOUT, + shell=True, + ) + + _ = parallel_subprocess( + repos, + jobs, + _build_cmd, + on_exit=None, + ) def fuzz_repos(repos: list[str], jobs: int, timeout: int = 60): @@ -392,18 +425,22 @@ def main( match pipeline: case "build_image": build_image(repos, jobs) + case "build_fuzzer": + build_fuzzer(repos, jobs) case "fuzz": fuzz_repos(repos, jobs, timeout=timeout) case "testgen": testgen_repos(repos, jobs, n_fuzz, strategy, max_len, sim_thresh) case "all": build_image(repos, jobs) + build_fuzzer(repos, jobs) # 在构建镜像后添加构建模糊测试器阶段 fuzz_repos(repos, jobs, timeout=timeout) testgen_repos(repos, jobs, n_fuzz, strategy, max_len, sim_thresh) case _: logging.error(f"Unknown pipeline {pipeline}") + if __name__ == "__main__": logging.basicConfig(level=logging.INFO) fire.Fire(main) From 3a0565f62a9effc0900f537a84420a051bc3c166 Mon Sep 17 00:00:00 2001 From: joyguoguo <599325370@qq.com> Date: Mon, 4 Aug 2025 23:35:37 +0000 Subject: [PATCH 080/134] fuzz and testgen --- fuzz/collect_fuzz_python.py | 385 ++++++++++++++++++++++++------------ 1 file changed, 258 insertions(+), 127 deletions(-) diff --git a/fuzz/collect_fuzz_python.py b/fuzz/collect_fuzz_python.py index 8dc5350..7b0256c 100644 --- a/fuzz/collect_fuzz_python.py +++ b/fuzz/collect_fuzz_python.py @@ -1,6 +1,9 @@ """ 用于 python 项目模糊测试(fuzzing)和测试模板转换的脚本 + +PYTHONPATH=. python3 fuzz/collect_fuzz_python.py --pipeline build_fuzzers """ +from pathlib import Path # 导入日志记录模块,用于输出程序运行信息 import logging # 从 typing 模块导入 Optional,用于类型提示,表示一个值可以是某个类型或者 None @@ -25,6 +28,7 @@ from difflib import SequenceMatcher # 从 itertools 导入 islice,用于对迭代器进行切片操作 from itertools import islice +from datetime import datetime # def transform_repos(repos: list[str], jobs: int): @@ -78,38 +82,38 @@ def get_target_list(p: subprocess.Popen): return p.stdout.read().decode("utf-8").split("\n") -def fuzz_one_target(target: tuple[str, str], timeout): - """ - 对单个模糊测试目标执行 `cargo fuzz run` 命令。 +# def fuzz_one_target(target: tuple[str, str], timeout): +# """ +# 对单个模糊测试目标执行 `cargo fuzz run` 命令。 - Args: - target (tuple[str, str]): 一个元组,包含仓库路径和目标名称。 - timeout (int): 模糊测试的超时时间(秒)。 +# Args: +# target (tuple[str, str]): 一个元组,包含仓库路径和目标名称。 +# timeout (int): 模糊测试的超时时间(秒)。 - Returns: - subprocess.Popen: 启动的模糊测试子进程对象。 - """ - # 解包元组,获取仓库路径和目标名称 - repo_path, target_name = target - # 创建一个文件用于存放该目标的模糊测试输入 - with open(pjoin(repo_path, "fuzz_inputs", target_name), "w") as f: - # 启动一个子进程来执行模糊测试命令 - return subprocess.Popen( - # todo: 研究为什么 -max_total_time 参数不起作用 - # ["cargo", "fuzz", "run", target_name, "--", f"-max_total_time={timeout}"], - # 使用 bash -c 和 timeout 命令来强制实现超时功能 - [ - "bash", - "-c", - f"timeout {timeout} cargo fuzz run {target_name}", - ], - cwd=repo_path, # 在指定的仓库路径下执行命令 - stdout=f, # 将标准输出重定向到文件 - stderr=subprocess.DEVNULL, # 丢弃标准错误输出 - ) +# Returns: +# subprocess.Popen: 启动的模糊测试子进程对象。 +# """ +# # 解包元组,获取仓库路径和目标名称 +# repo_path, target_name = target +# # 创建一个文件用于存放该目标的模糊测试输入 +# with open(pjoin(repo_path, "fuzz_inputs", target_name), "w") as f: +# # 启动一个子进程来执行模糊测试命令 +# return subprocess.Popen( +# # todo: 研究为什么 -max_total_time 参数不起作用 +# # ["cargo", "fuzz", "run", target_name, "--", f"-max_total_time={timeout}"], +# # 使用 bash -c 和 timeout 命令来强制实现超时功能 +# [ +# "bash", +# "-c", +# f"timeout {timeout} python3 infra/helper.py run_fuzzer {target_name}", +# ], +# cwd=repo_path, # 在指定的仓库路径下执行命令 +# stdout=f, # 将标准输出重定向到文件 +# stderr=subprocess.DEVNULL, # 丢弃标准错误输出 +# ) -from datetime import datetime +# from datetime import datetime def build_image(repos: list[str], jobs: int): """ @@ -179,82 +183,213 @@ def _build_cmd(path: str): ) +# 添加新的目标发现函数 +def discover_targets(project_name: str, oss_fuzz_dir: Path, logger: logging.Logger) -> list[str]: + """Discover fuzz targets for a project (starting with 'fuzz_', ending with 'print1', no extension, and executable)""" + out_dir = oss_fuzz_dir / "build" / "out" / project_name + targets: list[str] = [] + logger = logger.getChild("discover_targets") # 使用子日志器 + + if not out_dir.is_dir(): + logger.warning(f"Build output directory for project {project_name} does not exist: {out_dir}") + return targets + + try: + for f in out_dir.iterdir(): + try: + if (f.is_file() and + f.name.startswith("fuzz_") and + '.' not in f.name and + f.name.endswith("print1") and + os.access(f, os.X_OK)): + logger.info(f"🔍 Discovered target: {f.name}") + targets.append(f.name) + except OSError as e: + logger.warning(f"⚠️ Error while checking file {f.name}, skipped: {e}") + logger.info(f"🎯 Found {len(targets)} valid targets for {project_name}") + + except PermissionError: + logger.error(f"🔒 Insufficient permissions to access directory: {out_dir}") + except OSError as e: + logger.exception(f"💥 Operating system error occurred while discovering targets: {e}") + + return targets + + +# 重写fuzz_repos函数使用新的目标发现机制 def fuzz_repos(repos: list[str], jobs: int, timeout: int = 60): """ - 对一组仓库执行模糊测试。 - + 对一组仓库执行模糊测试(使用新的目标发现机制和infra/helper.py run_fuzzer)。 + Args: repos (list[str]): 仓库路径列表。 jobs (int): 并行任务数。 - timeout (int, optional): 每个目标的模糊测试超时时间(秒)。默认为 60。 + timeout (int, optional): 每个目标的模糊测试超时时间(秒)。默认为 60. """ + # 删除有问题的旧代码(f"timeout {timeout} cargo fuzz run {target_name}") + # 记录日志,说明正在收集所有模糊测试目标 - logging.info("Collecting all fuzz targets") - - # 并行执行 `cargo fuzz list` 来获取所有仓库的模糊测试目标 - target_map = parallel_subprocess( - repos, - jobs, - lambda path: subprocess.Popen( - ["cargo", "fuzz", "list"], cwd=path, stdout=subprocess.PIPE - ), - on_exit=get_target_list, # 使用 get_target_list 函数处理每个子进程的输出 - ) - # 将 `target_map` 整理成一个 (仓库路径, 目标名称) 的元组列表 + logging.info("🔍 使用infra/helper.py方法发现模糊测试目标") + + # 确保日志目录存在 + log_dir = Path("fuzz_run_logs") + log_dir.mkdir(exist_ok=True, parents=True) + + # 定义获取目标列表的函数 + def get_targets_for_repo(repo: str) -> list[str]: + """获取单个仓库的目标列表""" + project_name = os.path.basename(repo) + oss_fuzz_dir = Path(repo).parent.parent + logger = logging.getLogger(f"targets.{project_name}") + return discover_targets(project_name, oss_fuzz_dir, logger) + + # 并行获取目标列表 + with ProcessingPool(jobs) as p: + targets_list = list(tqdm( + p.map(get_targets_for_repo, repos), + total=len(repos), + desc="Discovering targets" + )) + + # 创建目标映射 + target_map = {repo: targets for repo, targets in zip(repos, targets_list)} + + # 将目标映射整理成元组列表 targets: list[tuple[str, str]] = [ (k, v) for k, vs in target_map.items() for v in vs if len(v) > 0 ] + + # 记录发现的目标数量 + logging.info(f"🎯 在 {len(repos)} 个项目中发现了 {len(targets)} 个目标") + # 为每个仓库创建存放模糊测试输入的目录 for repo in repos: - os.makedirs(pjoin(repo, "fuzz_inputs"), exist_ok=True) - - # 记录日志,说明即将开始模糊测试 - logging.info(f"Running cargo fuzz on {len(targets)} targets for {timeout} seconds") + inputs_dir = pjoin(repo, "fuzz_inputs") + os.makedirs(inputs_dir, exist_ok=True) + + # 启动模糊测试 + logging.info(f"🚀 开始在 {len(targets)} 个目标上运行模糊测试(每个目标 {timeout} 秒)") + # 并行执行模糊测试 - parallel_subprocess( - targets, jobs, lambda p: fuzz_one_target(p, timeout), on_exit=None + results = parallel_subprocess( + targets, + jobs, + lambda p: fuzz_one_target(p, timeout), + on_exit=None + # desc="Running fuzzers" ) + + # 记录结果统计 + failed = sum(1 for r in results if r is None) + logging.info(f"✅ 模糊测试完成:成功 {len(results) - failed} 个目标,失败 {failed} 个目标") -def substitute_input(template: str, input_data: str, idx: int) -> str: +# 修改fuzz_one_target函数使用infra/helper.py +def fuzz_one_target(target: tuple[str, str], timeout: int): """ - 将模糊测试的输入数据替换到测试模板中。 + 对单个模糊测试目标执行模糊测试命令。 Args: - template (str): 测试模板字符串。 - input_data (str): 单条模糊测试输入数据。 - idx (int): 测试用例的索引号。 + target (tuple[str, str]): 一个元组,包含仓库路径和目标名称。 + timeout (int): 模糊测试的超时时间(秒)。 Returns: - str: 替换完成后的测试代码字符串。 + subprocess.Popen: 启动的模糊测试子进程对象。 """ - # 替换模板中的占位符为实际的输入数据 - return template.replace( - '[] ; # [doc = "This is a test template"]', f"{input_data} ; " - # 替换模板中的函数名为唯一的测试函数名 - ).replace("fn test_something ()", f"fn test_{idx} ()") - + # 解包元组,获取仓库路径和目标名称 + repo_path, target_name = target + project_name = os.path.basename(repo_path) + oss_fuzz_root = os.path.dirname(os.path.dirname(repo_path)) + + # 创建一个文件用于存放该目标的模糊测试输入 + input_file_path = pjoin(repo_path, "fuzz_inputs", target_name) + os.makedirs(os.path.dirname(input_file_path), exist_ok=True) + + # 创建日志文件路径 + log_dir = os.path.abspath("fuzz_run_logs") + os.makedirs(log_dir, exist_ok=True) + log_file_path = pjoin(log_dir, f"{project_name}_{target_name}.log") + + try: + # 启动一个子进程来执行模糊测试命令 + return subprocess.Popen( + [ + "bash", + "-c", + f"timeout {timeout} python3 infra/helper.py run_fuzzer {project_name} {target_name}" + ], + cwd=oss_fuzz_root, # 在OSS-Fuzz根目录下执行命令 + stdout=open(input_file_path, "w"), # 将模糊测试输入重定向到文件 + stderr=open(log_file_path, "w"), # 将日志输出重定向到日志文件 + ) + except Exception as e: + logging.error(f"Error starting fuzzer for target {target_name} in project {project_name}: {e}") + return None -def has_similar(selected: list[str], x: str, thresh: float = 0.8) -> bool: +# 修改testgen_repos函数使用新的目标发现方法 +def testgen_repos( + repos: list[str], + jobs: int, + n_fuzz: int = 100, + strategy: str = "shuffle", + max_len: int = 100, + sim_thresh: float = 0.8, +): """ - 检查字符串 `x` 是否与 `selected` 列表中的任何字符串足够相似。 + 从模糊测试的输入数据生成最终的测试用例(使用新的目标发现方法)。 Args: - selected (list[str]): 已选择的字符串列表。 - x (str): 待检查的字符串。 - thresh (float, optional): 相似度阈值。默认为 0.8。 - - Returns: - bool: 如果存在相似字符串,则返回 True,否则返回 False。 + repos (list[str]): 仓库路径列表。 + jobs (int): 并行任务数。 + n_fuzz (int, optional): 每个目标要使用的模糊测试输入数量。默认为 100。 + strategy (str, optional): 选择输入的策略。默认为 "shuffle"。 + max_len (int, optional): 输入的最大长度。默认为 100。 + sim_thresh (float, optional): 输入的相似度阈值。默认为 0.8。 """ - # 定义一个内部函数来计算两个字符串的相似度 - def similar(a, b): - return SequenceMatcher(None, a, b).ratio() - - # 检查列表中是否有任何一个字符串与 x 的相似度超过阈值 - return any(map(lambda y: similar(x, y) > thresh, selected)) + # 使用新的目标发现方法 + def get_targets_for_repo(repo: str) -> list[str]: + """获取单个仓库的目标列表""" + project_name = os.path.basename(repo) + oss_fuzz_dir = Path(repo).parent.parent + logger = logging.getLogger(f"testgen.{project_name}") + return discover_targets(project_name, oss_fuzz_dir, logger) + + # 并行获取目标列表 + with ProcessingPool(jobs) as p: + targets_list = list(tqdm( + p.map(get_targets_for_repo, repos), + total=len(repos), + desc="Discovering targets for testgen" + )) + + # 创建目标映射 + target_map = {repo: targets for repo, targets in zip(repos, targets_list)} + + # 记录日志 + logging.info("📝 Substitute fuzz data to test templates") + + # 使用进程池并行处理每个仓库的替换任务 + with ProcessingPool(jobs) as p: + # 使用 tqdm 显示总体进度 + results = list( + tqdm( + p.imap( + lambda item: substitute_one_repo( + item[0], item[1], n_fuzz, strategy, max_len, sim_thresh + ), + target_map.items(), + ), + total=len(target_map), + desc="Generating tests" + ) + ) + + # 记录完成情况 + successful_repos = sum(1 for r in results if r is not None) + logging.info(f"✅ Completed test generation for {successful_repos}/{len(repos)} projects") +# 修改substitute_one_repo以返回状态 def substitute_one_repo( repo: str, targets: list[str], @@ -262,7 +397,7 @@ def substitute_one_repo( strategy: str, max_len: int, sim_thresh: float, -): +) -> Optional[int]: """ 处理单个仓库,将其模糊测试输入替换到测试模板中以生成测试文件。 @@ -273,24 +408,49 @@ def substitute_one_repo( strategy (str): 选择输入的策略("shuffle", "reverse" 等)。 max_len (int): 模糊测试输入的最大长度。 sim_thresh (float): 用于去重的相似度阈值。 + + Returns: + int: 成功处理的目标数量,或出错时为None """ + logger = logging.getLogger(f"substitute.{os.path.basename(repo)}") # 定义模板目录和输入目录的路径 template_dir = pjoin(repo, "tests-gen") input_dir = pjoin(repo, "fuzz_inputs") + + success_count = 0 + # 遍历该仓库的所有模糊测试目标 for t in targets: - if t == "": # 跳过空目标 + if not t: # 跳过空目标 continue template_path = pjoin(template_dir, t + ".rs") + input_path = pjoin(input_dir, t) + try: + # 检查文件是否存在 + if not os.path.exists(template_path): + logger.warning(f"📄 Template file not found: {template_path}") + continue + + if not os.path.exists(input_path): + logger.warning(f"📄 Input file not found: {input_path}") + continue + # 读取测试模板文件 with open(template_path) as f_template: template = f_template.read() + # 读取对应的模糊测试输入文件 - with open(pjoin(input_dir, t), "r") as f_input: + with open(input_path, "r") as f_input: # 过滤掉空的输入行 all_inputs = [i for i in f_input.read().splitlines() if i != "[]"] + + if not all_inputs: + logger.warning(f"⚠️ No valid inputs found for {t}") + continue + + logger.info(f"📥 Loaded {len(all_inputs)} inputs for {t}") inputs: list[str] # 根据不同的策略来选择输入数据 @@ -315,69 +475,40 @@ def substitute_one_repo( # 默认策略:直接取前 n_fuzz 个输入 inputs = all_inputs[:n_fuzz] + logger.info(f"✅ Selected {len(inputs)} inputs after {strategy} strategy") + # 使用选定的输入数据生成测试用例代码 tests = [ substitute_input(template, input_data, i) for i, input_data in enumerate(inputs) ] + # 定义生成的测试文件的路径 generated_test_path = pjoin(template_dir, f"{t}.inputs.rs") + # 将生成的测试代码写入文件 with open(generated_test_path, "w") as f_template: f_template.write("\n".join(tests)) + + logger.info(f"📝 Generated test file: {generated_test_path}") # 使用 rustfmt 工具格式化生成的测试文件 - subprocess.run(["rustfmt", str(generated_test_path)], check=False) - except FileNotFoundError: - # 如果找不到模板文件,则记录一条调试信息 - logging.debug(f"Template {template_path} not found") - + fmt_result = subprocess.run(["rustfmt", generated_test_path], capture_output=True, text=True) + if fmt_result.returncode != 0: + logger.warning(f"⚠️ rustfmt failed for {generated_test_path}: {fmt_result.stderr}") + else: + logger.info("✨ Formatted with rustfmt") + + success_count += 1 + + except FileNotFoundError as e: + logger.error(f"❌ File not found: {e}") + except Exception as e: + logger.exception(f"💥 Unexpected error processing {t}: {e}") + + return success_count if success_count > 0 else None -def testgen_repos( - repos: list[str], - jobs: int, - n_fuzz: int = 100, - strategy: str = "shuffle", - max_len: int = 100, - sim_thresh: float = 0.8, -): - """ - 从模糊测试的输入数据生成最终的测试用例。 - Args: - repos (list[str]): 仓库路径列表。 - jobs (int): 并行任务数。 - n_fuzz (int, optional): 每个目标要使用的模糊测试输入数量。默认为 100。 - strategy (str, optional): 选择输入的策略。默认为 "shuffle"。 - max_len (int, optional): 输入的最大长度。默认为 100。 - sim_thresh (float, optional): 输入的相似度阈值。默认为 0.8。 - """ - # 首先,获取所有仓库的模糊测试目标 - target_map = parallel_subprocess( - repos, - jobs, - lambda path: subprocess.Popen( - ["cargo", "fuzz", "list"], cwd=path, stdout=subprocess.PIPE - ), - on_exit=get_target_list, - use_tqdm=False, # 不在此处显示进度条 - ) - # 记录日志 - logging.info("Substitute fuzz data to test templates") - # 使用进程池并行处理每个仓库的替换任务 - with ProcessingPool(jobs) as p: - # 使用 tqdm 显示总体进度 - _ = list( - tqdm( - p.map( - # 对 target_map 中的每个项(仓库及其目标列表)调用 substitute_one_repo - lambda item: substitute_one_repo( - item[0], item[1], n_fuzz, strategy, max_len, sim_thresh - ), - target_map.items(), - ) - ) - ) def main( From 779fb408a2ef8b651260715a15a653af147bb123 Mon Sep 17 00:00:00 2001 From: joyguoguo <599325370@qq.com> Date: Tue, 5 Aug 2025 01:04:27 +0000 Subject: [PATCH 081/134] correct run_one_target --- fuzz/collect_fuzz_python.py | 44 ++++++++++++++----------------------- 1 file changed, 16 insertions(+), 28 deletions(-) diff --git a/fuzz/collect_fuzz_python.py b/fuzz/collect_fuzz_python.py index 7b0256c..d4cecb8 100644 --- a/fuzz/collect_fuzz_python.py +++ b/fuzz/collect_fuzz_python.py @@ -284,47 +284,41 @@ def get_targets_for_repo(repo: str) -> list[str]: logging.info(f"✅ 模糊测试完成:成功 {len(results) - failed} 个目标,失败 {failed} 个目标") -# 修改fuzz_one_target函数使用infra/helper.py def fuzz_one_target(target: tuple[str, str], timeout: int): - """ - 对单个模糊测试目标执行模糊测试命令。 - - Args: - target (tuple[str, str]): 一个元组,包含仓库路径和目标名称。 - timeout (int): 模糊测试的超时时间(秒)。 - - Returns: - subprocess.Popen: 启动的模糊测试子进程对象。 - """ - # 解包元组,获取仓库路径和目标名称 repo_path, target_name = target project_name = os.path.basename(repo_path) oss_fuzz_root = os.path.dirname(os.path.dirname(repo_path)) - # 创建一个文件用于存放该目标的模糊测试输入 + # 创建输入文件路径 input_file_path = pjoin(repo_path, "fuzz_inputs", target_name) os.makedirs(os.path.dirname(input_file_path), exist_ok=True) # 创建日志文件路径 - log_dir = os.path.abspath("fuzz_run_logs") - os.makedirs(log_dir, exist_ok=True) - log_file_path = pjoin(log_dir, f"{project_name}_{target_name}.log") + log_dir = Path("fuzz_run_logs") + log_dir.mkdir(exist_ok=True, parents=True) + log_file_path = log_dir / f"{project_name}_{target_name}.log" try: - # 启动一个子进程来执行模糊测试命令 - return subprocess.Popen( + # 打开输入文件和日志文件 + input_file = open(input_file_path, "w") + log_file = open(log_file_path, "w") + + # 启动子进程 + proc = subprocess.Popen( [ "bash", "-c", f"timeout {timeout} python3 infra/helper.py run_fuzzer {project_name} {target_name}" ], - cwd=oss_fuzz_root, # 在OSS-Fuzz根目录下执行命令 - stdout=open(input_file_path, "w"), # 将模糊测试输入重定向到文件 - stderr=open(log_file_path, "w"), # 将日志输出重定向到日志文件 + cwd=oss_fuzz_root, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, ) + return proc except Exception as e: - logging.error(f"Error starting fuzzer for target {target_name} in project {project_name}: {e}") + logging.error(f"Error starting fuzzer for {project_name}/{target_name}: {e}") return None + # 修改testgen_repos函数使用新的目标发现方法 def testgen_repos( @@ -576,9 +570,3 @@ def main( logging.basicConfig(level=logging.INFO) fire.Fire(main) -# 当脚本作为主程序执行时 -if __name__ == "__main__": - # 配置日志记录的基本设置,级别为 INFO - logging.basicConfig(level=logging.INFO) - # 使用 fire 库将 main 函数暴露为命令行接口 - fire.Fire(main) \ No newline at end of file From 0afec3b4edd24268d411f8ce5a99bda4801dcbd9 Mon Sep 17 00:00:00 2001 From: joyguoguo <599325370@qq.com> Date: Tue, 5 Aug 2025 04:15:21 +0000 Subject: [PATCH 082/134] fuzz ok --- fuzz/collect_fuzz_python.py | 25 ++++++++++++++----------- 1 file changed, 14 insertions(+), 11 deletions(-) diff --git a/fuzz/collect_fuzz_python.py b/fuzz/collect_fuzz_python.py index d4cecb8..1fafe0d 100644 --- a/fuzz/collect_fuzz_python.py +++ b/fuzz/collect_fuzz_python.py @@ -280,9 +280,9 @@ def get_targets_for_repo(repo: str) -> list[str]: ) # 记录结果统计 - failed = sum(1 for r in results if r is None) - logging.info(f"✅ 模糊测试完成:成功 {len(results) - failed} 个目标,失败 {failed} 个目标") - + failed = sum(1 for r in results.values() if r != 0) + success = len(results) - failed + logging.info(f"✅ 模糊测试完成:成功 {success} 个目标,失败 {failed} 个目标") def fuzz_one_target(target: tuple[str, str], timeout: int): repo_path, target_name = target @@ -294,10 +294,10 @@ def fuzz_one_target(target: tuple[str, str], timeout: int): os.makedirs(os.path.dirname(input_file_path), exist_ok=True) # 创建日志文件路径 - log_dir = Path("fuzz_run_logs") + log_dir = Path("fuzz_run_logs3") log_dir.mkdir(exist_ok=True, parents=True) log_file_path = log_dir / f"{project_name}_{target_name}.log" - + logging.info(f"[START] Fuzzing: project={project_name}, target={target_name}, timeout={timeout}s") try: # 打开输入文件和日志文件 input_file = open(input_file_path, "w") @@ -308,16 +308,19 @@ def fuzz_one_target(target: tuple[str, str], timeout: int): [ "bash", "-c", - f"timeout {timeout} python3 infra/helper.py run_fuzzer {project_name} {target_name}" + f"python3 infra/helper.py run_fuzzer {project_name} {target_name} -- -max_total_time={timeout}" ], cwd=oss_fuzz_root, - stdout=subprocess.PIPE, - stderr=subprocess.PIPE, + stdout=input_file, + stderr=log_file, ) + logging.info(f"[RUNNING] Subprocess started for {project_name}/{target_name} (PID: {proc.pid})") return proc except Exception as e: - logging.error(f"Error starting fuzzer for {project_name}/{target_name}: {e}") + logging.error(f"[ERROR] Failed to start fuzzer for {project_name}/{target_name}: {e}") return None + finally: + logging.info(f"[END] Fuzzing launch attempt completed for {project_name}/{target_name}") # 修改testgen_repos函数使用新的目标发现方法 @@ -418,7 +421,7 @@ def substitute_one_repo( if not t: # 跳过空目标 continue - template_path = pjoin(template_dir, t + ".rs") + template_path = pjoin(template_dir, t + ".py") input_path = pjoin(input_dir, t) try: @@ -509,7 +512,7 @@ def main( repo_id: str = "data/valid_projects3.txt", repo_root: str = "fuzz/oss-fuzz/projects/", timeout: int = 60, - jobs: int = 80, + jobs: int = 2, limits: Optional[int] = None, pipeline: str = "transform", n_fuzz: int = 100, From ba61ca1c8eb825cfcbfd3b7fdc0aa4e0b6b80aaf Mon Sep 17 00:00:00 2001 From: joyguoguo <599325370@qq.com> Date: Tue, 5 Aug 2025 04:24:58 +0000 Subject: [PATCH 083/134] transform --- fuzz/collect_fuzz_python.py | 589 ++++++++++-------------------------- 1 file changed, 164 insertions(+), 425 deletions(-) diff --git a/fuzz/collect_fuzz_python.py b/fuzz/collect_fuzz_python.py index 1fafe0d..796386c 100644 --- a/fuzz/collect_fuzz_python.py +++ b/fuzz/collect_fuzz_python.py @@ -1,130 +1,24 @@ """ -用于 python 项目模糊测试(fuzzing)和测试模板转换的脚本 - -PYTHONPATH=. python3 fuzz/collect_fuzz_python.py --pipeline build_fuzzers +用于Python项目模糊测试(fuzzing)和测试模板转换的脚本 """ from pathlib import Path -# 导入日志记录模块,用于输出程序运行信息 import logging -# 从 typing 模块导入 Optional,用于类型提示,表示一个值可以是某个类型或者 None -from typing import Optional -# 导入 fire 库,用于快速创建命令行界面 +from typing import Optional, List, Tuple import fire -# 导入 os 模块,用于与操作系统交互,如文件路径操作 import os -# 从自定义的 UniTSyn 工具库中导入辅助函数 from UniTSyn.frontend.util import wrap_repo, parallel_subprocess -# 导入 subprocess 模块,用于创建和管理子进程 import subprocess -# 从 os.path 中导入 join 和 abspath,分别用于拼接路径和获取绝对路径 from os.path import join as pjoin, abspath -# 导入 tqdm 库,用于显示进度条 from tqdm import tqdm -# 从 pathos.multiprocessing 导入 ProcessingPool,用于创建进程池以实现并行处理 from pathos.multiprocessing import ProcessingPool -# 导入 random 模块,用于生成随机数 import random -# 从 difflib 导入 SequenceMatcher,用于比较序列(如字符串)的相似度 from difflib import SequenceMatcher -# 从 itertools 导入 islice,用于对迭代器进行切片操作 from itertools import islice from datetime import datetime - -# def transform_repos(repos: list[str], jobs: int): -# """ -# 对一组仓库执行 `rust-fuzzer-gen` 命令,以生成模糊测试模板。 - -# Args: -# repos (list[str]): 包含多个仓库路径的列表。 -# jobs (int): 并行执行的任务数量。 -# """ -# def transform_one_repo(repo_path: str): -# """ -# 对单个仓库启动 `rust-fuzzer-gen` 进程。 - -# Args: -# repo_path (str): 单个仓库的路径。 - -# Returns: -# subprocess.Popen: 启动的子进程对象。 -# """ -# # 启动一个子进程来执行 `rust-fuzzer-gen` 命令 -# return subprocess.Popen( -# ["rust-fuzzer-gen", repo_path], -# stdout=subprocess.PIPE, # 捕获标准输出 -# stderr=subprocess.PIPE, # 捕获标准错误 -# ) - -# # 记录日志,说明正在对多少个仓库进行操作 -# logging.info(f"Running rust-fuzz-gen on {len(repos)} repos") -# # 使用并行处理工具来同时对多个仓库执行 transform_one_repo 函数 -# parallel_subprocess(repos, jobs, transform_one_repo, on_exit=None) - - -def get_target_list(p: subprocess.Popen): - """ - 从子进程的输出中解析出模糊测试目标列表。 - - Args: - p (subprocess.Popen): 一个已完成的子进程对象。 - - Returns: - list[str]: 模糊测试目标的名称列表。 - """ - # 使用 match 语句检查子进程的标准输出 - match p.stdout: - # 如果标准输出为 None,则返回空列表 - case None: - return [] - # 否则,读取标准输出,解码为 UTF-8 字符串,并按换行符分割成列表 - case _: - return p.stdout.read().decode("utf-8").split("\n") - - -# def fuzz_one_target(target: tuple[str, str], timeout): -# """ -# 对单个模糊测试目标执行 `cargo fuzz run` 命令。 - -# Args: -# target (tuple[str, str]): 一个元组,包含仓库路径和目标名称。 -# timeout (int): 模糊测试的超时时间(秒)。 - -# Returns: -# subprocess.Popen: 启动的模糊测试子进程对象。 -# """ -# # 解包元组,获取仓库路径和目标名称 -# repo_path, target_name = target -# # 创建一个文件用于存放该目标的模糊测试输入 -# with open(pjoin(repo_path, "fuzz_inputs", target_name), "w") as f: -# # 启动一个子进程来执行模糊测试命令 -# return subprocess.Popen( -# # todo: 研究为什么 -max_total_time 参数不起作用 -# # ["cargo", "fuzz", "run", target_name, "--", f"-max_total_time={timeout}"], -# # 使用 bash -c 和 timeout 命令来强制实现超时功能 -# [ -# "bash", -# "-c", -# f"timeout {timeout} python3 infra/helper.py run_fuzzer {target_name}", -# ], -# cwd=repo_path, # 在指定的仓库路径下执行命令 -# stdout=f, # 将标准输出重定向到文件 -# stderr=subprocess.DEVNULL, # 丢弃标准错误输出 -# ) - - -# from datetime import datetime - def build_image(repos: list[str], jobs: int): - """ - 构建每个仓库对应的 OSS-Fuzz 项目的 Docker 镜像,并将构建日志写入 fuzz_pipeline_log 目录。 - - Args: - repos (list[str]): 仓库路径列表(每个应包含一个已接入 OSS-Fuzz 的项目)。 - jobs (int): 并行任务数。 - """ + """构建每个仓库对应的OSS-Fuzz项目的Docker镜像""" logging.info(f"Building Docker images for {len(repos)} OSS-Fuzz projects") - log_dir = os.path.abspath("fuzz_pipeline_log") os.makedirs(log_dir, exist_ok=True) @@ -132,8 +26,6 @@ def _build_cmd(path: str): project_name = os.path.basename(path.rstrip("/")) timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") log_file = os.path.join(log_dir, f"{project_name}_{timestamp}.log") - - logging.info(f"Start building {project_name}, logging to {log_file}") return subprocess.Popen( f"yes | python3 infra/helper.py build_image {project_name}", cwd=os.path.abspath(os.path.join(path, "../../")), @@ -142,22 +34,11 @@ def _build_cmd(path: str): shell=True, ) - _ = parallel_subprocess( - repos, - jobs, - _build_cmd, - on_exit=None, - ) + _ = parallel_subprocess(repos, jobs, _build_cmd, on_exit=None) + def build_fuzzer(repos: list[str], jobs: int): - """ - 对构建成功的项目并行构建模糊测试器 - - Args: - repos (list[str]): 仓库路径列表(每个应包含一个已接入 OSS-Fuzz 的项目)。 - jobs (int): 并行任务数。 - """ + """对构建成功的项目并行构建模糊测试器""" logging.info(f"Building fuzzers for {len(repos)} OSS-Fuzz projects") - log_dir = os.path.abspath("fuzz_pipeline_log") os.makedirs(log_dir, exist_ok=True) @@ -165,8 +46,6 @@ def _build_cmd(path: str): project_name = os.path.basename(path.rstrip("/")) timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") log_file = os.path.join(log_dir, f"{project_name}_fuzzer_{timestamp}.log") - - logging.info(f"Start building fuzzers for {project_name}, logging to {log_file}") return subprocess.Popen( f"python3 infra/helper.py build_fuzzers --sanitizer address {project_name}", cwd=os.path.abspath(os.path.join(path, "../../")), @@ -175,218 +54,132 @@ def _build_cmd(path: str): shell=True, ) - _ = parallel_subprocess( - repos, - jobs, - _build_cmd, - on_exit=None, - ) - + _ = parallel_subprocess(repos, jobs, _build_cmd, on_exit=None) -# 添加新的目标发现函数 -def discover_targets(project_name: str, oss_fuzz_dir: Path, logger: logging.Logger) -> list[str]: - """Discover fuzz targets for a project (starting with 'fuzz_', ending with 'print1', no extension, and executable)""" +def discover_targets(project_name: str, oss_fuzz_dir: Path) -> list[str]: + """发现模糊测试目标""" out_dir = oss_fuzz_dir / "build" / "out" / project_name - targets: list[str] = [] - logger = logger.getChild("discover_targets") # 使用子日志器 - + targets = [] + if not out_dir.is_dir(): - logger.warning(f"Build output directory for project {project_name} does not exist: {out_dir}") + logging.warning(f"Build output directory for {project_name} does not exist") return targets try: for f in out_dir.iterdir(): - try: - if (f.is_file() and - f.name.startswith("fuzz_") and - '.' not in f.name and - f.name.endswith("print1") and - os.access(f, os.X_OK)): - logger.info(f"🔍 Discovered target: {f.name}") - targets.append(f.name) - except OSError as e: - logger.warning(f"⚠️ Error while checking file {f.name}, skipped: {e}") - logger.info(f"🎯 Found {len(targets)} valid targets for {project_name}") - - except PermissionError: - logger.error(f"🔒 Insufficient permissions to access directory: {out_dir}") - except OSError as e: - logger.exception(f"💥 Operating system error occurred while discovering targets: {e}") + if (f.is_file() and f.name.startswith("fuzz_") and + '.' not in f.name and f.name.endswith("print1") and + os.access(f, os.X_OK)): + targets.append(f.name) + except Exception as e: + logging.error(f"Error discovering targets: {e}") return targets - -# 重写fuzz_repos函数使用新的目标发现机制 -def fuzz_repos(repos: list[str], jobs: int, timeout: int = 60): - """ - 对一组仓库执行模糊测试(使用新的目标发现机制和infra/helper.py run_fuzzer)。 - - Args: - repos (list[str]): 仓库路径列表。 - jobs (int): 并行任务数。 - timeout (int, optional): 每个目标的模糊测试超时时间(秒)。默认为 60. - """ - # 删除有问题的旧代码(f"timeout {timeout} cargo fuzz run {target_name}") +def fuzz_one_target(target: tuple[str, str], timeout: int): + """对单个模糊测试目标执行模糊测试""" + repo_path, target_name = target + project_name = os.path.basename(repo_path) + oss_fuzz_root = os.path.dirname(os.path.dirname(repo_path)) - # 记录日志,说明正在收集所有模糊测试目标 - logging.info("🔍 使用infra/helper.py方法发现模糊测试目标") + # 创建输入文件路径 + input_file_path = pjoin(repo_path, "fuzz_inputs", target_name) + os.makedirs(os.path.dirname(input_file_path), exist_ok=True) - # 确保日志目录存在 - log_dir = Path("fuzz_run_logs") - log_dir.mkdir(exist_ok=True, parents=True) + try: + with open(input_file_path, "w") as input_file: + return subprocess.Popen( + [ + "bash", + "-c", + f"timeout {timeout} python3 infra/helper.py run_fuzzer {project_name} {target_name}" + ], + cwd=oss_fuzz_root, + stdout=input_file, + stderr=subprocess.DEVNULL, + ) + except Exception as e: + logging.error(f"Error starting fuzzer: {e}") + return None + +def fuzz_repos(repos: list[str], jobs: int, timeout: int = 60): + """对一组仓库执行模糊测试""" + logging.info("Discovering fuzz targets") - # 定义获取目标列表的函数 - def get_targets_for_repo(repo: str) -> list[str]: - """获取单个仓库的目标列表""" + # 获取所有目标 + targets_list = [] + for repo in repos: project_name = os.path.basename(repo) oss_fuzz_dir = Path(repo).parent.parent - logger = logging.getLogger(f"targets.{project_name}") - return discover_targets(project_name, oss_fuzz_dir, logger) - - # 并行获取目标列表 - with ProcessingPool(jobs) as p: - targets_list = list(tqdm( - p.map(get_targets_for_repo, repos), - total=len(repos), - desc="Discovering targets" - )) + targets = discover_targets(project_name, oss_fuzz_dir) + targets_list.append(targets) # 创建目标映射 target_map = {repo: targets for repo, targets in zip(repos, targets_list)} - - # 将目标映射整理成元组列表 targets: list[tuple[str, str]] = [ - (k, v) for k, vs in target_map.items() for v in vs if len(v) > 0 + (k, v) for k, vs in target_map.items() for v in vs ] - # 记录发现的目标数量 - logging.info(f"🎯 在 {len(repos)} 个项目中发现了 {len(targets)} 个目标") + logging.info(f"Running fuzzing on {len(targets)} targets") - # 为每个仓库创建存放模糊测试输入的目录 + # 创建输入目录 for repo in repos: - inputs_dir = pjoin(repo, "fuzz_inputs") - os.makedirs(inputs_dir, exist_ok=True) - - # 启动模糊测试 - logging.info(f"🚀 开始在 {len(targets)} 个目标上运行模糊测试(每个目标 {timeout} 秒)") + os.makedirs(pjoin(repo, "fuzz_inputs"), exist_ok=True) # 并行执行模糊测试 - results = parallel_subprocess( - targets, - jobs, - lambda p: fuzz_one_target(p, timeout), - on_exit=None - # desc="Running fuzzers" - ) - - # 记录结果统计 - failed = sum(1 for r in results.values() if r != 0) - success = len(results) - failed - logging.info(f"✅ 模糊测试完成:成功 {success} 个目标,失败 {failed} 个目标") + parallel_subprocess(targets, jobs, lambda p: fuzz_one_target(p, timeout), on_exit=None) -def fuzz_one_target(target: tuple[str, str], timeout: int): - repo_path, target_name = target - project_name = os.path.basename(repo_path) - oss_fuzz_root = os.path.dirname(os.path.dirname(repo_path)) +def generate_test_template(target_name: str, repo_path: str): + """为单个目标生成测试模板""" + template_dir = pjoin(repo_path, "tests-gen") + os.makedirs(template_dir, exist_ok=True) + template_path = pjoin(template_dir, f"{target_name}.rs") - # 创建输入文件路径 - input_file_path = pjoin(repo_path, "fuzz_inputs", target_name) - os.makedirs(os.path.dirname(input_file_path), exist_ok=True) + # 基本测试模板 + template = f""" + #[test] + fn test_{target_name}() {{ + // 测试逻辑将在这里生成 + let input = []; // 模糊测试输入将替换这里 + let result = process_input(&input); + assert!(result.is_ok()); + }} + """ - # 创建日志文件路径 - log_dir = Path("fuzz_run_logs3") - log_dir.mkdir(exist_ok=True, parents=True) - log_file_path = log_dir / f"{project_name}_{target_name}.log" - logging.info(f"[START] Fuzzing: project={project_name}, target={target_name}, timeout={timeout}s") - try: - # 打开输入文件和日志文件 - input_file = open(input_file_path, "w") - log_file = open(log_file_path, "w") - - # 启动子进程 - proc = subprocess.Popen( - [ - "bash", - "-c", - f"python3 infra/helper.py run_fuzzer {project_name} {target_name} -- -max_total_time={timeout}" - ], - cwd=oss_fuzz_root, - stdout=input_file, - stderr=log_file, - ) - logging.info(f"[RUNNING] Subprocess started for {project_name}/{target_name} (PID: {proc.pid})") - return proc - except Exception as e: - logging.error(f"[ERROR] Failed to start fuzzer for {project_name}/{target_name}: {e}") - return None - finally: - logging.info(f"[END] Fuzzing launch attempt completed for {project_name}/{target_name}") + with open(template_path, "w") as f: + f.write(template) + return template_path -# 修改testgen_repos函数使用新的目标发现方法 -def testgen_repos( - repos: list[str], - jobs: int, - n_fuzz: int = 100, - strategy: str = "shuffle", - max_len: int = 100, - sim_thresh: float = 0.8, -): - """ - 从模糊测试的输入数据生成最终的测试用例(使用新的目标发现方法)。 - - Args: - repos (list[str]): 仓库路径列表。 - jobs (int): 并行任务数。 - n_fuzz (int, optional): 每个目标要使用的模糊测试输入数量。默认为 100。 - strategy (str, optional): 选择输入的策略。默认为 "shuffle"。 - max_len (int, optional): 输入的最大长度。默认为 100。 - sim_thresh (float, optional): 输入的相似度阈值。默认为 0.8。 - """ - # 使用新的目标发现方法 - def get_targets_for_repo(repo: str) -> list[str]: - """获取单个仓库的目标列表""" +def transform_repos(repos: list[str], jobs: int): + """为所有目标生成测试模板""" + logging.info("Generating test templates") + + def _transform_repo(repo: str): project_name = os.path.basename(repo) oss_fuzz_dir = Path(repo).parent.parent - logger = logging.getLogger(f"testgen.{project_name}") - return discover_targets(project_name, oss_fuzz_dir, logger) + targets = discover_targets(project_name, oss_fuzz_dir) + return [generate_test_template(t, repo) for t in targets] - # 并行获取目标列表 with ProcessingPool(jobs) as p: - targets_list = list(tqdm( - p.map(get_targets_for_repo, repos), - total=len(repos), - desc="Discovering targets for testgen" - )) - - # 创建目标映射 - target_map = {repo: targets for repo, targets in zip(repos, targets_list)} - - # 记录日志 - logging.info("📝 Substitute fuzz data to test templates") - - # 使用进程池并行处理每个仓库的替换任务 - with ProcessingPool(jobs) as p: - # 使用 tqdm 显示总体进度 - results = list( - tqdm( - p.imap( - lambda item: substitute_one_repo( - item[0], item[1], n_fuzz, strategy, max_len, sim_thresh - ), - target_map.items(), - ), - total=len(target_map), - desc="Generating tests" - ) - ) - - # 记录完成情况 - successful_repos = sum(1 for r in results if r is not None) - logging.info(f"✅ Completed test generation for {successful_repos}/{len(repos)} projects") + return list(p.map(_transform_repo, repos)) + +def substitute_input(template: str, input_data: str, idx: int) -> str: + """将模糊测试输入替换到测试模板中""" + return template.replace( + 'let input = []; // 模糊测试输入将替换这里', + f"let input = {input_data};" + ).replace( + f"fn test_", + f"fn test_{idx}_" + ) +def has_similar(selected: list[str], x: str, thresh: float = 0.8) -> bool: + """检查字符串是否与已选列表中的任何字符串足够相似""" + def similar(a, b): + return SequenceMatcher(None, a, b).ratio() + return any(similar(x, y) > thresh for y in selected) -# 修改substitute_one_repo以返回状态 def substitute_one_repo( repo: str, targets: list[str], @@ -394,182 +187,128 @@ def substitute_one_repo( strategy: str, max_len: int, sim_thresh: float, -) -> Optional[int]: - """ - 处理单个仓库,将其模糊测试输入替换到测试模板中以生成测试文件。 - - Args: - repo (str): 仓库路径。 - targets (list[str]): 该仓库的模糊测试目标列表。 - n_fuzz (int): 要使用的模糊测试输入数量。 - strategy (str): 选择输入的策略("shuffle", "reverse" 等)。 - max_len (int): 模糊测试输入的最大长度。 - sim_thresh (float): 用于去重的相似度阈值。 - - Returns: - int: 成功处理的目标数量,或出错时为None - """ - logger = logging.getLogger(f"substitute.{os.path.basename(repo)}") - # 定义模板目录和输入目录的路径 +): + """处理单个仓库,将模糊测试输入替换到测试模板中""" template_dir = pjoin(repo, "tests-gen") input_dir = pjoin(repo, "fuzz_inputs") - success_count = 0 - - # 遍历该仓库的所有模糊测试目标 for t in targets: - if not t: # 跳过空目标 - continue - - template_path = pjoin(template_dir, t + ".py") + template_path = pjoin(template_dir, f"{t}.rs") input_path = pjoin(input_dir, t) try: - # 检查文件是否存在 - if not os.path.exists(template_path): - logger.warning(f"📄 Template file not found: {template_path}") - continue - - if not os.path.exists(input_path): - logger.warning(f"📄 Input file not found: {input_path}") - continue - - # 读取测试模板文件 with open(template_path) as f_template: template = f_template.read() - # 读取对应的模糊测试输入文件 with open(input_path, "r") as f_input: - # 过滤掉空的输入行 - all_inputs = [i for i in f_input.read().splitlines() if i != "[]"] - - if not all_inputs: - logger.warning(f"⚠️ No valid inputs found for {t}") - continue - - logger.info(f"📥 Loaded {len(all_inputs)} inputs for {t}") - - inputs: list[str] - # 根据不同的策略来选择输入数据 + all_inputs = [i for i in f_input.read().splitlines() if i] + + # 选择输入策略 if strategy == "shuffle": - # 随机打乱所有输入 random.shuffle(all_inputs) - # 过滤掉过长的输入,并取前 n_fuzz 个 - inputs = list( - islice(filter(lambda x: len(x) < max_len, all_inputs), n_fuzz) - ) + inputs = list(islice( + (x for x in all_inputs if len(x) < max_len), n_fuzz)) elif strategy == "reverse": - # 从后往前选择,同时进行去重和长度过滤 inputs = [] for x in reversed(all_inputs): - if len(inputs) >= n_fuzz: # 如果已选够,则停止 + if len(inputs) >= n_fuzz: break - # 如果输入过长或与已选输入相似,则跳过 if len(x) > max_len or has_similar(inputs, x, sim_thresh): continue inputs.append(x) else: - # 默认策略:直接取前 n_fuzz 个输入 inputs = all_inputs[:n_fuzz] - - logger.info(f"✅ Selected {len(inputs)} inputs after {strategy} strategy") - - # 使用选定的输入数据生成测试用例代码 + + # 生成测试用例 tests = [ substitute_input(template, input_data, i) for i, input_data in enumerate(inputs) ] - # 定义生成的测试文件的路径 - generated_test_path = pjoin(template_dir, f"{t}.inputs.rs") - - # 将生成的测试代码写入文件 - with open(generated_test_path, "w") as f_template: - f_template.write("\n".join(tests)) - - logger.info(f"📝 Generated test file: {generated_test_path}") - - # 使用 rustfmt 工具格式化生成的测试文件 - fmt_result = subprocess.run(["rustfmt", generated_test_path], capture_output=True, text=True) - if fmt_result.returncode != 0: - logger.warning(f"⚠️ rustfmt failed for {generated_test_path}: {fmt_result.stderr}") - else: - logger.info("✨ Formatted with rustfmt") + # 写入生成的测试文件 + generated_path = pjoin(template_dir, f"{t}.inputs.rs") + with open(generated_path, "w") as f: + f.write("\n".join(tests)) - success_count += 1 + # 格式化代码 + subprocess.run(["rustfmt", generated_path], check=False) - except FileNotFoundError as e: - logger.error(f"❌ File not found: {e}") except Exception as e: - logger.exception(f"💥 Unexpected error processing {t}: {e}") - - return success_count if success_count > 0 else None - - + logging.error(f"Error processing {t}: {e}") +def testgen_repos( + repos: list[str], + jobs: int, + n_fuzz: int = 100, + strategy: str = "shuffle", + max_len: int = 100, + sim_thresh: float = 0.8, +): + """从模糊测试输入生成测试用例""" + # 首先获取所有目标 + targets_list = [] + for repo in repos: + project_name = os.path.basename(repo) + oss_fuzz_dir = Path(repo).parent.parent + targets = discover_targets(project_name, oss_fuzz_dir) + targets_list.append(targets) + + target_map = {repo: targets for repo, targets in zip(repos, targets_list)} + + # 并行处理每个仓库 + with ProcessingPool(jobs) as p: + list(p.map( + lambda item: substitute_one_repo( + item[0], item[1], n_fuzz, strategy, max_len, sim_thresh + ), + target_map.items() + )) def main( repo_id: str = "data/valid_projects3.txt", repo_root: str = "fuzz/oss-fuzz/projects/", timeout: int = 60, - jobs: int = 2, - limits: Optional[int] = None, - pipeline: str = "transform", + jobs: int = 4, + pipeline: str = "all", n_fuzz: int = 100, strategy: str = "shuffle", max_len: int = 100, sim_thresh: float = 0.8, ): - """ - 从多个 OSS-Fuzz 项目中执行指定阶段(构建镜像、模糊测试、测试生成等)的主函数。 - - Args: - repo_id (str): 文件路径,包含 OSS-Fuzz 项目名称(每行一个)。 - repo_root (str): 所有 OSS-Fuzz 项目所在的根目录。 - timeout (int): 模糊测试的超时时间。 - jobs (int): 并行任务数。 - limits (Optional[int]): 处理项目数量的上限。 - pipeline (str): 执行阶段:build_image, fuzz, testgen, all。 - n_fuzz, strategy, max_len, sim_thresh: testgen 参数。 - """ + """主函数,控制整个模糊测试流程""" try: with open(repo_id, "r") as f: repo_id_list = [line.strip() for line in f if line.strip()] except FileNotFoundError: repo_id_list = [repo_id] - if limits is not None: - repo_id_list = repo_id_list[:limits] - - logging.info(f"Loaded {len(repo_id_list)} repos to be processed") - - logging.info("Collecting all OSS-Fuzz project directories") + # 收集仓库路径 repos = [] for repo_id in repo_id_list: repo_path = abspath(os.path.join(repo_root, repo_id)) if os.path.isdir(repo_path): repos.append(repo_path) - match pipeline: - case "build_image": - build_image(repos, jobs) - case "build_fuzzer": - build_fuzzer(repos, jobs) - case "fuzz": - fuzz_repos(repos, jobs, timeout=timeout) - case "testgen": - testgen_repos(repos, jobs, n_fuzz, strategy, max_len, sim_thresh) - case "all": - build_image(repos, jobs) - build_fuzzer(repos, jobs) # 在构建镜像后添加构建模糊测试器阶段 - fuzz_repos(repos, jobs, timeout=timeout) - testgen_repos(repos, jobs, n_fuzz, strategy, max_len, sim_thresh) - case _: - logging.error(f"Unknown pipeline {pipeline}") - - + # 执行指定流程 + if pipeline == "build_image": + build_image(repos, jobs) + elif pipeline == "build_fuzzer": + build_fuzzer(repos, jobs) + elif pipeline == "fuzz": + fuzz_repos(repos, jobs, timeout) + elif pipeline == "testgen": + testgen_repos(repos, jobs, n_fuzz, strategy, max_len, sim_thresh) + elif pipeline == "transform": + transform_repos(repos, jobs) + elif pipeline == "all": + build_image(repos, jobs) + build_fuzzer(repos, jobs) + transform_repos(repos, jobs) # 关键添加:模板生成 + fuzz_repos(repos, jobs, timeout) + testgen_repos(repos, jobs, n_fuzz, strategy, max_len, sim_thresh) + else: + logging.error(f"Unknown pipeline: {pipeline}") if __name__ == "__main__": logging.basicConfig(level=logging.INFO) fire.Fire(main) - From 58d1f76e3a6539e17a059abb7ce6f57eaa7e31fd Mon Sep 17 00:00:00 2001 From: joyguoguo <599325370@qq.com> Date: Tue, 5 Aug 2025 04:37:46 +0000 Subject: [PATCH 084/134] testgen need to ^ help: add `;` here --- fuzz/collect_fuzz_python.py | 209 +++++++++++++++++++++++++++++++----- 1 file changed, 181 insertions(+), 28 deletions(-) diff --git a/fuzz/collect_fuzz_python.py b/fuzz/collect_fuzz_python.py index 796386c..ab94676 100644 --- a/fuzz/collect_fuzz_python.py +++ b/fuzz/collect_fuzz_python.py @@ -15,9 +15,16 @@ from difflib import SequenceMatcher from itertools import islice from datetime import datetime +import re def build_image(repos: list[str], jobs: int): - """构建每个仓库对应的OSS-Fuzz项目的Docker镜像""" + """ + 构建每个仓库对应的OSS-Fuzz项目的Docker镜像 + + Args: + repos (list[str]): 仓库路径列表 + jobs (int): 并行任务数 + """ logging.info(f"Building Docker images for {len(repos)} OSS-Fuzz projects") log_dir = os.path.abspath("fuzz_pipeline_log") os.makedirs(log_dir, exist_ok=True) @@ -26,6 +33,8 @@ def _build_cmd(path: str): project_name = os.path.basename(path.rstrip("/")) timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") log_file = os.path.join(log_dir, f"{project_name}_{timestamp}.log") + + logging.info(f"Start building {project_name}, logging to {log_file}") return subprocess.Popen( f"yes | python3 infra/helper.py build_image {project_name}", cwd=os.path.abspath(os.path.join(path, "../../")), @@ -37,7 +46,13 @@ def _build_cmd(path: str): _ = parallel_subprocess(repos, jobs, _build_cmd, on_exit=None) def build_fuzzer(repos: list[str], jobs: int): - """对构建成功的项目并行构建模糊测试器""" + """ + 对构建成功的项目并行构建模糊测试器 + + Args: + repos (list[str]): 仓库路径列表 + jobs (int): 并行任务数 + """ logging.info(f"Building fuzzers for {len(repos)} OSS-Fuzz projects") log_dir = os.path.abspath("fuzz_pipeline_log") os.makedirs(log_dir, exist_ok=True) @@ -46,6 +61,8 @@ def _build_cmd(path: str): project_name = os.path.basename(path.rstrip("/")) timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") log_file = os.path.join(log_dir, f"{project_name}_fuzzer_{timestamp}.log") + + logging.info(f"Start building fuzzers for {project_name}, logging to {log_file}") return subprocess.Popen( f"python3 infra/helper.py build_fuzzers --sanitizer address {project_name}", cwd=os.path.abspath(os.path.join(path, "../../")), @@ -57,7 +74,16 @@ def _build_cmd(path: str): _ = parallel_subprocess(repos, jobs, _build_cmd, on_exit=None) def discover_targets(project_name: str, oss_fuzz_dir: Path) -> list[str]: - """发现模糊测试目标""" + """ + 发现模糊测试目标 + + Args: + project_name (str): 项目名称 + oss_fuzz_dir (Path): OSS-Fuzz根目录 + + Returns: + list[str]: 目标名称列表 + """ out_dir = oss_fuzz_dir / "build" / "out" / project_name targets = [] @@ -77,7 +103,16 @@ def discover_targets(project_name: str, oss_fuzz_dir: Path) -> list[str]: return targets def fuzz_one_target(target: tuple[str, str], timeout: int): - """对单个模糊测试目标执行模糊测试""" + """ + 对单个模糊测试目标执行模糊测试 + + Args: + target (tuple[str, str]): (仓库路径, 目标名称) + timeout (int): 超时时间(秒) + + Returns: + subprocess.Popen: 子进程对象 + """ repo_path, target_name = target project_name = os.path.basename(repo_path) oss_fuzz_root = os.path.dirname(os.path.dirname(repo_path)) @@ -103,7 +138,14 @@ def fuzz_one_target(target: tuple[str, str], timeout: int): return None def fuzz_repos(repos: list[str], jobs: int, timeout: int = 60): - """对一组仓库执行模糊测试""" + """ + 对一组仓库执行模糊测试 + + Args: + repos (list[str]): 仓库路径列表 + jobs (int): 并行任务数 + timeout (int): 超时时间(秒) + """ logging.info("Discovering fuzz targets") # 获取所有目标 @@ -130,17 +172,26 @@ def fuzz_repos(repos: list[str], jobs: int, timeout: int = 60): parallel_subprocess(targets, jobs, lambda p: fuzz_one_target(p, timeout), on_exit=None) def generate_test_template(target_name: str, repo_path: str): - """为单个目标生成测试模板""" + """ + 为单个目标生成测试模板 + + Args: + target_name (str): 目标名称 + repo_path (str): 仓库路径 + + Returns: + str: 模板文件路径 + """ template_dir = pjoin(repo_path, "tests-gen") os.makedirs(template_dir, exist_ok=True) template_path = pjoin(template_dir, f"{target_name}.rs") - # 基本测试模板 + # 基本测试模板 - 使用字节数组而不是字节字符 template = f""" #[test] fn test_{target_name}() {{ // 测试逻辑将在这里生成 - let input = []; // 模糊测试输入将替换这里 + let input = b""; // 模糊测试输入将替换这里 let result = process_input(&input); assert!(result.is_ok()); }} @@ -152,7 +203,13 @@ def generate_test_template(target_name: str, repo_path: str): return template_path def transform_repos(repos: list[str], jobs: int): - """为所有目标生成测试模板""" + """ + 为所有目标生成测试模板 + + Args: + repos (list[str]): 仓库路径列表 + jobs (int): 并行任务数 + """ logging.info("Generating test templates") def _transform_repo(repo: str): @@ -164,18 +221,67 @@ def _transform_repo(repo: str): with ProcessingPool(jobs) as p: return list(p.map(_transform_repo, repos)) -def substitute_input(template: str, input_data: str, idx: int) -> str: - """将模糊测试输入替换到测试模板中""" - return template.replace( - 'let input = []; // 模糊测试输入将替换这里', - f"let input = {input_data};" - ).replace( - f"fn test_", - f"fn test_{idx}_" +def escape_special_chars(input_data: str) -> str: + """ + 转义输入数据中的特殊字符 + + Args: + input_data (str): 原始输入数据 + + Returns: + str: 转义后的输入数据 + """ + # 转义反斜杠和双引号 + escaped = input_data.replace('\\', '\\\\').replace('"', '\\"') + + # 处理非ASCII字符 + if any(ord(c) > 127 for c in escaped): + # 如果包含非ASCII字符,使用字节数组表示 + byte_array = [str(b) for b in input_data.encode()] + return f"b\"\" // Original: {input_data}\n let input = vec![{', '.join(byte_array)}];" + + return f"b\"{escaped}\"" + +def substitute_input(template: str, input_data: str, idx: int, target_name: str) -> str: + """ + 将模糊测试输入替换到测试模板中 + + Args: + template (str): 模板内容 + input_data (str): 输入数据 + idx (int): 测试索引 + target_name (str): 目标名称 + + Returns: + str: 替换后的测试代码 + """ + # 转义特殊字符并处理非ASCII字符 + escaped_input = escape_special_chars(input_data) + + # 替换输入占位符 + new_template = template.replace( + 'let input = b""; // 模糊测试输入将替换这里', + escaped_input + ) + + # 替换函数名避免重复 + return new_template.replace( + f"fn test_{target_name}()", + f"fn test_{target_name}_{idx}()" ) def has_similar(selected: list[str], x: str, thresh: float = 0.8) -> bool: - """检查字符串是否与已选列表中的任何字符串足够相似""" + """ + 检查字符串是否与已选列表中的任何字符串足够相似 + + Args: + selected (list[str]): 已选字符串列表 + x (str): 待检查字符串 + thresh (float): 相似度阈值 + + Returns: + bool: 是否相似 + """ def similar(a, b): return SequenceMatcher(None, a, b).ratio() return any(similar(x, y) > thresh for y in selected) @@ -188,21 +294,45 @@ def substitute_one_repo( max_len: int, sim_thresh: float, ): - """处理单个仓库,将模糊测试输入替换到测试模板中""" + """ + 处理单个仓库,将模糊测试输入替换到测试模板中 + + Args: + repo (str): 仓库路径 + targets (list[str]): 目标列表 + n_fuzz (int): 使用的输入数量 + strategy (str): 选择策略 + max_len (int): 最大长度 + sim_thresh (float): 相似度阈值 + """ template_dir = pjoin(repo, "tests-gen") input_dir = pjoin(repo, "fuzz_inputs") - for t in targets: - template_path = pjoin(template_dir, f"{t}.rs") - input_path = pjoin(input_dir, t) + for target_name in targets: # 使用target_name作为循环变量 + template_path = pjoin(template_dir, f"{target_name}.rs") + input_path = pjoin(input_dir, target_name) try: + if not os.path.exists(template_path): + logging.warning(f"Template file not found: {template_path}") + continue + + if not os.path.exists(input_path): + logging.warning(f"Input file not found: {input_path}") + continue + with open(template_path) as f_template: template = f_template.read() with open(input_path, "r") as f_input: - all_inputs = [i for i in f_input.read().splitlines() if i] + all_inputs = [line.strip() for line in f_input if line.strip()] + if not all_inputs: + logging.warning(f"No valid inputs found for {target_name}") + continue + + logging.info(f"Loaded {len(all_inputs)} inputs for {target_name}") + # 选择输入策略 if strategy == "shuffle": random.shuffle(all_inputs) @@ -221,12 +351,12 @@ def substitute_one_repo( # 生成测试用例 tests = [ - substitute_input(template, input_data, i) + substitute_input(template, input_data, i, target_name) # 传递target_name for i, input_data in enumerate(inputs) ] # 写入生成的测试文件 - generated_path = pjoin(template_dir, f"{t}.inputs.rs") + generated_path = pjoin(template_dir, f"{target_name}.inputs.rs") with open(generated_path, "w") as f: f.write("\n".join(tests)) @@ -234,7 +364,7 @@ def substitute_one_repo( subprocess.run(["rustfmt", generated_path], check=False) except Exception as e: - logging.error(f"Error processing {t}: {e}") + logging.error(f"Error processing {target_name}: {e}") def testgen_repos( repos: list[str], @@ -244,7 +374,17 @@ def testgen_repos( max_len: int = 100, sim_thresh: float = 0.8, ): - """从模糊测试输入生成测试用例""" + """ + 从模糊测试输入生成测试用例 + + Args: + repos (list[str]): 仓库路径列表 + jobs (int): 并行任务数 + n_fuzz (int): 使用的输入数量 + strategy (str): 选择策略 + max_len (int): 最大长度 + sim_thresh (float): 相似度阈值 + """ # 首先获取所有目标 targets_list = [] for repo in repos: @@ -275,7 +415,20 @@ def main( max_len: int = 100, sim_thresh: float = 0.8, ): - """主函数,控制整个模糊测试流程""" + """ + 主函数,控制整个模糊测试流程 + + Args: + repo_id (str): 项目ID文件路径 + repo_root (str): 项目根目录 + timeout (int): 超时时间 + jobs (int): 并行任务数 + pipeline (str): 流程类型 + n_fuzz (int): 使用的输入数量 + strategy (str): 选择策略 + max_len (int): 最大长度 + sim_thresh (float): 相似度阈值 + """ try: with open(repo_id, "r") as f: repo_id_list = [line.strip() for line in f if line.strip()] From 92368225ed98260aecbb363ac75b2d7a3a1ee59a Mon Sep 17 00:00:00 2001 From: joyguoguo <599325370@qq.com> Date: Tue, 5 Aug 2025 06:10:36 +0000 Subject: [PATCH 085/134] test successful --- fuzz/collect_fuzz_python.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/fuzz/collect_fuzz_python.py b/fuzz/collect_fuzz_python.py index ab94676..7ae4933 100644 --- a/fuzz/collect_fuzz_python.py +++ b/fuzz/collect_fuzz_python.py @@ -238,9 +238,9 @@ def escape_special_chars(input_data: str) -> str: if any(ord(c) > 127 for c in escaped): # 如果包含非ASCII字符,使用字节数组表示 byte_array = [str(b) for b in input_data.encode()] - return f"b\"\" // Original: {input_data}\n let input = vec![{', '.join(byte_array)}];" + return f"let input = vec![{', '.join(byte_array)}];" - return f"b\"{escaped}\"" + return f"let input = b\"{escaped}\";" def substitute_input(template: str, input_data: str, idx: int, target_name: str) -> str: """ @@ -408,7 +408,7 @@ def main( repo_id: str = "data/valid_projects3.txt", repo_root: str = "fuzz/oss-fuzz/projects/", timeout: int = 60, - jobs: int = 4, + jobs: int = 80, pipeline: str = "all", n_fuzz: int = 100, strategy: str = "shuffle", From 310b079dcccc0bdce8937fc47e3cb637b068e124 Mon Sep 17 00:00:00 2001 From: joyguoguo <599325370@qq.com> Date: Tue, 5 Aug 2025 06:13:33 +0000 Subject: [PATCH 086/134] example output project --- UniTSyn | 2 +- fuzz/oss-fuzz | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/UniTSyn b/UniTSyn index 45c7bd1..0d9e0df 160000 --- a/UniTSyn +++ b/UniTSyn @@ -1 +1 @@ -Subproject commit 45c7bd1152ce420781d4b5ce6d4bf8b1e6c7b3ca +Subproject commit 0d9e0df455655773eaf0acabd9008aa34f0e3f73 diff --git a/fuzz/oss-fuzz b/fuzz/oss-fuzz index 4bbbeb5..8f6559b 160000 --- a/fuzz/oss-fuzz +++ b/fuzz/oss-fuzz @@ -1 +1 @@ -Subproject commit 4bbbeb59599ad38b7984191e2e83bc9a61f7fd4b +Subproject commit 8f6559b916e0d7ca6e7f974394ce6f651783c163 From 5ceb873c885960c7a3651f3a1214a327154bb17a Mon Sep 17 00:00:00 2001 From: joyguoguo <599325370@qq.com> Date: Tue, 5 Aug 2025 06:25:47 +0000 Subject: [PATCH 087/134] type error --- fuzz/collect_fuzz.py | 1 - fuzz/collect_fuzz_python.py | 10 +- fuzz/command_util.py | 179 +++++++++------------------- fuzz/run_fuzz_all_targets_print1.py | 143 ++++++++++++++-------- 4 files changed, 155 insertions(+), 178 deletions(-) diff --git a/fuzz/collect_fuzz.py b/fuzz/collect_fuzz.py index bcc4b5e..95bba86 100644 --- a/fuzz/collect_fuzz.py +++ b/fuzz/collect_fuzz.py @@ -1,5 +1,4 @@ """script for rust fuzzing and transforming test_template""" - import logging from typing import Optional import fire diff --git a/fuzz/collect_fuzz_python.py b/fuzz/collect_fuzz_python.py index 7ae4933..816cd63 100644 --- a/fuzz/collect_fuzz_python.py +++ b/fuzz/collect_fuzz_python.py @@ -85,7 +85,7 @@ def discover_targets(project_name: str, oss_fuzz_dir: Path) -> list[str]: list[str]: 目标名称列表 """ out_dir = oss_fuzz_dir / "build" / "out" / project_name - targets = [] + targets: list[str] = [] if not out_dir.is_dir(): logging.warning(f"Build output directory for {project_name} does not exist") @@ -158,18 +158,18 @@ def fuzz_repos(repos: list[str], jobs: int, timeout: int = 60): # 创建目标映射 target_map = {repo: targets for repo, targets in zip(repos, targets_list)} - targets: list[tuple[str, str]] = [ + all_targets: list[tuple[str, str]] = [ # 修复: 重命名变量避免冲突 (k, v) for k, vs in target_map.items() for v in vs ] - logging.info(f"Running fuzzing on {len(targets)} targets") + logging.info(f"Running fuzzing on {len(all_targets)} targets") # 创建输入目录 for repo in repos: os.makedirs(pjoin(repo, "fuzz_inputs"), exist_ok=True) # 并行执行模糊测试 - parallel_subprocess(targets, jobs, lambda p: fuzz_one_target(p, timeout), on_exit=None) + parallel_subprocess(all_targets, jobs, lambda p: fuzz_one_target(p, timeout), on_exit=None) def generate_test_template(target_name: str, repo_path: str): """ @@ -405,7 +405,7 @@ def testgen_repos( )) def main( - repo_id: str = "data/valid_projects3.txt", + repo_id: str = "data/valid_projects.txt", repo_root: str = "fuzz/oss-fuzz/projects/", timeout: int = 60, jobs: int = 80, diff --git a/fuzz/command_util.py b/fuzz/command_util.py index 8270d64..e446f42 100644 --- a/fuzz/command_util.py +++ b/fuzz/command_util.py @@ -1,14 +1,8 @@ -# command_util.py - import subprocess import logging import time -import os -import pty -import tty -import termios from pathlib import Path -from typing import Optional, Tuple, List +from typing import Optional from returns.maybe import Maybe from errors import CommandError @@ -46,94 +40,65 @@ def create_popen_object( return process -def run_subprocess_with_pty( - cmd: str, - cwd: Optional[Path] = None, - timeout: Optional[int] = None, - logger: Optional[logging.Logger] = None, -) -> Tuple[int, List[str]]: +def parallel_subprocess( + tasks: list[tuple[str, Path, Optional[int], logging.Logger]] +) -> list[tuple[subprocess.Popen, str, Path]]: """ - 使用伪终端执行命令,解决终端设置问题 - - timeout: 使用 shell 的 timeout 命令处理超时 - - logger: 用于实时打印输出 - 返回: (退出码, 输出行列表) + 并行执行多个子进程 + - tasks: 任务列表,每个任务是元组 (cmd, cwd, timeout, logger) + 返回: 包含 (Popen对象, 命令, 工作目录) 的列表 """ - # 添加超时命令 - if timeout and timeout > 0: - cmd = f"timeout {timeout}s {cmd}" - if logger: - logger.debug(f"⌛ Adding timeout ({timeout}s) to command") - - # 使用伪终端执行命令 - master_fd, slave_fd = pty.openpty() - - # 设置伪终端为原始模式 - old_settings = termios.tcgetattr(master_fd) - tty.setraw(master_fd) - - process = subprocess.Popen( - cmd, - shell=True, - cwd=str(cwd) if cwd else None, - stdin=slave_fd, - stdout=slave_fd, - stderr=slave_fd, - close_fds=True, - start_new_session=True - ) - - os.close(slave_fd) + processes = [] + for cmd, cwd, timeout, logger in tasks: + process = create_popen_object( + cmd, + cwd=cwd, + capture_output=True, + timeout=timeout, + logger=logger + ) + processes.append((process, cmd, cwd)) - output_lines = [] - try: - while True: - try: - data = os.read(master_fd, 1024) - if not data: - break - decoded = data.decode("utf-8", "replace") - output_lines.append(decoded.strip()) - if logger: - logger.debug(decoded.strip()) - except OSError: - break - finally: - # 恢复终端设置 - termios.tcsetattr(master_fd, termios.TCSADRAIN, old_settings) - os.close(master_fd) - process.wait() - - return process.returncode, output_lines + return processes -def run_command_fuzz_all_targets( - cmd: str, - log_msg: str, +def wait_for_processes( + processes: list[tuple[subprocess.Popen, str, Path]], logger: logging.Logger, - allowed_exit_codes: Maybe[list[int]] = Maybe.empty, - timeout: int = 3600, -) -> bool: - """run_command used in run_fuzz_all_targets_print1.py, run_fuzz_all_targets.py""" - logger.info(f"▶️ {log_msg}...") - logger.debug(f" $ {cmd}") - - # 允许超时退出码 124 - allowed_codes = allowed_exit_codes.value_or([]) + [124] - - # 使用伪终端解决终端设置问题 - exit_code, _ = run_subprocess_with_pty( - cmd, - timeout=timeout, - logger=logger - ) + allowed_exit_codes: list[int] = [0] +) -> list[tuple[bool, str, Path]]: + """ + 等待所有进程完成并处理结果 + - processes: 进程列表 + - logger: 日志记录器 + - allowed_exit_codes: 允许的退出码列表 + 返回: 结果列表 (成功状态, 命令, 工作目录) + """ + results = [] - # 返回 124 表示超时 - if exit_code == 124: - logger.warning(f"⌛ Command timed out after {timeout} seconds") + for process, cmd, cwd in processes: + # 实时读取输出 + output_lines = [] + while True: + line = process.stdout.readline() + if not line and process.poll() is not None: + break + if line: + stripped_line = line.strip() + output_lines.append(stripped_line) + if logger: + logger.debug(stripped_line) + + exit_code = process.returncode + + # 检查是否超时 (124 是 timeout 命令的退出码) + if exit_code == 124: + logger.warning(f"⌛ Command timed out: {cmd}") + + # 检查是否成功 + success = exit_code in allowed_exit_codes + results.append((success, cmd, cwd)) - if exit_code not in [0, *allowed_codes]: - logger.error(f"❌ The command failed, exit code: {exit_code}") - return False - return True + return results def run_command_build_fuzz( cmd: str, @@ -145,45 +110,15 @@ def run_command_build_fuzz( """run_command used in build_fuzz.py, build_fuzzers.py""" allowed_codes = allowed_exit_codes.value_or([0]) cmd_str = f"yes | {cmd}" if not skip_yes else cmd - exit_code, _ = _run_subprocess(cmd_str, cwd=oss_fuzz_dir) + process = create_popen_object(cmd_str, cwd=oss_fuzz_dir) + process.wait() # 处理 timeout 的特殊退出码 (124) - exit_code = 124 if exit_code == 124 else exit_code + exit_code = 124 if process.returncode == 124 else process.returncode if exit_code not in allowed_codes: error_msg = f"The command failed (exit code: {exit_code})" if project: error_msg += f" for project: {project}" raise CommandError(error_msg, project=project, exit_code=exit_code) - return exit_code - - -# def run_command_fuzz_all_targets( -# cmd: str, -# log_msg: str, -# logger: logging.Logger, -# allowed_exit_codes: Maybe[list[int]] = Maybe.empty, -# timeout: int = 3600, -# ) -> bool: -# """run_command used in run_fuzz_all_targets_print1.py, run_fuzz_all_targets.py""" -# logger.info(f"▶️ {log_msg}...") -# logger.debug(f" $ {cmd}") - -# # 允许超时退出码 124 -# allowed_codes = allowed_exit_codes.value_or([]) + [124] - -# exit_code, _ = _run_subprocess( -# cmd, -# capture_output=True, -# timeout=timeout, -# logger=logger -# ) - -# # 返回 124 表示超时 -# if exit_code == 124: -# logger.warning(f"⌛ Command timed out after {timeout} seconds") - -# if exit_code not in [0, *allowed_codes]: -# logger.error(f"❌ The command failed, exit code: {exit_code}") -# return False -# return True \ No newline at end of file + return exit_code \ No newline at end of file diff --git a/fuzz/run_fuzz_all_targets_print1.py b/fuzz/run_fuzz_all_targets_print1.py index 04400c6..ea45ddb 100644 --- a/fuzz/run_fuzz_all_targets_print1.py +++ b/fuzz/run_fuzz_all_targets_print1.py @@ -8,7 +8,7 @@ 1. Discovery phase: Traverse all specified projects to collect all executable fuzz targets which start with "fuzz_" and end with "print1" in each project 2. Execution phase: Create a task pool containing all (project, target) pairs and execute them in parallel using multiprocessing -This approach maximizes CPU utilization and provides clear overall progress[2](@ref). +This approach maximizes CPU utilization and provides clear overall progress. Usage: python3 run_fuzz_all_targets_print1.py [project_list_file] [--oss-fuzz-dir /your/custom/path/to/oss-fuzz] [--timeout seconds] [--workers N] Example: python3 fuzz/run_fuzz_all_targets_print1.py data/valid_projects3.txt --oss-fuzz-dir /home/jiayiguo/FuzzAug/fuzz/oss-fuzz --timeout 60 --workers 4 @@ -17,16 +17,18 @@ import os import sys +fuzzaug_root = os.path.abspath(os.path.join(os.path.dirname(__file__), "..")) +sys.path.insert(0, fuzzaug_root) import subprocess import argparse import logging import time from datetime import datetime from pathlib import Path -from multiprocessing import Pool, cpu_count +from multiprocessing import cpu_count from returns.maybe import Maybe, Nothing, Some -from command_util import run_command_fuzz_all_targets as run_command - +from UniTSyn.frontend.util import parallel_subprocess + def discover_targets(project_name: str, oss_fuzz_dir: Path, logger: logging.Logger) -> list[str]: @@ -57,12 +59,12 @@ def discover_targets(project_name: str, oss_fuzz_dir: Path, logger: logging.Logg return targets - -def run_single_target(project_name: str, target_name: str, timeout: int, oss_fuzz_dir: Path) -> tuple[bool, str, str]: - """Execute fuzz testing workflow for a single (project, target) pair""" +def setup_task_logger(project_name: str, target_name: str, oss_fuzz_dir: Path) -> logging.Logger: + """为单个任务设置日志记录器""" task_id = f"{project_name}_{target_name}" logger = logging.getLogger(task_id) - LOG_DIR = oss_fuzz_dir / "run_fuzz_all_targets_logs" + LOG_DIR = oss_fuzz_dir / "run1_fuzz_all_targets_logs_print1_parallel" + try: logger.setLevel(logging.DEBUG) LOG_DIR.mkdir(parents=True, exist_ok=True) @@ -75,38 +77,64 @@ def run_single_target(project_name: str, target_name: str, timeout: int, oss_fuz ) file_handler.setFormatter(formatter) logger.addHandler(file_handler) - os.chdir(oss_fuzz_dir) - + return logger except (OSError, PermissionError) as e: print(f"❌ Critical error occurred during initialization of task {task_id}: {e}") - return False, project_name, target_name - - logger.info(f"🚀 Starting test -> Project: {project_name}, Target: {target_name}") - try: - cmd = f"python3 infra/helper.py run_fuzzer {project_name} {target_name} -- -max_total_time={timeout}" - success = run_command( - cmd, - f"Running Target '{target_name}' (timeout={timeout}s)", - logger, - allowed_exit_codes=Some([1, 124]), # 1=Crashes found, 124=Timeout - timeout=timeout + 300 - ) - - if success: - logger.info(f"✅ Target '{target_name}' completed successfully.") - else: - logger.error(f"❌ Target '{target_name}' failed.") + return None - return success, project_name, target_name +def create_task_command(project_name: str, target_name: str, timeout: int) -> str: + """创建任务命令字符串""" + return f"python3 infra/helper.py run_fuzzer {project_name} {target_name} -- -max_total_time={timeout}" - except Exception as e: - logger.exception(f"💥 Unexpected error occurred while running target '{target_name}': {e}") - return False, project_name, target_name - finally: - for handler in logger.handlers[:]: - handler.close() - logger.removeHandler(handler) +def create_subprocess(task: tuple) -> subprocess.Popen: + """为每个任务创建子进程""" + project_name, target_name, timeout, oss_fuzz_dir, logger = task + cmd = create_task_command(project_name, target_name, timeout) + + # 设置日志文件,存放测试过程的输出 + task_id = f"{project_name}_{target_name}" + LOG_DIR = oss_fuzz_dir / "run2_fuzz_all_targets_logs" + LOG_DIR.mkdir(parents=True, exist_ok=True) + timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") + log_file = LOG_DIR / f"run_{task_id}_{timestamp}.log" + + # 创建并返回 Popen 对象 + process = subprocess.Popen( + cmd, + shell=True, + cwd=str(oss_fuzz_dir), + stdout=open(log_file, 'w'), + stderr=subprocess.STDOUT, + text=True, + encoding="utf-8", + errors="replace", + ) + + # 将任务数据附加到进程对象以便后续使用 + process.task_data = task + return process +def on_process_exit(process: subprocess.Popen) -> tuple[bool, str, str]: + """处理进程退出""" + project_name, target_name, _, oss_fuzz_dir, logger = process.task_data + + # 等待进程结束 + process.wait() + exit_code = process.returncode + + # 记录结果 + if exit_code == 124: + logger.warning(f"⌛ Command timed out: {project_name}/{target_name}") + + # 检查是否成功 + success = exit_code in [0, 1, 124] # 0=成功, 1=发现崩溃, 124=超时 + + if success: + logger.info(f"✅ Target '{target_name}' completed successfully.") + else: + logger.error(f"❌ Target '{target_name}' failed with exit code: {exit_code}") + + return success, project_name, target_name def main(): # Configure main process logging @@ -126,7 +154,6 @@ def main(): args = parser.parse_args() OSS_FUZZ_DIR = args.oss_fuzz_dir.resolve() - LOG_DIR = OSS_FUZZ_DIR / "run_ds_logs" # 1. Read project list file try: @@ -174,25 +201,41 @@ def main(): logger.info("=" * 20 + " Phase 2: Parallel Fuzzing Execution " + "=" * 23) logger.info(f"🚀 Starting parallel testing with {args.workers} worker processes (timeout per target: {args.timeout}s)...") - # Prepare task parameters (project, target, timeout) - tasks_with_args = [(p, t, args.timeout, OSS_FUZZ_DIR) for p, t in all_fuzz_tasks] - results: list[tuple[bool, str, str]] = [] # Store results (success, project, target) - - # Execute in parallel using process pool - with Pool(args.workers) as pool: - try: - results = pool.starmap(run_single_target, tasks_with_args) - except Exception as e: - logger.error(f"💥 Critical error occurred during parallel execution: {e}") - pool.terminate() - pool.join() + # 准备任务列表 + tasks = [] + for project, target in all_fuzz_tasks: + task_logger = setup_task_logger(project, target, OSS_FUZZ_DIR) + if task_logger: + # 每个任务包含: (project, target, timeout, oss_fuzz_dir, logger) + task = (project, target, args.timeout, OSS_FUZZ_DIR, task_logger) + tasks.append(task) + task_logger.info(f"🚀 Starting test -> Project: {project}, Target: {target}") + + # 使用 parallel_subprocess 并行执行 + results = parallel_subprocess( + iterable=tasks, + jobs=args.workers, + subprocess_creator=create_subprocess, + on_exit=on_process_exit, + use_tqdm=True, + tqdm_leave=True, + tqdm_msg="Running fuzz targets" + ) # 4. Result summary and reporting logger.info("\n" + "=" * 20 + " Phase 3: Result Summary " + "=" * 28) - failed_tasks = [(p, t) for success, p, t in results if not success] # List of failed tasks + success_count = 0 + failed_tasks = [] + + for task, result in results.items(): + success, project, target = result + if success: + success_count += 1 + else: + failed_tasks.append((project, target)) + total_tasks = len(all_fuzz_tasks) failed_count = len(failed_tasks) - success_count = total_tasks - failed_count # Output statistical summary logger.info(f"📊 Fuzzing completed: Success {success_count}/{total_tasks}, Failed {failed_count}/{total_tasks}") From d31a33352c57c462c9b60d52b424a9f740693ea9 Mon Sep 17 00:00:00 2001 From: joyguoguo <599325370@qq.com> Date: Tue, 5 Aug 2025 06:29:54 +0000 Subject: [PATCH 088/134] English ver --- fuzz/collect_fuzz_python.py | 187 ++++++++++++++++++------------------ 1 file changed, 94 insertions(+), 93 deletions(-) diff --git a/fuzz/collect_fuzz_python.py b/fuzz/collect_fuzz_python.py index 816cd63..c772975 100644 --- a/fuzz/collect_fuzz_python.py +++ b/fuzz/collect_fuzz_python.py @@ -1,5 +1,6 @@ """ -用于Python项目模糊测试(fuzzing)和测试模板转换的脚本 +Script for Python project fuzzing and test template conversion +usage: PYTHONPATH=. python3 fuzz/collect_fuzz_python.py --pipeline all """ from pathlib import Path import logging @@ -19,11 +20,11 @@ def build_image(repos: list[str], jobs: int): """ - 构建每个仓库对应的OSS-Fuzz项目的Docker镜像 + Build Docker images for OSS-Fuzz projects corresponding to each repository Args: - repos (list[str]): 仓库路径列表 - jobs (int): 并行任务数 + repos (list[str]): List of repository paths + jobs (int): Number of parallel tasks """ logging.info(f"Building Docker images for {len(repos)} OSS-Fuzz projects") log_dir = os.path.abspath("fuzz_pipeline_log") @@ -47,11 +48,11 @@ def _build_cmd(path: str): def build_fuzzer(repos: list[str], jobs: int): """ - 对构建成功的项目并行构建模糊测试器 + Build fuzzers in parallel for successfully built projects Args: - repos (list[str]): 仓库路径列表 - jobs (int): 并行任务数 + repos (list[str]): List of repository paths + jobs (int): Number of parallel tasks """ logging.info(f"Building fuzzers for {len(repos)} OSS-Fuzz projects") log_dir = os.path.abspath("fuzz_pipeline_log") @@ -75,17 +76,17 @@ def _build_cmd(path: str): def discover_targets(project_name: str, oss_fuzz_dir: Path) -> list[str]: """ - 发现模糊测试目标 + Discover fuzzing targets Args: - project_name (str): 项目名称 - oss_fuzz_dir (Path): OSS-Fuzz根目录 + project_name (str): Project name + oss_fuzz_dir (Path): OSS-Fuzz root directory Returns: - list[str]: 目标名称列表 + list[str]: List of target names """ out_dir = oss_fuzz_dir / "build" / "out" / project_name - targets: list[str] = [] + targets: list[str] = [] # Fix: Add type annotation if not out_dir.is_dir(): logging.warning(f"Build output directory for {project_name} does not exist") @@ -104,20 +105,20 @@ def discover_targets(project_name: str, oss_fuzz_dir: Path) -> list[str]: def fuzz_one_target(target: tuple[str, str], timeout: int): """ - 对单个模糊测试目标执行模糊测试 + Perform fuzzing on a single fuzzing target Args: - target (tuple[str, str]): (仓库路径, 目标名称) - timeout (int): 超时时间(秒) + target (tuple[str, str]): (Repository path, target name) + timeout (int): Timeout duration (seconds) Returns: - subprocess.Popen: 子进程对象 + subprocess.Popen: Subprocess object """ repo_path, target_name = target project_name = os.path.basename(repo_path) oss_fuzz_root = os.path.dirname(os.path.dirname(repo_path)) - # 创建输入文件路径 + # Create input file path input_file_path = pjoin(repo_path, "fuzz_inputs", target_name) os.makedirs(os.path.dirname(input_file_path), exist_ok=True) @@ -139,16 +140,16 @@ def fuzz_one_target(target: tuple[str, str], timeout: int): def fuzz_repos(repos: list[str], jobs: int, timeout: int = 60): """ - 对一组仓库执行模糊测试 + Perform fuzzing on a set of repositories Args: - repos (list[str]): 仓库路径列表 - jobs (int): 并行任务数 - timeout (int): 超时时间(秒) + repos (list[str]): List of repository paths + jobs (int): Number of parallel tasks + timeout (int): Timeout duration (seconds) """ logging.info("Discovering fuzz targets") - # 获取所有目标 + # Get all targets targets_list = [] for repo in repos: project_name = os.path.basename(repo) @@ -156,42 +157,42 @@ def fuzz_repos(repos: list[str], jobs: int, timeout: int = 60): targets = discover_targets(project_name, oss_fuzz_dir) targets_list.append(targets) - # 创建目标映射 + # Create target mapping target_map = {repo: targets for repo, targets in zip(repos, targets_list)} - all_targets: list[tuple[str, str]] = [ # 修复: 重命名变量避免冲突 + all_targets: list[tuple[str, str]] = [ # Fix: Rename variable to avoid conflict (k, v) for k, vs in target_map.items() for v in vs ] logging.info(f"Running fuzzing on {len(all_targets)} targets") - # 创建输入目录 + # Create input directory for repo in repos: os.makedirs(pjoin(repo, "fuzz_inputs"), exist_ok=True) - # 并行执行模糊测试 + # Execute fuzzing in parallel parallel_subprocess(all_targets, jobs, lambda p: fuzz_one_target(p, timeout), on_exit=None) def generate_test_template(target_name: str, repo_path: str): """ - 为单个目标生成测试模板 + Generate test template for a single target Args: - target_name (str): 目标名称 - repo_path (str): 仓库路径 + target_name (str): Target name + repo_path (str): Repository path Returns: - str: 模板文件路径 + str: Template file path """ template_dir = pjoin(repo_path, "tests-gen") os.makedirs(template_dir, exist_ok=True) template_path = pjoin(template_dir, f"{target_name}.rs") - # 基本测试模板 - 使用字节数组而不是字节字符 + # Basic test template - use byte array instead of byte characters template = f""" #[test] fn test_{target_name}() {{ - // 测试逻辑将在这里生成 - let input = b""; // 模糊测试输入将替换这里 + // Test logic will be generated here + let input = b""; // Fuzzing input will be replaced here let result = process_input(&input); assert!(result.is_ok()); }} @@ -204,11 +205,11 @@ def generate_test_template(target_name: str, repo_path: str): def transform_repos(repos: list[str], jobs: int): """ - 为所有目标生成测试模板 + Generate test templates for all targets Args: - repos (list[str]): 仓库路径列表 - jobs (int): 并行任务数 + repos (list[str]): List of repository paths + jobs (int): Number of parallel tasks """ logging.info("Generating test templates") @@ -223,20 +224,20 @@ def _transform_repo(repo: str): def escape_special_chars(input_data: str) -> str: """ - 转义输入数据中的特殊字符 + Escape special characters in input data Args: - input_data (str): 原始输入数据 + input_data (str): Raw input data Returns: - str: 转义后的输入数据 + str: Input data with escaped characters """ - # 转义反斜杠和双引号 + # Escape backslashes and double quotes escaped = input_data.replace('\\', '\\\\').replace('"', '\\"') - # 处理非ASCII字符 + # Handle non-ASCII characters if any(ord(c) > 127 for c in escaped): - # 如果包含非ASCII字符,使用字节数组表示 + # If containing non-ASCII characters, use byte array representation byte_array = [str(b) for b in input_data.encode()] return f"let input = vec![{', '.join(byte_array)}];" @@ -244,27 +245,27 @@ def escape_special_chars(input_data: str) -> str: def substitute_input(template: str, input_data: str, idx: int, target_name: str) -> str: """ - 将模糊测试输入替换到测试模板中 + Replace fuzzing input into test template Args: - template (str): 模板内容 - input_data (str): 输入数据 - idx (int): 测试索引 - target_name (str): 目标名称 + template (str): Template content + input_data (str): Input data + idx (int): Test index + target_name (str): Target name Returns: - str: 替换后的测试代码 + str: Test code after substitution """ - # 转义特殊字符并处理非ASCII字符 + # Escape special characters and handle non-ASCII characters escaped_input = escape_special_chars(input_data) - # 替换输入占位符 + # Replace input placeholder new_template = template.replace( - 'let input = b""; // 模糊测试输入将替换这里', + 'let input = b""; // Fuzzing input will be replaced here', escaped_input ) - # 替换函数名避免重复 + # Replace function name to avoid duplication return new_template.replace( f"fn test_{target_name}()", f"fn test_{target_name}_{idx}()" @@ -272,15 +273,15 @@ def substitute_input(template: str, input_data: str, idx: int, target_name: str) def has_similar(selected: list[str], x: str, thresh: float = 0.8) -> bool: """ - 检查字符串是否与已选列表中的任何字符串足够相似 + Check if a string is sufficiently similar to any string in the selected list Args: - selected (list[str]): 已选字符串列表 - x (str): 待检查字符串 - thresh (float): 相似度阈值 + selected (list[str]): List of selected strings + x (str): String to check + thresh (float): Similarity threshold Returns: - bool: 是否相似 + bool: Whether they are similar """ def similar(a, b): return SequenceMatcher(None, a, b).ratio() @@ -295,20 +296,20 @@ def substitute_one_repo( sim_thresh: float, ): """ - 处理单个仓库,将模糊测试输入替换到测试模板中 + Process a single repository, replace fuzzing inputs into test templates Args: - repo (str): 仓库路径 - targets (list[str]): 目标列表 - n_fuzz (int): 使用的输入数量 - strategy (str): 选择策略 - max_len (int): 最大长度 - sim_thresh (float): 相似度阈值 + repo (str): Repository path + targets (list[str]): List of targets + n_fuzz (int): Number of inputs to use + strategy (str): Selection strategy + max_len (int): Maximum length + sim_thresh (float): Similarity threshold """ template_dir = pjoin(repo, "tests-gen") input_dir = pjoin(repo, "fuzz_inputs") - for target_name in targets: # 使用target_name作为循环变量 + for target_name in targets: # Use target_name as loop variable template_path = pjoin(template_dir, f"{target_name}.rs") input_path = pjoin(input_dir, target_name) @@ -333,7 +334,7 @@ def substitute_one_repo( logging.info(f"Loaded {len(all_inputs)} inputs for {target_name}") - # 选择输入策略 + # Input selection strategy if strategy == "shuffle": random.shuffle(all_inputs) inputs = list(islice( @@ -349,18 +350,18 @@ def substitute_one_repo( else: inputs = all_inputs[:n_fuzz] - # 生成测试用例 + # Generate test cases tests = [ - substitute_input(template, input_data, i, target_name) # 传递target_name + substitute_input(template, input_data, i, target_name) # Pass target_name for i, input_data in enumerate(inputs) ] - # 写入生成的测试文件 + # Write generated test file generated_path = pjoin(template_dir, f"{target_name}.inputs.rs") with open(generated_path, "w") as f: f.write("\n".join(tests)) - # 格式化代码 + # Format code subprocess.run(["rustfmt", generated_path], check=False) except Exception as e: @@ -375,17 +376,17 @@ def testgen_repos( sim_thresh: float = 0.8, ): """ - 从模糊测试输入生成测试用例 + Generate test cases from fuzzing inputs Args: - repos (list[str]): 仓库路径列表 - jobs (int): 并行任务数 - n_fuzz (int): 使用的输入数量 - strategy (str): 选择策略 - max_len (int): 最大长度 - sim_thresh (float): 相似度阈值 + repos (list[str]): List of repository paths + jobs (int): Number of parallel tasks + n_fuzz (int): Number of inputs to use + strategy (str): Selection strategy + max_len (int): Maximum length + sim_thresh (float): Similarity threshold """ - # 首先获取所有目标 + # First get all targets targets_list = [] for repo in repos: project_name = os.path.basename(repo) @@ -395,7 +396,7 @@ def testgen_repos( target_map = {repo: targets for repo, targets in zip(repos, targets_list)} - # 并行处理每个仓库 + # Process each repository in parallel with ProcessingPool(jobs) as p: list(p.map( lambda item: substitute_one_repo( @@ -416,18 +417,18 @@ def main( sim_thresh: float = 0.8, ): """ - 主函数,控制整个模糊测试流程 + Main function, controlling the entire fuzzing process Args: - repo_id (str): 项目ID文件路径 - repo_root (str): 项目根目录 - timeout (int): 超时时间 - jobs (int): 并行任务数 - pipeline (str): 流程类型 - n_fuzz (int): 使用的输入数量 - strategy (str): 选择策略 - max_len (int): 最大长度 - sim_thresh (float): 相似度阈值 + repo_id (str): Project ID file path + repo_root (str): Project root directory + timeout (int): Timeout duration + jobs (int): Number of parallel tasks + pipeline (str): Pipeline type + n_fuzz (int): Number of inputs to use + strategy (str): Selection strategy + max_len (int): Maximum length + sim_thresh (float): Similarity threshold """ try: with open(repo_id, "r") as f: @@ -435,14 +436,14 @@ def main( except FileNotFoundError: repo_id_list = [repo_id] - # 收集仓库路径 + # Collect repository paths repos = [] for repo_id in repo_id_list: repo_path = abspath(os.path.join(repo_root, repo_id)) if os.path.isdir(repo_path): repos.append(repo_path) - # 执行指定流程 + # Execute specified pipeline if pipeline == "build_image": build_image(repos, jobs) elif pipeline == "build_fuzzer": @@ -456,7 +457,7 @@ def main( elif pipeline == "all": build_image(repos, jobs) build_fuzzer(repos, jobs) - transform_repos(repos, jobs) # 关键添加:模板生成 + transform_repos(repos, jobs) # Key addition: Template generation fuzz_repos(repos, jobs, timeout) testgen_repos(repos, jobs, n_fuzz, strategy, max_len, sim_thresh) else: @@ -464,4 +465,4 @@ def main( if __name__ == "__main__": logging.basicConfig(level=logging.INFO) - fire.Fire(main) + fire.Fire(main) \ No newline at end of file From 6717dad9a67b1b3c607fc9906f88c3041633ceb4 Mon Sep 17 00:00:00 2001 From: joyguoguo <599325370@qq.com> Date: Tue, 5 Aug 2025 06:35:12 +0000 Subject: [PATCH 089/134] delete privious scripts --- fuzz/build_fuzz.py | 259 ---------------------------- fuzz/build_fuzzers.py | 229 ------------------------ fuzz/command_util.py | 124 ------------- fuzz/errors.py | 20 --- fuzz/run_fuzz_all_targets.py | 211 ---------------------- fuzz/run_fuzz_all_targets_print1.py | 256 --------------------------- 6 files changed, 1099 deletions(-) delete mode 100644 fuzz/build_fuzz.py delete mode 100644 fuzz/build_fuzzers.py delete mode 100644 fuzz/command_util.py delete mode 100644 fuzz/errors.py delete mode 100644 fuzz/run_fuzz_all_targets.py delete mode 100644 fuzz/run_fuzz_all_targets_print1.py diff --git a/fuzz/build_fuzz.py b/fuzz/build_fuzz.py deleted file mode 100644 index 00f8af6..0000000 --- a/fuzz/build_fuzz.py +++ /dev/null @@ -1,259 +0,0 @@ -#!/usr/bin/env python3 -# -*- coding: utf-8 -*- - -""" -OSS-Fuzz Build System - -Combines Docker image building and fuzzer building capabilities. -Supports three modes: 'image', 'fuzzer', or 'both'. - -Usage: - Build images: - python3 build_fuzz.py --mode image [project_list] --oss-fuzz-dir /path/to/oss-fuzz - - Build fuzzers: - python3 build_fuzz.py --mode fuzzer [project_list] --oss-fuzz-dir /path/to/oss-fuzz --image-results results.json - - Build both: - python3 build_fuzz.py --mode both [project_list] --oss-fuzz-dir /path/to/oss-fuzz --sanitizer address - -Example: - python3 fuzz/build_fuzz.py --mode both data/valid_projects3.txt \ - --oss-fuzz-dir ./fuzz/oss-fuzz \ - --sanitizer address \ - --workers 8 -""" - -import os -import sys -import subprocess -import argparse -import logging -import json -from pathlib import Path -from returns.maybe import Maybe -from multiprocessing import Pool -from errors import BuildError, CommandError, PathError, ConfigError -from command_util import run_command_build_fuzz as run_command - -# ======================================================================================== -# Build Functions -# ======================================================================================== -def build_image(project_name: str, oss_fuzz_dir: Path) -> tuple[bool, str]: - """Docker image build workflow""" - try: - logging.info(f"Building Docker image: {project_name}") - - # Validate paths - helper_script = oss_fuzz_dir / "infra" / "helper.py" - if not helper_script.exists(): - raise PathError(f"Missing helper script: {helper_script}", project=project_name) - - # Execute image build command - run_command( - f"python3 infra/helper.py build_image {project_name}", - oss_fuzz_dir, - project=project_name - ) - - logging.info(f"✅ Docker image built: {project_name}") - return (True, project_name) - - except CommandError as e: - logging.error(f"❌ Docker build failed: {project_name} - {str(e)}") - return (False, project_name) - except Exception as e: - logging.error(f"💥 Unhandled exception: {project_name} - {repr(e)}") - return (False, project_name) - -def build_fuzzers(project_name: str, sanitizer: str, oss_fuzz_dir: Path) -> tuple[bool, str]: - """Fuzzer build workflow""" - try: - logging.info(f"Building fuzzers: {project_name} ({sanitizer} sanitizer)") - - # Validate paths - helper_script = oss_fuzz_dir / "infra" / "helper.py" - if not helper_script.exists(): - raise PathError(f"Missing helper script: {helper_script}", project=project_name) - - # Execute fuzzer build command - run_command( - f"python3 infra/helper.py build_fuzzers --sanitizer {sanitizer} {project_name}", - oss_fuzz_dir, - project=project_name, - skip_yes=True - ) - - logging.info(f"✅ Fuzzers built: {project_name}") - return (True, project_name) - - except BuildError as e: - logging.error(f"❌ Fuzzer build failed: {project_name} - {str(e)}") - return (False, project_name) - except Exception as e: - logging.error(f"💥 Unhandled exception: {project_name} - {repr(e)}") - return (False, project_name) - -# ======================================================================================== -# Main Execution -# ======================================================================================== -def load_projects(file_path: Path) -> list[str]: - """Load project list from file""" - if not file_path.exists(): - raise FileNotFoundError(f"Project list not found: {file_path}") - - with open(file_path, "r", encoding="utf-8") as f: - projects = [line.strip() for line in f if line.strip()] - - if not projects: - raise ConfigError("Project list is empty") - - logging.info(f"Loaded {len(projects)} projects from {file_path.name}") - return projects - -def execute_builds( - func, - args_list: list[tuple], - worker_count: int, - success_msg: str, - failure_msg: str -) -> tuple[dict[str, bool], list[str]]: - """Execute build tasks in parallel and return results""" - results = {} - with Pool(worker_count) as pool: - for success, project in pool.starmap(func, args_list): - results[project] = success - - failed = [p for p, success in results.items() if not success] - success_count = len(results) - len(failed) - - if failed: - logging.error(f"\n❌ {failure_msg}: {len(failed)}/{len(results)} projects") - logging.info(f"\n📊 {success_msg}: {success_count}/{len(results)} projects") - - return results, failed - -def main(): - parser = argparse.ArgumentParser(description="OSS-Fuzz Build System") - parser.add_argument("project_list", help="Project list file path") - parser.add_argument("--oss-fuzz-dir", required=True, type=str, - help="OSS-Fuzz directory path") - parser.add_argument("--mode", choices=['image', 'fuzzer', 'both'], default='both', - help="Build mode: 'image', 'fuzzer', or 'both'") - parser.add_argument("--workers", type=int, default=os.cpu_count(), - help="Number of parallel worker processes") - parser.add_argument("--sanitizer", default="address", - choices=["address", "memory", "undefined"], - help="Fuzzer sanitizer type") - parser.add_argument("--image-results", default="image_build_results.json", - help="Image build results file (JSON)") - parser.add_argument("--log-level", default="INFO", - choices=["DEBUG", "INFO", "WARNING", "ERROR"], - help="Logging detail level") - args = parser.parse_args() - - # Configure logging - logging.basicConfig( - level=getattr(logging, args.log_level), - format='[%(levelname)s] [PID:%(process)d] %(message)s' - ) - - oss_fuzz_dir = Path(args.oss_fuzz_dir).resolve() - project_file = Path(args.project_list).resolve() - output_file = Path(args.image_results) - - # Sanity checks - if not oss_fuzz_dir.exists(): - logging.critical(f"OSS-Fuzz directory not found: {oss_fuzz_dir}") - sys.exit(1) - - # Load projects - try: - projects = load_projects(project_file) - except Exception as e: - logging.critical(f"❌ Failed to load projects: {e}") - sys.exit(1) - - # Image building workflow - image_results: dict[str, bool] = {} - if args.mode in ['image', 'both']: - logging.info("\n" + "="*60) - logging.info(f"Starting Docker image builds for {len(projects)} projects") - logging.info("="*60 + "\n") - - image_args = [(p, oss_fuzz_dir) for p in projects] - image_results, image_failures = execute_builds( - build_image, - image_args, - args.workers, - "✅ Docker image builds succeeded", - "🚫 Docker image builds failed" - ) - - # Save image build results - try: - with output_file.open("w") as f: - json.dump(image_results, f, indent=4) - logging.info(f"💾 Image build results saved to: {output_file}") - except Exception as e: - logging.error(f"❌ Failed to save image results: {e}") - - # Fuzzer building workflow - fuzzer_results: dict[str, bool] = {} - fuzz_projects = [] - if args.mode in ['fuzzer', 'both']: - logging.info("\n" + "="*60) - logging.info(f"Starting fuzzer builds ({args.sanitizer} sanitizer)") - logging.info("="*60 + "\n") - - # Load image results for fuzzer mode - if args.mode == 'fuzzer': - try: - with output_file.open("r") as f: - image_results = json.load(f) - logging.info(f"📋 Loaded image build results from: {output_file}") - except Exception as e: - logging.critical(f"❌ Failed to load image results: {e}") - sys.exit(1) - - # Filter projects with successful image builds - fuzz_projects = [p for p in projects if image_results.get(p, False)] - if not fuzz_projects: - logging.critical("❌ No projects with successful image builds to fuzz.") - sys.exit(0) - - logging.info(f"Attempting to build fuzzers for {len(fuzz_projects)} projects with successful image builds.") - - fuzzer_args = [(p, args.sanitizer, oss_fuzz_dir) for p in fuzz_projects] - fuzzer_results, fuzzer_failures = execute_builds( - build_fuzzers, - fuzzer_args, - args.workers, - "✅ Fuzzer builds succeeded", - "🚫 Fuzzer builds failed" - ) - - # Final summary - logging.info("\n" + "="*60) - logging.info("Build Summary") - logging.info("="*60) - - if args.mode in ['image', 'both']: - image_success = sum(1 for r in image_results.values() if r) - logging.info(f"📦 Docker Images: {image_success}/{len(projects)} succeeded") - - if args.mode in ['fuzzer', 'both'] and fuzz_projects: - fuzzer_success = sum(1 for r in fuzzer_results.values() if r) - logging.info(f"🔧 Fuzzers: {fuzzer_success}/{len(fuzz_projects)} succeeded") - - logging.info("="*60) - -if __name__ == "__main__": - try: - main() - except KeyboardInterrupt: - print("\n🛑 Operation interrupted by user") - sys.exit(1) - except Exception as e: - logging.critical(f"💥 Critical error: {str(e)}") - sys.exit(1) \ No newline at end of file diff --git a/fuzz/build_fuzzers.py b/fuzz/build_fuzzers.py deleted file mode 100644 index ab8ac7d..0000000 --- a/fuzz/build_fuzzers.py +++ /dev/null @@ -1,229 +0,0 @@ -#!/usr/bin/env python3 -# -*- coding: utf-8 -*- - -""" -build_fuzzers.py - -Parallel build of OSS-Fuzz fuzzers. -Requires Docker images to be built first (using build_images.py). - -Usage: python3 build_fuzzers.py [project_list_file] --oss-fuzz-dir /path/to/oss-fuzz \ - --image-results image_build_results.json \ - [--sanitizer type] [--workers N] -Example: python3 fuzz/build_fuzzers.py data/valid_projects.txt \ - --oss-fuzz-dir ./fuzz/oss-fuzz \ - --image-results image_build_results.json \ - --sanitizer address \ - --workers 8 -""" - -import os -import sys -import subprocess -import argparse -import logging -import json -from pathlib import Path -from typing import Optional -from multiprocessing import Pool, cpu_count -from errors import BuildError, CommandError, PathError, ConfigError -from command_util import run_command_build_fuzz as run_command - -# def run_command( -# cmd: str, -# oss_fuzz_dir: Path, -# project: str = "", -# allowed_exit_codes: Optional[list[int]] = None -# ) -> int: -# """Execute a command and return the exit code""" -# allowed_exit_codes = allowed_exit_codes or [0] -# logging.info(f"▶️ Executing command: {cmd}") - -# try: -# process = subprocess.Popen( -# cmd, -# shell=True, -# cwd=str(oss_fuzz_dir), -# stdout=subprocess.PIPE, -# stderr=subprocess.PIPE, -# text=True -# ) - -# stdout, stderr = process.communicate() -# exit_code = process.returncode - -# if exit_code in allowed_exit_codes: -# return exit_code - -# # Build detailed error message -# error_msg = f"Command failed (exit code: {exit_code})" -# if project: -# error_msg += f" for project: {project}" - -# if stderr.strip(): -# error_msg += f"\nError output:\n{stderr.strip()}" - -# if stdout.strip(): -# error_msg += f"\nOutput:\n{stdout.strip()}" - -# raise CommandError(error_msg, project=project, exit_code=exit_code) - -# except FileNotFoundError as e: -# raise CommandError(f"Command not found: {cmd.split()[0]}", project=project) from e -# except OSError as e: -# raise CommandError(f"System error: {e}", project=project) from e -# except subprocess.SubprocessError as e: -# raise CommandError(f"Subprocess error: {e}", project=project) from e - -def build_fuzzers(project_name: str, sanitizer: str, oss_fuzz_dir: Path) -> tuple[bool, str]: - """Fuzzer build workflow""" - try: - logging.info("=" * 60) - logging.info(f"🔧 Building fuzzers for: {project_name}") - logging.info(f"📁 OSS-Fuzz directory: {oss_fuzz_dir}") - logging.info("=" * 60) - - # Validate paths - helper_script = oss_fuzz_dir / "infra" / "helper.py" - if not helper_script.exists(): - raise PathError(f"Missing helper script: {helper_script}", project=project_name) - - # Execute fuzzer build command - run_command( - f"python3 infra/helper.py build_fuzzers --sanitizer {sanitizer} {project_name}", - oss_fuzz_dir, - project=project_name - ) - - logging.info(f"✅ Fuzzers built: {project_name}") - return (True, project_name) - - except BuildError as e: - logging.error(f"❌ Build failed: {project_name}") - logging.error(f" Reason: {str(e)}") - return (False, project_name) - except Exception as e: - logging.error(f"🔥 Unhandled exception: {project_name}") - logging.exception(f" Exception details: {e}") - return (False, project_name) - -def main(): - parser = argparse.ArgumentParser(description="OSS-Fuzz Fuzzer Builder") - parser.add_argument("project_list", help="Project list file path") - parser.add_argument("--oss-fuzz-dir", required=True, type=str, - help="OSS-Fuzz directory path") - parser.add_argument("--sanitizer", default="address", - choices=["address", "memory", "undefined"], - help="Fuzzer sanitizer type") - parser.add_argument("--workers", type=int, default=cpu_count(), - help="Number of parallel worker processes") - parser.add_argument("--image-results", required=True, - help="JSON file with image build results from build_images.py") - args = parser.parse_args() - - logging.basicConfig( - level=logging.INFO, - format='[%(levelname)s] %(message)s' - ) - - # Process paths - oss_fuzz_dir = Path(args.oss_fuzz_dir).resolve() - logging.info(f"📁 Using OSS-Fuzz directory: {oss_fuzz_dir}") - - # Read project list - try: - project_file = Path(args.project_list) - if not project_file.exists(): - raise FileNotFoundError(f"Project list file not found: {project_file}") - - with open(project_file, "r", encoding="utf-8") as f: - all_projects = [line.strip() for line in f if line.strip()] - - if not all_projects: - raise ConfigError("Project list is empty") - - logging.info(f"📋 Loaded {len(all_projects)} projects") - except Exception as e: - logging.error(f"❌ Failed to read project list: {e}") - sys.exit(1) - - # Load image build results - try: - image_results_file = Path(args.image_results) - if not image_results_file.exists(): - raise FileNotFoundError(f"Image results file not found: {image_results_file}") - - with open(image_results_file, "r") as f: - image_results = json.load(f) - - if not isinstance(image_results, dict): - raise ConfigError("Image results should be a JSON object") - - logging.info(f"📋 Loaded image build results: {args.image_results}") - except json.JSONDecodeError as e: - logging.error(f"❌ Failed to parse image build results: {e}") - sys.exit(1) - except Exception as e: - logging.error(f"❌ Failed to load image build results: {e}") - sys.exit(1) - - # Filter projects with successful image builds - projects_to_build = [p for p in all_projects if p in image_results and image_results[p]] - image_failures = [p for p in all_projects if p not in image_results or not image_results[p]] - - if not projects_to_build: - logging.error("❌ No projects with successful image builds") - if image_failures: - logging.error(f" Projects with image build failures: {', '.join(image_failures[:10])}{'...' if len(image_failures) > 10 else ''}") - sys.exit(1) - - skipped = len(all_projects) - len(projects_to_build) - logging.info(f"🔍 Building {len(projects_to_build)} projects (skipped {skipped} due to image failures)") - - # Parallel fuzzer builds - with Pool(args.workers) as pool: - results = pool.starmap( - build_fuzzers, - [(p, args.sanitizer, oss_fuzz_dir) for p in projects_to_build] - ) - - # Output results - fuzzer_results = {project: success for success, project in results} - failed = [p for p in projects_to_build if not fuzzer_results[p]] - - success_count = len(projects_to_build) - len(failed) - logging.info(f"\n📊 Build completed: {success_count}/{len(projects_to_build)}") - - if failed: - logging.error(f"❌ Failed builds ({len(failed)} projects):") - for project in failed: - logging.error(f" - {project}") - - # Generate overall status report - overall_results = {} - for project in all_projects: - status = "❌" - if project in image_results and image_results[project]: - if project in fuzzer_results and fuzzer_results[project]: - status = "✅" - elif project in fuzzer_results: - status = "❌ (fuzzer)" - else: - status = "❌ (not built)" - else: - status = "❌ (image)" - overall_results[project] = status - - logging.info("\n📊 Overall status:") - for project, status in overall_results.items(): - logging.info(f" {project}: {status}") - -if __name__ == "__main__": - try: - main() - except KeyboardInterrupt: - print("\n🛑 Operation interrupted") - sys.exit(1) - except Exception as e: - print(f"💥 Critical error: {e}") - sys.exit(1) \ No newline at end of file diff --git a/fuzz/command_util.py b/fuzz/command_util.py deleted file mode 100644 index e446f42..0000000 --- a/fuzz/command_util.py +++ /dev/null @@ -1,124 +0,0 @@ -import subprocess -import logging -import time -from pathlib import Path -from typing import Optional -from returns.maybe import Maybe -from errors import CommandError - -def create_popen_object( - cmd: str, - cwd: Optional[Path] = None, - capture_output: bool = False, - timeout: Optional[int] = None, - logger: Optional[logging.Logger] = None, -) -> subprocess.Popen: - """ - 创建并返回 Popen 对象,不等待进程结束 - - capture_output: 是否捕获输出 - - timeout: 使用 shell 的 timeout 命令处理超时 - - logger: 用于实时打印输出 - 返回: Popen 对象 - """ - # 添加超时命令 - if timeout and timeout > 0: - cmd = f"timeout {timeout}s {cmd}" - if logger: - logger.debug(f"⌛ Adding timeout ({timeout}s) to command") - - # 创建 Popen 对象 - process = subprocess.Popen( - cmd, - shell=True, - cwd=str(cwd) if cwd else None, - stdout=subprocess.PIPE if capture_output else None, - stderr=subprocess.STDOUT if capture_output else None, - text=True, - encoding="utf-8", - errors="replace", - ) - - return process - -def parallel_subprocess( - tasks: list[tuple[str, Path, Optional[int], logging.Logger]] -) -> list[tuple[subprocess.Popen, str, Path]]: - """ - 并行执行多个子进程 - - tasks: 任务列表,每个任务是元组 (cmd, cwd, timeout, logger) - 返回: 包含 (Popen对象, 命令, 工作目录) 的列表 - """ - processes = [] - for cmd, cwd, timeout, logger in tasks: - process = create_popen_object( - cmd, - cwd=cwd, - capture_output=True, - timeout=timeout, - logger=logger - ) - processes.append((process, cmd, cwd)) - - return processes - -def wait_for_processes( - processes: list[tuple[subprocess.Popen, str, Path]], - logger: logging.Logger, - allowed_exit_codes: list[int] = [0] -) -> list[tuple[bool, str, Path]]: - """ - 等待所有进程完成并处理结果 - - processes: 进程列表 - - logger: 日志记录器 - - allowed_exit_codes: 允许的退出码列表 - 返回: 结果列表 (成功状态, 命令, 工作目录) - """ - results = [] - - for process, cmd, cwd in processes: - # 实时读取输出 - output_lines = [] - while True: - line = process.stdout.readline() - if not line and process.poll() is not None: - break - if line: - stripped_line = line.strip() - output_lines.append(stripped_line) - if logger: - logger.debug(stripped_line) - - exit_code = process.returncode - - # 检查是否超时 (124 是 timeout 命令的退出码) - if exit_code == 124: - logger.warning(f"⌛ Command timed out: {cmd}") - - # 检查是否成功 - success = exit_code in allowed_exit_codes - results.append((success, cmd, cwd)) - - return results - -def run_command_build_fuzz( - cmd: str, - oss_fuzz_dir: Path, - project: str = "", - allowed_exit_codes: Maybe[list[int]] = Maybe.empty, - skip_yes: bool = False -) -> int: - """run_command used in build_fuzz.py, build_fuzzers.py""" - allowed_codes = allowed_exit_codes.value_or([0]) - cmd_str = f"yes | {cmd}" if not skip_yes else cmd - process = create_popen_object(cmd_str, cwd=oss_fuzz_dir) - process.wait() - - # 处理 timeout 的特殊退出码 (124) - exit_code = 124 if process.returncode == 124 else process.returncode - - if exit_code not in allowed_codes: - error_msg = f"The command failed (exit code: {exit_code})" - if project: - error_msg += f" for project: {project}" - raise CommandError(error_msg, project=project, exit_code=exit_code) - return exit_code \ No newline at end of file diff --git a/fuzz/errors.py b/fuzz/errors.py deleted file mode 100644 index 294e642..0000000 --- a/fuzz/errors.py +++ /dev/null @@ -1,20 +0,0 @@ -# fuzz/errors.py - -class BuildError(Exception): - """Base exception for build failures""" - def __init__(self, message: str, project: str = "", exit_code: int | None = None): - super().__init__(message) - self.project = project - self.exit_code = exit_code - -class CommandError(BuildError): - """Exception for command execution failures""" - pass - -class PathError(BuildError): - """Exception for missing paths or files""" - pass - -class ConfigError(BuildError): - """Exception for configuration errors""" - pass diff --git a/fuzz/run_fuzz_all_targets.py b/fuzz/run_fuzz_all_targets.py deleted file mode 100644 index 8990e33..0000000 --- a/fuzz/run_fuzz_all_targets.py +++ /dev/null @@ -1,211 +0,0 @@ -#!/usr/bin/env python3 -# -*- coding: utf-8 -*- - -""" -run_fuzz_all_targets.py - -This script employs a two-phase approach for fuzz testing: -1. Discovery phase: Traverse all specified projects to collect all executable fuzz targets in each project -2. Execution phase: Create a task pool containing all (project, target) pairs and execute them in parallel using multiprocessing - -This approach maximizes CPU utilization and provides clear overall progress[2](@ref). - -Usage: python3 run_fuzz_all_targets.py [project_list_file] [--oss-fuzz-dir /your/custom/path/to/oss-fuzz] [--timeout seconds] [--workers N] -Example: python3 fuzz/run_fuzz_all_targets.py data/valid_projects3.txt --oss-fuzz-dir /home/jiayiguo/FuzzAug/fuzz/oss-fuzz --timeout 60 --workers 4 - -""" - -import os -import sys -import subprocess -import argparse -import logging -import time -from datetime import datetime -from pathlib import Path -from multiprocessing import Pool, cpu_count -from returns.maybe import Maybe, Nothing, Some -from command_util import run_command_fuzz_all_targets as run_command - -def discover_targets(project_name: str, oss_fuzz_dir: Path, logger: logging.Logger) -> list[str]: - """Discover fuzz targets for a project (starting with 'fuzz_', no extension, and executable)""" - out_dir = oss_fuzz_dir / "build" / "out" / project_name - targets: list[str] = [] - - if not out_dir.is_dir(): - logger.warning(f"Build output directory for project {project_name} does not exist: {out_dir}") - return targets - - try: - for f in out_dir.iterdir(): - try: - if (f.is_file() and - f.name.startswith("fuzz_") and - '.' not in f.name and - # f.name.endswith("print1") and - os.access(f, os.X_OK)): - targets.append(f.name) - except OSError as e: - logger.warning(f"⚠️ Error while checking file {f.name}, skipped: {e}") - - except PermissionError: - logger.error(f"🔒 Insufficient permissions to access directory: {out_dir}") - except OSError as e: - logger.exception(f"💥 Operating system error occurred while discovering targets: {e}") - - return targets - - -def run_single_target(project_name: str, target_name: str, timeout: int, oss_fuzz_dir: Path) -> tuple[bool, str, str]: - """Execute fuzz testing workflow for a single (project, target) pair""" - task_id = f"{project_name}_{target_name}" - logger = logging.getLogger(task_id) - LOG_DIR = oss_fuzz_dir / "run_pj3_logs" - try: - logger.setLevel(logging.DEBUG) - LOG_DIR.mkdir(parents=True, exist_ok=True) - timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") - log_file = LOG_DIR / f"run_{task_id}_{timestamp}.log" - file_handler = logging.FileHandler(log_file, encoding="utf-8") - formatter = logging.Formatter( - "%(asctime)s [PID:%(process)d] %(levelname)s: %(message)s", - datefmt="%Y-%m-%d %H:%M:%S" - ) - file_handler.setFormatter(formatter) - logger.addHandler(file_handler) - os.chdir(oss_fuzz_dir) - - except (OSError, PermissionError) as e: - print(f"❌ Critical error occurred during initialization of task {task_id}: {e}") - return False, project_name, target_name - - logger.info(f"🚀 Starting test -> Project: {project_name}, Target: {target_name}") - try: - cmd = f"python3 infra/helper.py run_fuzzer {project_name} {target_name} -- -max_total_time={timeout}" - success = run_command( - cmd, - f"Running Target '{target_name}' (timeout={timeout}s)", - logger, - allowed_exit_codes=Some([1, 124]), # 1=Crashes found, 124=Timeout - timeout=timeout + 300 - ) - - if success: - logger.info(f"✅ Target '{target_name}' completed successfully.") - else: - logger.error(f"❌ Target '{target_name}' failed.") - - return success, project_name, target_name - - except Exception as e: - logger.exception(f"💥 Unexpected error occurred while running target '{target_name}': {e}") - return False, project_name, target_name - finally: - for handler in logger.handlers[:]: - handler.close() - logger.removeHandler(handler) - - -def main(): - # Configure main process logging - logging.basicConfig( - level=logging.INFO, - format="[%(levelname)s] %(message)s", - stream=sys.stdout - ) - logger = logging.getLogger("Main") - - # Set up command line argument parsing - parser = argparse.ArgumentParser(description="OSS-Fuzz Parallel Fuzz Testing Tool") - parser.add_argument("project_list", help="File path containing list of project names") - parser.add_argument("--oss-fuzz-dir", type=Path, required=True, help="Path to the oss-fuzz directory (e.g., /path/to/oss-fuzz)") - parser.add_argument("--timeout", type=int, default=60, help="Timeout duration per Fuzz Target (seconds)") - parser.add_argument("--workers", type=int, default=cpu_count(), help="Number of parallel worker processes") - args = parser.parse_args() - - OSS_FUZZ_DIR = args.oss_fuzz_dir.resolve() - LOG_DIR = OSS_FUZZ_DIR / "run_fuzz_all_targets_logs" - - # 1. Read project list file - try: - project_path = Path(args.project_list) - with open(project_path, "r", encoding="utf-8") as f: - projects = [line.strip() for line in f if line.strip()] - logger.info(f"📋 Loaded {len(projects)} projects from {project_path.name}.") - except FileNotFoundError: - logger.error(f"❌ Project list file not found: {args.project_list}") - sys.exit(1) - except (OSError, PermissionError) as e: - logger.exception(f"💥 Error occurred while reading project list: {e}") - sys.exit(1) - - # 2. Discovery phase: Collect all fuzz targets - logger.info("\n" + "=" * 20 + " Phase 1: Discover all Fuzz Targets " + "=" * 20) - all_fuzz_tasks: list[tuple[str, str]] = [] # Store (project, target) tuples - try: - original_cwd = Path.cwd() # Save current working directory - os.chdir(OSS_FUZZ_DIR) # Switch to OSS-Fuzz directory - for project_name in projects: - targets = discover_targets(project_name, OSS_FUZZ_DIR, logger) - - if targets: - logger.info(f"🔍 Discovered {len(targets)} targets in project '{project_name}': {', '.join(targets)}") - for target in targets: - all_fuzz_tasks.append((project_name, target)) - else: - logger.warning(f"⚠️ No Fuzz Targets found in project '{project_name}'.") - os.chdir(original_cwd) # Restore original working directory - except FileNotFoundError: - logger.error(f"❌ OSS-Fuzz directory does not exist: {OSS_FUZZ_DIR}") - sys.exit(1) - except Exception as e: - logger.exception(f"💥 Unknown error occurred during discovery phase: {e}") - sys.exit(1) - - # Check if any valid targets were found - if not all_fuzz_tasks: - logger.info("🤷 No executable Fuzz Targets found. Program exits.") - sys.exit(0) - - # 3. Execution phase: Parallel fuzz testing - logger.info(f"\n✅ Discovery phase completed. Found a total of {len(all_fuzz_tasks)} fuzz tasks.") - logger.info("=" * 20 + " Phase 2: Parallel Fuzzing Execution " + "=" * 23) - logger.info(f"🚀 Starting parallel testing with {args.workers} worker processes (timeout per target: {args.timeout}s)...") - - # Prepare task parameters (project, target, timeout) - tasks_with_args = [(p, t, args.timeout, OSS_FUZZ_DIR) for p, t in all_fuzz_tasks] - results: list[tuple[bool, str, str]] = [] # Store results (success, project, target) - - # Execute in parallel using process pool - with Pool(args.workers) as pool: - try: - results = pool.starmap(run_single_target, tasks_with_args) - except Exception as e: - logger.error(f"💥 Critical error occurred during parallel execution: {e}") - pool.terminate() - pool.join() - - # 4. Result summary and reporting - logger.info("\n" + "=" * 20 + " Phase 3: Result Summary " + "=" * 28) - failed_tasks = [(p, t) for success, p, t in results if not success] # List of failed tasks - total_tasks = len(all_fuzz_tasks) - failed_count = len(failed_tasks) - success_count = total_tasks - failed_count - - # Output statistical summary - logger.info(f"📊 Fuzzing completed: Success {success_count}/{total_tasks}, Failed {failed_count}/{total_tasks}") - if failed_tasks: - logger.error("❌ The following Fuzz Targets failed:") - for project, target in failed_tasks: - logger.error(f" - Project: {project}, Target: {target}") # List detailed failures - - -if __name__ == "__main__": - try: - main() - except KeyboardInterrupt: - print("\n🛑 Operation interrupted by user.") - sys.exit(1) - except Exception as e: - print(f"\n💥 Fatal error in main program: {e}") - sys.exit(1) \ No newline at end of file diff --git a/fuzz/run_fuzz_all_targets_print1.py b/fuzz/run_fuzz_all_targets_print1.py deleted file mode 100644 index ea45ddb..0000000 --- a/fuzz/run_fuzz_all_targets_print1.py +++ /dev/null @@ -1,256 +0,0 @@ -#!/usr/bin/env python3 -# -*- coding: utf-8 -*- - -""" -run_fuzz_all_targets_print1.py - -This script employs a two-phase approach for fuzz testing: -1. Discovery phase: Traverse all specified projects to collect all executable fuzz targets which start with "fuzz_" and end with "print1" in each project -2. Execution phase: Create a task pool containing all (project, target) pairs and execute them in parallel using multiprocessing - -This approach maximizes CPU utilization and provides clear overall progress. - -Usage: python3 run_fuzz_all_targets_print1.py [project_list_file] [--oss-fuzz-dir /your/custom/path/to/oss-fuzz] [--timeout seconds] [--workers N] -Example: python3 fuzz/run_fuzz_all_targets_print1.py data/valid_projects3.txt --oss-fuzz-dir /home/jiayiguo/FuzzAug/fuzz/oss-fuzz --timeout 60 --workers 4 - -""" - -import os -import sys -fuzzaug_root = os.path.abspath(os.path.join(os.path.dirname(__file__), "..")) -sys.path.insert(0, fuzzaug_root) -import subprocess -import argparse -import logging -import time -from datetime import datetime -from pathlib import Path -from multiprocessing import cpu_count -from returns.maybe import Maybe, Nothing, Some -from UniTSyn.frontend.util import parallel_subprocess - - - -def discover_targets(project_name: str, oss_fuzz_dir: Path, logger: logging.Logger) -> list[str]: - """Discover fuzz targets for a project (starting with 'fuzz_', ending with 'print1', no extension, and executable)""" - out_dir = oss_fuzz_dir / "build" / "out" / project_name - targets: list[str] = [] - - if not out_dir.is_dir(): - logger.warning(f"Build output directory for project {project_name} does not exist: {out_dir}") - return targets - - try: - for f in out_dir.iterdir(): - try: - if (f.is_file() and - f.name.startswith("fuzz_") and - '.' not in f.name and - f.name.endswith("print1") and - os.access(f, os.X_OK)): - targets.append(f.name) - except OSError as e: - logger.warning(f"⚠️ Error while checking file {f.name}, skipped: {e}") - - except PermissionError: - logger.error(f"🔒 Insufficient permissions to access directory: {out_dir}") - except OSError as e: - logger.exception(f"💥 Operating system error occurred while discovering targets: {e}") - - return targets - -def setup_task_logger(project_name: str, target_name: str, oss_fuzz_dir: Path) -> logging.Logger: - """为单个任务设置日志记录器""" - task_id = f"{project_name}_{target_name}" - logger = logging.getLogger(task_id) - LOG_DIR = oss_fuzz_dir / "run1_fuzz_all_targets_logs_print1_parallel" - - try: - logger.setLevel(logging.DEBUG) - LOG_DIR.mkdir(parents=True, exist_ok=True) - timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") - log_file = LOG_DIR / f"run_{task_id}_{timestamp}.log" - file_handler = logging.FileHandler(log_file, encoding="utf-8") - formatter = logging.Formatter( - "%(asctime)s [PID:%(process)d] %(levelname)s: %(message)s", - datefmt="%Y-%m-%d %H:%M:%S" - ) - file_handler.setFormatter(formatter) - logger.addHandler(file_handler) - return logger - except (OSError, PermissionError) as e: - print(f"❌ Critical error occurred during initialization of task {task_id}: {e}") - return None - -def create_task_command(project_name: str, target_name: str, timeout: int) -> str: - """创建任务命令字符串""" - return f"python3 infra/helper.py run_fuzzer {project_name} {target_name} -- -max_total_time={timeout}" - -def create_subprocess(task: tuple) -> subprocess.Popen: - """为每个任务创建子进程""" - project_name, target_name, timeout, oss_fuzz_dir, logger = task - cmd = create_task_command(project_name, target_name, timeout) - - # 设置日志文件,存放测试过程的输出 - task_id = f"{project_name}_{target_name}" - LOG_DIR = oss_fuzz_dir / "run2_fuzz_all_targets_logs" - LOG_DIR.mkdir(parents=True, exist_ok=True) - timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") - log_file = LOG_DIR / f"run_{task_id}_{timestamp}.log" - - # 创建并返回 Popen 对象 - process = subprocess.Popen( - cmd, - shell=True, - cwd=str(oss_fuzz_dir), - stdout=open(log_file, 'w'), - stderr=subprocess.STDOUT, - text=True, - encoding="utf-8", - errors="replace", - ) - - # 将任务数据附加到进程对象以便后续使用 - process.task_data = task - return process - -def on_process_exit(process: subprocess.Popen) -> tuple[bool, str, str]: - """处理进程退出""" - project_name, target_name, _, oss_fuzz_dir, logger = process.task_data - - # 等待进程结束 - process.wait() - exit_code = process.returncode - - # 记录结果 - if exit_code == 124: - logger.warning(f"⌛ Command timed out: {project_name}/{target_name}") - - # 检查是否成功 - success = exit_code in [0, 1, 124] # 0=成功, 1=发现崩溃, 124=超时 - - if success: - logger.info(f"✅ Target '{target_name}' completed successfully.") - else: - logger.error(f"❌ Target '{target_name}' failed with exit code: {exit_code}") - - return success, project_name, target_name - -def main(): - # Configure main process logging - logging.basicConfig( - level=logging.INFO, - format="[%(levelname)s] %(message)s", - stream=sys.stdout - ) - logger = logging.getLogger("Main") - - # Set up command line argument parsing - parser = argparse.ArgumentParser(description="OSS-Fuzz Parallel Fuzz Testing Tool") - parser.add_argument("project_list", help="File path containing list of project names") - parser.add_argument("--oss-fuzz-dir", type=Path, required=True, help="Path to the oss-fuzz directory (e.g., /path/to/oss-fuzz)") - parser.add_argument("--timeout", type=int, default=60, help="Timeout duration per Fuzz Target (seconds)") - parser.add_argument("--workers", type=int, default=cpu_count(), help="Number of parallel worker processes") - args = parser.parse_args() - - OSS_FUZZ_DIR = args.oss_fuzz_dir.resolve() - - # 1. Read project list file - try: - project_path = Path(args.project_list) - with open(project_path, "r", encoding="utf-8") as f: - projects = [line.strip() for line in f if line.strip()] - logger.info(f"📋 Loaded {len(projects)} projects from {project_path.name}.") - except FileNotFoundError: - logger.error(f"❌ Project list file not found: {args.project_list}") - sys.exit(1) - except (OSError, PermissionError) as e: - logger.exception(f"💥 Error occurred while reading project list: {e}") - sys.exit(1) - - # 2. Discovery phase: Collect all fuzz targets - logger.info("\n" + "=" * 20 + " Phase 1: Discover all Fuzz Targets " + "=" * 20) - all_fuzz_tasks: list[tuple[str, str]] = [] # Store (project, target) tuples - try: - original_cwd = Path.cwd() # Save current working directory - os.chdir(OSS_FUZZ_DIR) # Switch to OSS-Fuzz directory - for project_name in projects: - targets = discover_targets(project_name, OSS_FUZZ_DIR, logger) - - if targets: - logger.info(f"🔍 Discovered {len(targets)} targets in project '{project_name}': {', '.join(targets)}") - for target in targets: - all_fuzz_tasks.append((project_name, target)) - else: - logger.warning(f"⚠️ No Fuzz Targets found in project '{project_name}'.") - os.chdir(original_cwd) # Restore original working directory - except FileNotFoundError: - logger.error(f"❌ OSS-Fuzz directory does not exist: {OSS_FUZZ_DIR}") - sys.exit(1) - except Exception as e: - logger.exception(f"💥 Unknown error occurred during discovery phase: {e}") - sys.exit(1) - - # Check if any valid targets were found - if not all_fuzz_tasks: - logger.info("🤷 No executable Fuzz Targets found. Program exits.") - sys.exit(0) - - # 3. Execution phase: Parallel fuzz testing - logger.info(f"\n✅ Discovery phase completed. Found a total of {len(all_fuzz_tasks)} fuzz tasks.") - logger.info("=" * 20 + " Phase 2: Parallel Fuzzing Execution " + "=" * 23) - logger.info(f"🚀 Starting parallel testing with {args.workers} worker processes (timeout per target: {args.timeout}s)...") - - # 准备任务列表 - tasks = [] - for project, target in all_fuzz_tasks: - task_logger = setup_task_logger(project, target, OSS_FUZZ_DIR) - if task_logger: - # 每个任务包含: (project, target, timeout, oss_fuzz_dir, logger) - task = (project, target, args.timeout, OSS_FUZZ_DIR, task_logger) - tasks.append(task) - task_logger.info(f"🚀 Starting test -> Project: {project}, Target: {target}") - - # 使用 parallel_subprocess 并行执行 - results = parallel_subprocess( - iterable=tasks, - jobs=args.workers, - subprocess_creator=create_subprocess, - on_exit=on_process_exit, - use_tqdm=True, - tqdm_leave=True, - tqdm_msg="Running fuzz targets" - ) - - # 4. Result summary and reporting - logger.info("\n" + "=" * 20 + " Phase 3: Result Summary " + "=" * 28) - success_count = 0 - failed_tasks = [] - - for task, result in results.items(): - success, project, target = result - if success: - success_count += 1 - else: - failed_tasks.append((project, target)) - - total_tasks = len(all_fuzz_tasks) - failed_count = len(failed_tasks) - - # Output statistical summary - logger.info(f"📊 Fuzzing completed: Success {success_count}/{total_tasks}, Failed {failed_count}/{total_tasks}") - if failed_tasks: - logger.error("❌ The following Fuzz Targets failed:") - for project, target in failed_tasks: - logger.error(f" - Project: {project}, Target: {target}") # List detailed failures - - -if __name__ == "__main__": - try: - main() - except KeyboardInterrupt: - print("\n🛑 Operation interrupted by user.") - sys.exit(1) - except Exception as e: - print(f"\n💥 Fatal error in main program: {e}") - sys.exit(1) \ No newline at end of file From 12666062186de1b0ad0eabb901a5884d91e75380 Mon Sep 17 00:00:00 2001 From: joyguoguo <599325370@qq.com> Date: Wed, 6 Aug 2025 01:12:50 +0000 Subject: [PATCH 090/134] python template --- fuzz/collect_fuzz_python.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fuzz/collect_fuzz_python.py b/fuzz/collect_fuzz_python.py index c772975..98fad4f 100644 --- a/fuzz/collect_fuzz_python.py +++ b/fuzz/collect_fuzz_python.py @@ -409,7 +409,7 @@ def main( repo_id: str = "data/valid_projects.txt", repo_root: str = "fuzz/oss-fuzz/projects/", timeout: int = 60, - jobs: int = 80, + jobs: int = 4, pipeline: str = "all", n_fuzz: int = 100, strategy: str = "shuffle", From d42927feef0162e2a066f760b571327796b1db91 Mon Sep 17 00:00:00 2001 From: joyguoguo <599325370@qq.com> Date: Wed, 6 Aug 2025 01:13:12 +0000 Subject: [PATCH 091/134] python template --- fuzz/collect_fuzz_python.py | 98 +++++++++++++++++++++---------------- 1 file changed, 57 insertions(+), 41 deletions(-) diff --git a/fuzz/collect_fuzz_python.py b/fuzz/collect_fuzz_python.py index 98fad4f..9e46db1 100644 --- a/fuzz/collect_fuzz_python.py +++ b/fuzz/collect_fuzz_python.py @@ -86,7 +86,7 @@ def discover_targets(project_name: str, oss_fuzz_dir: Path) -> list[str]: list[str]: List of target names """ out_dir = oss_fuzz_dir / "build" / "out" / project_name - targets: list[str] = [] # Fix: Add type annotation + targets: list[str] = [] if not out_dir.is_dir(): logging.warning(f"Build output directory for {project_name} does not exist") @@ -159,7 +159,7 @@ def fuzz_repos(repos: list[str], jobs: int, timeout: int = 60): # Create target mapping target_map = {repo: targets for repo, targets in zip(repos, targets_list)} - all_targets: list[tuple[str, str]] = [ # Fix: Rename variable to avoid conflict + all_targets: list[tuple[str, str]] = [ (k, v) for k, vs in target_map.items() for v in vs ] @@ -174,7 +174,7 @@ def fuzz_repos(repos: list[str], jobs: int, timeout: int = 60): def generate_test_template(target_name: str, repo_path: str): """ - Generate test template for a single target + Generate Python test template for a single target Args: target_name (str): Target name @@ -185,18 +185,37 @@ def generate_test_template(target_name: str, repo_path: str): """ template_dir = pjoin(repo_path, "tests-gen") os.makedirs(template_dir, exist_ok=True) - template_path = pjoin(template_dir, f"{target_name}.rs") - - # Basic test template - use byte array instead of byte characters - template = f""" - #[test] - fn test_{target_name}() {{ - // Test logic will be generated here - let input = b""; // Fuzzing input will be replaced here - let result = process_input(&input); - assert!(result.is_ok()); - }} - """ + + # Use .py extension for Python test files + template_path = pjoin(template_dir, f"{target_name}.py") + + # Basic Python test template + template = f"""#!/usr/bin/env python3 +import sys +import os +import unittest + +# Add the project directory to the Python path +sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '..', '..'))) + +# Import the function to test +from {target_name.replace('fuzz_', '')} import process_input + +class Test{target_name.capitalize()}(unittest.TestCase): + def test_generated(self): + \"\"\"Test generated from fuzzing input\"\"\" + # Fuzzing input will be replaced here + input_data = b"" + + # Call the function under test + result = (input_data) + + # Add assertions based on expected behavior + self.assertIsNotNone(result) + +if __name__ == '__main__': + unittest.main() +""" with open(template_path, "w") as f: f.write(template) @@ -224,7 +243,7 @@ def _transform_repo(repo: str): def escape_special_chars(input_data: str) -> str: """ - Escape special characters in input data + Escape special characters in input data for Python byte strings Args: input_data (str): Raw input data @@ -232,20 +251,13 @@ def escape_special_chars(input_data: str) -> str: Returns: str: Input data with escaped characters """ - # Escape backslashes and double quotes - escaped = input_data.replace('\\', '\\\\').replace('"', '\\"') - - # Handle non-ASCII characters - if any(ord(c) > 127 for c in escaped): - # If containing non-ASCII characters, use byte array representation - byte_array = [str(b) for b in input_data.encode()] - return f"let input = vec![{', '.join(byte_array)}];" - - return f"let input = b\"{escaped}\";" + # For Python, we can use repr() to safely represent byte strings + # This will handle all special characters and non-ASCII bytes + return repr(input_data.encode('latin-1', 'replace')) def substitute_input(template: str, input_data: str, idx: int, target_name: str) -> str: """ - Replace fuzzing input into test template + Replace fuzzing input into Python test template Args: template (str): Template content @@ -256,19 +268,19 @@ def substitute_input(template: str, input_data: str, idx: int, target_name: str) Returns: str: Test code after substitution """ - # Escape special characters and handle non-ASCII characters + # Escape special characters for Python escaped_input = escape_special_chars(input_data) # Replace input placeholder new_template = template.replace( - 'let input = b""; // Fuzzing input will be replaced here', - escaped_input + 'input_data = b""', + f'input_data = {escaped_input}' ) - # Replace function name to avoid duplication + # Replace test method name to avoid duplication return new_template.replace( - f"fn test_{target_name}()", - f"fn test_{target_name}_{idx}()" + f"def test_generated(self):", + f"def test_{idx}(self):" ) def has_similar(selected: list[str], x: str, thresh: float = 0.8) -> bool: @@ -309,8 +321,9 @@ def substitute_one_repo( template_dir = pjoin(repo, "tests-gen") input_dir = pjoin(repo, "fuzz_inputs") - for target_name in targets: # Use target_name as loop variable - template_path = pjoin(template_dir, f"{target_name}.rs") + for target_name in targets: + # Use .py extension for Python test files + template_path = pjoin(template_dir, f"{target_name}.py") input_path = pjoin(input_dir, target_name) try: @@ -352,17 +365,20 @@ def substitute_one_repo( # Generate test cases tests = [ - substitute_input(template, input_data, i, target_name) # Pass target_name + substitute_input(template, input_data, i, target_name) for i, input_data in enumerate(inputs) ] - # Write generated test file - generated_path = pjoin(template_dir, f"{target_name}.inputs.rs") + # Write generated test file with .py extension + generated_path = pjoin(template_dir, f"{target_name}.inputs.py") with open(generated_path, "w") as f: f.write("\n".join(tests)) - # Format code - subprocess.run(["rustfmt", generated_path], check=False) + # Format Python code (optional) + try: + subprocess.run(["black", generated_path], check=False) + except FileNotFoundError: + logging.warning("Black formatter not found, skipping formatting") except Exception as e: logging.error(f"Error processing {target_name}: {e}") @@ -457,7 +473,7 @@ def main( elif pipeline == "all": build_image(repos, jobs) build_fuzzer(repos, jobs) - transform_repos(repos, jobs) # Key addition: Template generation + transform_repos(repos, jobs) # Generate test templates fuzz_repos(repos, jobs, timeout) testgen_repos(repos, jobs, n_fuzz, strategy, max_len, sim_thresh) else: From f8941f1c9c444fa19354c05cbedee3773466a726 Mon Sep 17 00:00:00 2001 From: joyguoguo <599325370@qq.com> Date: Wed, 6 Aug 2025 23:26:29 +0000 Subject: [PATCH 092/134] correct the template --- fuzz/collect_fuzz_python.py | 111 +++++++++++++++++++----------------- 1 file changed, 60 insertions(+), 51 deletions(-) diff --git a/fuzz/collect_fuzz_python.py b/fuzz/collect_fuzz_python.py index 9e46db1..6a974d5 100644 --- a/fuzz/collect_fuzz_python.py +++ b/fuzz/collect_fuzz_python.py @@ -175,51 +175,53 @@ def fuzz_repos(repos: list[str], jobs: int, timeout: int = 60): def generate_test_template(target_name: str, repo_path: str): """ Generate Python test template for a single target - + Args: target_name (str): Target name repo_path (str): Repository path - + Returns: str: Template file path """ template_dir = pjoin(repo_path, "tests-gen") os.makedirs(template_dir, exist_ok=True) - - # Use .py extension for Python test files + + # Ensure __init__.py exists + init_path = pjoin(template_dir, "__init__.py") + if not os.path.exists(init_path): + with open(init_path, "w") as f: + f.write("") + template_path = pjoin(template_dir, f"{target_name}.py") - - # Basic Python test template + + # Python test template with placeholder template = f"""#!/usr/bin/env python3 import sys import os import unittest -# Add the project directory to the Python path -sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '..', '..'))) +# Add the parent directory to the Python path +sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))) # Import the function to test -from {target_name.replace('fuzz_', '')} import process_input +try: + from {target_name} import TestOneInput as TestClass +except ImportError: + from {target_name} import TestInput as TestClass class Test{target_name.capitalize()}(unittest.TestCase): def test_generated(self): \"\"\"Test generated from fuzzing input\"\"\" - # Fuzzing input will be replaced here - input_data = b"" - - # Call the function under test - result = (input_data) - - # Add assertions based on expected behavior + input_data = b"" # FUZZ_PLACEHOLDER + result = TestClass(input_data) self.assertIsNotNone(result) if __name__ == '__main__': unittest.main() """ - with open(template_path, "w") as f: f.write(template) - + return template_path def transform_repos(repos: list[str], jobs: int): @@ -309,49 +311,45 @@ def substitute_one_repo( ): """ Process a single repository, replace fuzzing inputs into test templates - - Args: - repo (str): Repository path - targets (list[str]): List of targets - n_fuzz (int): Number of inputs to use - strategy (str): Selection strategy - max_len (int): Maximum length - sim_thresh (float): Similarity threshold """ template_dir = pjoin(repo, "tests-gen") input_dir = pjoin(repo, "fuzz_inputs") - + + # Ensure __init__.py exists + init_path = pjoin(template_dir, "__init__.py") + if not os.path.exists(init_path): + with open(init_path, "w") as f: + f.write("") + for target_name in targets: - # Use .py extension for Python test files template_path = pjoin(template_dir, f"{target_name}.py") input_path = pjoin(input_dir, target_name) - + try: if not os.path.exists(template_path): logging.warning(f"Template file not found: {template_path}") continue - + if not os.path.exists(input_path): logging.warning(f"Input file not found: {input_path}") continue - + with open(template_path) as f_template: template = f_template.read() - + with open(input_path, "r") as f_input: all_inputs = [line.strip() for line in f_input if line.strip()] - + if not all_inputs: logging.warning(f"No valid inputs found for {target_name}") continue - + logging.info(f"Loaded {len(all_inputs)} inputs for {target_name}") - # Input selection strategy + # Input selection if strategy == "shuffle": random.shuffle(all_inputs) - inputs = list(islice( - (x for x in all_inputs if len(x) < max_len), n_fuzz)) + inputs = list(islice((x for x in all_inputs if len(x) < max_len), n_fuzz)) elif strategy == "reverse": inputs = [] for x in reversed(all_inputs): @@ -362,24 +360,35 @@ def substitute_one_repo( inputs.append(x) else: inputs = all_inputs[:n_fuzz] - - # Generate test cases - tests = [ - substitute_input(template, input_data, i, target_name) - for i, input_data in enumerate(inputs) - ] - - # Write generated test file with .py extension + + # Split header and method body + if "def test_generated(self):" not in template: + logging.error(f"Template format error in {template_path}") + continue + + header, method = template.split("def test_generated(self):", 1) + + # Generate multiple methods + test_methods = [] + for i, input_data in enumerate(inputs): + escaped_input = escape_special_chars(input_data) + method_code = f" def test_{i}(self):" + method + method_code = method_code.replace('input_data = b""', f"input_data = {escaped_input}") + test_methods.append(method_code.strip()) + + final_code = header + "\n\n" + "\n\n".join(test_methods) + + # Write to inputs.py generated_path = pjoin(template_dir, f"{target_name}.inputs.py") with open(generated_path, "w") as f: - f.write("\n".join(tests)) - - # Format Python code (optional) + f.write(final_code) + + # Format (optional) try: subprocess.run(["black", generated_path], check=False) except FileNotFoundError: logging.warning("Black formatter not found, skipping formatting") - + except Exception as e: logging.error(f"Error processing {target_name}: {e}") @@ -422,10 +431,10 @@ def testgen_repos( )) def main( - repo_id: str = "data/valid_projects.txt", + repo_id: str = "data/valid_projects3.txt", repo_root: str = "fuzz/oss-fuzz/projects/", timeout: int = 60, - jobs: int = 4, + jobs: int = 8, pipeline: str = "all", n_fuzz: int = 100, strategy: str = "shuffle", From b357a5296f9fcac8adad55b3e966cf0e26c97d08 Mon Sep 17 00:00:00 2001 From: joyguoguo <599325370@qq.com> Date: Thu, 7 Aug 2025 02:07:16 +0000 Subject: [PATCH 093/134] ver2 wrong template --- fuzz/collect_fuzz_python.py | 62 ++++++++++++++++++++++++++++++------- 1 file changed, 51 insertions(+), 11 deletions(-) diff --git a/fuzz/collect_fuzz_python.py b/fuzz/collect_fuzz_python.py index 6a974d5..e1f92de 100644 --- a/fuzz/collect_fuzz_python.py +++ b/fuzz/collect_fuzz_python.py @@ -301,6 +301,8 @@ def similar(a, b): return SequenceMatcher(None, a, b).ratio() return any(similar(x, y) > thresh for y in selected) +import re + def substitute_one_repo( repo: str, targets: list[str], @@ -361,29 +363,67 @@ def substitute_one_repo( else: inputs = all_inputs[:n_fuzz] - # Split header and method body - if "def test_generated(self):" not in template: - logging.error(f"Template format error in {template_path}") + # Extract structure from template + match = re.search(r"(class\s+Test\w+\(unittest\.TestCase\):)", template) + if not match: + logging.error(f"Class definition not found in template: {template_path}") continue - header, method = template.split("def test_generated(self):", 1) + class_def_index = match.start() + before_class = template[:class_def_index] + class_and_after = template[class_def_index:] + + method_match = re.search(r"def\s+test_generated\(self\):", class_and_after) + if not method_match: + logging.error(f"test_generated method not found in template: {template_path}") + continue - # Generate multiple methods + method_start = method_match.end() + class_header = class_and_after[:method_start] + method_indent_block = class_and_after[method_start:] + + method_lines = method_indent_block.splitlines() + method_body = [] + footer_lines = [] + for line in method_lines: + if line.strip() == "": + continue + if not line.startswith(" "): # outside method block + footer_lines.append(line) + elif not footer_lines: # still inside method + method_body.append(line) + + method_body_str = "\n".join(method_body) + footer_str = "\n".join(footer_lines) + + # Build all test methods test_methods = [] for i, input_data in enumerate(inputs): escaped_input = escape_special_chars(input_data) - method_code = f" def test_{i}(self):" + method - method_code = method_code.replace('input_data = b""', f"input_data = {escaped_input}") - test_methods.append(method_code.strip()) + test_func = f" def test_{i}(self):\n" + test_func += "\n".join( + " " + line.lstrip().replace('input_data = b""', f"input_data = {escaped_input}") + for line in method_body if line.strip() + ) + test_methods.append(test_func) + + if not test_methods: + # Fallback: generate dummy method to avoid syntax error + test_methods = [" def test_placeholder(self):\n self.assertTrue(True)"] + + final_code = before_class.rstrip() + "\n" + class_header.rstrip() + "\n\n" + final_code += "\n\n".join(test_methods).rstrip() + "\n" + + if footer_str.strip(): + final_code += "\n\n" + footer_str.strip() + "\n" - final_code = header + "\n\n" + "\n\n".join(test_methods) - # Write to inputs.py + # Write to output generated_path = pjoin(template_dir, f"{target_name}.inputs.py") with open(generated_path, "w") as f: f.write(final_code) - # Format (optional) + # Format with black try: subprocess.run(["black", generated_path], check=False) except FileNotFoundError: From 1637e23e5869c7db92135571f0291b642555d107 Mon Sep 17 00:00:00 2001 From: joyguoguo <599325370@qq.com> Date: Thu, 7 Aug 2025 02:24:55 +0000 Subject: [PATCH 094/134] ok --- fuzz/collect_fuzz_python.py | 88 ++++++++++++++----------------------- 1 file changed, 33 insertions(+), 55 deletions(-) diff --git a/fuzz/collect_fuzz_python.py b/fuzz/collect_fuzz_python.py index e1f92de..16ae813 100644 --- a/fuzz/collect_fuzz_python.py +++ b/fuzz/collect_fuzz_python.py @@ -212,9 +212,8 @@ def generate_test_template(target_name: str, repo_path: str): class Test{target_name.capitalize()}(unittest.TestCase): def test_generated(self): \"\"\"Test generated from fuzzing input\"\"\" - input_data = b"" # FUZZ_PLACEHOLDER + input_data = b"" result = TestClass(input_data) - self.assertIsNotNone(result) if __name__ == '__main__': unittest.main() @@ -313,6 +312,7 @@ def substitute_one_repo( ): """ Process a single repository, replace fuzzing inputs into test templates + and generate {target_name}.inputs.py files. """ template_dir = pjoin(repo, "tests-gen") input_dir = pjoin(repo, "fuzz_inputs") @@ -324,21 +324,13 @@ def substitute_one_repo( f.write("") for target_name in targets: - template_path = pjoin(template_dir, f"{target_name}.py") input_path = pjoin(input_dir, target_name) try: - if not os.path.exists(template_path): - logging.warning(f"Template file not found: {template_path}") - continue - if not os.path.exists(input_path): logging.warning(f"Input file not found: {input_path}") continue - with open(template_path) as f_template: - template = f_template.read() - with open(input_path, "r") as f_input: all_inputs = [line.strip() for line in f_input if line.strip()] @@ -363,62 +355,47 @@ def substitute_one_repo( else: inputs = all_inputs[:n_fuzz] - # Extract structure from template - match = re.search(r"(class\s+Test\w+\(unittest\.TestCase\):)", template) - if not match: - logging.error(f"Class definition not found in template: {template_path}") - continue + # Header + file_header = f"""import sys +import os +import unittest - class_def_index = match.start() - before_class = template[:class_def_index] - class_and_after = template[class_def_index:] +# 将项目目录加入 Python 路径,确保能导入上层模块 +sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), ".."))) - method_match = re.search(r"def\s+test_generated\(self\):", class_and_after) - if not method_match: - logging.error(f"test_generated method not found in template: {template_path}") - continue +try: + from {target_name} import TestOneInput as TestClass +except ImportError: + from {target_name} import TestInput as TestClass + + +class Test{target_name.capitalize()}(unittest.TestCase):""" - method_start = method_match.end() - class_header = class_and_after[:method_start] - method_indent_block = class_and_after[method_start:] - - method_lines = method_indent_block.splitlines() - method_body = [] - footer_lines = [] - for line in method_lines: - if line.strip() == "": - continue - if not line.startswith(" "): # outside method block - footer_lines.append(line) - elif not footer_lines: # still inside method - method_body.append(line) - - method_body_str = "\n".join(method_body) - footer_str = "\n".join(footer_lines) - - # Build all test methods + # Method body template + method_body_template = [ + '"""Test generated from fuzzing input"""', + 'input_data = b""', + 'result = TestClass(input_data)', + ] + + # Generate test methods test_methods = [] for i, input_data in enumerate(inputs): escaped_input = escape_special_chars(input_data) test_func = f" def test_{i}(self):\n" - test_func += "\n".join( - " " + line.lstrip().replace('input_data = b""', f"input_data = {escaped_input}") - for line in method_body if line.strip() - ) + for line in method_body_template: + replaced_line = line.replace('input_data = b""', f"input_data = {escaped_input}") + test_func += f" {replaced_line}\n" test_methods.append(test_func) if not test_methods: - # Fallback: generate dummy method to avoid syntax error test_methods = [" def test_placeholder(self):\n self.assertTrue(True)"] - final_code = before_class.rstrip() + "\n" + class_header.rstrip() + "\n\n" - final_code += "\n\n".join(test_methods).rstrip() + "\n" - - if footer_str.strip(): - final_code += "\n\n" + footer_str.strip() + "\n" + # Combine full file + final_code = file_header + "\n\n" + "\n\n".join(test_methods) + final_code += "\n\nif __name__ == '__main__':\n unittest.main()\n" - - # Write to output + # Write output file generated_path = pjoin(template_dir, f"{target_name}.inputs.py") with open(generated_path, "w") as f: f.write(final_code) @@ -432,6 +409,7 @@ def substitute_one_repo( except Exception as e: logging.error(f"Error processing {target_name}: {e}") + def testgen_repos( repos: list[str], jobs: int, @@ -471,10 +449,10 @@ def testgen_repos( )) def main( - repo_id: str = "data/valid_projects3.txt", + repo_id: str = "data/valid_projects.txt", repo_root: str = "fuzz/oss-fuzz/projects/", timeout: int = 60, - jobs: int = 8, + jobs: int = 4, pipeline: str = "all", n_fuzz: int = 100, strategy: str = "shuffle", From 85b6ed907a274df3177c9ac7b20cd64cb8c18f53 Mon Sep 17 00:00:00 2001 From: joyguoguo <599325370@qq.com> Date: Mon, 11 Aug 2025 16:11:41 +0000 Subject: [PATCH 095/134] testgen file change into copy the original and then add input_data =b"" --- fuzz/collect_fuzz_python.py | 154 +++++++++++++++--------------------- 1 file changed, 64 insertions(+), 90 deletions(-) diff --git a/fuzz/collect_fuzz_python.py b/fuzz/collect_fuzz_python.py index 16ae813..a509b08 100644 --- a/fuzz/collect_fuzz_python.py +++ b/fuzz/collect_fuzz_python.py @@ -137,6 +137,7 @@ def fuzz_one_target(target: tuple[str, str], timeout: int): except Exception as e: logging.error(f"Error starting fuzzer: {e}") return None + def fuzz_repos(repos: list[str], jobs: int, timeout: int = 60): """ @@ -300,7 +301,7 @@ def similar(a, b): return SequenceMatcher(None, a, b).ratio() return any(similar(x, y) > thresh for y in selected) -import re + def substitute_one_repo( repo: str, @@ -311,103 +312,76 @@ def substitute_one_repo( sim_thresh: float, ): """ - Process a single repository, replace fuzzing inputs into test templates - and generate {target_name}.inputs.py files. + 从原 fuzz target 复制文件,按 fuzz input 生成多个 testgen 文件。 """ - template_dir = pjoin(repo, "tests-gen") input_dir = pjoin(repo, "fuzz_inputs") - - # Ensure __init__.py exists - init_path = pjoin(template_dir, "__init__.py") - if not os.path.exists(init_path): - with open(init_path, "w") as f: - f.write("") + template_dir = pjoin(repo, "tests-gen") + os.makedirs(template_dir, exist_ok=True) for target_name in targets: - input_path = pjoin(input_dir, target_name) - - try: - if not os.path.exists(input_path): - logging.warning(f"Input file not found: {input_path}") - continue - - with open(input_path, "r") as f_input: - all_inputs = [line.strip() for line in f_input if line.strip()] - - if not all_inputs: - logging.warning(f"No valid inputs found for {target_name}") - continue - - logging.info(f"Loaded {len(all_inputs)} inputs for {target_name}") - - # Input selection - if strategy == "shuffle": - random.shuffle(all_inputs) - inputs = list(islice((x for x in all_inputs if len(x) < max_len), n_fuzz)) - elif strategy == "reverse": - inputs = [] - for x in reversed(all_inputs): - if len(inputs) >= n_fuzz: - break - if len(x) > max_len or has_similar(inputs, x, sim_thresh): - continue - inputs.append(x) - else: - inputs = all_inputs[:n_fuzz] - - # Header - file_header = f"""import sys -import os -import unittest - -# 将项目目录加入 Python 路径,确保能导入上层模块 -sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), ".."))) - -try: - from {target_name} import TestOneInput as TestClass -except ImportError: - from {target_name} import TestInput as TestClass - + source_file = pjoin(repo, f"{target_name}.py") + if not os.path.exists(source_file): + logging.warning(f"Source file not found: {source_file}") + continue -class Test{target_name.capitalize()}(unittest.TestCase):""" - - # Method body template - method_body_template = [ - '"""Test generated from fuzzing input"""', - 'input_data = b""', - 'result = TestClass(input_data)', - ] - - # Generate test methods - test_methods = [] - for i, input_data in enumerate(inputs): - escaped_input = escape_special_chars(input_data) - test_func = f" def test_{i}(self):\n" - for line in method_body_template: - replaced_line = line.replace('input_data = b""', f"input_data = {escaped_input}") - test_func += f" {replaced_line}\n" - test_methods.append(test_func) - - if not test_methods: - test_methods = [" def test_placeholder(self):\n self.assertTrue(True)"] - - # Combine full file - final_code = file_header + "\n\n" + "\n\n".join(test_methods) - final_code += "\n\nif __name__ == '__main__':\n unittest.main()\n" - - # Write output file - generated_path = pjoin(template_dir, f"{target_name}.inputs.py") - with open(generated_path, "w") as f: - f.write(final_code) + input_path = pjoin(input_dir, target_name) + if not os.path.exists(input_path): + logging.warning(f"Input file not found: {input_path}") + continue + + with open(input_path, "r") as f_input: + all_inputs = [line.strip() for line in f_input if line.strip()] + + if not all_inputs: + logging.warning(f"No valid inputs found for {target_name}") + continue + + logging.info(f"Loaded {len(all_inputs)} inputs for {target_name}") + + # 策略选择输入 + if strategy == "shuffle": + random.shuffle(all_inputs) + inputs = list(islice((x for x in all_inputs if len(x) < max_len), n_fuzz)) + elif strategy == "reverse": + inputs = [] + for x in reversed(all_inputs): + if len(inputs) >= n_fuzz: + break + if len(x) > max_len or has_similar(inputs, x, sim_thresh): + continue + inputs.append(x) + else: + inputs = all_inputs[:n_fuzz] + + # 每个 fuzz input 生成一个单独的文件 + for idx, fuzz_input in enumerate(inputs, start=1): + with open(source_file, "r") as f_src: + code = f_src.read() + + # 删除 main 和 __main__ 块 + code = re.sub(r"\nif __name__ == ['\"]__main__['\"]:.*", "", code, flags=re.S) + code = re.sub(r"\n\s*main\s*\(.*?\)", "", code) + + # 找到 TestInput / TestOneInput 并改成 test_{idx} + code = re.sub(r"\bTestOneInput\b", f"test_{idx}", code) + code = re.sub(r"\bTestInput\b", f"test_{idx}", code) + + # 在 test_{idx} 函数定义后插入 data 赋值 + + code = re.sub( + rf"(def\s+test_{idx}\s*\([^)]*\)\s*:\s*\n)(\s*)", # 新增捕获组匹配函数体首行缩进 + rf"\1\2data = {escape_special_chars(fuzz_input)}\n\2", # 复用缩进 + code, + ) + out_path = pjoin(template_dir, f"{target_name}.testgen_{idx}.py") + with open(out_path, "w") as f_out: + f_out.write(code) - # Format with black try: - subprocess.run(["black", generated_path], check=False) + subprocess.run(["black", out_path], check=False) except FileNotFoundError: logging.warning("Black formatter not found, skipping formatting") - except Exception as e: - logging.error(f"Error processing {target_name}: {e}") def testgen_repos( @@ -449,9 +423,9 @@ def testgen_repos( )) def main( - repo_id: str = "data/valid_projects.txt", + repo_id: str = "data/valid_projects3.txt", repo_root: str = "fuzz/oss-fuzz/projects/", - timeout: int = 60, + timeout: int = 10, jobs: int = 4, pipeline: str = "all", n_fuzz: int = 100, From 618d156e3029170a5141f6b7d9b926e09592d045 Mon Sep 17 00:00:00 2001 From: joyguoguo <599325370@qq.com> Date: Tue, 12 Aug 2025 05:21:06 +0000 Subject: [PATCH 096/134] only read b' ' inputs --- fuzz/collect_fuzz_python.py | 60 ++++++++++++++++++++++++++++++++----- 1 file changed, 53 insertions(+), 7 deletions(-) diff --git a/fuzz/collect_fuzz_python.py b/fuzz/collect_fuzz_python.py index a509b08..3084053 100644 --- a/fuzz/collect_fuzz_python.py +++ b/fuzz/collect_fuzz_python.py @@ -329,8 +329,19 @@ def substitute_one_repo( logging.warning(f"Input file not found: {input_path}") continue - with open(input_path, "r") as f_input: - all_inputs = [line.strip() for line in f_input if line.strip()] + # 修改1:过滤警告行,只提取有效的字节字符串 + all_inputs = [] + with open(input_path, "rb") as f_input: # 二进制模式读取 + for line in f_input: + try: + decoded_line = line.decode('utf-8', errors='replace').strip() + # 只提取以 b' 开头的有效字节字符串 + if decoded_line.startswith(('b"', "b'")): + all_inputs.append(decoded_line) + except UnicodeDecodeError: + # 处理无法解码的行 + logging.warning(f"Skipping invalid input line in {input_path}") + continue if not all_inputs: logging.warning(f"No valid inputs found for {target_name}") @@ -365,14 +376,17 @@ def substitute_one_repo( # 找到 TestInput / TestOneInput 并改成 test_{idx} code = re.sub(r"\bTestOneInput\b", f"test_{idx}", code) code = re.sub(r"\bTestInput\b", f"test_{idx}", code) - - # 在 test_{idx} 函数定义后插入 data 赋值 - + def insert_fuzz_input(match): + indent = match.group(2) + # 使用原始字符串避免转义解析 + return f"{match.group(1)}{indent}data = {fuzz_input}\n{indent}" + # 修改2:直接使用原始字节字符串,无需额外转义 code = re.sub( - rf"(def\s+test_{idx}\s*\([^)]*\)\s*:\s*\n)(\s*)", # 新增捕获组匹配函数体首行缩进 - rf"\1\2data = {escape_special_chars(fuzz_input)}\n\2", # 复用缩进 + rf"(def\s+test_{idx}\s*\([^)]*\)\s*:\s*\n)(\s*)", + insert_fuzz_input, # 使用回调函数 code, ) + out_path = pjoin(template_dir, f"{target_name}.testgen_{idx}.py") with open(out_path, "w") as f_out: f_out.write(code) @@ -381,8 +395,40 @@ def substitute_one_repo( subprocess.run(["black", out_path], check=False) except FileNotFoundError: logging.warning("Black formatter not found, skipping formatting") + + for idx, fuzz_input in enumerate(inputs, start=1): + with open(source_file, "r") as f_src: + code = f_src.read() + + # 删除 main 和 __main__ 块(保持不变) + code = re.sub(r"\nif __name__ == ['\"]__main__['\"]:.*", "", code, flags=re.S) + code = re.sub(r"\n\s*main\s*\(.*?\)", "", code) + + # 重命名测试函数(保持不变) + code = re.sub(r"\bTestOneInput\b", f"test_{idx}", code) + code = re.sub(r"\bTestInput\b", f"test_{idx}", code) + # ==== 核心修复:使用 lambda 函数绕过转义解析 ==== + def insert_fuzz_input(match): + indent = match.group(2) # 提取原缩进 + return f"{match.group(1)}{indent}data = {fuzz_input}\n{indent}" + code = re.sub( + rf"(def\s+test_{idx}\s*\([^)]*\)\s*:\s*\n)(\s*)", + insert_fuzz_input, # 替换为函数引用 + code, + ) + + # 写入文件(保持不变) + out_path = pjoin(template_dir, f"{target_name}.testgen_{idx}.py") + with open(out_path, "w") as f_out: + f_out.write(code) + + # 格式化(保持不变) + try: + subprocess.run(["black", out_path], check=False) + except FileNotFoundError: + logging.warning("Black formatter not found, skipping formatting") def testgen_repos( repos: list[str], From 5a7f51366067061545d38f311b336fe67076bef0 Mon Sep 17 00:00:00 2001 From: joyguoguo <599325370@qq.com> Date: Tue, 12 Aug 2025 06:12:28 +0000 Subject: [PATCH 097/134] remove transform --- fuzz/collect_fuzz_python.py | 138 ++++++++++++++++++------------------ 1 file changed, 69 insertions(+), 69 deletions(-) diff --git a/fuzz/collect_fuzz_python.py b/fuzz/collect_fuzz_python.py index 3084053..8783cfc 100644 --- a/fuzz/collect_fuzz_python.py +++ b/fuzz/collect_fuzz_python.py @@ -173,75 +173,75 @@ def fuzz_repos(repos: list[str], jobs: int, timeout: int = 60): # Execute fuzzing in parallel parallel_subprocess(all_targets, jobs, lambda p: fuzz_one_target(p, timeout), on_exit=None) -def generate_test_template(target_name: str, repo_path: str): - """ - Generate Python test template for a single target - - Args: - target_name (str): Target name - repo_path (str): Repository path - - Returns: - str: Template file path - """ - template_dir = pjoin(repo_path, "tests-gen") - os.makedirs(template_dir, exist_ok=True) - - # Ensure __init__.py exists - init_path = pjoin(template_dir, "__init__.py") - if not os.path.exists(init_path): - with open(init_path, "w") as f: - f.write("") - - template_path = pjoin(template_dir, f"{target_name}.py") - - # Python test template with placeholder - template = f"""#!/usr/bin/env python3 -import sys -import os -import unittest - -# Add the parent directory to the Python path -sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))) - -# Import the function to test -try: - from {target_name} import TestOneInput as TestClass -except ImportError: - from {target_name} import TestInput as TestClass - -class Test{target_name.capitalize()}(unittest.TestCase): - def test_generated(self): - \"\"\"Test generated from fuzzing input\"\"\" - input_data = b"" - result = TestClass(input_data) - -if __name__ == '__main__': - unittest.main() -""" - with open(template_path, "w") as f: - f.write(template) - - return template_path - -def transform_repos(repos: list[str], jobs: int): - """ - Generate test templates for all targets +# def generate_test_template(target_name: str, repo_path: str): +# """ +# Generate Python test template for a single target + +# Args: +# target_name (str): Target name +# repo_path (str): Repository path + +# Returns: +# str: Template file path +# """ +# template_dir = pjoin(repo_path, "tests-gen") +# os.makedirs(template_dir, exist_ok=True) + +# # Ensure __init__.py exists +# init_path = pjoin(template_dir, "__init__.py") +# if not os.path.exists(init_path): +# with open(init_path, "w") as f: +# f.write("") + +# template_path = pjoin(template_dir, f"{target_name}.py") + +# # Python test template with placeholder +# template = f"""#!/usr/bin/env python3 +# import sys +# import os +# import unittest + +# # Add the parent directory to the Python path +# sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))) + +# # Import the function to test +# try: +# from {target_name} import TestOneInput as TestClass +# except ImportError: +# from {target_name} import TestInput as TestClass + +# class Test{target_name.capitalize()}(unittest.TestCase): +# def test_generated(self): +# \"\"\"Test generated from fuzzing input\"\"\" +# input_data = b"" +# result = TestClass(input_data) + +# if __name__ == '__main__': +# unittest.main() +# """ +# with open(template_path, "w") as f: +# f.write(template) + +# return template_path + +# def transform_repos(repos: list[str], jobs: int): +# """ +# Generate test templates for all targets - Args: - repos (list[str]): List of repository paths - jobs (int): Number of parallel tasks - """ - logging.info("Generating test templates") +# Args: +# repos (list[str]): List of repository paths +# jobs (int): Number of parallel tasks +# """ +# logging.info("Generating test templates") - def _transform_repo(repo: str): - project_name = os.path.basename(repo) - oss_fuzz_dir = Path(repo).parent.parent - targets = discover_targets(project_name, oss_fuzz_dir) - return [generate_test_template(t, repo) for t in targets] +# def _transform_repo(repo: str): +# project_name = os.path.basename(repo) +# oss_fuzz_dir = Path(repo).parent.parent +# targets = discover_targets(project_name, oss_fuzz_dir) +# return [generate_test_template(t, repo) for t in targets] - with ProcessingPool(jobs) as p: - return list(p.map(_transform_repo, repos)) +# with ProcessingPool(jobs) as p: +# return list(p.map(_transform_repo, repos)) def escape_special_chars(input_data: str) -> str: """ @@ -515,12 +515,12 @@ def main( fuzz_repos(repos, jobs, timeout) elif pipeline == "testgen": testgen_repos(repos, jobs, n_fuzz, strategy, max_len, sim_thresh) - elif pipeline == "transform": - transform_repos(repos, jobs) + # elif pipeline == "transform": + # transform_repos(repos, jobs) elif pipeline == "all": build_image(repos, jobs) build_fuzzer(repos, jobs) - transform_repos(repos, jobs) # Generate test templates + # transform_repos(repos, jobs) # Generate test templates fuzz_repos(repos, jobs, timeout) testgen_repos(repos, jobs, n_fuzz, strategy, max_len, sim_thresh) else: From d5fd84d70ef2111e141d9bb5da352d450cc0af42 Mon Sep 17 00:00:00 2001 From: joyguoguo <599325370@qq.com> Date: Tue, 12 Aug 2025 06:12:49 +0000 Subject: [PATCH 098/134] clean the inputs and testgen --- fuzz/clean_fuzz_dir.py | 42 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 42 insertions(+) create mode 100644 fuzz/clean_fuzz_dir.py diff --git a/fuzz/clean_fuzz_dir.py b/fuzz/clean_fuzz_dir.py new file mode 100644 index 0000000..b4e2b0b --- /dev/null +++ b/fuzz/clean_fuzz_dir.py @@ -0,0 +1,42 @@ +#!/usr/bin/env python3 +import os +import shutil + +ROOT_DIR = "/home/jiayiguo/FuzzAug/fuzz/oss-fuzz/projects" + +def clean_project_dirs(root_dir): + removed_files = 0 + removed_dirs = 0 + + # 遍历一级项目目录 + for project in os.listdir(root_dir): + project_path = os.path.join(root_dir, project) + if not os.path.isdir(project_path): + continue + + # 删除 fuzz_inputs 文件夹 + fuzz_inputs_path = os.path.join(project_path, "fuzz_inputs") + if os.path.isdir(fuzz_inputs_path): + shutil.rmtree(fuzz_inputs_path) + print(f"🗑️ Removed dir: {fuzz_inputs_path}") + removed_dirs += 1 + + # 删除 tests-gen 文件夹 + tests_gen_path = os.path.join(project_path, "tests-gen") + if os.path.isdir(tests_gen_path): + shutil.rmtree(tests_gen_path) + print(f"🗑️ Removed dir: {tests_gen_path}") + removed_dirs += 1 + + # 删除 .inputs.py 文件 + for fname in os.listdir(project_path): + if fname.endswith(".inputs.py"): + file_path = os.path.join(project_path, fname) + os.remove(file_path) + print(f"🗑️ Removed file: {file_path}") + removed_files += 1 + + print(f"\n✅ Done. Removed {removed_files} files and {removed_dirs} directories.") + +if __name__ == "__main__": + clean_project_dirs(ROOT_DIR) From bcc22b09aaccb2aaaef2a47a5c76cca5583c12b1 Mon Sep 17 00:00:00 2001 From: joyguoguo <599325370@qq.com> Date: Tue, 12 Aug 2025 07:11:56 +0000 Subject: [PATCH 099/134] set max_file --- fuzz/collect_fuzz_python.py | 218 +++++++++++++++++++++++++++--------- 1 file changed, 166 insertions(+), 52 deletions(-) diff --git a/fuzz/collect_fuzz_python.py b/fuzz/collect_fuzz_python.py index 8783cfc..fc93297 100644 --- a/fuzz/collect_fuzz_python.py +++ b/fuzz/collect_fuzz_python.py @@ -102,6 +102,92 @@ def discover_targets(project_name: str, oss_fuzz_dir: Path) -> list[str]: logging.error(f"Error discovering targets: {e}") return targets +import threading +import time + + + +def monitor_file_size(file_path, process, max_size=MAX_INPUT_FILE_SIZE): + """ + 监控 fuzz_input 文件大小,如果超过 max_size 就杀掉进程 + + Args: + file_path (str): 监控文件路径 + process (subprocess.Popen): 关联的进程 + max_size (int): 最大文件大小 (默认 500MB) + """ + project_name = os.path.basename(os.path.dirname(os.path.dirname(file_path))) + target_name = os.path.basename(file_path).split('.')[0] + + # 关键日志:启动监控 + logging.info( + f"Started file monitor for {project_name}/{target_name} " + f"(max size: {max_size//(1024 * 1024)}MB)" + ) + + last_size = 0 + last_log = time.time() + + while process.poll() is None: # 进程还在运行 + try: + if not os.path.exists(file_path): + # 关键日志:文件丢失警告 + logging.warning( + f"Output file missing: {file_path}. " + f"Process status: {'running' if process.poll() is None else 'exited'}" + ) + time.sleep(1) + continue + + size = os.path.getsize(file_path) + + # 记录显著的尺寸变化 (+10%) + if size > 0 and abs(size - last_size)/size > 0.1: + logging.info( + f"File size changed: {file_path} " + f"{last_size//1024}KB → {size//1024}KB" + ) + last_size = size + + # 每分钟记录一次当前尺寸 + if time.time() - last_log > 60: + logging.debug( + f"File size update: {file_path} = {size//1024}KB" + ) + last_log = time.time() + + if size > max_size: + # 关键警告:文件超限 + logging.warning( + f"Terminating {project_name}/{target_name}: " + f"File size {size//(1024 * 1024)}MB > {max_size//(1024 * 1024)}MB" + ) + process.kill() + # 记录终止后的最终文件大小 + final_size = os.path.getsize(file_path) + logging.info( + f"After termination: {file_path} = {final_size//1024}KB" + ) + break + + except Exception as e: + logging.error( + f"File monitor error for {project_name}/{target_name}: " + f"{type(e).__name__} - {str(e)}" + ) + # 防止错误导致高频重试 + time.sleep(5) + + time.sleep(1) # 每秒检查一次 + + # 进程结束时记录 + exit_code = process.poll() + if exit_code is not None: + logging.info( + f"Process ended: {project_name}/{target_name} " + f"Exit code: {exit_code} " + f"Output file: {os.path.exists(file_path)}" + ) def fuzz_one_target(target: tuple[str, str], timeout: int): """ @@ -119,59 +205,91 @@ def fuzz_one_target(target: tuple[str, str], timeout: int): oss_fuzz_root = os.path.dirname(os.path.dirname(repo_path)) # Create input file path - input_file_path = pjoin(repo_path, "fuzz_inputs", target_name) + input_file_path = pjoin(repo_path, "fuzz_inputs", f"{target_name}.txt") os.makedirs(os.path.dirname(input_file_path), exist_ok=True) try: - with open(input_file_path, "w") as input_file: - return subprocess.Popen( - [ - "bash", - "-c", - f"timeout {timeout} python3 infra/helper.py run_fuzzer {project_name} {target_name}" - ], - cwd=oss_fuzz_root, - stdout=input_file, - stderr=subprocess.DEVNULL, - ) + # 创建空文件确保路径存在 + open(input_file_path, "w").close() + + # 关键日志:开始执行前记录所有参数 + logging.info( + f"Starting fuzzer for {project_name}/{target_name}: " + f"timeout={timeout}s, output={input_file_path}" + ) + + cmd = f"timeout {timeout} python3 infra/helper.py run_fuzzer {project_name} {target_name}" + + # 关键日志:记录完整命令 + logging.debug(f"Executing command: {cmd}") + + # 记录进程开始时间 + start_time = datetime.now() + process = subprocess.Popen( + ["bash", "-c", cmd], + cwd=oss_fuzz_root, + stdout=open(input_file_path, "w"), + stderr=subprocess.DEVNULL, + ) + + # 关键日志:记录进程ID和启动时间 + logging.info( + f"Started PID {process.pid} at {start_time.strftime('%H:%M:%S')} " + f"for {project_name}/{target_name}" + ) + + # 启动文件监控线程 + threading.Thread( + target=monitor_file_size, + args=(input_file_path, process), + daemon=True + ).start() + + return process except Exception as e: - logging.error(f"Error starting fuzzer: {e}") + # 详细错误日志 + logging.error( + f"Failed to start fuzzer for {project_name}/{target_name}: " + f"{type(e).__name__} - {str(e)}" + ) + # 记录堆栈跟踪 + logging.debug("Exception details:", exc_info=True) return None -def fuzz_repos(repos: list[str], jobs: int, timeout: int = 60): - """ - Perform fuzzing on a set of repositories +# def fuzz_one_target(target: tuple[str, str], timeout: int): +# """ +# Perform fuzzing on a set of repositories - Args: - repos (list[str]): List of repository paths - jobs (int): Number of parallel tasks - timeout (int): Timeout duration (seconds) - """ - logging.info("Discovering fuzz targets") +# Args: +# repos (list[str]): List of repository paths +# jobs (int): Number of parallel tasks +# timeout (int): Timeout duration (seconds) +# """ +# logging.info("Discovering fuzz targets") - # Get all targets - targets_list = [] - for repo in repos: - project_name = os.path.basename(repo) - oss_fuzz_dir = Path(repo).parent.parent - targets = discover_targets(project_name, oss_fuzz_dir) - targets_list.append(targets) +# # Get all targets +# targets_list = [] +# for repo in repos: +# project_name = os.path.basename(repo) +# oss_fuzz_dir = Path(repo).parent.parent +# targets = discover_targets(project_name, oss_fuzz_dir) +# targets_list.append(targets) - # Create target mapping - target_map = {repo: targets for repo, targets in zip(repos, targets_list)} - all_targets: list[tuple[str, str]] = [ - (k, v) for k, vs in target_map.items() for v in vs - ] +# # Create target mapping +# target_map = {repo: targets for repo, targets in zip(repos, targets_list)} +# all_targets: list[tuple[str, str]] = [ +# (k, v) for k, vs in target_map.items() for v in vs +# ] - logging.info(f"Running fuzzing on {len(all_targets)} targets") +# logging.info(f"Running fuzzing on {len(all_targets)} targets") - # Create input directory - for repo in repos: - os.makedirs(pjoin(repo, "fuzz_inputs"), exist_ok=True) +# # Create input directory +# for repo in repos: +# os.makedirs(pjoin(repo, "fuzz_inputs"), exist_ok=True) - # Execute fuzzing in parallel - parallel_subprocess(all_targets, jobs, lambda p: fuzz_one_target(p, timeout), on_exit=None) +# # Execute fuzzing in parallel +# parallel_subprocess(all_targets, jobs, lambda p: fuzz_one_target(p, timeout), on_exit=None) # def generate_test_template(target_name: str, repo_path: str): # """ @@ -332,16 +450,11 @@ def substitute_one_repo( # 修改1:过滤警告行,只提取有效的字节字符串 all_inputs = [] with open(input_path, "rb") as f_input: # 二进制模式读取 - for line in f_input: - try: - decoded_line = line.decode('utf-8', errors='replace').strip() - # 只提取以 b' 开头的有效字节字符串 - if decoded_line.startswith(('b"', "b'")): - all_inputs.append(decoded_line) - except UnicodeDecodeError: - # 处理无法解码的行 - logging.warning(f"Skipping invalid input line in {input_path}") - continue + raw_inputs = [line for line in f_input] + valid_inputs = [ + data for data in raw_inputs + if len(data) <= max_len + ][:n_fuzz] if not all_inputs: logging.warning(f"No valid inputs found for {target_name}") @@ -478,6 +591,7 @@ def main( strategy: str = "shuffle", max_len: int = 100, sim_thresh: float = 0.8, + max_file_size=500 * 1024 * 1024 # 500MB限制 ): """ Main function, controlling the entire fuzzing process @@ -512,7 +626,7 @@ def main( elif pipeline == "build_fuzzer": build_fuzzer(repos, jobs) elif pipeline == "fuzz": - fuzz_repos(repos, jobs, timeout) + fuzz_repos(repos, jobs, timeout,max_file_size) elif pipeline == "testgen": testgen_repos(repos, jobs, n_fuzz, strategy, max_len, sim_thresh) # elif pipeline == "transform": @@ -521,7 +635,7 @@ def main( build_image(repos, jobs) build_fuzzer(repos, jobs) # transform_repos(repos, jobs) # Generate test templates - fuzz_repos(repos, jobs, timeout) + fuzz_repos(repos, jobs, timeout,max_file_size) testgen_repos(repos, jobs, n_fuzz, strategy, max_len, sim_thresh) else: logging.error(f"Unknown pipeline: {pipeline}") From 020c970eebf8625541c642b6205e75d323ea081b Mon Sep 17 00:00:00 2001 From: joyguoguo <599325370@qq.com> Date: Tue, 12 Aug 2025 07:56:38 +0000 Subject: [PATCH 100/134] max input file --- fuzz/collect_fuzz_python.py | 43 ++++++++++++++++++++++++++++++++----- 1 file changed, 38 insertions(+), 5 deletions(-) diff --git a/fuzz/collect_fuzz_python.py b/fuzz/collect_fuzz_python.py index fc93297..2318082 100644 --- a/fuzz/collect_fuzz_python.py +++ b/fuzz/collect_fuzz_python.py @@ -107,7 +107,7 @@ def discover_targets(project_name: str, oss_fuzz_dir: Path) -> list[str]: -def monitor_file_size(file_path, process, max_size=MAX_INPUT_FILE_SIZE): +def monitor_file_size(file_path, process, max_size=500 * 1024 * 1024): """ 监控 fuzz_input 文件大小,如果超过 max_size 就杀掉进程 @@ -255,6 +255,39 @@ def fuzz_one_target(target: tuple[str, str], timeout: int): # 记录堆栈跟踪 logging.debug("Exception details:", exc_info=True) return None +def fuzz_repos(repos: list[str], jobs: int, timeout: int = 60): + """ + Perform fuzzing on a set of repositories + + Args: + repos (list[str]): List of repository paths + jobs (int): Number of parallel tasks + timeout (int): Timeout duration (seconds) + """ + logging.info("Discovering fuzz targets") + + # Get all targets + targets_list = [] + for repo in repos: + project_name = os.path.basename(repo) + oss_fuzz_dir = Path(repo).parent.parent + targets = discover_targets(project_name, oss_fuzz_dir) + targets_list.append(targets) + + # Create target mapping + target_map = {repo: targets for repo, targets in zip(repos, targets_list)} + all_targets: list[tuple[str, str]] = [ + (k, v) for k, vs in target_map.items() for v in vs + ] + + logging.info(f"Running fuzzing on {len(all_targets)} targets") + + # Create input directory + for repo in repos: + os.makedirs(pjoin(repo, "fuzz_inputs"), exist_ok=True) + + # Execute fuzzing in parallel + parallel_subprocess(all_targets, jobs, lambda p: fuzz_one_target(p, timeout), on_exit=None) # def fuzz_one_target(target: tuple[str, str], timeout: int): @@ -442,7 +475,7 @@ def substitute_one_repo( logging.warning(f"Source file not found: {source_file}") continue - input_path = pjoin(input_dir, target_name) + input_path = pjoin(input_dir, f"{target_name}.txt") if not os.path.exists(input_path): logging.warning(f"Input file not found: {input_path}") continue @@ -591,7 +624,7 @@ def main( strategy: str = "shuffle", max_len: int = 100, sim_thresh: float = 0.8, - max_file_size=500 * 1024 * 1024 # 500MB限制 + ): """ Main function, controlling the entire fuzzing process @@ -626,7 +659,7 @@ def main( elif pipeline == "build_fuzzer": build_fuzzer(repos, jobs) elif pipeline == "fuzz": - fuzz_repos(repos, jobs, timeout,max_file_size) + fuzz_repos(repos, jobs, timeout) elif pipeline == "testgen": testgen_repos(repos, jobs, n_fuzz, strategy, max_len, sim_thresh) # elif pipeline == "transform": @@ -635,7 +668,7 @@ def main( build_image(repos, jobs) build_fuzzer(repos, jobs) # transform_repos(repos, jobs) # Generate test templates - fuzz_repos(repos, jobs, timeout,max_file_size) + fuzz_repos(repos, jobs, timeout) testgen_repos(repos, jobs, n_fuzz, strategy, max_len, sim_thresh) else: logging.error(f"Unknown pipeline: {pipeline}") From 8787982e86bd5437c88d3f9a61c82f3872c796df Mon Sep 17 00:00:00 2001 From: joyguoguo <599325370@qq.com> Date: Tue, 12 Aug 2025 08:59:41 +0000 Subject: [PATCH 101/134] input b"" --- fuzz/collect_fuzz_python.py | 103 +++++++++++++++--------------------- 1 file changed, 42 insertions(+), 61 deletions(-) diff --git a/fuzz/collect_fuzz_python.py b/fuzz/collect_fuzz_python.py index 2318082..8f5473f 100644 --- a/fuzz/collect_fuzz_python.py +++ b/fuzz/collect_fuzz_python.py @@ -453,7 +453,6 @@ def similar(a, b): return any(similar(x, y) > thresh for y in selected) - def substitute_one_repo( repo: str, targets: list[str], @@ -475,40 +474,52 @@ def substitute_one_repo( logging.warning(f"Source file not found: {source_file}") continue + # 修复:添加 .txt 后缀 input_path = pjoin(input_dir, f"{target_name}.txt") if not os.path.exists(input_path): logging.warning(f"Input file not found: {input_path}") continue - # 修改1:过滤警告行,只提取有效的字节字符串 - all_inputs = [] - with open(input_path, "rb") as f_input: # 二进制模式读取 - raw_inputs = [line for line in f_input] - valid_inputs = [ - data for data in raw_inputs - if len(data) <= max_len - ][:n_fuzz] - - if not all_inputs: + # 读取所有有效的输入数据 + valid_inputs = [] + with open(input_path, "rb") as f_input: + for line in f_input: + try: + # 尝试解码行以检查内容 + decoded = line.decode('utf-8', errors='replace') + + # 只处理以 b' 或 b" 开头的行(这些是实际的测试输入) + if decoded.startswith(("b'", 'b"')): + # 提取字节数据部分 + if decoded.startswith("b'") and decoded.endswith("'\n"): + byte_data = line[2:-2] # 移除 b' 和末尾的 '\n + elif decoded.startswith('b"') and decoded.endswith('"\n'): + byte_data = line[2:-2] # 移除 b" 和末尾的 "\n + else: + continue + + # 只保留有效长度的输入 + if 0 < len(byte_data) <= max_len: + valid_inputs.append(byte_data) + except UnicodeDecodeError: + # 如果无法解码,可能是二进制数据,直接使用 + if 0 < len(line) <= max_len: + valid_inputs.append(line) + + if not valid_inputs: logging.warning(f"No valid inputs found for {target_name}") continue - logging.info(f"Loaded {len(all_inputs)} inputs for {target_name}") + logging.info(f"Loaded {len(valid_inputs)} inputs for {target_name}") - # 策略选择输入 + # 策略选择输入 - 最多选择 n_fuzz 个输入 if strategy == "shuffle": - random.shuffle(all_inputs) - inputs = list(islice((x for x in all_inputs if len(x) < max_len), n_fuzz)) + random.shuffle(valid_inputs) + inputs = valid_inputs[:n_fuzz] elif strategy == "reverse": - inputs = [] - for x in reversed(all_inputs): - if len(inputs) >= n_fuzz: - break - if len(x) > max_len or has_similar(inputs, x, sim_thresh): - continue - inputs.append(x) + inputs = list(reversed(valid_inputs))[:n_fuzz] else: - inputs = all_inputs[:n_fuzz] + inputs = valid_inputs[:n_fuzz] # 每个 fuzz input 生成一个单独的文件 for idx, fuzz_input in enumerate(inputs, start=1): @@ -522,60 +533,30 @@ def substitute_one_repo( # 找到 TestInput / TestOneInput 并改成 test_{idx} code = re.sub(r"\bTestOneInput\b", f"test_{idx}", code) code = re.sub(r"\bTestInput\b", f"test_{idx}", code) + + # 插入测试数据 - 确保使用二进制表示 def insert_fuzz_input(match): indent = match.group(2) - # 使用原始字符串避免转义解析 - return f"{match.group(1)}{indent}data = {fuzz_input}\n{indent}" - # 修改2:直接使用原始字节字符串,无需额外转义 - code = re.sub( - rf"(def\s+test_{idx}\s*\([^)]*\)\s*:\s*\n)(\s*)", - insert_fuzz_input, # 使用回调函数 - code, - ) + # 使用 repr() 安全表示二进制数据 + byte_repr = repr(fuzz_input) + return f"{match.group(1)}{indent}data = {byte_repr}\n{indent}" - out_path = pjoin(template_dir, f"{target_name}.testgen_{idx}.py") - with open(out_path, "w") as f_out: - f_out.write(code) - - try: - subprocess.run(["black", out_path], check=False) - except FileNotFoundError: - logging.warning("Black formatter not found, skipping formatting") - - for idx, fuzz_input in enumerate(inputs, start=1): - with open(source_file, "r") as f_src: - code = f_src.read() - - # 删除 main 和 __main__ 块(保持不变) - code = re.sub(r"\nif __name__ == ['\"]__main__['\"]:.*", "", code, flags=re.S) - code = re.sub(r"\n\s*main\s*\(.*?\)", "", code) - - # 重命名测试函数(保持不变) - code = re.sub(r"\bTestOneInput\b", f"test_{idx}", code) - code = re.sub(r"\bTestInput\b", f"test_{idx}", code) - - # ==== 核心修复:使用 lambda 函数绕过转义解析 ==== - def insert_fuzz_input(match): - indent = match.group(2) # 提取原缩进 - return f"{match.group(1)}{indent}data = {fuzz_input}\n{indent}" - + # 在测试函数中插入数据 code = re.sub( rf"(def\s+test_{idx}\s*\([^)]*\)\s*:\s*\n)(\s*)", - insert_fuzz_input, # 替换为函数引用 + insert_fuzz_input, code, ) - # 写入文件(保持不变) out_path = pjoin(template_dir, f"{target_name}.testgen_{idx}.py") with open(out_path, "w") as f_out: f_out.write(code) - # 格式化(保持不变) + # 格式化代码 try: subprocess.run(["black", out_path], check=False) except FileNotFoundError: logging.warning("Black formatter not found, skipping formatting") - def testgen_repos( repos: list[str], jobs: int, From 606c34f76652bf9e5ec8faf13962445ee68d8d9c Mon Sep 17 00:00:00 2001 From: joyguoguo <599325370@qq.com> Date: Tue, 12 Aug 2025 23:56:58 +0000 Subject: [PATCH 102/134] modify the method of writing files into PIPE --- fuzz/collect_fuzz_python.py | 92 ++++++++++++++++++------------------- 1 file changed, 44 insertions(+), 48 deletions(-) diff --git a/fuzz/collect_fuzz_python.py b/fuzz/collect_fuzz_python.py index 8f5473f..4d4cda4 100644 --- a/fuzz/collect_fuzz_python.py +++ b/fuzz/collect_fuzz_python.py @@ -190,71 +190,67 @@ def monitor_file_size(file_path, process, max_size=500 * 1024 * 1024): ) def fuzz_one_target(target: tuple[str, str], timeout: int): - """ - Perform fuzzing on a single fuzzing target - - Args: - target (tuple[str, str]): (Repository path, target name) - timeout (int): Timeout duration (seconds) - - Returns: - subprocess.Popen: Subprocess object - """ repo_path, target_name = target project_name = os.path.basename(repo_path) oss_fuzz_root = os.path.dirname(os.path.dirname(repo_path)) - - # Create input file path + input_file_path = pjoin(repo_path, "fuzz_inputs", f"{target_name}.txt") os.makedirs(os.path.dirname(input_file_path), exist_ok=True) - + try: - # 创建空文件确保路径存在 - open(input_file_path, "w").close() - - # 关键日志:开始执行前记录所有参数 - logging.info( - f"Starting fuzzer for {project_name}/{target_name}: " - f"timeout={timeout}s, output={input_file_path}" - ) - + # 清空输出文件 + open(input_file_path, "wb").close() + + logging.info(f"Starting fuzzer for {project_name}/{target_name}: timeout={timeout}s, output={input_file_path}") + cmd = f"timeout {timeout} python3 infra/helper.py run_fuzzer {project_name} {target_name}" - - # 关键日志:记录完整命令 logging.debug(f"Executing command: {cmd}") - - # 记录进程开始时间 + start_time = datetime.now() process = subprocess.Popen( ["bash", "-c", cmd], cwd=oss_fuzz_root, - stdout=open(input_file_path, "w"), - stderr=subprocess.DEVNULL, - ) - - # 关键日志:记录进程ID和启动时间 - logging.info( - f"Started PID {process.pid} at {start_time.strftime('%H:%M:%S')} " - f"for {project_name}/{target_name}" + stdout=subprocess.PIPE, + stderr=subprocess.STDOUT, + bufsize=1 ) - - # 启动文件监控线程 - threading.Thread( - target=monitor_file_size, + + logging.info(f"Started PID {process.pid} at {start_time.strftime('%H:%M:%S')} for {project_name}/{target_name}") + + # 启动监控线程 + monitor_thread = threading.Thread( + target=monitor_file_size, args=(input_file_path, process), daemon=True - ).start() - + ) + monitor_thread.start() + + # 从 PIPE 读取并写入文件 + with open(input_file_path, "ab") as output_file: + for chunk in iter(lambda: process.stdout.read(4096), b""): + output_file.write(chunk) + + process.wait() + + if os.path.getsize(input_file_path) == 0: + logging.warning(f"Output file is empty: {input_file_path}") + error_path = pjoin(repo_path, "fuzz_inputs", f"{target_name}_error.log") + with open(error_path, "wb") as error_file: + subprocess.run( + ["bash", "-c", cmd], + cwd=oss_fuzz_root, + stdout=error_file, + stderr=subprocess.STDOUT, + ) + logging.info(f"Error output saved to {error_path}") + return process + except Exception as e: - # 详细错误日志 - logging.error( - f"Failed to start fuzzer for {project_name}/{target_name}: " - f"{type(e).__name__} - {str(e)}" - ) - # 记录堆栈跟踪 + logging.error(f"Failed to start fuzzer for {project_name}/{target_name}: {type(e).__name__} - {str(e)}") logging.debug("Exception details:", exc_info=True) return None + def fuzz_repos(repos: list[str], jobs: int, timeout: int = 60): """ Perform fuzzing on a set of repositories @@ -598,8 +594,8 @@ def testgen_repos( def main( repo_id: str = "data/valid_projects3.txt", repo_root: str = "fuzz/oss-fuzz/projects/", - timeout: int = 10, - jobs: int = 4, + timeout: int = 30, + jobs: int = 8, pipeline: str = "all", n_fuzz: int = 100, strategy: str = "shuffle", From 503063c7cc33e5a27129ff77fd3ce62ed0606784 Mon Sep 17 00:00:00 2001 From: joyguoguo <599325370@qq.com> Date: Wed, 13 Aug 2025 00:33:20 +0000 Subject: [PATCH 103/134] use max total time; remove size monitor --- fuzz/collect_fuzz_python.py | 183 +++++++++++++++++++----------------- 1 file changed, 98 insertions(+), 85 deletions(-) diff --git a/fuzz/collect_fuzz_python.py b/fuzz/collect_fuzz_python.py index 4d4cda4..e9ecc22 100644 --- a/fuzz/collect_fuzz_python.py +++ b/fuzz/collect_fuzz_python.py @@ -102,92 +102,92 @@ def discover_targets(project_name: str, oss_fuzz_dir: Path) -> list[str]: logging.error(f"Error discovering targets: {e}") return targets -import threading -import time +# import threading +# import time -def monitor_file_size(file_path, process, max_size=500 * 1024 * 1024): - """ - 监控 fuzz_input 文件大小,如果超过 max_size 就杀掉进程 - - Args: - file_path (str): 监控文件路径 - process (subprocess.Popen): 关联的进程 - max_size (int): 最大文件大小 (默认 500MB) - """ - project_name = os.path.basename(os.path.dirname(os.path.dirname(file_path))) - target_name = os.path.basename(file_path).split('.')[0] - - # 关键日志:启动监控 - logging.info( - f"Started file monitor for {project_name}/{target_name} " - f"(max size: {max_size//(1024 * 1024)}MB)" - ) - - last_size = 0 - last_log = time.time() +# def monitor_file_size(file_path, process, max_size=500 * 1024 * 1024): +# """ +# 监控 fuzz_input 文件大小,如果超过 max_size 就杀掉进程 - while process.poll() is None: # 进程还在运行 - try: - if not os.path.exists(file_path): - # 关键日志:文件丢失警告 - logging.warning( - f"Output file missing: {file_path}. " - f"Process status: {'running' if process.poll() is None else 'exited'}" - ) - time.sleep(1) - continue +# Args: +# file_path (str): 监控文件路径 +# process (subprocess.Popen): 关联的进程 +# max_size (int): 最大文件大小 (默认 500MB) +# """ +# project_name = os.path.basename(os.path.dirname(os.path.dirname(file_path))) +# target_name = os.path.basename(file_path).split('.')[0] + +# # 关键日志:启动监控 +# logging.info( +# f"Started file monitor for {project_name}/{target_name} " +# f"(max size: {max_size//(1024 * 1024)}MB)" +# ) + +# last_size = 0 +# last_log = time.time() + +# while process.poll() is None: # 进程还在运行 +# try: +# if not os.path.exists(file_path): +# # 关键日志:文件丢失警告 +# logging.warning( +# f"Output file missing: {file_path}. " +# f"Process status: {'running' if process.poll() is None else 'exited'}" +# ) +# time.sleep(1) +# continue - size = os.path.getsize(file_path) +# size = os.path.getsize(file_path) - # 记录显著的尺寸变化 (+10%) - if size > 0 and abs(size - last_size)/size > 0.1: - logging.info( - f"File size changed: {file_path} " - f"{last_size//1024}KB → {size//1024}KB" - ) - last_size = size +# # 记录显著的尺寸变化 (+10%) +# if size > 0 and abs(size - last_size)/size > 0.1: +# logging.info( +# f"File size changed: {file_path} " +# f"{last_size//1024}KB → {size//1024}KB" +# ) +# last_size = size - # 每分钟记录一次当前尺寸 - if time.time() - last_log > 60: - logging.debug( - f"File size update: {file_path} = {size//1024}KB" - ) - last_log = time.time() +# # 每分钟记录一次当前尺寸 +# if time.time() - last_log > 60: +# logging.debug( +# f"File size update: {file_path} = {size//1024}KB" +# ) +# last_log = time.time() - if size > max_size: - # 关键警告:文件超限 - logging.warning( - f"Terminating {project_name}/{target_name}: " - f"File size {size//(1024 * 1024)}MB > {max_size//(1024 * 1024)}MB" - ) - process.kill() - # 记录终止后的最终文件大小 - final_size = os.path.getsize(file_path) - logging.info( - f"After termination: {file_path} = {final_size//1024}KB" - ) - break +# if size > max_size: +# # 关键警告:文件超限 +# logging.warning( +# f"Terminating {project_name}/{target_name}: " +# f"File size {size//(1024 * 1024)}MB > {max_size//(1024 * 1024)}MB" +# ) +# process.kill() +# # 记录终止后的最终文件大小 +# final_size = os.path.getsize(file_path) +# logging.info( +# f"After termination: {file_path} = {final_size//1024}KB" +# ) +# break - except Exception as e: - logging.error( - f"File monitor error for {project_name}/{target_name}: " - f"{type(e).__name__} - {str(e)}" - ) - # 防止错误导致高频重试 - time.sleep(5) +# except Exception as e: +# logging.error( +# f"File monitor error for {project_name}/{target_name}: " +# f"{type(e).__name__} - {str(e)}" +# ) +# # 防止错误导致高频重试 +# time.sleep(5) - time.sleep(1) # 每秒检查一次 - - # 进程结束时记录 - exit_code = process.poll() - if exit_code is not None: - logging.info( - f"Process ended: {project_name}/{target_name} " - f"Exit code: {exit_code} " - f"Output file: {os.path.exists(file_path)}" - ) +# time.sleep(1) # 每秒检查一次 + +# # 进程结束时记录 +# exit_code = process.poll() +# if exit_code is not None: +# logging.info( +# f"Process ended: {project_name}/{target_name} " +# f"Exit code: {exit_code} " +# f"Output file: {os.path.exists(file_path)}" +# ) def fuzz_one_target(target: tuple[str, str], timeout: int): repo_path, target_name = target @@ -203,7 +203,7 @@ def fuzz_one_target(target: tuple[str, str], timeout: int): logging.info(f"Starting fuzzer for {project_name}/{target_name}: timeout={timeout}s, output={input_file_path}") - cmd = f"timeout {timeout} python3 infra/helper.py run_fuzzer {project_name} {target_name}" + cmd = f"python3 infra/helper.py run_fuzzer {project_name} {target_name} -- -max_total_time={timeout}" logging.debug(f"Executing command: {cmd}") start_time = datetime.now() @@ -217,13 +217,14 @@ def fuzz_one_target(target: tuple[str, str], timeout: int): logging.info(f"Started PID {process.pid} at {start_time.strftime('%H:%M:%S')} for {project_name}/{target_name}") - # 启动监控线程 - monitor_thread = threading.Thread( - target=monitor_file_size, - args=(input_file_path, process), - daemon=True - ) - monitor_thread.start() + # --- 按要求:去掉文件大小监控线程 --- + # monitor_thread = threading.Thread( + # target=monitor_file_size, + # args=(input_file_path, process), + # daemon=True + # ) + # monitor_thread.start() + # --- 结束 --- # 从 PIPE 读取并写入文件 with open(input_file_path, "ab") as output_file: @@ -251,6 +252,7 @@ def fuzz_one_target(target: tuple[str, str], timeout: int): logging.debug("Exception details:", exc_info=True) return None + def fuzz_repos(repos: list[str], jobs: int, timeout: int = 60): """ Perform fuzzing on a set of repositories @@ -286,6 +288,17 @@ def fuzz_repos(repos: list[str], jobs: int, timeout: int = 60): parallel_subprocess(all_targets, jobs, lambda p: fuzz_one_target(p, timeout), on_exit=None) +# def fuzz_one_target(target: tuple[str, str], timeout: int): +# """ +# Perform fuzzing on a set of repositories +# """ +# pass + +# def generate_test_template(target_name: str, repo_path: str): +# ... +# def transform_repos(repos: list[str], jobs: int): +# ... + # def fuzz_one_target(target: tuple[str, str], timeout: int): # """ # Perform fuzzing on a set of repositories @@ -595,7 +608,7 @@ def main( repo_id: str = "data/valid_projects3.txt", repo_root: str = "fuzz/oss-fuzz/projects/", timeout: int = 30, - jobs: int = 8, + jobs: int = 2, pipeline: str = "all", n_fuzz: int = 100, strategy: str = "shuffle", From 708c8a9e4109e57d2a91c7c155a02c1ba669a5e2 Mon Sep 17 00:00:00 2001 From: joyguoguo <599325370@qq.com> Date: Thu, 14 Aug 2025 01:38:50 +0000 Subject: [PATCH 104/134] =?UTF-8?q?=E4=BF=AE=E6=94=B9=E5=B9=B6=E8=A1=8C?= =?UTF-8?q?=E9=94=99=E8=AF=AF,=20=E5=86=99=E5=85=A5=E6=96=B9=E6=B3=95?= =?UTF-8?q?=E8=BF=98=E6=98=AF=E7=9B=B4=E6=8E=A5=E5=86=99=E5=85=A5=E6=96=87?= =?UTF-8?q?=E4=BB=B6=20=E5=BB=B6=E6=97=B6=E6=8E=A7=E5=88=B6=E4=B8=BAmax=20?= =?UTF-8?q?total=20time?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- fuzz/collect_fuzz_python.py | 254 +++++++++++++++--------------------- 1 file changed, 104 insertions(+), 150 deletions(-) diff --git a/fuzz/collect_fuzz_python.py b/fuzz/collect_fuzz_python.py index e9ecc22..3282bca 100644 --- a/fuzz/collect_fuzz_python.py +++ b/fuzz/collect_fuzz_python.py @@ -102,158 +102,42 @@ def discover_targets(project_name: str, oss_fuzz_dir: Path) -> list[str]: logging.error(f"Error discovering targets: {e}") return targets -# import threading -# import time - - - -# def monitor_file_size(file_path, process, max_size=500 * 1024 * 1024): -# """ -# 监控 fuzz_input 文件大小,如果超过 max_size 就杀掉进程 - -# Args: -# file_path (str): 监控文件路径 -# process (subprocess.Popen): 关联的进程 -# max_size (int): 最大文件大小 (默认 500MB) -# """ -# project_name = os.path.basename(os.path.dirname(os.path.dirname(file_path))) -# target_name = os.path.basename(file_path).split('.')[0] - -# # 关键日志:启动监控 -# logging.info( -# f"Started file monitor for {project_name}/{target_name} " -# f"(max size: {max_size//(1024 * 1024)}MB)" -# ) - -# last_size = 0 -# last_log = time.time() - -# while process.poll() is None: # 进程还在运行 -# try: -# if not os.path.exists(file_path): -# # 关键日志:文件丢失警告 -# logging.warning( -# f"Output file missing: {file_path}. " -# f"Process status: {'running' if process.poll() is None else 'exited'}" -# ) -# time.sleep(1) -# continue - -# size = os.path.getsize(file_path) - -# # 记录显著的尺寸变化 (+10%) -# if size > 0 and abs(size - last_size)/size > 0.1: -# logging.info( -# f"File size changed: {file_path} " -# f"{last_size//1024}KB → {size//1024}KB" -# ) -# last_size = size - -# # 每分钟记录一次当前尺寸 -# if time.time() - last_log > 60: -# logging.debug( -# f"File size update: {file_path} = {size//1024}KB" -# ) -# last_log = time.time() - -# if size > max_size: -# # 关键警告:文件超限 -# logging.warning( -# f"Terminating {project_name}/{target_name}: " -# f"File size {size//(1024 * 1024)}MB > {max_size//(1024 * 1024)}MB" -# ) -# process.kill() -# # 记录终止后的最终文件大小 -# final_size = os.path.getsize(file_path) -# logging.info( -# f"After termination: {file_path} = {final_size//1024}KB" -# ) -# break - -# except Exception as e: -# logging.error( -# f"File monitor error for {project_name}/{target_name}: " -# f"{type(e).__name__} - {str(e)}" -# ) -# # 防止错误导致高频重试 -# time.sleep(5) - -# time.sleep(1) # 每秒检查一次 - -# # 进程结束时记录 -# exit_code = process.poll() -# if exit_code is not None: -# logging.info( -# f"Process ended: {project_name}/{target_name} " -# f"Exit code: {exit_code} " -# f"Output file: {os.path.exists(file_path)}" -# ) - def fuzz_one_target(target: tuple[str, str], timeout: int): + """ + Perform fuzzing on a single fuzzing target + + Args: + target (tuple[str, str]): (Repository path, target name) + timeout (int): Timeout duration (seconds) + + Returns: + subprocess.Popen: Subprocess object + """ repo_path, target_name = target project_name = os.path.basename(repo_path) oss_fuzz_root = os.path.dirname(os.path.dirname(repo_path)) - - input_file_path = pjoin(repo_path, "fuzz_inputs", f"{target_name}.txt") + + # Create input file path + input_file_path = pjoin(repo_path, "fuzz_inputs", target_name) os.makedirs(os.path.dirname(input_file_path), exist_ok=True) - + try: - # 清空输出文件 - open(input_file_path, "wb").close() - - logging.info(f"Starting fuzzer for {project_name}/{target_name}: timeout={timeout}s, output={input_file_path}") - - cmd = f"python3 infra/helper.py run_fuzzer {project_name} {target_name} -- -max_total_time={timeout}" - logging.debug(f"Executing command: {cmd}") - - start_time = datetime.now() - process = subprocess.Popen( - ["bash", "-c", cmd], - cwd=oss_fuzz_root, - stdout=subprocess.PIPE, - stderr=subprocess.STDOUT, - bufsize=1 - ) - - logging.info(f"Started PID {process.pid} at {start_time.strftime('%H:%M:%S')} for {project_name}/{target_name}") - - # --- 按要求:去掉文件大小监控线程 --- - # monitor_thread = threading.Thread( - # target=monitor_file_size, - # args=(input_file_path, process), - # daemon=True - # ) - # monitor_thread.start() - # --- 结束 --- - - # 从 PIPE 读取并写入文件 - with open(input_file_path, "ab") as output_file: - for chunk in iter(lambda: process.stdout.read(4096), b""): - output_file.write(chunk) - - process.wait() - - if os.path.getsize(input_file_path) == 0: - logging.warning(f"Output file is empty: {input_file_path}") - error_path = pjoin(repo_path, "fuzz_inputs", f"{target_name}_error.log") - with open(error_path, "wb") as error_file: - subprocess.run( - ["bash", "-c", cmd], - cwd=oss_fuzz_root, - stdout=error_file, - stderr=subprocess.STDOUT, - ) - logging.info(f"Error output saved to {error_path}") - - return process - + with open(input_file_path, "w") as input_file: + return subprocess.Popen( + [ + "bash", + "-c", + f"python3 infra/helper.py run_fuzzer {project_name} {target_name} -- -max_total_time={timeout}" + ], + cwd=oss_fuzz_root, + stdout=input_file, + stderr=subprocess.DEVNULL, + ) except Exception as e: - logging.error(f"Failed to start fuzzer for {project_name}/{target_name}: {type(e).__name__} - {str(e)}") - logging.debug("Exception details:", exc_info=True) + logging.error(f"Error starting fuzzer: {e}") return None - -def fuzz_repos(repos: list[str], jobs: int, timeout: int = 60): +def fuzz_repos(repos: list[str], jobs: int, timeout: int ): """ Perform fuzzing on a set of repositories @@ -286,18 +170,88 @@ def fuzz_repos(repos: list[str], jobs: int, timeout: int = 60): # Execute fuzzing in parallel parallel_subprocess(all_targets, jobs, lambda p: fuzz_one_target(p, timeout), on_exit=None) - -# def fuzz_one_target(target: tuple[str, str], timeout: int): + +# def fuzz_one_target(target: tuple[str, str], timeout: int): +# repo_path, target_name = target +# project_name = os.path.basename(repo_path) +# oss_fuzz_root = os.path.dirname(os.path.dirname(repo_path)) + +# input_file_path = pjoin(repo_path, "fuzz_inputs", f"{target_name}.txt") +# os.makedirs(os.path.dirname(input_file_path), exist_ok=True) + +# try: +# # 清空输出文件 +# open(input_file_path, "wb").close() + +# logging.info(f"Starting fuzzer for {project_name}/{target_name}: timeout={timeout}s, output={input_file_path}") + +# cmd = f"python3 infra/helper.py run_fuzzer {project_name} {target_name} -- -max_total_time={timeout}" +# logging.debug(f"Executing command: {cmd}") + +# start_time = datetime.now() +# process = subprocess.Popen( +# ["bash", "-c", cmd], +# cwd=oss_fuzz_root, +# stdout=subprocess.PIPE, +# stderr=subprocess.DEVNULL, +# # bufsize=1 +# ) + +# logging.info(f"Started PID {process.pid} at {start_time.strftime('%H:%M:%S')} for {project_name}/{target_name}") + + + +# # 从 PIPE 读取并写入文件 +# with open(input_file_path, "ab") as output_file: +# for chunk in iter(lambda: process.stdout.read(4096), b""): +# output_file.write(chunk) + +# process.wait() + + + +# except Exception as e: +# logging.error(f"Failed to start fuzzer for {project_name}/{target_name}: {type(e).__name__} - {str(e)}") +# logging.debug("Exception details:", exc_info=True) +# return None + + +# def fuzz_repos(repos: list[str], jobs: int, timeout: int = 30): # """ # Perform fuzzing on a set of repositories + +# Args: +# repos (list[str]): List of repository paths +# jobs (int): Number of parallel tasks +# timeout (int): Timeout duration (seconds) # """ -# pass +# logging.info("Discovering fuzz targets") + +# # Get all targets +# targets_list = [] +# for repo in repos: +# project_name = os.path.basename(repo) +# oss_fuzz_dir = Path(repo).parent.parent +# targets = discover_targets(project_name, oss_fuzz_dir) +# targets_list.append(targets) + +# # Create target mapping +# target_map = {repo: targets for repo, targets in zip(repos, targets_list)} +# all_targets: list[tuple[str, str]] = [ +# (k, v) for k, vs in target_map.items() for v in vs +# ] + +# logging.info(f"Running fuzzing on {len(all_targets)} targets") + +# # Create input directory +# for repo in repos: +# os.makedirs(pjoin(repo, "fuzz_inputs"), exist_ok=True) + +# # Execute fuzzing in parallel +# parallel_subprocess(all_targets, jobs, lambda p: fuzz_one_target(p, timeout), on_exit=None) + -# def generate_test_template(target_name: str, repo_path: str): -# ... -# def transform_repos(repos: list[str], jobs: int): -# ... # def fuzz_one_target(target: tuple[str, str], timeout: int): # """ From f417e19a3ee9c598f119c94979825f71c2ae8400 Mon Sep 17 00:00:00 2001 From: joyguoguo <599325370@qq.com> Date: Thu, 14 Aug 2025 01:40:56 +0000 Subject: [PATCH 105/134] =?UTF-8?q?=E8=A1=A5=E5=85=85=E6=97=A5=E5=BF=97?= =?UTF-8?q?=E8=BE=93=E5=87=BA?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- fuzz/collect_fuzz_python.py | 54 ++++++++++++++----------------------- 1 file changed, 20 insertions(+), 34 deletions(-) diff --git a/fuzz/collect_fuzz_python.py b/fuzz/collect_fuzz_python.py index 3282bca..74733d6 100644 --- a/fuzz/collect_fuzz_python.py +++ b/fuzz/collect_fuzz_python.py @@ -77,17 +77,12 @@ def _build_cmd(path: str): def discover_targets(project_name: str, oss_fuzz_dir: Path) -> list[str]: """ Discover fuzzing targets - - Args: - project_name (str): Project name - oss_fuzz_dir (Path): OSS-Fuzz root directory - - Returns: - list[str]: List of target names """ out_dir = oss_fuzz_dir / "build" / "out" / project_name targets: list[str] = [] + logging.debug(f"Searching fuzz targets in: {out_dir}") + if not out_dir.is_dir(): logging.warning(f"Build output directory for {project_name} does not exist") return targets @@ -98,29 +93,27 @@ def discover_targets(project_name: str, oss_fuzz_dir: Path) -> list[str]: '.' not in f.name and f.name.endswith("print1") and os.access(f, os.X_OK)): targets.append(f.name) + logging.info(f"Discovered {len(targets)} fuzz targets in {project_name}: {targets}") except Exception as e: - logging.error(f"Error discovering targets: {e}") + logging.error(f"Error discovering targets for {project_name}: {e}", exc_info=True) return targets + + def fuzz_one_target(target: tuple[str, str], timeout: int): """ Perform fuzzing on a single fuzzing target - - Args: - target (tuple[str, str]): (Repository path, target name) - timeout (int): Timeout duration (seconds) - - Returns: - subprocess.Popen: Subprocess object """ repo_path, target_name = target project_name = os.path.basename(repo_path) oss_fuzz_root = os.path.dirname(os.path.dirname(repo_path)) - # Create input file path input_file_path = pjoin(repo_path, "fuzz_inputs", target_name) os.makedirs(os.path.dirname(input_file_path), exist_ok=True) - + + logging.info(f"Starting fuzzing: project={project_name}, target={target_name}, timeout={timeout}s") + logging.debug(f"Fuzz output will be saved to: {input_file_path}") + try: with open(input_file_path, "w") as input_file: return subprocess.Popen( @@ -134,21 +127,16 @@ def fuzz_one_target(target: tuple[str, str], timeout: int): stderr=subprocess.DEVNULL, ) except Exception as e: - logging.error(f"Error starting fuzzer: {e}") + logging.error(f"Error starting fuzzer for {project_name}/{target_name}: {e}", exc_info=True) return None -def fuzz_repos(repos: list[str], jobs: int, timeout: int ): + +def fuzz_repos(repos: list[str], jobs: int, timeout: int): """ Perform fuzzing on a set of repositories - - Args: - repos (list[str]): List of repository paths - jobs (int): Number of parallel tasks - timeout (int): Timeout duration (seconds) """ - logging.info("Discovering fuzz targets") - - # Get all targets + logging.info(f"Discovering fuzz targets for {len(repos)} repositories...") + targets_list = [] for repo in repos: project_name = os.path.basename(repo) @@ -156,22 +144,20 @@ def fuzz_repos(repos: list[str], jobs: int, timeout: int ): targets = discover_targets(project_name, oss_fuzz_dir) targets_list.append(targets) - # Create target mapping target_map = {repo: targets for repo, targets in zip(repos, targets_list)} all_targets: list[tuple[str, str]] = [ (k, v) for k, vs in target_map.items() for v in vs ] - logging.info(f"Running fuzzing on {len(all_targets)} targets") - - # Create input directory + logging.info(f"Total fuzz targets discovered: {len(all_targets)}") + for repo, targets in target_map.items(): + logging.info(f"{os.path.basename(repo)}: {len(targets)} targets") + for repo in repos: os.makedirs(pjoin(repo, "fuzz_inputs"), exist_ok=True) - # Execute fuzzing in parallel + logging.info(f"Starting parallel fuzzing with {jobs} jobs, timeout={timeout}s per target") parallel_subprocess(all_targets, jobs, lambda p: fuzz_one_target(p, timeout), on_exit=None) - - # def fuzz_one_target(target: tuple[str, str], timeout: int): # repo_path, target_name = target # project_name = os.path.basename(repo_path) From 2a27db9f6cc7fb6a5391158ad026bc5a55e08e6a Mon Sep 17 00:00:00 2001 From: joyguoguo <599325370@qq.com> Date: Thu, 14 Aug 2025 08:23:55 +0000 Subject: [PATCH 106/134] =?UTF-8?q?=E6=A8=A1=E6=9D=BF=E7=94=9F=E6=88=90?= =?UTF-8?q?=E6=88=90=E5=8A=9F?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- fuzz/collect_fuzz_python.py | 258 ++++++++++++------------------------ 1 file changed, 83 insertions(+), 175 deletions(-) diff --git a/fuzz/collect_fuzz_python.py b/fuzz/collect_fuzz_python.py index 74733d6..b3896e1 100644 --- a/fuzz/collect_fuzz_python.py +++ b/fuzz/collect_fuzz_python.py @@ -158,190 +158,98 @@ def fuzz_repos(repos: list[str], jobs: int, timeout: int): logging.info(f"Starting parallel fuzzing with {jobs} jobs, timeout={timeout}s per target") parallel_subprocess(all_targets, jobs, lambda p: fuzz_one_target(p, timeout), on_exit=None) -# def fuzz_one_target(target: tuple[str, str], timeout: int): -# repo_path, target_name = target -# project_name = os.path.basename(repo_path) -# oss_fuzz_root = os.path.dirname(os.path.dirname(repo_path)) - -# input_file_path = pjoin(repo_path, "fuzz_inputs", f"{target_name}.txt") -# os.makedirs(os.path.dirname(input_file_path), exist_ok=True) - -# try: -# # 清空输出文件 -# open(input_file_path, "wb").close() - -# logging.info(f"Starting fuzzer for {project_name}/{target_name}: timeout={timeout}s, output={input_file_path}") - -# cmd = f"python3 infra/helper.py run_fuzzer {project_name} {target_name} -- -max_total_time={timeout}" -# logging.debug(f"Executing command: {cmd}") - -# start_time = datetime.now() -# process = subprocess.Popen( -# ["bash", "-c", cmd], -# cwd=oss_fuzz_root, -# stdout=subprocess.PIPE, -# stderr=subprocess.DEVNULL, -# # bufsize=1 -# ) +import os +import re +import logging +from os.path import join as pjoin -# logging.info(f"Started PID {process.pid} at {start_time.strftime('%H:%M:%S')} for {project_name}/{target_name}") +def generate_test_template(target_name: str, repo_path: str): + """ + Generate Python test template for a single target by stripping license header, + main() block, and print(data) inside TestInput/TestOneInput. + """ + src_file = pjoin(repo_path, target_name + ".py") + if not os.path.exists(src_file): + logging.error(f"Source target file not found: {src_file}") + return None - + with open(src_file, "r", encoding="utf-8") as f: + original_code = f.read() -# # 从 PIPE 读取并写入文件 -# with open(input_file_path, "ab") as output_file: -# for chunk in iter(lambda: process.stdout.read(4096), b""): -# output_file.write(chunk) + # --- 1. 保留 shebang,但删除许可证注释 --- + shebang = "" + if original_code.startswith("#!"): + shebang, original_code = original_code.split("\n", 1) + shebang += "\n" -# process.wait() + # 匹配从文件开头到包含 "limitations under the license" 那一行的注释块 + license_pattern = re.compile( + r"^(?:\s*#.*\n)*\s*#.*limitations\s+under\s+the\s+license.*\n", + re.IGNORECASE | re.MULTILINE + ) + code_no_license = re.sub(license_pattern, "", original_code, count=1) + + # --- 2. 删除 main 函数和 if __name__ == '__main__' --- + code_no_main = re.sub( + r"\n?def\s+main\([\s\S]*?(?=^if\s+__name__\s*==\s*['\"]__main__['\"]:)", + "", + code_no_license, + flags=re.MULTILINE + ) + code_no_main = re.sub( + r"\n?if\s+__name__\s*==\s*['\"]__main__['\"]:\s*main\(\s*\)\s*", + "", + code_no_main, + flags=re.MULTILINE + ) - + # --- 3. 删除 TestInput/TestOneInput 内的 print(data) --- + def remove_print_in_func(match): + func_body = match.group(0) + func_body = re.sub(r"^\s*print\(data\)\s*$", "", func_body, flags=re.MULTILINE) + return func_body + + cleaned_code = re.sub( + r"(def\s+Test(?:Input|OneInput)\s*\([\s\S]*?)(?=\n\w|\Z)", + lambda m: remove_print_in_func(m), + code_no_main, + flags=re.MULTILINE + ) -# except Exception as e: -# logging.error(f"Failed to start fuzzer for {project_name}/{target_name}: {type(e).__name__} - {str(e)}") -# logging.debug("Exception details:", exc_info=True) -# return None + # --- 4. 输出到 tests-gen --- + template_dir = pjoin(repo_path, "tests-gen") + os.makedirs(template_dir, exist_ok=True) + init_path = pjoin(template_dir, "__init__.py") + if not os.path.exists(init_path): + with open(init_path, "w", encoding="utf-8") as f: + f.write("") -# def fuzz_repos(repos: list[str], jobs: int, timeout: int = 30): -# """ -# Perform fuzzing on a set of repositories - -# Args: -# repos (list[str]): List of repository paths -# jobs (int): Number of parallel tasks -# timeout (int): Timeout duration (seconds) -# """ -# logging.info("Discovering fuzz targets") - -# # Get all targets -# targets_list = [] -# for repo in repos: -# project_name = os.path.basename(repo) -# oss_fuzz_dir = Path(repo).parent.parent -# targets = discover_targets(project_name, oss_fuzz_dir) -# targets_list.append(targets) - -# # Create target mapping -# target_map = {repo: targets for repo, targets in zip(repos, targets_list)} -# all_targets: list[tuple[str, str]] = [ -# (k, v) for k, vs in target_map.items() for v in vs -# ] - -# logging.info(f"Running fuzzing on {len(all_targets)} targets") - -# # Create input directory -# for repo in repos: -# os.makedirs(pjoin(repo, "fuzz_inputs"), exist_ok=True) - -# # Execute fuzzing in parallel -# parallel_subprocess(all_targets, jobs, lambda p: fuzz_one_target(p, timeout), on_exit=None) - + template_path = pjoin(template_dir, f"{target_name}.py") + with open(template_path, "w", encoding="utf-8") as f: + f.write(shebang + cleaned_code.strip() + "\n") + logging.info(f"Generated cleaned template: {template_path}") + return template_path -# def fuzz_one_target(target: tuple[str, str], timeout: int): -# """ -# Perform fuzzing on a set of repositories - -# Args: -# repos (list[str]): List of repository paths -# jobs (int): Number of parallel tasks -# timeout (int): Timeout duration (seconds) -# """ -# logging.info("Discovering fuzz targets") - -# # Get all targets -# targets_list = [] -# for repo in repos: -# project_name = os.path.basename(repo) -# oss_fuzz_dir = Path(repo).parent.parent -# targets = discover_targets(project_name, oss_fuzz_dir) -# targets_list.append(targets) - -# # Create target mapping -# target_map = {repo: targets for repo, targets in zip(repos, targets_list)} -# all_targets: list[tuple[str, str]] = [ -# (k, v) for k, vs in target_map.items() for v in vs -# ] - -# logging.info(f"Running fuzzing on {len(all_targets)} targets") - -# # Create input directory -# for repo in repos: -# os.makedirs(pjoin(repo, "fuzz_inputs"), exist_ok=True) - -# # Execute fuzzing in parallel -# parallel_subprocess(all_targets, jobs, lambda p: fuzz_one_target(p, timeout), on_exit=None) - -# def generate_test_template(target_name: str, repo_path: str): -# """ -# Generate Python test template for a single target - -# Args: -# target_name (str): Target name -# repo_path (str): Repository path - -# Returns: -# str: Template file path -# """ -# template_dir = pjoin(repo_path, "tests-gen") -# os.makedirs(template_dir, exist_ok=True) - -# # Ensure __init__.py exists -# init_path = pjoin(template_dir, "__init__.py") -# if not os.path.exists(init_path): -# with open(init_path, "w") as f: -# f.write("") - -# template_path = pjoin(template_dir, f"{target_name}.py") - -# # Python test template with placeholder -# template = f"""#!/usr/bin/env python3 -# import sys -# import os -# import unittest - -# # Add the parent directory to the Python path -# sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))) - -# # Import the function to test -# try: -# from {target_name} import TestOneInput as TestClass -# except ImportError: -# from {target_name} import TestInput as TestClass - -# class Test{target_name.capitalize()}(unittest.TestCase): -# def test_generated(self): -# \"\"\"Test generated from fuzzing input\"\"\" -# input_data = b"" -# result = TestClass(input_data) - -# if __name__ == '__main__': -# unittest.main() -# """ -# with open(template_path, "w") as f: -# f.write(template) - -# return template_path - -# def transform_repos(repos: list[str], jobs: int): -# """ -# Generate test templates for all targets +def transform_repos(repos: list[str], jobs: int): + """ + Generate test templates for all targets -# Args: -# repos (list[str]): List of repository paths -# jobs (int): Number of parallel tasks -# """ -# logging.info("Generating test templates") + Args: + repos (list[str]): List of repository paths + jobs (int): Number of parallel tasks + """ + logging.info("Generating test templates") -# def _transform_repo(repo: str): -# project_name = os.path.basename(repo) -# oss_fuzz_dir = Path(repo).parent.parent -# targets = discover_targets(project_name, oss_fuzz_dir) -# return [generate_test_template(t, repo) for t in targets] + def _transform_repo(repo: str): + project_name = os.path.basename(repo) + oss_fuzz_dir = Path(repo).parent.parent + targets = discover_targets(project_name, oss_fuzz_dir) + return [generate_test_template(t, repo) for t in targets] -# with ProcessingPool(jobs) as p: -# return list(p.map(_transform_repo, repos)) + with ProcessingPool(jobs) as p: + return list(p.map(_transform_repo, repos)) def escape_special_chars(input_data: str) -> str: """ @@ -424,7 +332,7 @@ def substitute_one_repo( continue # 修复:添加 .txt 后缀 - input_path = pjoin(input_dir, f"{target_name}.txt") + input_path = pjoin(input_dir, f"{target_name}") if not os.path.exists(input_path): logging.warning(f"Input file not found: {input_path}") continue @@ -592,12 +500,12 @@ def main( fuzz_repos(repos, jobs, timeout) elif pipeline == "testgen": testgen_repos(repos, jobs, n_fuzz, strategy, max_len, sim_thresh) - # elif pipeline == "transform": - # transform_repos(repos, jobs) + elif pipeline == "transform": + transform_repos(repos, jobs) elif pipeline == "all": build_image(repos, jobs) build_fuzzer(repos, jobs) - # transform_repos(repos, jobs) # Generate test templates + transform_repos(repos, jobs) # Generate test templates fuzz_repos(repos, jobs, timeout) testgen_repos(repos, jobs, n_fuzz, strategy, max_len, sim_thresh) else: From f58370272d3ef2dccfc00ccf19242e206011d730 Mon Sep 17 00:00:00 2001 From: joyguoguo <599325370@qq.com> Date: Thu, 14 Aug 2025 08:35:57 +0000 Subject: [PATCH 107/134] =?UTF-8?q?testgen=E5=AE=8C=E6=88=90?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- fuzz/collect_fuzz_python.py | 83 ++++++++++++++++++------------------- 1 file changed, 40 insertions(+), 43 deletions(-) diff --git a/fuzz/collect_fuzz_python.py b/fuzz/collect_fuzz_python.py index b3896e1..3529f19 100644 --- a/fuzz/collect_fuzz_python.py +++ b/fuzz/collect_fuzz_python.py @@ -265,49 +265,49 @@ def escape_special_chars(input_data: str) -> str: # This will handle all special characters and non-ASCII bytes return repr(input_data.encode('latin-1', 'replace')) -def substitute_input(template: str, input_data: str, idx: int, target_name: str) -> str: - """ - Replace fuzzing input into Python test template +# def substitute_input(template: str, input_data: str, idx: int, target_name: str) -> str: +# """ +# Replace fuzzing input into Python test template - Args: - template (str): Template content - input_data (str): Input data - idx (int): Test index - target_name (str): Target name +# Args: +# template (str): Template content +# input_data (str): Input data +# idx (int): Test index +# target_name (str): Target name - Returns: - str: Test code after substitution - """ - # Escape special characters for Python - escaped_input = escape_special_chars(input_data) +# Returns: +# str: Test code after substitution +# """ +# # Escape special characters for Python +# escaped_input = escape_special_chars(input_data) - # Replace input placeholder - new_template = template.replace( - 'input_data = b""', - f'input_data = {escaped_input}' - ) +# # Replace input placeholder +# new_template = template.replace( +# 'input_data = b""', +# f'input_data = {escaped_input}' +# ) - # Replace test method name to avoid duplication - return new_template.replace( - f"def test_generated(self):", - f"def test_{idx}(self):" - ) - -def has_similar(selected: list[str], x: str, thresh: float = 0.8) -> bool: - """ - Check if a string is sufficiently similar to any string in the selected list +# # Replace test method name to avoid duplication +# return new_template.replace( +# f"def test_generated(self):", +# f"def test_{idx}(self):" +# ) + +# def has_similar(selected: list[str], x: str, thresh: float = 0.8) -> bool: +# """ +# Check if a string is sufficiently similar to any string in the selected list - Args: - selected (list[str]): List of selected strings - x (str): String to check - thresh (float): Similarity threshold +# Args: +# selected (list[str]): List of selected strings +# x (str): String to check +# thresh (float): Similarity threshold - Returns: - bool: Whether they are similar - """ - def similar(a, b): - return SequenceMatcher(None, a, b).ratio() - return any(similar(x, y) > thresh for y in selected) +# Returns: +# bool: Whether they are similar +# """ +# def similar(a, b): +# return SequenceMatcher(None, a, b).ratio() +# return any(similar(x, y) > thresh for y in selected) def substitute_one_repo( @@ -326,12 +326,12 @@ def substitute_one_repo( os.makedirs(template_dir, exist_ok=True) for target_name in targets: - source_file = pjoin(repo, f"{target_name}.py") + source_file = pjoin(template_dir, f"{target_name}.py") if not os.path.exists(source_file): logging.warning(f"Source file not found: {source_file}") continue - # 修复:添加 .txt 后缀 + input_path = pjoin(input_dir, f"{target_name}") if not os.path.exists(input_path): logging.warning(f"Input file not found: {input_path}") @@ -383,10 +383,7 @@ def substitute_one_repo( with open(source_file, "r") as f_src: code = f_src.read() - # 删除 main 和 __main__ 块 - code = re.sub(r"\nif __name__ == ['\"]__main__['\"]:.*", "", code, flags=re.S) - code = re.sub(r"\n\s*main\s*\(.*?\)", "", code) - + # 找到 TestInput / TestOneInput 并改成 test_{idx} code = re.sub(r"\bTestOneInput\b", f"test_{idx}", code) code = re.sub(r"\bTestInput\b", f"test_{idx}", code) From 68c656a4676cb9068590e89a8b3dcd1c62eeae2a Mon Sep 17 00:00:00 2001 From: joyguoguo <599325370@qq.com> Date: Thu, 14 Aug 2025 18:20:33 +0000 Subject: [PATCH 108/134] =?UTF-8?q?=E5=88=A0=E9=99=A4=E5=86=97=E4=BD=99,?= =?UTF-8?q?=20=E4=BF=AE=E6=94=B9=E4=BB=A3=E7=A0=81?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- fuzz/collect_fuzz_python.py | 518 ++++++++++++++++++------------------ 1 file changed, 266 insertions(+), 252 deletions(-) diff --git a/fuzz/collect_fuzz_python.py b/fuzz/collect_fuzz_python.py index 3529f19..a42d0ba 100644 --- a/fuzz/collect_fuzz_python.py +++ b/fuzz/collect_fuzz_python.py @@ -2,9 +2,11 @@ Script for Python project fuzzing and test template conversion usage: PYTHONPATH=. python3 fuzz/collect_fuzz_python.py --pipeline all """ +from __future__ import annotations + from pathlib import Path import logging -from typing import Optional, List, Tuple +from typing import Optional import fire import os from UniTSyn.frontend.util import wrap_repo, parallel_subprocess @@ -13,157 +15,178 @@ from tqdm import tqdm from pathos.multiprocessing import ProcessingPool import random -from difflib import SequenceMatcher from itertools import islice from datetime import datetime import re +from functools import partial -def build_image(repos: list[str], jobs: int): - """ - Build Docker images for OSS-Fuzz projects corresponding to each repository - - Args: - repos (list[str]): List of repository paths - jobs (int): Number of parallel tasks - """ - logging.info(f"Building Docker images for {len(repos)} OSS-Fuzz projects") - log_dir = os.path.abspath("fuzz_pipeline_log") - os.makedirs(log_dir, exist_ok=True) +############################################################ +# Top-level helpers (picklable) to avoid pool pickling woes +############################################################ - def _build_cmd(path: str): - project_name = os.path.basename(path.rstrip("/")) - timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") - log_file = os.path.join(log_dir, f"{project_name}_{timestamp}.log") - logging.info(f"Start building {project_name}, logging to {log_file}") +def _run_build_image(path: str, log_dir: str) -> Optional[subprocess.Popen]: + """Helper for build_image: must be top-level for pickling.""" + project_name = os.path.basename(path.rstrip("/")) + timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") + log_file = os.path.join(log_dir, f"{project_name}_{timestamp}.log") + logging.info(f"Start building {project_name}, logging to {log_file}") + # Note: child process keeps the fd open even if parent closes it after spawn + f = open(log_file, "w") + try: return subprocess.Popen( - f"yes | python3 infra/helper.py build_image {project_name}", + "yes | python3 infra/helper.py build_image {project}".format(project=project_name), cwd=os.path.abspath(os.path.join(path, "../../")), - stdout=open(log_file, "w"), + stdout=f, stderr=subprocess.STDOUT, shell=True, ) - - _ = parallel_subprocess(repos, jobs, _build_cmd, on_exit=None) - -def build_fuzzer(repos: list[str], jobs: int): - """ - Build fuzzers in parallel for successfully built projects - - Args: - repos (list[str]): List of repository paths - jobs (int): Number of parallel tasks - """ - logging.info(f"Building fuzzers for {len(repos)} OSS-Fuzz projects") - log_dir = os.path.abspath("fuzz_pipeline_log") - os.makedirs(log_dir, exist_ok=True) - - def _build_cmd(path: str): - project_name = os.path.basename(path.rstrip("/")) - timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") - log_file = os.path.join(log_dir, f"{project_name}_fuzzer_{timestamp}.log") - - logging.info(f"Start building fuzzers for {project_name}, logging to {log_file}") + except Exception: + f.close() + raise + + +def _run_build_fuzzer(path: str, log_dir: str) -> Optional[subprocess.Popen]: + """Helper for build_fuzzer: must be top-level for pickling.""" + project_name = os.path.basename(path.rstrip("/")) + timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") + log_file = os.path.join(log_dir, f"{project_name}_fuzzer_{timestamp}.log") + logging.info(f"Start building fuzzers for {project_name}, logging to {log_file}") + f = open(log_file, "w") + try: return subprocess.Popen( - f"python3 infra/helper.py build_fuzzers --sanitizer address {project_name}", + "python3 infra/helper.py build_fuzzers --sanitizer address {project}".format( + project=project_name + ), cwd=os.path.abspath(os.path.join(path, "../../")), - stdout=open(log_file, "w"), + stdout=f, stderr=subprocess.STDOUT, shell=True, ) + except Exception: + f.close() + raise + + +############################################################ +# Discover & fuzz +############################################################ - _ = parallel_subprocess(repos, jobs, _build_cmd, on_exit=None) def discover_targets(project_name: str, oss_fuzz_dir: Path) -> list[str]: - """ - Discover fuzzing targets + """Discover fuzzing targets in out/ directory. + + Rules: + - file name startswith "fuzz_" + - no dot in filename (exclude corpora, dictionaries) + - executable bit set + - (optional) if you want only print1 variants, pass a filter later """ out_dir = oss_fuzz_dir / "build" / "out" / project_name targets: list[str] = [] - + logging.debug(f"Searching fuzz targets in: {out_dir}") - + if not out_dir.is_dir(): logging.warning(f"Build output directory for {project_name} does not exist") return targets try: for f in out_dir.iterdir(): - if (f.is_file() and f.name.startswith("fuzz_") and - '.' not in f.name and f.name.endswith("print1") and - os.access(f, os.X_OK)): + if ( + f.is_file() + and f.name.startswith("fuzz_") + and f.name.endswith("print1") + and "." not in f.name + and os.access(f, os.X_OK) + ): targets.append(f.name) - logging.info(f"Discovered {len(targets)} fuzz targets in {project_name}: {targets}") + logging.info( + f"Discovered {len(targets)} fuzz targets in {project_name}: {targets}" + ) except Exception as e: - logging.error(f"Error discovering targets for {project_name}: {e}", exc_info=True) - + logging.error( + f"Error discovering targets for {project_name}: {e}", exc_info=True + ) + return targets -def fuzz_one_target(target: tuple[str, str], timeout: int): - """ - Perform fuzzing on a single fuzzing target - """ + +def fuzz_one_target(target: tuple[str, str], timeout: int) -> Optional[subprocess.Popen]: + """Perform fuzzing on a single fuzzing target.""" repo_path, target_name = target project_name = os.path.basename(repo_path) oss_fuzz_root = os.path.dirname(os.path.dirname(repo_path)) - + input_file_path = pjoin(repo_path, "fuzz_inputs", target_name) os.makedirs(os.path.dirname(input_file_path), exist_ok=True) - logging.info(f"Starting fuzzing: project={project_name}, target={target_name}, timeout={timeout}s") + logging.info( + f"Starting fuzzing: project={project_name}, target={target_name}, timeout={timeout}s" + ) logging.debug(f"Fuzz output will be saved to: {input_file_path}") try: - with open(input_file_path, "w") as input_file: + f = open(input_file_path, "wb") + try: return subprocess.Popen( [ "bash", "-c", - f"python3 infra/helper.py run_fuzzer {project_name} {target_name} -- -max_total_time={timeout}" + f"python3 infra/helper.py run_fuzzer {project_name} {target_name} -- -max_total_time={timeout}", ], cwd=oss_fuzz_root, - stdout=input_file, + stdout=f, stderr=subprocess.DEVNULL, ) + except Exception: + f.close() + raise except Exception as e: - logging.error(f"Error starting fuzzer for {project_name}/{target_name}: {e}", exc_info=True) + logging.error( + f"Error starting fuzzer for {project_name}/{target_name}: {e}", + exc_info=True, + ) return None -def fuzz_repos(repos: list[str], jobs: int, timeout: int): - """ - Perform fuzzing on a set of repositories - """ + +def fuzz_repos(repos: list[str], jobs: int, timeout: int) -> None: + """Perform fuzzing on a set of repositories.""" logging.info(f"Discovering fuzz targets for {len(repos)} repositories...") - targets_list = [] + targets_list: list[list[str]] = [] for repo in repos: project_name = os.path.basename(repo) oss_fuzz_dir = Path(repo).parent.parent targets = discover_targets(project_name, oss_fuzz_dir) targets_list.append(targets) - - target_map = {repo: targets for repo, targets in zip(repos, targets_list)} + + target_map: dict[str, list[str]] = {repo: targets for repo, targets in zip(repos, targets_list)} all_targets: list[tuple[str, str]] = [ (k, v) for k, vs in target_map.items() for v in vs ] - + logging.info(f"Total fuzz targets discovered: {len(all_targets)}") for repo, targets in target_map.items(): logging.info(f"{os.path.basename(repo)}: {len(targets)} targets") for repo in repos: os.makedirs(pjoin(repo, "fuzz_inputs"), exist_ok=True) - - logging.info(f"Starting parallel fuzzing with {jobs} jobs, timeout={timeout}s per target") - parallel_subprocess(all_targets, jobs, lambda p: fuzz_one_target(p, timeout), on_exit=None) -import os -import re -import logging -from os.path import join as pjoin -def generate_test_template(target_name: str, repo_path: str): + logging.info( + f"Starting parallel fuzzing with {jobs} jobs, timeout={timeout}s per target" + ) + parallel_subprocess(all_targets, jobs, partial(fuzz_one_target, timeout=timeout), on_exit=None) + + +############################################################ +# Transform: generate cleaned templates from fuzz target py +############################################################ + + +def generate_test_template(target_name: str, repo_path: str) -> Optional[str]: """ Generate Python test template for a single target by stripping license header, main() block, and print(data) inside TestInput/TestOneInput. @@ -176,47 +199,49 @@ def generate_test_template(target_name: str, repo_path: str): with open(src_file, "r", encoding="utf-8") as f: original_code = f.read() - # --- 1. 保留 shebang,但删除许可证注释 --- + # 1) keep shebang shebang = "" if original_code.startswith("#!"): shebang, original_code = original_code.split("\n", 1) shebang += "\n" - # 匹配从文件开头到包含 "limitations under the license" 那一行的注释块 + # 2) drop license block (best-effort) license_pattern = re.compile( r"^(?:\s*#.*\n)*\s*#.*limitations\s+under\s+the\s+license.*\n", - re.IGNORECASE | re.MULTILINE + re.IGNORECASE | re.MULTILINE, ) code_no_license = re.sub(license_pattern, "", original_code, count=1) - # --- 2. 删除 main 函数和 if __name__ == '__main__' --- + # 3) remove main() and if __main__ guards (best-effort) code_no_main = re.sub( - r"\n?def\s+main\([\s\S]*?(?=^if\s+__name__\s*==\s*['\"]__main__['\"]:)", + r"\n?def\s+main\([^)]*\):[\s\S]*?(?=^\S|\Z)", "", code_no_license, - flags=re.MULTILINE + flags=re.MULTILINE, ) code_no_main = re.sub( - r"\n?if\s+__name__\s*==\s*['\"]__main__['\"]:\s*main\(\s*\)\s*", + r"\n?if\s+__name__\s*==\s*['\"]__main__['\"]:[\s\S]*?(?=^\S|\Z)", "", code_no_main, - flags=re.MULTILINE + flags=re.MULTILINE, ) - # --- 3. 删除 TestInput/TestOneInput 内的 print(data) --- - def remove_print_in_func(match): - func_body = match.group(0) - func_body = re.sub(r"^\s*print\(data\)\s*$", "", func_body, flags=re.MULTILINE) - return func_body + # 4) remove print(data) inside TestInput/TestOneInput + def _strip_print_in_func(src: str) -> str: + def _repl(m: re.Match) -> str: + body = m.group(0) + return re.sub(r"^\s*print\(data\)\s*$", "", body, flags=re.MULTILINE) + + return re.sub( + r"(def\s+Test(?:Input|OneInput)\s*\([\s\S]*?\):[\s\S]*?)(?=^def\s|^@|\Z)", + _repl, + src, + flags=re.MULTILINE, + ) - cleaned_code = re.sub( - r"(def\s+Test(?:Input|OneInput)\s*\([\s\S]*?)(?=\n\w|\Z)", - lambda m: remove_print_in_func(m), - code_no_main, - flags=re.MULTILINE - ) + cleaned_code = _strip_print_in_func(code_no_main) - # --- 4. 输出到 tests-gen --- + # 5) write into tests-gen template_dir = pjoin(repo_path, "tests-gen") os.makedirs(template_dir, exist_ok=True) @@ -232,82 +257,29 @@ def remove_print_in_func(match): logging.info(f"Generated cleaned template: {template_path}") return template_path -def transform_repos(repos: list[str], jobs: int): + +# top-level for pickling + +def _transform_repo_fn(repo: str) -> list[Optional[str]]: + project_name = os.path.basename(repo) + oss_fuzz_dir = Path(repo).parent.parent + targets = discover_targets(project_name, oss_fuzz_dir) + return [generate_test_template(t, repo) for t in targets] + + + +def transform_repos(repos: list[str], jobs: int) -> list[list[Optional[str]]]: """ - Generate test templates for all targets - - Args: - repos (list[str]): List of repository paths - jobs (int): Number of parallel tasks + Generate test templates for all targets (parallel, picklable). """ logging.info("Generating test templates") - - def _transform_repo(repo: str): - project_name = os.path.basename(repo) - oss_fuzz_dir = Path(repo).parent.parent - targets = discover_targets(project_name, oss_fuzz_dir) - return [generate_test_template(t, repo) for t in targets] - with ProcessingPool(jobs) as p: - return list(p.map(_transform_repo, repos)) + return list(p.map(_transform_repo_fn, repos)) -def escape_special_chars(input_data: str) -> str: - """ - Escape special characters in input data for Python byte strings - - Args: - input_data (str): Raw input data - - Returns: - str: Input data with escaped characters - """ - # For Python, we can use repr() to safely represent byte strings - # This will handle all special characters and non-ASCII bytes - return repr(input_data.encode('latin-1', 'replace')) - -# def substitute_input(template: str, input_data: str, idx: int, target_name: str) -> str: -# """ -# Replace fuzzing input into Python test template - -# Args: -# template (str): Template content -# input_data (str): Input data -# idx (int): Test index -# target_name (str): Target name - -# Returns: -# str: Test code after substitution -# """ -# # Escape special characters for Python -# escaped_input = escape_special_chars(input_data) - -# # Replace input placeholder -# new_template = template.replace( -# 'input_data = b""', -# f'input_data = {escaped_input}' -# ) - -# # Replace test method name to avoid duplication -# return new_template.replace( -# f"def test_generated(self):", -# f"def test_{idx}(self):" -# ) - -# def has_similar(selected: list[str], x: str, thresh: float = 0.8) -> bool: -# """ -# Check if a string is sufficiently similar to any string in the selected list - -# Args: -# selected (list[str]): List of selected strings -# x (str): String to check -# thresh (float): Similarity threshold - -# Returns: -# bool: Whether they are similar -# """ -# def similar(a, b): -# return SequenceMatcher(None, a, b).ratio() -# return any(similar(x, y) > thresh for y in selected) + +############################################################ +# Testgen: substitute fuzz inputs into test templates +############################################################ def substitute_one_repo( @@ -317,7 +289,7 @@ def substitute_one_repo( strategy: str, max_len: int, sim_thresh: float, -): +) -> None: """ 从原 fuzz target 复制文件,按 fuzz input 生成多个 testgen 文件。 """ @@ -331,36 +303,32 @@ def substitute_one_repo( logging.warning(f"Source file not found: {source_file}") continue - input_path = pjoin(input_dir, f"{target_name}") if not os.path.exists(input_path): logging.warning(f"Input file not found: {input_path}") continue - # 读取所有有效的输入数据 - valid_inputs = [] + # 读取所有有效的输入数据(逐行,允许原始二进制) + valid_inputs: list[bytes] = [] with open(input_path, "rb") as f_input: for line in f_input: - try: - # 尝试解码行以检查内容 - decoded = line.decode('utf-8', errors='replace') - - # 只处理以 b' 或 b" 开头的行(这些是实际的测试输入) - if decoded.startswith(("b'", 'b"')): - # 提取字节数据部分 - if decoded.startswith("b'") and decoded.endswith("'\n"): - byte_data = line[2:-2] # 移除 b' 和末尾的 '\n - elif decoded.startswith('b"') and decoded.endswith('"\n'): - byte_data = line[2:-2] # 移除 b" 和末尾的 "\n - else: - continue - - # 只保留有效长度的输入 - if 0 < len(byte_data) <= max_len: - valid_inputs.append(byte_data) - except UnicodeDecodeError: - # 如果无法解码,可能是二进制数据,直接使用 - if 0 < len(line) <= max_len: + # If the line looks like a Python bytes literal b'...' + if line.startswith(b"b'") or line.startswith(b'b"'): + # Try to strip leading b' or b" and trailing quote+newline + stripped = None + if line.startswith(b"b'") and line.endswith(b"'\n"): + stripped = line[2:-2] + elif line.startswith(b'b"') and line.endswith(b'"\n'): + stripped = line[2:-2] + if stripped is not None and 0 < len(stripped) <= max_len: + valid_inputs.append(stripped) + continue + # Otherwise treat as raw bytes line + if 0 < len(line) <= max_len: + # drop final newline if present to keep tests stable + if line.endswith(b"\n"): + line = line[:-1] + if line: valid_inputs.append(line) if not valid_inputs: @@ -369,7 +337,8 @@ def substitute_one_repo( logging.info(f"Loaded {len(valid_inputs)} inputs for {target_name}") - # 策略选择输入 - 最多选择 n_fuzz 个输入 + # select inputs + inputs: list[bytes] if strategy == "shuffle": random.shuffle(valid_inputs) inputs = valid_inputs[:n_fuzz] @@ -378,39 +347,46 @@ def substitute_one_repo( else: inputs = valid_inputs[:n_fuzz] - # 每个 fuzz input 生成一个单独的文件 + # emit tests for idx, fuzz_input in enumerate(inputs, start=1): - with open(source_file, "r") as f_src: + with open(source_file, "r", encoding="utf-8") as f_src: code = f_src.read() - - # 找到 TestInput / TestOneInput 并改成 test_{idx} + # rename entry to test_{idx} code = re.sub(r"\bTestOneInput\b", f"test_{idx}", code) code = re.sub(r"\bTestInput\b", f"test_{idx}", code) - - # 插入测试数据 - 确保使用二进制表示 - def insert_fuzz_input(match): + + # inject bytes into the top of the function body + def _insert(match: re.Match) -> str: + header = match.group(1) indent = match.group(2) - # 使用 repr() 安全表示二进制数据 byte_repr = repr(fuzz_input) - return f"{match.group(1)}{indent}data = {byte_repr}\n{indent}" - - # 在测试函数中插入数据 + return f"{header}{indent}data = {byte_repr}\n{indent}" + code = re.sub( rf"(def\s+test_{idx}\s*\([^)]*\)\s*:\s*\n)(\s*)", - insert_fuzz_input, + _insert, code, ) - + out_path = pjoin(template_dir, f"{target_name}.testgen_{idx}.py") - with open(out_path, "w") as f_out: + with open(out_path, "w", encoding="utf-8") as f_out: f_out.write(code) - # 格式化代码 + # format if black exists try: subprocess.run(["black", out_path], check=False) except FileNotFoundError: logging.warning("Black formatter not found, skipping formatting") + + +# top-level wrapper to map (repo, targets) tuples without lambdas + +def _substitute_wrapper(args: tuple[str, list[str], int, str, int, float]) -> None: + return substitute_one_repo(*args) + + + def testgen_repos( repos: list[str], jobs: int, @@ -418,39 +394,67 @@ def testgen_repos( strategy: str = "shuffle", max_len: int = 100, sim_thresh: float = 0.8, -): - """ - Generate test cases from fuzzing inputs - - Args: - repos (list[str]): List of repository paths - jobs (int): Number of parallel tasks - n_fuzz (int): Number of inputs to use - strategy (str): Selection strategy - max_len (int): Maximum length - sim_thresh (float): Similarity threshold - """ +) -> None: + """Generate test cases from fuzzing inputs.""" # First get all targets - targets_list = [] + targets_list: list[list[str]] = [] for repo in repos: project_name = os.path.basename(repo) oss_fuzz_dir = Path(repo).parent.parent targets = discover_targets(project_name, oss_fuzz_dir) targets_list.append(targets) - - target_map = {repo: targets for repo, targets in zip(repos, targets_list)} - - # Process each repository in parallel + + target_map: dict[str, list[str]] = {repo: targets for repo, targets in zip(repos, targets_list)} + + work: list[tuple[str, list[str], int, str, int, float]] = [ + (repo, targets, n_fuzz, strategy, max_len, sim_thresh) + for repo, targets in target_map.items() + ] + with ProcessingPool(jobs) as p: - list(p.map( - lambda item: substitute_one_repo( - item[0], item[1], n_fuzz, strategy, max_len, sim_thresh - ), - target_map.items() - )) + list(p.map(_substitute_wrapper, work)) + + +############################################################ +# Build steps (parallel via parallel_subprocess) +############################################################ + + +def build_image(repos: list[str], jobs: int) -> None: + """Build Docker images for all repos.""" + logging.info(f"Building Docker images for {len(repos)} OSS-Fuzz projects") + log_dir = os.path.abspath("fuzz_pipeline_log") + os.makedirs(log_dir, exist_ok=True) + + parallel_subprocess( + repos, + jobs, + partial(_run_build_image, log_dir=log_dir), + on_exit=None, + ) + + +def build_fuzzer(repos: list[str], jobs: int) -> None: + """Build fuzzers in parallel for successfully built projects.""" + logging.info(f"Building fuzzers for {len(repos)} OSS-Fuzz projects") + log_dir = os.path.abspath("fuzz_pipeline_log") + os.makedirs(log_dir, exist_ok=True) + + parallel_subprocess( + repos, + jobs, + partial(_run_build_fuzzer, log_dir=log_dir), + on_exit=None, + ) + + +############################################################ +# CLI +############################################################ + def main( - repo_id: str = "data/valid_projects3.txt", + repo_id: str = "data/valid_projects2.txt", repo_root: str = "fuzz/oss-fuzz/projects/", timeout: int = 30, jobs: int = 2, @@ -459,36 +463,42 @@ def main( strategy: str = "shuffle", max_len: int = 100, sim_thresh: float = 0.8, - ): """ Main function, controlling the entire fuzzing process - + Args: - repo_id (str): Project ID file path + repo_id (str): Project ID file path or a single project name repo_root (str): Project root directory - timeout (int): Timeout duration + timeout (int): Timeout duration per fuzz target (seconds) jobs (int): Number of parallel tasks - pipeline (str): Pipeline type - n_fuzz (int): Number of inputs to use - strategy (str): Selection strategy - max_len (int): Maximum length - sim_thresh (float): Similarity threshold + pipeline (str): One of [build_image, build_fuzzer, fuzz, testgen, transform, all] + n_fuzz (int): Number of inputs to use (testgen) + strategy (str): Selection strategy [head|shuffle|reverse] + max_len (int): Maximum fuzz input length (bytes) + sim_thresh (float): Reserved for similarity dedup (not used currently) """ try: - with open(repo_id, "r") as f: + with open(repo_id, "r", encoding="utf-8") as f: repo_id_list = [line.strip() for line in f if line.strip()] except FileNotFoundError: repo_id_list = [repo_id] # Collect repository paths - repos = [] - for repo_id in repo_id_list: - repo_path = abspath(os.path.join(repo_root, repo_id)) + repos: list[str] = [] + for rid in repo_id_list: + repo_path = abspath(os.path.join(repo_root, rid)) if os.path.isdir(repo_path): repos.append(repo_path) + else: + logging.warning(f"Repo not found or not a directory: {repo_path}") + + if not repos: + logging.error("No valid repositories found.") + return + + pipeline = pipeline.lower().strip() - # Execute specified pipeline if pipeline == "build_image": build_image(repos, jobs) elif pipeline == "build_fuzzer": @@ -502,12 +512,16 @@ def main( elif pipeline == "all": build_image(repos, jobs) build_fuzzer(repos, jobs) - transform_repos(repos, jobs) # Generate test templates + transform_repos(repos, jobs) fuzz_repos(repos, jobs, timeout) testgen_repos(repos, jobs, n_fuzz, strategy, max_len, sim_thresh) else: logging.error(f"Unknown pipeline: {pipeline}") + if __name__ == "__main__": - logging.basicConfig(level=logging.INFO) - fire.Fire(main) \ No newline at end of file + logging.basicConfig( + level=logging.INFO, + format="%(asctime)s %(levelname)s %(message)s", + ) + fire.Fire(main) From 60fbb7ab251e6b124c1a63d6b059010a46df989b Mon Sep 17 00:00:00 2001 From: joyguoguo <599325370@qq.com> Date: Thu, 14 Aug 2025 22:02:59 +0000 Subject: [PATCH 109/134] =?UTF-8?q?=E6=9B=B4=E6=8D=A2=E4=B8=BA=E6=9C=AA?= =?UTF-8?q?=E5=88=A0=E9=99=A4=E5=86=97=E4=BD=99=E7=89=88=E6=9C=AC?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- fuzz/collect_fuzz_python.py | 520 ++++++++++++++++++------------------ 1 file changed, 253 insertions(+), 267 deletions(-) diff --git a/fuzz/collect_fuzz_python.py b/fuzz/collect_fuzz_python.py index a42d0ba..2d950a0 100644 --- a/fuzz/collect_fuzz_python.py +++ b/fuzz/collect_fuzz_python.py @@ -2,11 +2,9 @@ Script for Python project fuzzing and test template conversion usage: PYTHONPATH=. python3 fuzz/collect_fuzz_python.py --pipeline all """ -from __future__ import annotations - from pathlib import Path import logging -from typing import Optional +from typing import Optional, List, Tuple import fire import os from UniTSyn.frontend.util import wrap_repo, parallel_subprocess @@ -15,178 +13,157 @@ from tqdm import tqdm from pathos.multiprocessing import ProcessingPool import random +from difflib import SequenceMatcher from itertools import islice from datetime import datetime import re -from functools import partial -############################################################ -# Top-level helpers (picklable) to avoid pool pickling woes -############################################################ +def build_image(repos: list[str], jobs: int): + """ + Build Docker images for OSS-Fuzz projects corresponding to each repository + + Args: + repos (list[str]): List of repository paths + jobs (int): Number of parallel tasks + """ + logging.info(f"Building Docker images for {len(repos)} OSS-Fuzz projects") + log_dir = os.path.abspath("fuzz_pipeline_log") + os.makedirs(log_dir, exist_ok=True) + def _build_cmd(path: str): + project_name = os.path.basename(path.rstrip("/")) + timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") + log_file = os.path.join(log_dir, f"{project_name}_{timestamp}.log") -def _run_build_image(path: str, log_dir: str) -> Optional[subprocess.Popen]: - """Helper for build_image: must be top-level for pickling.""" - project_name = os.path.basename(path.rstrip("/")) - timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") - log_file = os.path.join(log_dir, f"{project_name}_{timestamp}.log") - logging.info(f"Start building {project_name}, logging to {log_file}") - # Note: child process keeps the fd open even if parent closes it after spawn - f = open(log_file, "w") - try: + logging.info(f"Start building {project_name}, logging to {log_file}") return subprocess.Popen( - "yes | python3 infra/helper.py build_image {project}".format(project=project_name), + f"yes | python3 infra/helper.py build_image {project_name}", cwd=os.path.abspath(os.path.join(path, "../../")), - stdout=f, + stdout=open(log_file, "w"), stderr=subprocess.STDOUT, shell=True, ) - except Exception: - f.close() - raise - - -def _run_build_fuzzer(path: str, log_dir: str) -> Optional[subprocess.Popen]: - """Helper for build_fuzzer: must be top-level for pickling.""" - project_name = os.path.basename(path.rstrip("/")) - timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") - log_file = os.path.join(log_dir, f"{project_name}_fuzzer_{timestamp}.log") - logging.info(f"Start building fuzzers for {project_name}, logging to {log_file}") - f = open(log_file, "w") - try: + + _ = parallel_subprocess(repos, jobs, _build_cmd, on_exit=None) + +def build_fuzzer(repos: list[str], jobs: int): + """ + Build fuzzers in parallel for successfully built projects + + Args: + repos (list[str]): List of repository paths + jobs (int): Number of parallel tasks + """ + logging.info(f"Building fuzzers for {len(repos)} OSS-Fuzz projects") + log_dir = os.path.abspath("fuzz_pipeline_log") + os.makedirs(log_dir, exist_ok=True) + + def _build_cmd(path: str): + project_name = os.path.basename(path.rstrip("/")) + timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") + log_file = os.path.join(log_dir, f"{project_name}_fuzzer_{timestamp}.log") + + logging.info(f"Start building fuzzers for {project_name}, logging to {log_file}") return subprocess.Popen( - "python3 infra/helper.py build_fuzzers --sanitizer address {project}".format( - project=project_name - ), + f"python3 infra/helper.py build_fuzzers --sanitizer address {project_name}", cwd=os.path.abspath(os.path.join(path, "../../")), - stdout=f, + stdout=open(log_file, "w"), stderr=subprocess.STDOUT, shell=True, ) - except Exception: - f.close() - raise - - -############################################################ -# Discover & fuzz -############################################################ + _ = parallel_subprocess(repos, jobs, _build_cmd, on_exit=None) def discover_targets(project_name: str, oss_fuzz_dir: Path) -> list[str]: - """Discover fuzzing targets in out/ directory. - - Rules: - - file name startswith "fuzz_" - - no dot in filename (exclude corpora, dictionaries) - - executable bit set - - (optional) if you want only print1 variants, pass a filter later + """ + Discover fuzzing targets """ out_dir = oss_fuzz_dir / "build" / "out" / project_name targets: list[str] = [] - + logging.debug(f"Searching fuzz targets in: {out_dir}") - + if not out_dir.is_dir(): logging.warning(f"Build output directory for {project_name} does not exist") return targets try: for f in out_dir.iterdir(): - if ( - f.is_file() - and f.name.startswith("fuzz_") - and f.name.endswith("print1") - and "." not in f.name - and os.access(f, os.X_OK) - ): + if (f.is_file() and f.name.startswith("fuzz_") and + '.' not in f.name and f.name.endswith("print1") and + os.access(f, os.X_OK)): targets.append(f.name) - logging.info( - f"Discovered {len(targets)} fuzz targets in {project_name}: {targets}" - ) + logging.info(f"Discovered {len(targets)} fuzz targets in {project_name}: {targets}") except Exception as e: - logging.error( - f"Error discovering targets for {project_name}: {e}", exc_info=True - ) - + logging.error(f"Error discovering targets for {project_name}: {e}", exc_info=True) + return targets - -def fuzz_one_target(target: tuple[str, str], timeout: int) -> Optional[subprocess.Popen]: - """Perform fuzzing on a single fuzzing target.""" +def fuzz_one_target(target: tuple[str, str], timeout: int): + """ + Perform fuzzing on a single fuzzing target + """ repo_path, target_name = target project_name = os.path.basename(repo_path) oss_fuzz_root = os.path.dirname(os.path.dirname(repo_path)) - + input_file_path = pjoin(repo_path, "fuzz_inputs", target_name) os.makedirs(os.path.dirname(input_file_path), exist_ok=True) - logging.info( - f"Starting fuzzing: project={project_name}, target={target_name}, timeout={timeout}s" - ) + logging.info(f"Starting fuzzing: project={project_name}, target={target_name}, timeout={timeout}s") logging.debug(f"Fuzz output will be saved to: {input_file_path}") try: - f = open(input_file_path, "wb") - try: + with open(input_file_path, "w") as input_file: return subprocess.Popen( [ "bash", "-c", - f"python3 infra/helper.py run_fuzzer {project_name} {target_name} -- -max_total_time={timeout}", + f"python3 infra/helper.py run_fuzzer {project_name} {target_name} -- -max_total_time={timeout}" ], cwd=oss_fuzz_root, - stdout=f, + stdout=input_file, stderr=subprocess.DEVNULL, ) - except Exception: - f.close() - raise except Exception as e: - logging.error( - f"Error starting fuzzer for {project_name}/{target_name}: {e}", - exc_info=True, - ) + logging.error(f"Error starting fuzzer for {project_name}/{target_name}: {e}", exc_info=True) return None - -def fuzz_repos(repos: list[str], jobs: int, timeout: int) -> None: - """Perform fuzzing on a set of repositories.""" +def fuzz_repos(repos: list[str], jobs: int, timeout: int): + """ + Perform fuzzing on a set of repositories + """ logging.info(f"Discovering fuzz targets for {len(repos)} repositories...") - targets_list: list[list[str]] = [] + targets_list = [] for repo in repos: project_name = os.path.basename(repo) oss_fuzz_dir = Path(repo).parent.parent targets = discover_targets(project_name, oss_fuzz_dir) targets_list.append(targets) - - target_map: dict[str, list[str]] = {repo: targets for repo, targets in zip(repos, targets_list)} + + target_map = {repo: targets for repo, targets in zip(repos, targets_list)} all_targets: list[tuple[str, str]] = [ (k, v) for k, vs in target_map.items() for v in vs ] - + logging.info(f"Total fuzz targets discovered: {len(all_targets)}") for repo, targets in target_map.items(): logging.info(f"{os.path.basename(repo)}: {len(targets)} targets") for repo in repos: os.makedirs(pjoin(repo, "fuzz_inputs"), exist_ok=True) + + logging.info(f"Starting parallel fuzzing with {jobs} jobs, timeout={timeout}s per target") + parallel_subprocess(all_targets, jobs, lambda p: fuzz_one_target(p, timeout), on_exit=None) +import os +import re +import logging +from os.path import join as pjoin - logging.info( - f"Starting parallel fuzzing with {jobs} jobs, timeout={timeout}s per target" - ) - parallel_subprocess(all_targets, jobs, partial(fuzz_one_target, timeout=timeout), on_exit=None) - - -############################################################ -# Transform: generate cleaned templates from fuzz target py -############################################################ - - -def generate_test_template(target_name: str, repo_path: str) -> Optional[str]: +def generate_test_template(target_name: str, repo_path: str): """ Generate Python test template for a single target by stripping license header, main() block, and print(data) inside TestInput/TestOneInput. @@ -199,49 +176,47 @@ def generate_test_template(target_name: str, repo_path: str) -> Optional[str]: with open(src_file, "r", encoding="utf-8") as f: original_code = f.read() - # 1) keep shebang + # --- 1. 保留 shebang,但删除许可证注释 --- shebang = "" if original_code.startswith("#!"): shebang, original_code = original_code.split("\n", 1) shebang += "\n" - # 2) drop license block (best-effort) + # 匹配从文件开头到包含 "limitations under the license" 那一行的注释块 license_pattern = re.compile( r"^(?:\s*#.*\n)*\s*#.*limitations\s+under\s+the\s+license.*\n", - re.IGNORECASE | re.MULTILINE, + re.IGNORECASE | re.MULTILINE ) code_no_license = re.sub(license_pattern, "", original_code, count=1) - # 3) remove main() and if __main__ guards (best-effort) + # --- 2. 删除 main 函数和 if __name__ == '__main__' --- code_no_main = re.sub( - r"\n?def\s+main\([^)]*\):[\s\S]*?(?=^\S|\Z)", + r"\n?def\s+main\([\s\S]*?(?=^if\s+__name__\s*==\s*['\"]__main__['\"]:)", "", code_no_license, - flags=re.MULTILINE, + flags=re.MULTILINE ) code_no_main = re.sub( - r"\n?if\s+__name__\s*==\s*['\"]__main__['\"]:[\s\S]*?(?=^\S|\Z)", + r"\n?if\s+__name__\s*==\s*['\"]__main__['\"]:\s*main\(\s*\)\s*", "", code_no_main, - flags=re.MULTILINE, + flags=re.MULTILINE ) - # 4) remove print(data) inside TestInput/TestOneInput - def _strip_print_in_func(src: str) -> str: - def _repl(m: re.Match) -> str: - body = m.group(0) - return re.sub(r"^\s*print\(data\)\s*$", "", body, flags=re.MULTILINE) - - return re.sub( - r"(def\s+Test(?:Input|OneInput)\s*\([\s\S]*?\):[\s\S]*?)(?=^def\s|^@|\Z)", - _repl, - src, - flags=re.MULTILINE, - ) + # --- 3. 删除 TestInput/TestOneInput 内的 print(data) --- + def remove_print_in_func(match): + func_body = match.group(0) + func_body = re.sub(r"^\s*print\(data\)\s*$", "", func_body, flags=re.MULTILINE) + return func_body - cleaned_code = _strip_print_in_func(code_no_main) + cleaned_code = re.sub( + r"(def\s+Test(?:Input|OneInput)\s*\([\s\S]*?)(?=\n\w|\Z)", + lambda m: remove_print_in_func(m), + code_no_main, + flags=re.MULTILINE + ) - # 5) write into tests-gen + # --- 4. 输出到 tests-gen --- template_dir = pjoin(repo_path, "tests-gen") os.makedirs(template_dir, exist_ok=True) @@ -257,29 +232,82 @@ def _repl(m: re.Match) -> str: logging.info(f"Generated cleaned template: {template_path}") return template_path - -# top-level for pickling - -def _transform_repo_fn(repo: str) -> list[Optional[str]]: - project_name = os.path.basename(repo) - oss_fuzz_dir = Path(repo).parent.parent - targets = discover_targets(project_name, oss_fuzz_dir) - return [generate_test_template(t, repo) for t in targets] - - - -def transform_repos(repos: list[str], jobs: int) -> list[list[Optional[str]]]: +def transform_repos(repos: list[str], jobs: int): """ - Generate test templates for all targets (parallel, picklable). + Generate test templates for all targets + + Args: + repos (list[str]): List of repository paths + jobs (int): Number of parallel tasks """ logging.info("Generating test templates") + + def _transform_repo(repo: str): + project_name = os.path.basename(repo) + oss_fuzz_dir = Path(repo).parent.parent + targets = discover_targets(project_name, oss_fuzz_dir) + return [generate_test_template(t, repo) for t in targets] + with ProcessingPool(jobs) as p: - return list(p.map(_transform_repo_fn, repos)) - - -############################################################ -# Testgen: substitute fuzz inputs into test templates -############################################################ + return list(p.map(_transform_repo, repos)) + +# def escape_special_chars(input_data: str) -> str: +# """ +# Escape special characters in input data for Python byte strings + +# Args: +# input_data (str): Raw input data + +# Returns: +# str: Input data with escaped characters +# """ +# # For Python, we can use repr() to safely represent byte strings +# # This will handle all special characters and non-ASCII bytes +# return repr(input_data.encode('latin-1', 'replace')) + +# def substitute_input(template: str, input_data: str, idx: int, target_name: str) -> str: +# """ +# Replace fuzzing input into Python test template + +# Args: +# template (str): Template content +# input_data (str): Input data +# idx (int): Test index +# target_name (str): Target name + +# Returns: +# str: Test code after substitution +# """ +# # Escape special characters for Python +# escaped_input = escape_special_chars(input_data) + +# # Replace input placeholder +# new_template = template.replace( +# 'input_data = b""', +# f'input_data = {escaped_input}' +# ) + +# # Replace test method name to avoid duplication +# return new_template.replace( +# f"def test_generated(self):", +# f"def test_{idx}(self):" +# ) + +# def has_similar(selected: list[str], x: str, thresh: float = 0.8) -> bool: +# """ +# Check if a string is sufficiently similar to any string in the selected list + +# Args: +# selected (list[str]): List of selected strings +# x (str): String to check +# thresh (float): Similarity threshold + +# Returns: +# bool: Whether they are similar +# """ +# def similar(a, b): +# return SequenceMatcher(None, a, b).ratio() +# return any(similar(x, y) > thresh for y in selected) def substitute_one_repo( @@ -289,7 +317,7 @@ def substitute_one_repo( strategy: str, max_len: int, sim_thresh: float, -) -> None: +): """ 从原 fuzz target 复制文件,按 fuzz input 生成多个 testgen 文件。 """ @@ -303,32 +331,36 @@ def substitute_one_repo( logging.warning(f"Source file not found: {source_file}") continue + input_path = pjoin(input_dir, f"{target_name}") if not os.path.exists(input_path): logging.warning(f"Input file not found: {input_path}") continue - # 读取所有有效的输入数据(逐行,允许原始二进制) - valid_inputs: list[bytes] = [] + # 读取所有有效的输入数据 + valid_inputs = [] with open(input_path, "rb") as f_input: for line in f_input: - # If the line looks like a Python bytes literal b'...' - if line.startswith(b"b'") or line.startswith(b'b"'): - # Try to strip leading b' or b" and trailing quote+newline - stripped = None - if line.startswith(b"b'") and line.endswith(b"'\n"): - stripped = line[2:-2] - elif line.startswith(b'b"') and line.endswith(b'"\n'): - stripped = line[2:-2] - if stripped is not None and 0 < len(stripped) <= max_len: - valid_inputs.append(stripped) - continue - # Otherwise treat as raw bytes line - if 0 < len(line) <= max_len: - # drop final newline if present to keep tests stable - if line.endswith(b"\n"): - line = line[:-1] - if line: + try: + # 尝试解码行以检查内容 + decoded = line.decode('utf-8', errors='replace') + + # 只处理以 b' 或 b" 开头的行(这些是实际的测试输入) + if decoded.startswith(("b'", 'b"')): + # 提取字节数据部分 + if decoded.startswith("b'") and decoded.endswith("'\n"): + byte_data = line[2:-2] # 移除 b' 和末尾的 '\n + elif decoded.startswith('b"') and decoded.endswith('"\n'): + byte_data = line[2:-2] # 移除 b" 和末尾的 "\n + else: + continue + + # 只保留有效长度的输入 + if 0 < len(byte_data) <= max_len: + valid_inputs.append(byte_data) + except UnicodeDecodeError: + # 如果无法解码,可能是二进制数据,直接使用 + if 0 < len(line) <= max_len: valid_inputs.append(line) if not valid_inputs: @@ -337,8 +369,7 @@ def substitute_one_repo( logging.info(f"Loaded {len(valid_inputs)} inputs for {target_name}") - # select inputs - inputs: list[bytes] + # 策略选择输入 - 最多选择 n_fuzz 个输入 if strategy == "shuffle": random.shuffle(valid_inputs) inputs = valid_inputs[:n_fuzz] @@ -347,46 +378,39 @@ def substitute_one_repo( else: inputs = valid_inputs[:n_fuzz] - # emit tests + # 每个 fuzz input 生成一个单独的文件 for idx, fuzz_input in enumerate(inputs, start=1): - with open(source_file, "r", encoding="utf-8") as f_src: + with open(source_file, "r") as f_src: code = f_src.read() - # rename entry to test_{idx} + + # 找到 TestInput / TestOneInput 并改成 test_{idx} code = re.sub(r"\bTestOneInput\b", f"test_{idx}", code) code = re.sub(r"\bTestInput\b", f"test_{idx}", code) - - # inject bytes into the top of the function body - def _insert(match: re.Match) -> str: - header = match.group(1) + + # 插入测试数据 - 确保使用二进制表示 + def insert_fuzz_input(match): indent = match.group(2) + # 使用 repr() 安全表示二进制数据 byte_repr = repr(fuzz_input) - return f"{header}{indent}data = {byte_repr}\n{indent}" - + return f"{match.group(1)}{indent}data = {byte_repr}\n{indent}" + + # 在测试函数中插入数据 code = re.sub( rf"(def\s+test_{idx}\s*\([^)]*\)\s*:\s*\n)(\s*)", - _insert, + insert_fuzz_input, code, ) - + out_path = pjoin(template_dir, f"{target_name}.testgen_{idx}.py") - with open(out_path, "w", encoding="utf-8") as f_out: + with open(out_path, "w") as f_out: f_out.write(code) - # format if black exists + # 格式化代码 try: subprocess.run(["black", out_path], check=False) except FileNotFoundError: logging.warning("Black formatter not found, skipping formatting") - - -# top-level wrapper to map (repo, targets) tuples without lambdas - -def _substitute_wrapper(args: tuple[str, list[str], int, str, int, float]) -> None: - return substitute_one_repo(*args) - - - def testgen_repos( repos: list[str], jobs: int, @@ -394,67 +418,39 @@ def testgen_repos( strategy: str = "shuffle", max_len: int = 100, sim_thresh: float = 0.8, -) -> None: - """Generate test cases from fuzzing inputs.""" +): + """ + Generate test cases from fuzzing inputs + + Args: + repos (list[str]): List of repository paths + jobs (int): Number of parallel tasks + n_fuzz (int): Number of inputs to use + strategy (str): Selection strategy + max_len (int): Maximum length + sim_thresh (float): Similarity threshold + """ # First get all targets - targets_list: list[list[str]] = [] + targets_list = [] for repo in repos: project_name = os.path.basename(repo) oss_fuzz_dir = Path(repo).parent.parent targets = discover_targets(project_name, oss_fuzz_dir) targets_list.append(targets) - - target_map: dict[str, list[str]] = {repo: targets for repo, targets in zip(repos, targets_list)} - - work: list[tuple[str, list[str], int, str, int, float]] = [ - (repo, targets, n_fuzz, strategy, max_len, sim_thresh) - for repo, targets in target_map.items() - ] - + + target_map = {repo: targets for repo, targets in zip(repos, targets_list)} + + # Process each repository in parallel with ProcessingPool(jobs) as p: - list(p.map(_substitute_wrapper, work)) - - -############################################################ -# Build steps (parallel via parallel_subprocess) -############################################################ - - -def build_image(repos: list[str], jobs: int) -> None: - """Build Docker images for all repos.""" - logging.info(f"Building Docker images for {len(repos)} OSS-Fuzz projects") - log_dir = os.path.abspath("fuzz_pipeline_log") - os.makedirs(log_dir, exist_ok=True) - - parallel_subprocess( - repos, - jobs, - partial(_run_build_image, log_dir=log_dir), - on_exit=None, - ) - - -def build_fuzzer(repos: list[str], jobs: int) -> None: - """Build fuzzers in parallel for successfully built projects.""" - logging.info(f"Building fuzzers for {len(repos)} OSS-Fuzz projects") - log_dir = os.path.abspath("fuzz_pipeline_log") - os.makedirs(log_dir, exist_ok=True) - - parallel_subprocess( - repos, - jobs, - partial(_run_build_fuzzer, log_dir=log_dir), - on_exit=None, - ) - - -############################################################ -# CLI -############################################################ - + list(p.map( + lambda item: substitute_one_repo( + item[0], item[1], n_fuzz, strategy, max_len, sim_thresh + ), + target_map.items() + )) def main( - repo_id: str = "data/valid_projects2.txt", + repo_id: str = "data/valid_projects3.txt", repo_root: str = "fuzz/oss-fuzz/projects/", timeout: int = 30, jobs: int = 2, @@ -463,42 +459,36 @@ def main( strategy: str = "shuffle", max_len: int = 100, sim_thresh: float = 0.8, + ): """ Main function, controlling the entire fuzzing process - + Args: - repo_id (str): Project ID file path or a single project name + repo_id (str): Project ID file path repo_root (str): Project root directory - timeout (int): Timeout duration per fuzz target (seconds) + timeout (int): Timeout duration jobs (int): Number of parallel tasks - pipeline (str): One of [build_image, build_fuzzer, fuzz, testgen, transform, all] - n_fuzz (int): Number of inputs to use (testgen) - strategy (str): Selection strategy [head|shuffle|reverse] - max_len (int): Maximum fuzz input length (bytes) - sim_thresh (float): Reserved for similarity dedup (not used currently) + pipeline (str): Pipeline type + n_fuzz (int): Number of inputs to use + strategy (str): Selection strategy + max_len (int): Maximum length + sim_thresh (float): Similarity threshold """ try: - with open(repo_id, "r", encoding="utf-8") as f: + with open(repo_id, "r") as f: repo_id_list = [line.strip() for line in f if line.strip()] except FileNotFoundError: repo_id_list = [repo_id] # Collect repository paths - repos: list[str] = [] - for rid in repo_id_list: - repo_path = abspath(os.path.join(repo_root, rid)) + repos = [] + for repo_id in repo_id_list: + repo_path = abspath(os.path.join(repo_root, repo_id)) if os.path.isdir(repo_path): repos.append(repo_path) - else: - logging.warning(f"Repo not found or not a directory: {repo_path}") - - if not repos: - logging.error("No valid repositories found.") - return - - pipeline = pipeline.lower().strip() + # Execute specified pipeline if pipeline == "build_image": build_image(repos, jobs) elif pipeline == "build_fuzzer": @@ -512,16 +502,12 @@ def main( elif pipeline == "all": build_image(repos, jobs) build_fuzzer(repos, jobs) - transform_repos(repos, jobs) + transform_repos(repos, jobs) # Generate test templates fuzz_repos(repos, jobs, timeout) testgen_repos(repos, jobs, n_fuzz, strategy, max_len, sim_thresh) else: logging.error(f"Unknown pipeline: {pipeline}") - if __name__ == "__main__": - logging.basicConfig( - level=logging.INFO, - format="%(asctime)s %(levelname)s %(message)s", - ) - fire.Fire(main) + logging.basicConfig(level=logging.INFO) + fire.Fire(main) \ No newline at end of file From 641998d6eb40ccfea35a694802028413ae25bb70 Mon Sep 17 00:00:00 2001 From: joyguoguo <599325370@qq.com> Date: Thu, 14 Aug 2025 22:39:22 +0000 Subject: [PATCH 110/134] =?UTF-8?q?template=E6=8F=92=E5=85=A5data=3Db""=20?= =?UTF-8?q?=E5=87=BD=E6=95=B0header=E6=94=B9=E4=B8=BAtest=5F()?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- fuzz/collect_fuzz_python.py | 87 ++++++++++++++++++++++--------------- 1 file changed, 52 insertions(+), 35 deletions(-) diff --git a/fuzz/collect_fuzz_python.py b/fuzz/collect_fuzz_python.py index 2d950a0..9cf01cb 100644 --- a/fuzz/collect_fuzz_python.py +++ b/fuzz/collect_fuzz_python.py @@ -166,7 +166,7 @@ def fuzz_repos(repos: list[str], jobs: int, timeout: int): def generate_test_template(target_name: str, repo_path: str): """ Generate Python test template for a single target by stripping license header, - main() block, and print(data) inside TestInput/TestOneInput. + main() block, and converting TestInput/TestOneInput to test_ with data=b"". """ src_file = pjoin(repo_path, target_name + ".py") if not os.path.exists(src_file): @@ -182,7 +182,6 @@ def generate_test_template(target_name: str, repo_path: str): shebang, original_code = original_code.split("\n", 1) shebang += "\n" - # 匹配从文件开头到包含 "limitations under the license" 那一行的注释块 license_pattern = re.compile( r"^(?:\s*#.*\n)*\s*#.*limitations\s+under\s+the\s+license.*\n", re.IGNORECASE | re.MULTILINE @@ -203,15 +202,49 @@ def generate_test_template(target_name: str, repo_path: str): flags=re.MULTILINE ) - # --- 3. 删除 TestInput/TestOneInput 内的 print(data) --- - def remove_print_in_func(match): - func_body = match.group(0) - func_body = re.sub(r"^\s*print\(data\)\s*$", "", func_body, flags=re.MULTILINE) - return func_body - + # --- 3. 转换测试函数 --- + def process_test_function(match): + # 提取完整的函数定义和函数体 + func_str = match.group(0) + + # 1. 删除print(data)语句 + func_str = re.sub(r'print\s*\(\s*data\s*\)\s*', '', func_str) + + # 2. 将TestInput/TestOneInput改为test_() + func_str = re.sub(r'def\s+(TestInput|TestOneInput)\s*\(data\)', 'def test_()', func_str) + + # 3. 在函数体第一行可执行代码前插入data = b"" + # 查找第一个非空行(忽略空行和注释) + lines = func_str.splitlines() + if len(lines) < 2: + return func_str + + # 找到函数定义行后的第一个非空、非注释行 + insert_idx = None + for i in range(1, len(lines)): + line = lines[i].strip() + if line and not line.startswith('#'): + insert_idx = i + break + + if insert_idx is None: + return func_str + + # 获取该行的缩进量 + indent_match = re.match(r'^(\s*)', lines[insert_idx]) + if not indent_match: + return func_str + + indent = indent_match.group(1) + + # 插入 data = b"" + lines.insert(insert_idx, f"{indent}data = b\"\"") + + return "\n".join(lines) + cleaned_code = re.sub( - r"(def\s+Test(?:Input|OneInput)\s*\([\s\S]*?)(?=\n\w|\Z)", - lambda m: remove_print_in_func(m), + r"def\s+(TestInput|TestOneInput)\s*\(data\):[\s\S]*?(?=\n\w|\Z)", + process_test_function, code_no_main, flags=re.MULTILINE ) @@ -330,7 +363,6 @@ def substitute_one_repo( if not os.path.exists(source_file): logging.warning(f"Source file not found: {source_file}") continue - input_path = pjoin(input_dir, f"{target_name}") if not os.path.exists(input_path): @@ -345,21 +377,18 @@ def substitute_one_repo( # 尝试解码行以检查内容 decoded = line.decode('utf-8', errors='replace') - # 只处理以 b' 或 b" 开头的行(这些是实际的测试输入) + # 只处理以 b' 或 b" 开头的行 if decoded.startswith(("b'", 'b"')): - # 提取字节数据部分 if decoded.startswith("b'") and decoded.endswith("'\n"): - byte_data = line[2:-2] # 移除 b' 和末尾的 '\n + byte_data = line[2:-2] elif decoded.startswith('b"') and decoded.endswith('"\n'): - byte_data = line[2:-2] # 移除 b" 和末尾的 "\n + byte_data = line[2:-2] else: continue - # 只保留有效长度的输入 if 0 < len(byte_data) <= max_len: valid_inputs.append(byte_data) except UnicodeDecodeError: - # 如果无法解码,可能是二进制数据,直接使用 if 0 < len(line) <= max_len: valid_inputs.append(line) @@ -369,7 +398,7 @@ def substitute_one_repo( logging.info(f"Loaded {len(valid_inputs)} inputs for {target_name}") - # 策略选择输入 - 最多选择 n_fuzz 个输入 + # 策略选择输入 if strategy == "shuffle": random.shuffle(valid_inputs) inputs = valid_inputs[:n_fuzz] @@ -382,25 +411,13 @@ def substitute_one_repo( for idx, fuzz_input in enumerate(inputs, start=1): with open(source_file, "r") as f_src: code = f_src.read() - - - # 找到 TestInput / TestOneInput 并改成 test_{idx} - code = re.sub(r"\bTestOneInput\b", f"test_{idx}", code) - code = re.sub(r"\bTestInput\b", f"test_{idx}", code) - # 插入测试数据 - 确保使用二进制表示 - def insert_fuzz_input(match): - indent = match.group(2) - # 使用 repr() 安全表示二进制数据 - byte_repr = repr(fuzz_input) - return f"{match.group(1)}{indent}data = {byte_repr}\n{indent}" + # 1. 把函数名 test_ 改成 test_{idx} + code = re.sub(r'def\s+test_', f'def test_{idx}', code) - # 在测试函数中插入数据 - code = re.sub( - rf"(def\s+test_{idx}\s*\([^)]*\)\s*:\s*\n)(\s*)", - insert_fuzz_input, - code, - ) + # 2. 替换 data = b"" 为输入数据 + input_repr = repr(fuzz_input) + code = code.replace('data = b""', f'data = {input_repr}') out_path = pjoin(template_dir, f"{target_name}.testgen_{idx}.py") with open(out_path, "w") as f_out: From 73aac90fdffbbb1857bcf4a49fe2df2bb4e11ae6 Mon Sep 17 00:00:00 2001 From: joyguoguo <599325370@qq.com> Date: Thu, 14 Aug 2025 22:44:21 +0000 Subject: [PATCH 111/134] translation --- fuzz/collect_fuzz_python.py | 100 ++++++++---------------------------- 1 file changed, 21 insertions(+), 79 deletions(-) diff --git a/fuzz/collect_fuzz_python.py b/fuzz/collect_fuzz_python.py index 9cf01cb..893e55e 100644 --- a/fuzz/collect_fuzz_python.py +++ b/fuzz/collect_fuzz_python.py @@ -176,7 +176,7 @@ def generate_test_template(target_name: str, repo_path: str): with open(src_file, "r", encoding="utf-8") as f: original_code = f.read() - # --- 1. 保留 shebang,但删除许可证注释 --- + # --- 1. Keep shebang but remove license comments --- shebang = "" if original_code.startswith("#!"): shebang, original_code = original_code.split("\n", 1) @@ -188,7 +188,7 @@ def generate_test_template(target_name: str, repo_path: str): ) code_no_license = re.sub(license_pattern, "", original_code, count=1) - # --- 2. 删除 main 函数和 if __name__ == '__main__' --- + # --- 2. Remove main function and if __name__ == '__main__' --- code_no_main = re.sub( r"\n?def\s+main\([\s\S]*?(?=^if\s+__name__\s*==\s*['\"]__main__['\"]:)", "", @@ -202,24 +202,24 @@ def generate_test_template(target_name: str, repo_path: str): flags=re.MULTILINE ) - # --- 3. 转换测试函数 --- + # --- 3. Convert test functions --- def process_test_function(match): - # 提取完整的函数定义和函数体 + # Extract the complete function definition and body func_str = match.group(0) - # 1. 删除print(data)语句 + # 1. Remove print(data) statements func_str = re.sub(r'print\s*\(\s*data\s*\)\s*', '', func_str) - # 2. 将TestInput/TestOneInput改为test_() + # 2. Change TestInput/TestOneInput to test_() func_str = re.sub(r'def\s+(TestInput|TestOneInput)\s*\(data\)', 'def test_()', func_str) - # 3. 在函数体第一行可执行代码前插入data = b"" - # 查找第一个非空行(忽略空行和注释) + # 3. Insert data = b"" before the first executable line in the function body + # Find the first non-empty line (ignoring empty lines and comments) lines = func_str.splitlines() if len(lines) < 2: return func_str - # 找到函数定义行后的第一个非空、非注释行 + # Find the first non-empty, non-comment line after the function definition insert_idx = None for i in range(1, len(lines)): line = lines[i].strip() @@ -230,14 +230,14 @@ def process_test_function(match): if insert_idx is None: return func_str - # 获取该行的缩进量 + # Get the indentation level of that line indent_match = re.match(r'^(\s*)', lines[insert_idx]) if not indent_match: return func_str indent = indent_match.group(1) - # 插入 data = b"" + # Insert data = b"" lines.insert(insert_idx, f"{indent}data = b\"\"") return "\n".join(lines) @@ -249,7 +249,7 @@ def process_test_function(match): flags=re.MULTILINE ) - # --- 4. 输出到 tests-gen --- + # --- 4. Output to tests-gen directory --- template_dir = pjoin(repo_path, "tests-gen") os.makedirs(template_dir, exist_ok=True) @@ -284,64 +284,6 @@ def _transform_repo(repo: str): with ProcessingPool(jobs) as p: return list(p.map(_transform_repo, repos)) -# def escape_special_chars(input_data: str) -> str: -# """ -# Escape special characters in input data for Python byte strings - -# Args: -# input_data (str): Raw input data - -# Returns: -# str: Input data with escaped characters -# """ -# # For Python, we can use repr() to safely represent byte strings -# # This will handle all special characters and non-ASCII bytes -# return repr(input_data.encode('latin-1', 'replace')) - -# def substitute_input(template: str, input_data: str, idx: int, target_name: str) -> str: -# """ -# Replace fuzzing input into Python test template - -# Args: -# template (str): Template content -# input_data (str): Input data -# idx (int): Test index -# target_name (str): Target name - -# Returns: -# str: Test code after substitution -# """ -# # Escape special characters for Python -# escaped_input = escape_special_chars(input_data) - -# # Replace input placeholder -# new_template = template.replace( -# 'input_data = b""', -# f'input_data = {escaped_input}' -# ) - -# # Replace test method name to avoid duplication -# return new_template.replace( -# f"def test_generated(self):", -# f"def test_{idx}(self):" -# ) - -# def has_similar(selected: list[str], x: str, thresh: float = 0.8) -> bool: -# """ -# Check if a string is sufficiently similar to any string in the selected list - -# Args: -# selected (list[str]): List of selected strings -# x (str): String to check -# thresh (float): Similarity threshold - -# Returns: -# bool: Whether they are similar -# """ -# def similar(a, b): -# return SequenceMatcher(None, a, b).ratio() -# return any(similar(x, y) > thresh for y in selected) - def substitute_one_repo( repo: str, @@ -352,7 +294,7 @@ def substitute_one_repo( sim_thresh: float, ): """ - 从原 fuzz target 复制文件,按 fuzz input 生成多个 testgen 文件。 + Copy files from fuzz target template and generate multiple testgen files based on fuzz inputs """ input_dir = pjoin(repo, "fuzz_inputs") template_dir = pjoin(repo, "tests-gen") @@ -369,15 +311,15 @@ def substitute_one_repo( logging.warning(f"Input file not found: {input_path}") continue - # 读取所有有效的输入数据 + # Read all valid input data valid_inputs = [] with open(input_path, "rb") as f_input: for line in f_input: try: - # 尝试解码行以检查内容 + # Attempt to decode the line to check content decoded = line.decode('utf-8', errors='replace') - # 只处理以 b' 或 b" 开头的行 + # Only process lines starting with b' or b" if decoded.startswith(("b'", 'b"')): if decoded.startswith("b'") and decoded.endswith("'\n"): byte_data = line[2:-2] @@ -398,7 +340,7 @@ def substitute_one_repo( logging.info(f"Loaded {len(valid_inputs)} inputs for {target_name}") - # 策略选择输入 + # Strategy for selecting inputs if strategy == "shuffle": random.shuffle(valid_inputs) inputs = valid_inputs[:n_fuzz] @@ -407,15 +349,15 @@ def substitute_one_repo( else: inputs = valid_inputs[:n_fuzz] - # 每个 fuzz input 生成一个单独的文件 + # Generate a separate file for each fuzz input for idx, fuzz_input in enumerate(inputs, start=1): with open(source_file, "r") as f_src: code = f_src.read() - # 1. 把函数名 test_ 改成 test_{idx} + # 1. Change function name from test_ to test_{idx} code = re.sub(r'def\s+test_', f'def test_{idx}', code) - # 2. 替换 data = b"" 为输入数据 + # 2. Replace data = b"" with input data input_repr = repr(fuzz_input) code = code.replace('data = b""', f'data = {input_repr}') @@ -423,7 +365,7 @@ def substitute_one_repo( with open(out_path, "w") as f_out: f_out.write(code) - # 格式化代码 + # Format code try: subprocess.run(["black", out_path], check=False) except FileNotFoundError: From 4c94beb8656a8ee859eeba3bf056acac2b012cf3 Mon Sep 17 00:00:00 2001 From: joyguoguo <599325370@qq.com> Date: Thu, 14 Aug 2025 22:51:47 +0000 Subject: [PATCH 112/134] A complete script for building the processes of build_image, build_fuzzer, fuzz, transform, and testgen, suitable for Python projects. --- fuzz/clean_fuzz_dir.py | 14 +++++++------- fuzz/collect_fuzz_python.py | 2 +- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/fuzz/clean_fuzz_dir.py b/fuzz/clean_fuzz_dir.py index b4e2b0b..b3bed0a 100644 --- a/fuzz/clean_fuzz_dir.py +++ b/fuzz/clean_fuzz_dir.py @@ -28,13 +28,13 @@ def clean_project_dirs(root_dir): print(f"🗑️ Removed dir: {tests_gen_path}") removed_dirs += 1 - # 删除 .inputs.py 文件 - for fname in os.listdir(project_path): - if fname.endswith(".inputs.py"): - file_path = os.path.join(project_path, fname) - os.remove(file_path) - print(f"🗑️ Removed file: {file_path}") - removed_files += 1 + # # 删除 .inputs.py 文件 + # for fname in os.listdir(project_path): + # if fname.endswith(".inputs.py"): + # file_path = os.path.join(project_path, fname) + # os.remove(file_path) + # print(f"🗑️ Removed file: {file_path}") + # removed_files += 1 print(f"\n✅ Done. Removed {removed_files} files and {removed_dirs} directories.") diff --git a/fuzz/collect_fuzz_python.py b/fuzz/collect_fuzz_python.py index 893e55e..c8f1678 100644 --- a/fuzz/collect_fuzz_python.py +++ b/fuzz/collect_fuzz_python.py @@ -412,7 +412,7 @@ def main( repo_id: str = "data/valid_projects3.txt", repo_root: str = "fuzz/oss-fuzz/projects/", timeout: int = 30, - jobs: int = 2, + jobs: int = 8, pipeline: str = "all", n_fuzz: int = 100, strategy: str = "shuffle", From 5a470885064bbf96dd966a0394df3e702b8a1346 Mon Sep 17 00:00:00 2001 From: joyguoguo <599325370@qq.com> Date: Thu, 14 Aug 2025 23:04:39 +0000 Subject: [PATCH 113/134] delete some imports --- fuzz/collect_fuzz_python.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/fuzz/collect_fuzz_python.py b/fuzz/collect_fuzz_python.py index c8f1678..737c5de 100644 --- a/fuzz/collect_fuzz_python.py +++ b/fuzz/collect_fuzz_python.py @@ -158,10 +158,6 @@ def fuzz_repos(repos: list[str], jobs: int, timeout: int): logging.info(f"Starting parallel fuzzing with {jobs} jobs, timeout={timeout}s per target") parallel_subprocess(all_targets, jobs, lambda p: fuzz_one_target(p, timeout), on_exit=None) -import os -import re -import logging -from os.path import join as pjoin def generate_test_template(target_name: str, repo_path: str): """ From a16d664b3981dd74825d18f589cdefe41489d72b Mon Sep 17 00:00:00 2001 From: joyguoguo <599325370@qq.com> Date: Fri, 15 Aug 2025 23:46:08 +0000 Subject: [PATCH 114/134] use ASTfor transform and testgen --- fuzz/collect_fuzz_python.py | 310 ++++++++++++++++++++++++------------ 1 file changed, 209 insertions(+), 101 deletions(-) diff --git a/fuzz/collect_fuzz_python.py b/fuzz/collect_fuzz_python.py index 737c5de..4d9913f 100644 --- a/fuzz/collect_fuzz_python.py +++ b/fuzz/collect_fuzz_python.py @@ -3,8 +3,10 @@ usage: PYTHONPATH=. python3 fuzz/collect_fuzz_python.py --pipeline all """ from pathlib import Path +import ast +import astunparse import logging -from typing import Optional, List, Tuple +from typing import Optional import fire import os from UniTSyn.frontend.util import wrap_repo, parallel_subprocess @@ -159,10 +161,28 @@ def fuzz_repos(repos: list[str], jobs: int, timeout: int): logging.info(f"Starting parallel fuzzing with {jobs} jobs, timeout={timeout}s per target") parallel_subprocess(all_targets, jobs, lambda p: fuzz_one_target(p, timeout), on_exit=None) +def transform_repos(repos: list[str], jobs: int): + """ + Generate test templates for all targets + + Args: + repos (list[str]): List of repository paths + jobs (int): Number of parallel tasks + """ + logging.info("Generating test templates") + + def _transform_repo(repo: str): + project_name = os.path.basename(repo) + oss_fuzz_dir = Path(repo).parent.parent + targets = discover_targets(project_name, oss_fuzz_dir) + return [generate_test_template(t, repo) for t in targets] + + with ProcessingPool(jobs) as p: + return list(p.map(_transform_repo, repos)) + def generate_test_template(target_name: str, repo_path: str): """ - Generate Python test template for a single target by stripping license header, - main() block, and converting TestInput/TestOneInput to test_ with data=b"". + Generate Python test template using AST for more precise code transformations """ src_file = pjoin(repo_path, target_name + ".py") if not os.path.exists(src_file): @@ -184,68 +204,22 @@ def generate_test_template(target_name: str, repo_path: str): ) code_no_license = re.sub(license_pattern, "", original_code, count=1) - # --- 2. Remove main function and if __name__ == '__main__' --- - code_no_main = re.sub( - r"\n?def\s+main\([\s\S]*?(?=^if\s+__name__\s*==\s*['\"]__main__['\"]:)", - "", - code_no_license, - flags=re.MULTILINE - ) - code_no_main = re.sub( - r"\n?if\s+__name__\s*==\s*['\"]__main__['\"]:\s*main\(\s*\)\s*", - "", - code_no_main, - flags=re.MULTILINE - ) + # --- 2. Parse code to AST --- + try: + tree = ast.parse(code_no_license) + except SyntaxError as e: + logging.error(f"Syntax error in {src_file}: {e}") + return None - # --- 3. Convert test functions --- - def process_test_function(match): - # Extract the complete function definition and body - func_str = match.group(0) - - # 1. Remove print(data) statements - func_str = re.sub(r'print\s*\(\s*data\s*\)\s*', '', func_str) - - # 2. Change TestInput/TestOneInput to test_() - func_str = re.sub(r'def\s+(TestInput|TestOneInput)\s*\(data\)', 'def test_()', func_str) - - # 3. Insert data = b"" before the first executable line in the function body - # Find the first non-empty line (ignoring empty lines and comments) - lines = func_str.splitlines() - if len(lines) < 2: - return func_str - - # Find the first non-empty, non-comment line after the function definition - insert_idx = None - for i in range(1, len(lines)): - line = lines[i].strip() - if line and not line.startswith('#'): - insert_idx = i - break - - if insert_idx is None: - return func_str - - # Get the indentation level of that line - indent_match = re.match(r'^(\s*)', lines[insert_idx]) - if not indent_match: - return func_str - - indent = indent_match.group(1) - - # Insert data = b"" - lines.insert(insert_idx, f"{indent}data = b\"\"") - - return "\n".join(lines) - - cleaned_code = re.sub( - r"def\s+(TestInput|TestOneInput)\s*\(data\):[\s\S]*?(?=\n\w|\Z)", - process_test_function, - code_no_main, - flags=re.MULTILINE - ) + # --- 3. AST transformation --- + transformer = TestFunctionTransformer() + new_tree = transformer.visit(tree) + ast.fix_missing_locations(new_tree) + + # --- 4. Generate cleaned code --- + cleaned_code = astunparse.unparse(new_tree) - # --- 4. Output to tests-gen directory --- + # --- 5. Output to tests-gen directory --- template_dir = pjoin(repo_path, "tests-gen") os.makedirs(template_dir, exist_ok=True) @@ -260,26 +234,144 @@ def process_test_function(match): logging.info(f"Generated cleaned template: {template_path}") return template_path - -def transform_repos(repos: list[str], jobs: int): - """ - Generate test templates for all targets - Args: - repos (list[str]): List of repository paths - jobs (int): Number of parallel tasks - """ - logging.info("Generating test templates") +class TestFunctionTransformer(ast.NodeTransformer): + """AST transformer for test function conversion""" - def _transform_repo(repo: str): - project_name = os.path.basename(repo) - oss_fuzz_dir = Path(repo).parent.parent - targets = discover_targets(project_name, oss_fuzz_dir) - return [generate_test_template(t, repo) for t in targets] + def visit_FunctionDef(self, node): + # 首先处理 main 函数(移除) + if node.name == "main": + return None + + # 处理 TestInput/TestOneInput 函数 + if node.name in ["TestInput", "TestOneInput"]: + # a. 记录参数名称(假设只有一个参数) + param_name = None + if node.args.args: + param_name = node.args.args[0].arg + + # b. 将函数名改为 test_ + node.name = "test_" + + # c. 移除参数(将参数列表设为空) + node.args = ast.arguments( + posonlyargs=[], + args=[], + vararg=None, + kwonlyargs=[], + kw_defaults=[], + kwarg=None, + defaults=[] + ) + + # d. 在函数体开头插入 原参数名 = b"" + if param_name: + self.add_param_assignment(node, param_name) + + # f. 删除所有 print(原参数名) 的语句 + if param_name: + self.remove_print_param(node, param_name) + + # 确保继续遍历子节点 + self.generic_visit(node) + return node - with ProcessingPool(jobs) as p: - return list(p.map(_transform_repo, repos)) - + def add_param_assignment(self, node, param_name): + """Add param_name = b"" at the beginning of the function body""" + # 创建赋值节点 + assign_node = ast.Assign( + targets=[ast.Name(id=param_name, ctx=ast.Store())], + value=ast.Constant(value=b"") + ) + + # 如果有文档字符串,插入在文档字符串之后 + if node.body and isinstance(node.body[0], ast.Expr) and isinstance(node.body[0].value, ast.Str): + node.body.insert(1, assign_node) + else: + node.body.insert(0, assign_node) + + def remove_print_param(self, node, param_name): + """Remove print statements for the specific parameter""" + new_body = [] + for stmt in node.body: + # 跳过 print(param_name) 调用 + if (isinstance(stmt, ast.Expr) and + isinstance(stmt.value, ast.Call) and + isinstance(stmt.value.func, ast.Name) and + stmt.value.func.id == "print" and + any(isinstance(arg, ast.Name) and arg.id == param_name + for arg in stmt.value.args)): + continue + new_body.append(stmt) + node.body = new_body + + def visit_If(self, node): + """Remove if __name__ == '__main__' blocks""" + # 检查是否是主函数保护 + if (isinstance(node.test, ast.Compare) and + isinstance(node.test.left, ast.Name) and + node.test.left.id == "__name__" and + isinstance(node.test.ops[0], ast.Eq) and + isinstance(node.test.comparators[0], ast.Constant) and + node.test.comparators[0].value == "__main__"): + + # 移除整个 if 块 + return None + + # 确保继续遍历子节点 + self.generic_visit(node) + return node +class TestGenTransformer(ast.NodeTransformer): + """AST transformer for generating test cases from fuzzing inputs""" + + def __init__(self, idx: int, fuzz_input: bytes): + self.idx = idx + self.fuzz_input = fuzz_input + self.found_test_function = False + + def visit_FunctionDef(self, node): + # 只处理名为 test_ 的函数 + if node.name == "test_": + self.found_test_function = True + + # 1. 将函数名改为 test_{idx} + node.name = f"test_{self.idx}" + + # 2. 找到并替换 data = b"" 赋值语句 + self.replace_data_assignment(node) + + return node + + def replace_data_assignment(self, node): + """Replace data assignment with fuzz input""" + for i, stmt in enumerate(node.body): + # 查找赋值语句 + if isinstance(stmt, ast.Assign): + # 检查是否是 data = b"" 格式的赋值 + if (len(stmt.targets) == 1 and + isinstance(stmt.targets[0], ast.Name) and + isinstance(stmt.value, ast.Constant) and + stmt.value.value == b""): + + # 替换为新的输入数据 + node.body[i] = ast.Assign( + targets=[stmt.targets[0]], + value=ast.Constant(value=self.fuzz_input) + ) + return + + # 检查是否是 data = b'' 格式的赋值 + if (len(stmt.targets) == 1 and + isinstance(stmt.targets[0], ast.Name) and + isinstance(stmt.value, ast.Constant) and + stmt.value.value == b''): + + # 替换为新的输入数据 + node.body[i] = ast.Assign( + targets=[stmt.targets[0]], + value=ast.Constant(value=self.fuzz_input) + ) + return def substitute_one_repo( repo: str, @@ -291,6 +383,7 @@ def substitute_one_repo( ): """ Copy files from fuzz target template and generate multiple testgen files based on fuzz inputs + using AST transformations """ input_dir = pjoin(repo, "fuzz_inputs") template_dir = pjoin(repo, "tests-gen") @@ -307,15 +400,15 @@ def substitute_one_repo( logging.warning(f"Input file not found: {input_path}") continue - # Read all valid input data + # 读取所有有效的输入数据 valid_inputs = [] with open(input_path, "rb") as f_input: for line in f_input: try: - # Attempt to decode the line to check content + # 尝试解码行以检查内容 decoded = line.decode('utf-8', errors='replace') - # Only process lines starting with b' or b" + # 只处理以 b' 或 b" 开头的行 if decoded.startswith(("b'", 'b"')): if decoded.startswith("b'") and decoded.endswith("'\n"): byte_data = line[2:-2] @@ -336,7 +429,7 @@ def substitute_one_repo( logging.info(f"Loaded {len(valid_inputs)} inputs for {target_name}") - # Strategy for selecting inputs + # 策略选择输入 if strategy == "shuffle": random.shuffle(valid_inputs) inputs = valid_inputs[:n_fuzz] @@ -345,27 +438,42 @@ def substitute_one_repo( else: inputs = valid_inputs[:n_fuzz] - # Generate a separate file for each fuzz input + # 每个 fuzz input 生成一个单独的文件(使用 AST) for idx, fuzz_input in enumerate(inputs, start=1): with open(source_file, "r") as f_src: code = f_src.read() - # 1. Change function name from test_ to test_{idx} - code = re.sub(r'def\s+test_', f'def test_{idx}', code) - - # 2. Replace data = b"" with input data - input_repr = repr(fuzz_input) - code = code.replace('data = b""', f'data = {input_repr}') - - out_path = pjoin(template_dir, f"{target_name}.testgen_{idx}.py") - with open(out_path, "w") as f_out: - f_out.write(code) - - # Format code try: - subprocess.run(["black", out_path], check=False) - except FileNotFoundError: - logging.warning("Black formatter not found, skipping formatting") + # 解析为 AST + tree = ast.parse(code) + + # 应用转换器 + transformer = TestGenTransformer(idx, fuzz_input) + new_tree = transformer.visit(tree) + ast.fix_missing_locations(new_tree) + + # 确保找到并处理了测试函数 + if not transformer.found_test_function: + logging.warning(f"No test_ function found in {source_file}") + continue + + # 生成新代码 + new_code = astunparse.unparse(new_tree) + + out_path = pjoin(template_dir, f"{target_name}.testgen_{idx}.py") + with open(out_path, "w") as f_out: + f_out.write(new_code) + + # 格式化代码 + try: + subprocess.run(["black", out_path], check=False) + except FileNotFoundError: + logging.warning("Black formatter not found, skipping formatting") + + except SyntaxError as e: + logging.error(f"Syntax error when processing {source_file}: {e}") + except Exception as e: + logging.error(f"Error generating test case for {target_name}: {e}") def testgen_repos( repos: list[str], jobs: int, @@ -405,7 +513,7 @@ def testgen_repos( )) def main( - repo_id: str = "data/valid_projects3.txt", + repo_id: str = "data/valid_projects.txt", repo_root: str = "fuzz/oss-fuzz/projects/", timeout: int = 30, jobs: int = 8, From 730e45889425a45efc35f5b1785515bfb0c463f2 Mon Sep 17 00:00:00 2001 From: joyguoguo <599325370@qq.com> Date: Fri, 15 Aug 2025 23:46:19 +0000 Subject: [PATCH 115/134] use AST --- fuzz/modify_fuzz_files.py | 67 +++++++++++++++++++++------------------ 1 file changed, 37 insertions(+), 30 deletions(-) diff --git a/fuzz/modify_fuzz_files.py b/fuzz/modify_fuzz_files.py index de8333f..8e4eca4 100644 --- a/fuzz/modify_fuzz_files.py +++ b/fuzz/modify_fuzz_files.py @@ -1,34 +1,41 @@ import os -import re +import ast def add_print_to_testoneinput(file_path): with open(file_path, 'r') as f: content = f.read() - # 正则表达式匹配TestOneInput或TestInput函数定义及其函数体 - pattern = r'(\bdef\s+(TestOneInput|TestInput)\(data\):\s*\n)((?:[ \t]+.*\n|\s*\n)*)' - matches = re.finditer(pattern, content, re.MULTILINE) + # 解析 AST + tree = ast.parse(content) - new_content = content - for match in reversed(list(matches)): - function_def = match.group(1) - function_body = match.group(3) - - # 在函数体开头添加print(data)语句 - new_function_body = re.sub( - r'^([ \t]*)(.*\n)', - r'\g<1>\2\g<1>print(data)\n', - function_body, - count=1 - ) - - # 只有在函数体非空且未添加过print时才替换 - if new_function_body != function_body: - new_content = ( - new_content[:match.start(3)] + - new_function_body + - new_content[match.end(3):] - ) + class InsertPrintTransformer(ast.NodeTransformer): + def visit_FunctionDef(self, node): + if node.name in ("TestOneInput", "TestInput") and node.args.args: + first_arg_name = node.args.args[0].arg + # 创建 print(参数名) 语句 + print_stmt = ast.Expr( + value=ast.Call( + func=ast.Name(id='print', ctx=ast.Load()), + args=[ast.Name(id=first_arg_name, ctx=ast.Load())], + keywords=[] + ) + ) + # 确保没有重复插入 + if not ( + isinstance(node.body[0], ast.Expr) + and isinstance(node.body[0].value, ast.Call) + and getattr(node.body[0].value.func, "id", None) == "print" + ): + node.body.insert(0, print_stmt) + return node + + transformer = InsertPrintTransformer() + new_tree = transformer.visit(tree) + ast.fix_missing_locations(new_tree) + + # 转回代码 + import astor + new_content = astor.to_source(new_tree) return new_content @@ -41,7 +48,7 @@ def main(): for project in projects: project_dir = os.path.join(projects_path, project) - + if not os.path.isdir(project_dir): continue @@ -49,18 +56,18 @@ def main(): for file in files: if file.startswith('fuzz_') and file.endswith('.py'): file_path = os.path.join(root, file) - + try: new_content = add_print_to_testoneinput(file_path) - - # 保存修改后的文件(添加_print后缀) + + # 保存修改后的文件 new_file_path = file_path.rsplit('.', 1)[0] + '_print1.py' with open(new_file_path, 'w') as f: f.write(new_content) print(f"Processed: {file_path} -> {new_file_path}") - + except Exception as e: print(f"Error processing {file_path}: {str(e)}") if __name__ == "__main__": - main() \ No newline at end of file + main() From 40e380718ac2378f186b1010ab2cd8926255b24a Mon Sep 17 00:00:00 2001 From: joyguoguo <599325370@qq.com> Date: Sat, 16 Aug 2025 00:07:48 +0000 Subject: [PATCH 116/134] Set up command line arguments --- fuzz/clean_fuzz_dir.py | 37 +++++++++++++++++++++++-------------- 1 file changed, 23 insertions(+), 14 deletions(-) diff --git a/fuzz/clean_fuzz_dir.py b/fuzz/clean_fuzz_dir.py index b3bed0a..f8d69fe 100644 --- a/fuzz/clean_fuzz_dir.py +++ b/fuzz/clean_fuzz_dir.py @@ -1,42 +1,51 @@ #!/usr/bin/env python3 import os import shutil +import argparse -ROOT_DIR = "/home/jiayiguo/FuzzAug/fuzz/oss-fuzz/projects" +# Default root directory +DEFAULT_ROOT_DIR = "/home/jiayiguo/FuzzAug/fuzz/oss-fuzz/projects" def clean_project_dirs(root_dir): removed_files = 0 removed_dirs = 0 - # 遍历一级项目目录 + # Walk through the root directory for project in os.listdir(root_dir): project_path = os.path.join(root_dir, project) if not os.path.isdir(project_path): continue - # 删除 fuzz_inputs 文件夹 + # Delete fuzz_inputs directories fuzz_inputs_path = os.path.join(project_path, "fuzz_inputs") if os.path.isdir(fuzz_inputs_path): shutil.rmtree(fuzz_inputs_path) print(f"🗑️ Removed dir: {fuzz_inputs_path}") removed_dirs += 1 - # 删除 tests-gen 文件夹 + # Delete tests-gen directories tests_gen_path = os.path.join(project_path, "tests-gen") if os.path.isdir(tests_gen_path): shutil.rmtree(tests_gen_path) print(f"🗑️ Removed dir: {tests_gen_path}") removed_dirs += 1 - # # 删除 .inputs.py 文件 - # for fname in os.listdir(project_path): - # if fname.endswith(".inputs.py"): - # file_path = os.path.join(project_path, fname) - # os.remove(file_path) - # print(f"🗑️ Removed file: {file_path}") - # removed_files += 1 - - print(f"\n✅ Done. Removed {removed_files} files and {removed_dirs} directories.") + print(f"\n✅ Done. Removed {removed_dirs} directories in total.") if __name__ == "__main__": - clean_project_dirs(ROOT_DIR) + # Set up command line arguments + parser = argparse.ArgumentParser( + description='Clean project directories by removing fuzz_inputs and tests-gen folders', + formatter_class=argparse.ArgumentDefaultsHelpFormatter + ) + parser.add_argument('--root_dir', default=DEFAULT_ROOT_DIR, + help='Root directory containing project folders') + args = parser.parse_args() + + # Validate the root directory exists + if not os.path.isdir(args.root_dir): + print(f"❌ Error: Specified root directory does not exist: {args.root_dir}") + exit(1) + + print(f"Cleaning projects in: {args.root_dir}") + clean_project_dirs(args.root_dir) \ No newline at end of file From e3e5546582ee064c947eaba50f0455fbf8481c21 Mon Sep 17 00:00:00 2001 From: joyguoguo <599325370@qq.com> Date: Sat, 16 Aug 2025 00:29:13 +0000 Subject: [PATCH 117/134] use fire --- fuzz/clean_fuzz_dir.py | 43 +++++++++++++++++++----------------------- 1 file changed, 19 insertions(+), 24 deletions(-) diff --git a/fuzz/clean_fuzz_dir.py b/fuzz/clean_fuzz_dir.py index f8d69fe..9a9bdf8 100644 --- a/fuzz/clean_fuzz_dir.py +++ b/fuzz/clean_fuzz_dir.py @@ -1,51 +1,46 @@ #!/usr/bin/env python3 import os import shutil -import argparse +import fire -# Default root directory -DEFAULT_ROOT_DIR = "/home/jiayiguo/FuzzAug/fuzz/oss-fuzz/projects" +def clean_project_dirs(root_dir="/home/jiayiguo/FuzzAug/fuzz/oss-fuzz/projects"): + """ + 清理 oss-fuzz 项目目录下的 fuzz_inputs 和 tests-gen 文件夹 -def clean_project_dirs(root_dir): + Args: + root_dir (str): 项目的根目录路径 + """ removed_files = 0 removed_dirs = 0 - # Walk through the root directory for project in os.listdir(root_dir): project_path = os.path.join(root_dir, project) if not os.path.isdir(project_path): continue - # Delete fuzz_inputs directories + # 删除 fuzz_inputs 文件夹 fuzz_inputs_path = os.path.join(project_path, "fuzz_inputs") if os.path.isdir(fuzz_inputs_path): shutil.rmtree(fuzz_inputs_path) print(f"🗑️ Removed dir: {fuzz_inputs_path}") removed_dirs += 1 - # Delete tests-gen directories + # 删除 tests-gen 文件夹 tests_gen_path = os.path.join(project_path, "tests-gen") if os.path.isdir(tests_gen_path): shutil.rmtree(tests_gen_path) print(f"🗑️ Removed dir: {tests_gen_path}") removed_dirs += 1 - print(f"\n✅ Done. Removed {removed_dirs} directories in total.") + # 如果需要删除 .inputs.py 文件,取消注释以下代码 + # for fname in os.listdir(project_path): + # if fname.endswith(".inputs.py"): + # file_path = os.path.join(project_path, fname) + # os.remove(file_path) + # print(f"🗑️ Removed file: {file_path}") + # removed_files += 1 + + print(f"\n✅ Done. Removed {removed_files} files and {removed_dirs} directories.") if __name__ == "__main__": - # Set up command line arguments - parser = argparse.ArgumentParser( - description='Clean project directories by removing fuzz_inputs and tests-gen folders', - formatter_class=argparse.ArgumentDefaultsHelpFormatter - ) - parser.add_argument('--root_dir', default=DEFAULT_ROOT_DIR, - help='Root directory containing project folders') - args = parser.parse_args() - - # Validate the root directory exists - if not os.path.isdir(args.root_dir): - print(f"❌ Error: Specified root directory does not exist: {args.root_dir}") - exit(1) - - print(f"Cleaning projects in: {args.root_dir}") - clean_project_dirs(args.root_dir) \ No newline at end of file + fire.Fire(clean_project_dirs) From ab6813d080cda75e7bccbdb7df5b2cc4bd829d43 Mon Sep 17 00:00:00 2001 From: joyguoguo <599325370@qq.com> Date: Sat, 16 Aug 2025 00:30:36 +0000 Subject: [PATCH 118/134] use FIre --- fuzz/modify_fuzz_files.py | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/fuzz/modify_fuzz_files.py b/fuzz/modify_fuzz_files.py index 8e4eca4..1ed8b48 100644 --- a/fuzz/modify_fuzz_files.py +++ b/fuzz/modify_fuzz_files.py @@ -1,5 +1,7 @@ +#!/usr/bin/env python3 import os import ast +import fire def add_print_to_testoneinput(file_path): with open(file_path, 'r') as f: @@ -36,13 +38,19 @@ def visit_FunctionDef(self, node): # 转回代码 import astor new_content = astor.to_source(new_tree) - return new_content -def main(): - projects_path = "/home/jiayiguo/FuzzAug/fuzz/oss-fuzz/projects" - valid_projects_file = "data/valid_projects.txt" +def main( + projects_path="/home/jiayiguo/FuzzAug/fuzz/oss-fuzz/projects", + valid_projects_file="data/valid_projects.txt" +): + """ + 给 fuzz target 的 TestOneInput / TestInput 函数开头插入 print(参数名) + Args: + projects_path (str): OSS-Fuzz 项目的根目录 + valid_projects_file (str): 包含有效项目名的文件路径 + """ with open(valid_projects_file, 'r') as f: projects = [line.strip() for line in f if line.strip()] @@ -70,4 +78,4 @@ def main(): print(f"Error processing {file_path}: {str(e)}") if __name__ == "__main__": - main() + fire.Fire(main) From f2c7485d241d81e8e8c2b669ec6abe93ded5a3e0 Mon Sep 17 00:00:00 2001 From: joyguoguo <599325370@qq.com> Date: Sun, 17 Aug 2025 02:16:05 +0000 Subject: [PATCH 119/134] black formatter --- fuzz/collect_fuzz_python.py | 240 +++++++++++++++++++++--------------- 1 file changed, 144 insertions(+), 96 deletions(-) diff --git a/fuzz/collect_fuzz_python.py b/fuzz/collect_fuzz_python.py index 4d9913f..bb958c9 100644 --- a/fuzz/collect_fuzz_python.py +++ b/fuzz/collect_fuzz_python.py @@ -2,9 +2,10 @@ Script for Python project fuzzing and test template conversion usage: PYTHONPATH=. python3 fuzz/collect_fuzz_python.py --pipeline all """ + from pathlib import Path import ast -import astunparse +import astunparse import logging from typing import Optional import fire @@ -20,10 +21,11 @@ from datetime import datetime import re + def build_image(repos: list[str], jobs: int): """ Build Docker images for OSS-Fuzz projects corresponding to each repository - + Args: repos (list[str]): List of repository paths jobs (int): Number of parallel tasks @@ -48,10 +50,11 @@ def _build_cmd(path: str): _ = parallel_subprocess(repos, jobs, _build_cmd, on_exit=None) + def build_fuzzer(repos: list[str], jobs: int): """ Build fuzzers in parallel for successfully built projects - + Args: repos (list[str]): List of repository paths jobs (int): Number of parallel tasks @@ -65,7 +68,9 @@ def _build_cmd(path: str): timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") log_file = os.path.join(log_dir, f"{project_name}_fuzzer_{timestamp}.log") - logging.info(f"Start building fuzzers for {project_name}, logging to {log_file}") + logging.info( + f"Start building fuzzers for {project_name}, logging to {log_file}" + ) return subprocess.Popen( f"python3 infra/helper.py build_fuzzers --sanitizer address {project_name}", cwd=os.path.abspath(os.path.join(path, "../../")), @@ -76,29 +81,38 @@ def _build_cmd(path: str): _ = parallel_subprocess(repos, jobs, _build_cmd, on_exit=None) + def discover_targets(project_name: str, oss_fuzz_dir: Path) -> list[str]: """ Discover fuzzing targets """ out_dir = oss_fuzz_dir / "build" / "out" / project_name targets: list[str] = [] - + logging.debug(f"Searching fuzz targets in: {out_dir}") - + if not out_dir.is_dir(): logging.warning(f"Build output directory for {project_name} does not exist") return targets try: for f in out_dir.iterdir(): - if (f.is_file() and f.name.startswith("fuzz_") and - '.' not in f.name and f.name.endswith("print1") and - os.access(f, os.X_OK)): + if ( + f.is_file() + and f.name.startswith("fuzz_") + and "." not in f.name + and f.name.endswith("print1") + and os.access(f, os.X_OK) + ): targets.append(f.name) - logging.info(f"Discovered {len(targets)} fuzz targets in {project_name}: {targets}") + logging.info( + f"Discovered {len(targets)} fuzz targets in {project_name}: {targets}" + ) except Exception as e: - logging.error(f"Error discovering targets for {project_name}: {e}", exc_info=True) - + logging.error( + f"Error discovering targets for {project_name}: {e}", exc_info=True + ) + return targets @@ -109,11 +123,13 @@ def fuzz_one_target(target: tuple[str, str], timeout: int): repo_path, target_name = target project_name = os.path.basename(repo_path) oss_fuzz_root = os.path.dirname(os.path.dirname(repo_path)) - + input_file_path = pjoin(repo_path, "fuzz_inputs", target_name) os.makedirs(os.path.dirname(input_file_path), exist_ok=True) - logging.info(f"Starting fuzzing: project={project_name}, target={target_name}, timeout={timeout}s") + logging.info( + f"Starting fuzzing: project={project_name}, target={target_name}, timeout={timeout}s" + ) logging.debug(f"Fuzz output will be saved to: {input_file_path}") try: @@ -122,14 +138,17 @@ def fuzz_one_target(target: tuple[str, str], timeout: int): [ "bash", "-c", - f"python3 infra/helper.py run_fuzzer {project_name} {target_name} -- -max_total_time={timeout}" + f"python3 infra/helper.py run_fuzzer {project_name} {target_name} -- -max_total_time={timeout}", ], cwd=oss_fuzz_root, stdout=input_file, stderr=subprocess.DEVNULL, ) except Exception as e: - logging.error(f"Error starting fuzzer for {project_name}/{target_name}: {e}", exc_info=True) + logging.error( + f"Error starting fuzzer for {project_name}/{target_name}: {e}", + exc_info=True, + ) return None @@ -145,41 +164,47 @@ def fuzz_repos(repos: list[str], jobs: int, timeout: int): oss_fuzz_dir = Path(repo).parent.parent targets = discover_targets(project_name, oss_fuzz_dir) targets_list.append(targets) - + target_map = {repo: targets for repo, targets in zip(repos, targets_list)} all_targets: list[tuple[str, str]] = [ (k, v) for k, vs in target_map.items() for v in vs ] - + logging.info(f"Total fuzz targets discovered: {len(all_targets)}") for repo, targets in target_map.items(): logging.info(f"{os.path.basename(repo)}: {len(targets)} targets") for repo in repos: os.makedirs(pjoin(repo, "fuzz_inputs"), exist_ok=True) - - logging.info(f"Starting parallel fuzzing with {jobs} jobs, timeout={timeout}s per target") - parallel_subprocess(all_targets, jobs, lambda p: fuzz_one_target(p, timeout), on_exit=None) + + logging.info( + f"Starting parallel fuzzing with {jobs} jobs, timeout={timeout}s per target" + ) + parallel_subprocess( + all_targets, jobs, lambda p: fuzz_one_target(p, timeout), on_exit=None + ) + def transform_repos(repos: list[str], jobs: int): """ Generate test templates for all targets - + Args: repos (list[str]): List of repository paths jobs (int): Number of parallel tasks """ logging.info("Generating test templates") - + def _transform_repo(repo: str): project_name = os.path.basename(repo) oss_fuzz_dir = Path(repo).parent.parent targets = discover_targets(project_name, oss_fuzz_dir) return [generate_test_template(t, repo) for t in targets] - + with ProcessingPool(jobs) as p: return list(p.map(_transform_repo, repos)) + def generate_test_template(target_name: str, repo_path: str): """ Generate Python test template using AST for more precise code transformations @@ -200,7 +225,7 @@ def generate_test_template(target_name: str, repo_path: str): license_pattern = re.compile( r"^(?:\s*#.*\n)*\s*#.*limitations\s+under\s+the\s+license.*\n", - re.IGNORECASE | re.MULTILINE + re.IGNORECASE | re.MULTILINE, ) code_no_license = re.sub(license_pattern, "", original_code, count=1) @@ -234,25 +259,26 @@ def generate_test_template(target_name: str, repo_path: str): logging.info(f"Generated cleaned template: {template_path}") return template_path - + + class TestFunctionTransformer(ast.NodeTransformer): """AST transformer for test function conversion""" - + def visit_FunctionDef(self, node): # 首先处理 main 函数(移除) if node.name == "main": return None - + # 处理 TestInput/TestOneInput 函数 if node.name in ["TestInput", "TestOneInput"]: # a. 记录参数名称(假设只有一个参数) param_name = None if node.args.args: param_name = node.args.args[0].arg - + # b. 将函数名改为 test_ node.name = "test_" - + # c. 移除参数(将参数列表设为空) node.args = ast.arguments( posonlyargs=[], @@ -261,118 +287,135 @@ def visit_FunctionDef(self, node): kwonlyargs=[], kw_defaults=[], kwarg=None, - defaults=[] + defaults=[], ) - + # d. 在函数体开头插入 原参数名 = b"" if param_name: self.add_param_assignment(node, param_name) - + # f. 删除所有 print(原参数名) 的语句 if param_name: self.remove_print_param(node, param_name) - + # 确保继续遍历子节点 self.generic_visit(node) return node - + def add_param_assignment(self, node, param_name): """Add param_name = b"" at the beginning of the function body""" # 创建赋值节点 assign_node = ast.Assign( targets=[ast.Name(id=param_name, ctx=ast.Store())], - value=ast.Constant(value=b"") + value=ast.Constant(value=b""), ) - + # 如果有文档字符串,插入在文档字符串之后 - if node.body and isinstance(node.body[0], ast.Expr) and isinstance(node.body[0].value, ast.Str): + if ( + node.body + and isinstance(node.body[0], ast.Expr) + and isinstance(node.body[0].value, ast.Str) + ): node.body.insert(1, assign_node) else: node.body.insert(0, assign_node) - + def remove_print_param(self, node, param_name): """Remove print statements for the specific parameter""" new_body = [] for stmt in node.body: # 跳过 print(param_name) 调用 - if (isinstance(stmt, ast.Expr) and - isinstance(stmt.value, ast.Call) and - isinstance(stmt.value.func, ast.Name) and - stmt.value.func.id == "print" and - any(isinstance(arg, ast.Name) and arg.id == param_name - for arg in stmt.value.args)): + if ( + isinstance(stmt, ast.Expr) + and isinstance(stmt.value, ast.Call) + and isinstance(stmt.value.func, ast.Name) + and stmt.value.func.id == "print" + and any( + isinstance(arg, ast.Name) and arg.id == param_name + for arg in stmt.value.args + ) + ): continue new_body.append(stmt) node.body = new_body - + def visit_If(self, node): """Remove if __name__ == '__main__' blocks""" # 检查是否是主函数保护 - if (isinstance(node.test, ast.Compare) and - isinstance(node.test.left, ast.Name) and - node.test.left.id == "__name__" and - isinstance(node.test.ops[0], ast.Eq) and - isinstance(node.test.comparators[0], ast.Constant) and - node.test.comparators[0].value == "__main__"): - + if ( + isinstance(node.test, ast.Compare) + and isinstance(node.test.left, ast.Name) + and node.test.left.id == "__name__" + and isinstance(node.test.ops[0], ast.Eq) + and isinstance(node.test.comparators[0], ast.Constant) + and node.test.comparators[0].value == "__main__" + ): + # 移除整个 if 块 return None - + # 确保继续遍历子节点 self.generic_visit(node) return node + + class TestGenTransformer(ast.NodeTransformer): """AST transformer for generating test cases from fuzzing inputs""" - + def __init__(self, idx: int, fuzz_input: bytes): self.idx = idx self.fuzz_input = fuzz_input self.found_test_function = False - + def visit_FunctionDef(self, node): # 只处理名为 test_ 的函数 if node.name == "test_": self.found_test_function = True - + # 1. 将函数名改为 test_{idx} node.name = f"test_{self.idx}" - + # 2. 找到并替换 data = b"" 赋值语句 self.replace_data_assignment(node) - + return node - + def replace_data_assignment(self, node): """Replace data assignment with fuzz input""" for i, stmt in enumerate(node.body): # 查找赋值语句 if isinstance(stmt, ast.Assign): # 检查是否是 data = b"" 格式的赋值 - if (len(stmt.targets) == 1 and - isinstance(stmt.targets[0], ast.Name) and - isinstance(stmt.value, ast.Constant) and - stmt.value.value == b""): - + if ( + len(stmt.targets) == 1 + and isinstance(stmt.targets[0], ast.Name) + and isinstance(stmt.value, ast.Constant) + and stmt.value.value == b"" + ): + # 替换为新的输入数据 node.body[i] = ast.Assign( targets=[stmt.targets[0]], - value=ast.Constant(value=self.fuzz_input) + value=ast.Constant(value=self.fuzz_input), ) return - + # 检查是否是 data = b'' 格式的赋值 - if (len(stmt.targets) == 1 and - isinstance(stmt.targets[0], ast.Name) and - isinstance(stmt.value, ast.Constant) and - stmt.value.value == b''): - + if ( + len(stmt.targets) == 1 + and isinstance(stmt.targets[0], ast.Name) + and isinstance(stmt.value, ast.Constant) + and stmt.value.value == b"" + ): + # 替换为新的输入数据 node.body[i] = ast.Assign( targets=[stmt.targets[0]], - value=ast.Constant(value=self.fuzz_input) + value=ast.Constant(value=self.fuzz_input), ) return + def substitute_one_repo( repo: str, targets: list[str], @@ -394,7 +437,7 @@ def substitute_one_repo( if not os.path.exists(source_file): logging.warning(f"Source file not found: {source_file}") continue - + input_path = pjoin(input_dir, f"{target_name}") if not os.path.exists(input_path): logging.warning(f"Input file not found: {input_path}") @@ -406,8 +449,8 @@ def substitute_one_repo( for line in f_input: try: # 尝试解码行以检查内容 - decoded = line.decode('utf-8', errors='replace') - + decoded = line.decode("utf-8", errors="replace") + # 只处理以 b' 或 b" 开头的行 if decoded.startswith(("b'", 'b"')): if decoded.startswith("b'") and decoded.endswith("'\n"): @@ -416,7 +459,7 @@ def substitute_one_repo( byte_data = line[2:-2] else: continue - + if 0 < len(byte_data) <= max_len: valid_inputs.append(byte_data) except UnicodeDecodeError: @@ -442,38 +485,40 @@ def substitute_one_repo( for idx, fuzz_input in enumerate(inputs, start=1): with open(source_file, "r") as f_src: code = f_src.read() - + try: # 解析为 AST tree = ast.parse(code) - + # 应用转换器 transformer = TestGenTransformer(idx, fuzz_input) new_tree = transformer.visit(tree) ast.fix_missing_locations(new_tree) - + # 确保找到并处理了测试函数 if not transformer.found_test_function: logging.warning(f"No test_ function found in {source_file}") continue - + # 生成新代码 new_code = astunparse.unparse(new_tree) - + out_path = pjoin(template_dir, f"{target_name}.testgen_{idx}.py") with open(out_path, "w") as f_out: f_out.write(new_code) - + # 格式化代码 try: subprocess.run(["black", out_path], check=False) except FileNotFoundError: logging.warning("Black formatter not found, skipping formatting") - + except SyntaxError as e: logging.error(f"Syntax error when processing {source_file}: {e}") except Exception as e: logging.error(f"Error generating test case for {target_name}: {e}") + + def testgen_repos( repos: list[str], jobs: int, @@ -484,7 +529,7 @@ def testgen_repos( ): """ Generate test cases from fuzzing inputs - + Args: repos (list[str]): List of repository paths jobs (int): Number of parallel tasks @@ -500,17 +545,20 @@ def testgen_repos( oss_fuzz_dir = Path(repo).parent.parent targets = discover_targets(project_name, oss_fuzz_dir) targets_list.append(targets) - + target_map = {repo: targets for repo, targets in zip(repos, targets_list)} - + # Process each repository in parallel with ProcessingPool(jobs) as p: - list(p.map( - lambda item: substitute_one_repo( - item[0], item[1], n_fuzz, strategy, max_len, sim_thresh - ), - target_map.items() - )) + list( + p.map( + lambda item: substitute_one_repo( + item[0], item[1], n_fuzz, strategy, max_len, sim_thresh + ), + target_map.items(), + ) + ) + def main( repo_id: str = "data/valid_projects.txt", @@ -522,11 +570,10 @@ def main( strategy: str = "shuffle", max_len: int = 100, sim_thresh: float = 0.8, - ): """ Main function, controlling the entire fuzzing process - + Args: repo_id (str): Project ID file path repo_root (str): Project root directory @@ -571,6 +618,7 @@ def main( else: logging.error(f"Unknown pipeline: {pipeline}") + if __name__ == "__main__": logging.basicConfig(level=logging.INFO) - fire.Fire(main) \ No newline at end of file + fire.Fire(main) From 2ebcee3a6b90dd42ce3b8d4afe77089e656a6fc9 Mon Sep 17 00:00:00 2001 From: joyguoguo <599325370@qq.com> Date: Sun, 17 Aug 2025 20:09:19 +0000 Subject: [PATCH 120/134] deal the data after closing the file --- fuzz/collect_fuzz_python.py | 43 ++++++++++++++++++++----------------- 1 file changed, 23 insertions(+), 20 deletions(-) diff --git a/fuzz/collect_fuzz_python.py b/fuzz/collect_fuzz_python.py index bb958c9..e1c4dd4 100644 --- a/fuzz/collect_fuzz_python.py +++ b/fuzz/collect_fuzz_python.py @@ -445,33 +445,36 @@ def substitute_one_repo( # 读取所有有效的输入数据 valid_inputs = [] + # 首先读取文件内容,然后关闭文件 with open(input_path, "rb") as f_input: - for line in f_input: - try: - # 尝试解码行以检查内容 - decoded = line.decode("utf-8", errors="replace") - - # 只处理以 b' 或 b" 开头的行 - if decoded.startswith(("b'", 'b"')): - if decoded.startswith("b'") and decoded.endswith("'\n"): - byte_data = line[2:-2] - elif decoded.startswith('b"') and decoded.endswith('"\n'): - byte_data = line[2:-2] - else: - continue - - if 0 < len(byte_data) <= max_len: - valid_inputs.append(byte_data) - except UnicodeDecodeError: - if 0 < len(line) <= max_len: - valid_inputs.append(line) + lines = f_input.readlines() + + # 文件已关闭,现在处理数据 + for line in lines: + try: + # 尝试解码行以检查内容 + decoded = line.decode("utf-8", errors="replace") + + # 只处理以 b' 或 b" 开头的行 + if decoded.startswith(("b'", 'b"')): + if decoded.startswith("b'") and decoded.endswith("'\n"): + byte_data = line[2:-2] + elif decoded.startswith('b"') and decoded.endswith('"\n'): + byte_data = line[2:-2] + else: + continue + + if 0 < len(byte_data) <= max_len: + valid_inputs.append(byte_data) + except UnicodeDecodeError: + if 0 < len(line) <= max_len: + valid_inputs.append(line) if not valid_inputs: logging.warning(f"No valid inputs found for {target_name}") continue logging.info(f"Loaded {len(valid_inputs)} inputs for {target_name}") - # 策略选择输入 if strategy == "shuffle": random.shuffle(valid_inputs) From 7aa3f037b90ec533e6386e2530dc348fdbc31f2f Mon Sep 17 00:00:00 2001 From: joyguoguo <599325370@qq.com> Date: Sun, 17 Aug 2025 20:40:30 +0000 Subject: [PATCH 121/134] when doing line-matching, check for # This is a test template in the line --- fuzz/collect_fuzz_python.py | 82 ++++++++++++++----------------------- 1 file changed, 30 insertions(+), 52 deletions(-) diff --git a/fuzz/collect_fuzz_python.py b/fuzz/collect_fuzz_python.py index e1c4dd4..5b4d089 100644 --- a/fuzz/collect_fuzz_python.py +++ b/fuzz/collect_fuzz_python.py @@ -303,23 +303,33 @@ def visit_FunctionDef(self, node): return node def add_param_assignment(self, node, param_name): - """Add param_name = b"" at the beginning of the function body""" + """Add param_name = b"..." at the beginning of the function body with an inline comment""" + # 创建包含赋值和注释的复合值 + value_with_comment = ast.JoinedStr( + values=[ + ast.FormattedValue(value=ast.Constant(value=b""), conversion=-1), + ast.Constant(value=" # This is a test template") + ] + ) + # 创建赋值节点 assign_node = ast.Assign( targets=[ast.Name(id=param_name, ctx=ast.Store())], - value=ast.Constant(value=b""), + value=value_with_comment ) - + # 如果有文档字符串,插入在文档字符串之后 if ( node.body and isinstance(node.body[0], ast.Expr) - and isinstance(node.body[0].value, ast.Str) + and isinstance(node.body[0].value, ast.Constant) + and isinstance(node.body[0].value.value, str) ): + # 插入在文档字符串后面 node.body.insert(1, assign_node) else: + # 插入在函数开头 node.body.insert(0, assign_node) - def remove_print_param(self, node, param_name): """Remove print statements for the specific parameter""" new_body = [] @@ -360,62 +370,30 @@ def visit_If(self, node): class TestGenTransformer(ast.NodeTransformer): - """AST transformer for generating test cases from fuzzing inputs""" - - def __init__(self, idx: int, fuzz_input: bytes): + def __init__(self, idx, fuzz_input): self.idx = idx self.fuzz_input = fuzz_input self.found_test_function = False def visit_FunctionDef(self, node): - # 只处理名为 test_ 的函数 if node.name == "test_": self.found_test_function = True - - # 1. 将函数名改为 test_{idx} - node.name = f"test_{self.idx}" - - # 2. 找到并替换 data = b"" 赋值语句 - self.replace_data_assignment(node) - + # 遍历函数体,寻找包含注释的赋值语句 + for i, stmt in enumerate(node.body): + # 检查是否是赋值语句 + if isinstance(stmt, ast.Assign): + # 检查赋值语句的值是否是带有注释的复合值 + if ( + isinstance(stmt.value, ast.JoinedStr) + and len(stmt.value.values) >= 2 + and isinstance(stmt.value.values[1], ast.Constant) + and stmt.value.values[1].value == " # This is a test template" + ): + # 替换为新的输入值 + stmt.value = ast.Constant(value=self.fuzz_input) + break return node - def replace_data_assignment(self, node): - """Replace data assignment with fuzz input""" - for i, stmt in enumerate(node.body): - # 查找赋值语句 - if isinstance(stmt, ast.Assign): - # 检查是否是 data = b"" 格式的赋值 - if ( - len(stmt.targets) == 1 - and isinstance(stmt.targets[0], ast.Name) - and isinstance(stmt.value, ast.Constant) - and stmt.value.value == b"" - ): - - # 替换为新的输入数据 - node.body[i] = ast.Assign( - targets=[stmt.targets[0]], - value=ast.Constant(value=self.fuzz_input), - ) - return - - # 检查是否是 data = b'' 格式的赋值 - if ( - len(stmt.targets) == 1 - and isinstance(stmt.targets[0], ast.Name) - and isinstance(stmt.value, ast.Constant) - and stmt.value.value == b"" - ): - - # 替换为新的输入数据 - node.body[i] = ast.Assign( - targets=[stmt.targets[0]], - value=ast.Constant(value=self.fuzz_input), - ) - return - - def substitute_one_repo( repo: str, targets: list[str], From 11ca42ff4774baea495d506d65b895f29fd3fcae Mon Sep 17 00:00:00 2001 From: joyguoguo <599325370@qq.com> Date: Sun, 17 Aug 2025 20:59:04 +0000 Subject: [PATCH 122/134] when doing line-matching, check for # This is a test template in the line --- fuzz/collect_fuzz_python.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/fuzz/collect_fuzz_python.py b/fuzz/collect_fuzz_python.py index 5b4d089..4f0147f 100644 --- a/fuzz/collect_fuzz_python.py +++ b/fuzz/collect_fuzz_python.py @@ -378,7 +378,11 @@ def __init__(self, idx, fuzz_input): def visit_FunctionDef(self, node): if node.name == "test_": self.found_test_function = True - # 遍历函数体,寻找包含注释的赋值语句 + + # 1. 修改函数名 + node.name = f"test_{self.idx}" + + # 2. 查找并替换包含特定注释的赋值语句 for i, stmt in enumerate(node.body): # 检查是否是赋值语句 if isinstance(stmt, ast.Assign): From a0bbe5606679c86e8d94831cdb8c7e26f0e55d7e Mon Sep 17 00:00:00 2001 From: joyguoguo <599325370@qq.com> Date: Mon, 18 Aug 2025 00:21:19 +0000 Subject: [PATCH 123/134] delete UnicodeDecodeError --- fuzz/collect_fuzz_python.py | 34 ++++++++++++++++++++-------------- 1 file changed, 20 insertions(+), 14 deletions(-) diff --git a/fuzz/collect_fuzz_python.py b/fuzz/collect_fuzz_python.py index 4f0147f..42228e5 100644 --- a/fuzz/collect_fuzz_python.py +++ b/fuzz/collect_fuzz_python.py @@ -431,12 +431,11 @@ def substitute_one_repo( with open(input_path, "rb") as f_input: lines = f_input.readlines() - # 文件已关闭,现在处理数据 - for line in lines: - try: - # 尝试解码行以检查内容 + # 文件已关闭,现在处理数据 + for line in lines: + # 使用 errors='replace' 确保解码不会失败 decoded = line.decode("utf-8", errors="replace") - + # 只处理以 b' 或 b" 开头的行 if decoded.startswith(("b'", 'b"')): if decoded.startswith("b'") and decoded.endswith("'\n"): @@ -445,11 +444,11 @@ def substitute_one_repo( byte_data = line[2:-2] else: continue - + if 0 < len(byte_data) <= max_len: valid_inputs.append(byte_data) - except UnicodeDecodeError: - if 0 < len(line) <= max_len: + # 对于其他行,如果长度在范围内且不是以 b' 或 b" 开头,也考虑加入 + elif 0 < len(line) <= max_len: valid_inputs.append(line) if not valid_inputs: @@ -492,12 +491,19 @@ def substitute_one_repo( with open(out_path, "w") as f_out: f_out.write(new_code) - # 格式化代码 - try: - subprocess.run(["black", out_path], check=False) - except FileNotFoundError: - logging.warning("Black formatter not found, skipping formatting") - + # 格式化代码 + formatter_installed = True + try: + subprocess.run(["black", out_path], + check=False, + stdout=subprocess.DEVNULL, # 隐藏输出 + stderr=subprocess.DEVNULL) # 隐藏错误 + except FileNotFoundError: + if formatter_installed: # 避免多次记录 + logging.warning("Black code formatter not found. For better formatting, install with:") + logging.warning("pip install black") + formatter_installed = False + except SyntaxError as e: logging.error(f"Syntax error when processing {source_file}: {e}") except Exception as e: From 0067af3e7bdf376a27cc34f047389b84fd5d1ff0 Mon Sep 17 00:00:00 2001 From: joyguoguo <599325370@qq.com> Date: Mon, 18 Aug 2025 02:13:04 +0000 Subject: [PATCH 124/134] apply transformations on the original unmodified fuzz targets. --- fuzz/collect_fuzz_python.py | 90 +++++++++++++++++++++---------------- 1 file changed, 51 insertions(+), 39 deletions(-) diff --git a/fuzz/collect_fuzz_python.py b/fuzz/collect_fuzz_python.py index 42228e5..4ca7f3b 100644 --- a/fuzz/collect_fuzz_python.py +++ b/fuzz/collect_fuzz_python.py @@ -198,7 +198,15 @@ def transform_repos(repos: list[str], jobs: int): def _transform_repo(repo: str): project_name = os.path.basename(repo) oss_fuzz_dir = Path(repo).parent.parent - targets = discover_targets(project_name, oss_fuzz_dir) + raw_targets = discover_targets(project_name, oss_fuzz_dir) + + # 只需移除目标名称中的 "_print1",不要添加任何新后缀 + transformed_targets = [t.replace("_print1", "") for t in raw_targets] + + # 去重 + targets = list(set(transformed_targets)) + + # 传递给 generate_test_template 的是简单目标名称 return [generate_test_template(t, repo) for t in targets] with ProcessingPool(jobs) as p: @@ -209,7 +217,10 @@ def generate_test_template(target_name: str, repo_path: str): """ Generate Python test template using AST for more precise code transformations """ - src_file = pjoin(repo_path, target_name + ".py") + src_file = pjoin(repo_path, target_name) + logging.info(f"Generating test template for {src_file}") + if not src_file.endswith(".py"): + src_file += ".py" if not os.path.exists(src_file): logging.error(f"Source target file not found: {src_file}") return None @@ -253,7 +264,7 @@ def generate_test_template(target_name: str, repo_path: str): with open(init_path, "w", encoding="utf-8") as f: f.write("") - template_path = pjoin(template_dir, f"{target_name}.py") + template_path = pjoin(template_dir, f"{os.path.splitext(target_name)[0]}.py") with open(template_path, "w", encoding="utf-8") as f: f.write(shebang + cleaned_code.strip() + "\n") @@ -294,9 +305,6 @@ def visit_FunctionDef(self, node): if param_name: self.add_param_assignment(node, param_name) - # f. 删除所有 print(原参数名) 的语句 - if param_name: - self.remove_print_param(node, param_name) # 确保继续遍历子节点 self.generic_visit(node) @@ -400,7 +408,7 @@ def visit_FunctionDef(self, node): def substitute_one_repo( repo: str, - targets: list[str], + targets: list[tuple], # 每个元素是 (transformed_target, raw_target) n_fuzz: int, strategy: str, max_len: int, @@ -414,23 +422,25 @@ def substitute_one_repo( template_dir = pjoin(repo, "tests-gen") os.makedirs(template_dir, exist_ok=True) - for target_name in targets: - source_file = pjoin(template_dir, f"{target_name}.py") + for transformed_target, raw_target in targets: + # 使用转换后的目标名称构建模板文件路径 + source_file = pjoin(template_dir, transformed_target + ".py") + + # 使用原始目标名称构建输入文件路径 + input_path = pjoin(input_dir, raw_target) + + # 确保源文件存在 if not os.path.exists(source_file): logging.warning(f"Source file not found: {source_file}") continue - - input_path = pjoin(input_dir, f"{target_name}") if not os.path.exists(input_path): logging.warning(f"Input file not found: {input_path}") continue - + # 读取所有有效的输入数据 valid_inputs = [] - # 首先读取文件内容,然后关闭文件 with open(input_path, "rb") as f_input: lines = f_input.readlines() - # 文件已关闭,现在处理数据 for line in lines: # 使用 errors='replace' 确保解码不会失败 @@ -452,10 +462,12 @@ def substitute_one_repo( valid_inputs.append(line) if not valid_inputs: - logging.warning(f"No valid inputs found for {target_name}") + # 使用 transformed_target 而不是 target_name + logging.warning(f"No valid inputs found for {transformed_target}") continue - logging.info(f"Loaded {len(valid_inputs)} inputs for {target_name}") + # 使用 transformed_target 而不是 target_name + logging.info(f"Loaded {len(valid_inputs)} inputs for {transformed_target}") # 策略选择输入 if strategy == "shuffle": random.shuffle(valid_inputs) @@ -487,28 +499,22 @@ def substitute_one_repo( # 生成新代码 new_code = astunparse.unparse(new_tree) - out_path = pjoin(template_dir, f"{target_name}.testgen_{idx}.py") + # 使用 transformed_target 而不是 target_name + out_path = pjoin(template_dir, f"{transformed_target}.testgen_{idx}.py") with open(out_path, "w") as f_out: f_out.write(new_code) - # 格式化代码 - formatter_installed = True - try: - subprocess.run(["black", out_path], - check=False, - stdout=subprocess.DEVNULL, # 隐藏输出 - stderr=subprocess.DEVNULL) # 隐藏错误 - except FileNotFoundError: - if formatter_installed: # 避免多次记录 - logging.warning("Black code formatter not found. For better formatting, install with:") - logging.warning("pip install black") - formatter_installed = False + # 格式化代码 + try: + subprocess.run(["black", out_path], check=False) + except FileNotFoundError: + logging.warning("Black formatter not found, skipping formatting") except SyntaxError as e: logging.error(f"Syntax error when processing {source_file}: {e}") except Exception as e: - logging.error(f"Error generating test case for {target_name}: {e}") - + # 使用 transformed_target 而不是 target_name + logging.error(f"Error generating test case for {transformed_target}: {e}") def testgen_repos( repos: list[str], @@ -529,28 +535,34 @@ def testgen_repos( max_len (int): Maximum length sim_thresh (float): Similarity threshold """ - # First get all targets - targets_list = [] + # First get all targets and apply transformation + target_map = {} for repo in repos: project_name = os.path.basename(repo) oss_fuzz_dir = Path(repo).parent.parent - targets = discover_targets(project_name, oss_fuzz_dir) - targets_list.append(targets) - - target_map = {repo: targets for repo, targets in zip(repos, targets_list)} + raw_targets = discover_targets(project_name, oss_fuzz_dir) + + # 保存原始目标名称和转换后的目标名称 + transformed_targets = [t.replace("_print1", "") for t in raw_targets] + targets = list(zip(transformed_targets, raw_targets)) # (转换后, 原始) + target_map[repo] = targets # Process each repository in parallel with ProcessingPool(jobs) as p: list( p.map( lambda item: substitute_one_repo( - item[0], item[1], n_fuzz, strategy, max_len, sim_thresh + item[0], # repo path + item[1], # list of (transformed, raw) targets + n_fuzz, + strategy, + max_len, + sim_thresh ), target_map.items(), ) ) - def main( repo_id: str = "data/valid_projects.txt", repo_root: str = "fuzz/oss-fuzz/projects/", From f456adde40a5de28fc047110d0c9d58f7c51c53a Mon Sep 17 00:00:00 2001 From: joyguoguo <599325370@qq.com> Date: Mon, 18 Aug 2025 02:18:31 +0000 Subject: [PATCH 125/134] put all AST related class/module/function in another file and import from there. --- fuzz/ast_utils.py | 201 ++++++++++++++++++++++++++++++++++++ fuzz/collect_fuzz_python.py | 201 +----------------------------------- 2 files changed, 204 insertions(+), 198 deletions(-) create mode 100644 fuzz/ast_utils.py diff --git a/fuzz/ast_utils.py b/fuzz/ast_utils.py new file mode 100644 index 0000000..60bfce3 --- /dev/null +++ b/fuzz/ast_utils.py @@ -0,0 +1,201 @@ +# ast_utils.py +import ast +import astunparse +import logging +import os +import re + +class TestFunctionTransformer(ast.NodeTransformer): + """AST transformer for test function conversion""" + + def visit_FunctionDef(self, node): + # 首先处理 main 函数(移除) + if node.name == "main": + return None + + # 处理 TestInput/TestOneInput 函数 + if node.name in ["TestInput", "TestOneInput"]: + # a. 记录参数名称(假设只有一个参数) + param_name = None + if node.args.args: + param_name = node.args.args[0].arg + + # b. 将函数名改为 test_ + node.name = "test_" + + # c. 移除参数(将参数列表设为空) + node.args = ast.arguments( + posonlyargs=[], + args=[], + vararg=None, + kwonlyargs=[], + kw_defaults=[], + kwarg=None, + defaults=[], + ) + + # d. 在函数体开头插入 原参数名 = b"" + if param_name: + self.add_param_assignment(node, param_name) + + # 确保继续遍历子节点 + self.generic_visit(node) + return node + + def add_param_assignment(self, node, param_name): + """Add param_name = b"..." at the beginning of the function body with an inline comment""" + # 创建包含赋值和注释的复合值 + value_with_comment = ast.JoinedStr( + values=[ + ast.FormattedValue(value=ast.Constant(value=b""), conversion=-1), + ast.Constant(value=" # This is a test template") + ] + ) + + # 创建赋值节点 + assign_node = ast.Assign( + targets=[ast.Name(id=param_name, ctx=ast.Store())], + value=value_with_comment + ) + + # 如果有文档字符串,插入在文档字符串之后 + if ( + node.body + and isinstance(node.body[0], ast.Expr) + and isinstance(node.body[0].value, ast.Constant) + and isinstance(node.body[0].value.value, str) + ): + # 插入在文档字符串后面 + node.body.insert(1, assign_node) + else: + # 插入在函数开头 + node.body.insert(0, assign_node) + + def remove_print_param(self, node, param_name): + """Remove print statements for the specific parameter""" + new_body = [] + for stmt in node.body: + # 跳过 print(param_name) 调用 + if ( + isinstance(stmt, ast.Expr) + and isinstance(stmt.value, ast.Call) + and isinstance(stmt.value.func, ast.Name) + and stmt.value.func.id == "print" + and any( + isinstance(arg, ast.Name) and arg.id == param_name + for arg in stmt.value.args + ) + ): + continue + new_body.append(stmt) + node.body = new_body + + def visit_If(self, node): + """Remove if __name__ == '__main__' blocks""" + # 检查是否是主函数保护 + if ( + isinstance(node.test, ast.Compare) + and isinstance(node.test.left, ast.Name) + and node.test.left.id == "__name__" + and isinstance(node.test.ops[0], ast.Eq) + and isinstance(node.test.comparators[0], ast.Constant) + and node.test.comparators[0].value == "__main__" + ): + + # 移除整个 if 块 + return None + + # 确保继续遍历子节点 + self.generic_visit(node) + return node + + +class TestGenTransformer(ast.NodeTransformer): + def __init__(self, idx, fuzz_input): + self.idx = idx + self.fuzz_input = fuzz_input + self.found_test_function = False + + def visit_FunctionDef(self, node): + if node.name == "test_": + self.found_test_function = True + + # 1. 修改函数名 + node.name = f"test_{self.idx}" + + # 2. 查找并替换包含特定注释的赋值语句 + for i, stmt in enumerate(node.body): + # 检查是否是赋值语句 + if isinstance(stmt, ast.Assign): + # 检查赋值语句的值是否是带有注释的复合值 + if ( + isinstance(stmt.value, ast.JoinedStr) + and len(stmt.value.values) >= 2 + and isinstance(stmt.value.values[1], ast.Constant) + and stmt.value.values[1].value == " # This is a test template" + ): + # 替换为新的输入值 + stmt.value = ast.Constant(value=self.fuzz_input) + break + return node + + +def generate_test_template(target_name: str, repo_path: str): + """ + Generate Python test template using AST for more precise code transformations + """ + src_file = os.path.join(repo_path, target_name) + logging.info(f"Generating test template for {src_file}") + if not src_file.endswith(".py"): + src_file += ".py" + if not os.path.exists(src_file): + logging.error(f"Source target file not found: {src_file}") + return None + + with open(src_file, "r", encoding="utf-8") as f: + original_code = f.read() + + # --- 1. Keep shebang but remove license comments --- + shebang = "" + if original_code.startswith("#!"): + shebang, original_code = original_code.split("\n", 1) + shebang += "\n" + + license_pattern = re.compile( + r"^(?:\s*#.*\n)*\s*#.*limitations\s+under\s+the\s+license.*\n", + re.IGNORECASE | re.MULTILINE, + ) + code_no_license = re.sub(license_pattern, "", original_code, count=1) + + # --- 2. Parse code to AST --- + try: + tree = ast.parse(code_no_license) + except SyntaxError as e: + logging.error(f"Syntax error in {src_file}: {e}") + return None + + # --- 3. AST transformation --- + transformer = TestFunctionTransformer() + new_tree = transformer.visit(tree) + ast.fix_missing_locations(new_tree) + + # --- 4. Generate cleaned code --- + cleaned_code = astunparse.unparse(new_tree) + + # --- 5. Output to tests-gen directory --- + template_dir = os.path.join(repo_path, "tests-gen") + os.makedirs(template_dir, exist_ok=True) + + init_path = os.path.join(template_dir, "__init__.py") + if not os.path.exists(init_path): + with open(init_path, "w", encoding="utf-8") as f: + f.write("") + + # 使用目标名称的基础部分(移除扩展名)作为输出文件名 + base_target_name = os.path.splitext(target_name)[0] + template_path = os.path.join(template_dir, f"{base_target_name}.py") + with open(template_path, "w", encoding="utf-8") as f: + f.write(shebang + cleaned_code.strip() + "\n") + + logging.info(f"Generated cleaned template: {template_path}") + return template_path \ No newline at end of file diff --git a/fuzz/collect_fuzz_python.py b/fuzz/collect_fuzz_python.py index 4ca7f3b..f752512 100644 --- a/fuzz/collect_fuzz_python.py +++ b/fuzz/collect_fuzz_python.py @@ -4,8 +4,6 @@ """ from pathlib import Path -import ast -import astunparse import logging from typing import Optional import fire @@ -19,8 +17,9 @@ from difflib import SequenceMatcher from itertools import islice from datetime import datetime -import re +# 导入AST相关的功能 +from ast_utils import TestFunctionTransformer, TestGenTransformer, generate_test_template def build_image(repos: list[str], jobs: int): """ @@ -212,200 +211,6 @@ def _transform_repo(repo: str): with ProcessingPool(jobs) as p: return list(p.map(_transform_repo, repos)) - -def generate_test_template(target_name: str, repo_path: str): - """ - Generate Python test template using AST for more precise code transformations - """ - src_file = pjoin(repo_path, target_name) - logging.info(f"Generating test template for {src_file}") - if not src_file.endswith(".py"): - src_file += ".py" - if not os.path.exists(src_file): - logging.error(f"Source target file not found: {src_file}") - return None - - with open(src_file, "r", encoding="utf-8") as f: - original_code = f.read() - - # --- 1. Keep shebang but remove license comments --- - shebang = "" - if original_code.startswith("#!"): - shebang, original_code = original_code.split("\n", 1) - shebang += "\n" - - license_pattern = re.compile( - r"^(?:\s*#.*\n)*\s*#.*limitations\s+under\s+the\s+license.*\n", - re.IGNORECASE | re.MULTILINE, - ) - code_no_license = re.sub(license_pattern, "", original_code, count=1) - - # --- 2. Parse code to AST --- - try: - tree = ast.parse(code_no_license) - except SyntaxError as e: - logging.error(f"Syntax error in {src_file}: {e}") - return None - - # --- 3. AST transformation --- - transformer = TestFunctionTransformer() - new_tree = transformer.visit(tree) - ast.fix_missing_locations(new_tree) - - # --- 4. Generate cleaned code --- - cleaned_code = astunparse.unparse(new_tree) - - # --- 5. Output to tests-gen directory --- - template_dir = pjoin(repo_path, "tests-gen") - os.makedirs(template_dir, exist_ok=True) - - init_path = pjoin(template_dir, "__init__.py") - if not os.path.exists(init_path): - with open(init_path, "w", encoding="utf-8") as f: - f.write("") - - template_path = pjoin(template_dir, f"{os.path.splitext(target_name)[0]}.py") - with open(template_path, "w", encoding="utf-8") as f: - f.write(shebang + cleaned_code.strip() + "\n") - - logging.info(f"Generated cleaned template: {template_path}") - return template_path - - -class TestFunctionTransformer(ast.NodeTransformer): - """AST transformer for test function conversion""" - - def visit_FunctionDef(self, node): - # 首先处理 main 函数(移除) - if node.name == "main": - return None - - # 处理 TestInput/TestOneInput 函数 - if node.name in ["TestInput", "TestOneInput"]: - # a. 记录参数名称(假设只有一个参数) - param_name = None - if node.args.args: - param_name = node.args.args[0].arg - - # b. 将函数名改为 test_ - node.name = "test_" - - # c. 移除参数(将参数列表设为空) - node.args = ast.arguments( - posonlyargs=[], - args=[], - vararg=None, - kwonlyargs=[], - kw_defaults=[], - kwarg=None, - defaults=[], - ) - - # d. 在函数体开头插入 原参数名 = b"" - if param_name: - self.add_param_assignment(node, param_name) - - - # 确保继续遍历子节点 - self.generic_visit(node) - return node - - def add_param_assignment(self, node, param_name): - """Add param_name = b"..." at the beginning of the function body with an inline comment""" - # 创建包含赋值和注释的复合值 - value_with_comment = ast.JoinedStr( - values=[ - ast.FormattedValue(value=ast.Constant(value=b""), conversion=-1), - ast.Constant(value=" # This is a test template") - ] - ) - - # 创建赋值节点 - assign_node = ast.Assign( - targets=[ast.Name(id=param_name, ctx=ast.Store())], - value=value_with_comment - ) - - # 如果有文档字符串,插入在文档字符串之后 - if ( - node.body - and isinstance(node.body[0], ast.Expr) - and isinstance(node.body[0].value, ast.Constant) - and isinstance(node.body[0].value.value, str) - ): - # 插入在文档字符串后面 - node.body.insert(1, assign_node) - else: - # 插入在函数开头 - node.body.insert(0, assign_node) - def remove_print_param(self, node, param_name): - """Remove print statements for the specific parameter""" - new_body = [] - for stmt in node.body: - # 跳过 print(param_name) 调用 - if ( - isinstance(stmt, ast.Expr) - and isinstance(stmt.value, ast.Call) - and isinstance(stmt.value.func, ast.Name) - and stmt.value.func.id == "print" - and any( - isinstance(arg, ast.Name) and arg.id == param_name - for arg in stmt.value.args - ) - ): - continue - new_body.append(stmt) - node.body = new_body - - def visit_If(self, node): - """Remove if __name__ == '__main__' blocks""" - # 检查是否是主函数保护 - if ( - isinstance(node.test, ast.Compare) - and isinstance(node.test.left, ast.Name) - and node.test.left.id == "__name__" - and isinstance(node.test.ops[0], ast.Eq) - and isinstance(node.test.comparators[0], ast.Constant) - and node.test.comparators[0].value == "__main__" - ): - - # 移除整个 if 块 - return None - - # 确保继续遍历子节点 - self.generic_visit(node) - return node - - -class TestGenTransformer(ast.NodeTransformer): - def __init__(self, idx, fuzz_input): - self.idx = idx - self.fuzz_input = fuzz_input - self.found_test_function = False - - def visit_FunctionDef(self, node): - if node.name == "test_": - self.found_test_function = True - - # 1. 修改函数名 - node.name = f"test_{self.idx}" - - # 2. 查找并替换包含特定注释的赋值语句 - for i, stmt in enumerate(node.body): - # 检查是否是赋值语句 - if isinstance(stmt, ast.Assign): - # 检查赋值语句的值是否是带有注释的复合值 - if ( - isinstance(stmt.value, ast.JoinedStr) - and len(stmt.value.values) >= 2 - and isinstance(stmt.value.values[1], ast.Constant) - and stmt.value.values[1].value == " # This is a test template" - ): - # 替换为新的输入值 - stmt.value = ast.Constant(value=self.fuzz_input) - break - return node - def substitute_one_repo( repo: str, targets: list[tuple], # 每个元素是 (transformed_target, raw_target) @@ -624,4 +429,4 @@ def main( if __name__ == "__main__": logging.basicConfig(level=logging.INFO) - fire.Fire(main) + fire.Fire(main) \ No newline at end of file From 509a4c473fadbe6c55f95a74c06d0d1e77d28f46 Mon Sep 17 00:00:00 2001 From: joyguoguo <599325370@qq.com> Date: Mon, 18 Aug 2025 02:23:59 +0000 Subject: [PATCH 126/134] put all AST related class/module/function in another file and import from there. --- fuzz/collect_fuzz_python.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fuzz/collect_fuzz_python.py b/fuzz/collect_fuzz_python.py index f752512..ccf14fa 100644 --- a/fuzz/collect_fuzz_python.py +++ b/fuzz/collect_fuzz_python.py @@ -4,6 +4,8 @@ """ from pathlib import Path +import ast +import astunparse import logging from typing import Optional import fire From 1686058f9a44c8fe6b5f0a04908ff959de48e662 Mon Sep 17 00:00:00 2001 From: joyguoguo <599325370@qq.com> Date: Mon, 18 Aug 2025 02:33:14 +0000 Subject: [PATCH 127/134] translation --- fuzz/ast_utils.py | 64 +++++++++++----------- fuzz/collect_fuzz_python.py | 102 ++++++++++++++++++++---------------- 2 files changed, 89 insertions(+), 77 deletions(-) diff --git a/fuzz/ast_utils.py b/fuzz/ast_utils.py index 60bfce3..4bca4a7 100644 --- a/fuzz/ast_utils.py +++ b/fuzz/ast_utils.py @@ -5,25 +5,26 @@ import os import re + class TestFunctionTransformer(ast.NodeTransformer): """AST transformer for test function conversion""" def visit_FunctionDef(self, node): - # 首先处理 main 函数(移除) + # First, process main function (remove it) if node.name == "main": return None - # 处理 TestInput/TestOneInput 函数 + # Process TestInput/TestOneInput functions if node.name in ["TestInput", "TestOneInput"]: - # a. 记录参数名称(假设只有一个参数) + # a. Record parameter name (assume only one parameter) param_name = None if node.args.args: param_name = node.args.args[0].arg - # b. 将函数名改为 test_ + # b. Rename function to test_ node.name = "test_" - # c. 移除参数(将参数列表设为空) + # c. Remove parameters (set argument list to empty) node.args = ast.arguments( posonlyargs=[], args=[], @@ -34,48 +35,47 @@ def visit_FunctionDef(self, node): defaults=[], ) - # d. 在函数体开头插入 原参数名 = b"" + # d. Insert param_name = b"" at the beginning of the function body if param_name: self.add_param_assignment(node, param_name) - # 确保继续遍历子节点 + # Ensure traversing child nodes continues self.generic_visit(node) return node def add_param_assignment(self, node, param_name): """Add param_name = b"..." at the beginning of the function body with an inline comment""" - # 创建包含赋值和注释的复合值 + # Create a compound value containing assignment and comment value_with_comment = ast.JoinedStr( values=[ ast.FormattedValue(value=ast.Constant(value=b""), conversion=-1), - ast.Constant(value=" # This is a test template") + ast.Constant(value=" # This is a test template"), ] ) - - # 创建赋值节点 + + # Create an assignment node assign_node = ast.Assign( - targets=[ast.Name(id=param_name, ctx=ast.Store())], - value=value_with_comment + targets=[ast.Name(id=param_name, ctx=ast.Store())], value=value_with_comment ) - - # 如果有文档字符串,插入在文档字符串之后 + + # If there is a docstring, insert after the docstring if ( node.body and isinstance(node.body[0], ast.Expr) and isinstance(node.body[0].value, ast.Constant) and isinstance(node.body[0].value.value, str) ): - # 插入在文档字符串后面 + # Insert right after the docstring node.body.insert(1, assign_node) else: - # 插入在函数开头 + # Insert at the beginning of the function node.body.insert(0, assign_node) def remove_print_param(self, node, param_name): """Remove print statements for the specific parameter""" new_body = [] for stmt in node.body: - # 跳过 print(param_name) 调用 + # Skip print(param_name) calls if ( isinstance(stmt, ast.Expr) and isinstance(stmt.value, ast.Call) @@ -92,7 +92,7 @@ def remove_print_param(self, node, param_name): def visit_If(self, node): """Remove if __name__ == '__main__' blocks""" - # 检查是否是主函数保护 + # Check if this is the main function guard if ( isinstance(node.test, ast.Compare) and isinstance(node.test.left, ast.Name) @@ -102,10 +102,10 @@ def visit_If(self, node): and node.test.comparators[0].value == "__main__" ): - # 移除整个 if 块 + # Remove the entire if block return None - # 确保继续遍历子节点 + # Ensure traversing child nodes continues self.generic_visit(node) return node @@ -119,22 +119,22 @@ def __init__(self, idx, fuzz_input): def visit_FunctionDef(self, node): if node.name == "test_": self.found_test_function = True - - # 1. 修改函数名 + + # 1. Modify function name node.name = f"test_{self.idx}" - - # 2. 查找并替换包含特定注释的赋值语句 + + # 2. Find and replace assignment statements with the special comment for i, stmt in enumerate(node.body): - # 检查是否是赋值语句 + # Check if it's an assignment statement if isinstance(stmt, ast.Assign): - # 检查赋值语句的值是否是带有注释的复合值 + # Check if the value is a compound value with a comment if ( isinstance(stmt.value, ast.JoinedStr) and len(stmt.value.values) >= 2 and isinstance(stmt.value.values[1], ast.Constant) and stmt.value.values[1].value == " # This is a test template" ): - # 替换为新的输入值 + # Replace with new fuzz input stmt.value = ast.Constant(value=self.fuzz_input) break return node @@ -145,9 +145,9 @@ def generate_test_template(target_name: str, repo_path: str): Generate Python test template using AST for more precise code transformations """ src_file = os.path.join(repo_path, target_name) - logging.info(f"Generating test template for {src_file}") + logging.info(f"Generating test template for {src_file}") if not src_file.endswith(".py"): - src_file += ".py" + src_file += ".py" if not os.path.exists(src_file): logging.error(f"Source target file not found: {src_file}") return None @@ -191,11 +191,11 @@ def generate_test_template(target_name: str, repo_path: str): with open(init_path, "w", encoding="utf-8") as f: f.write("") - # 使用目标名称的基础部分(移除扩展名)作为输出文件名 + # Use the base part of target_name (remove extension) as the output file name base_target_name = os.path.splitext(target_name)[0] template_path = os.path.join(template_dir, f"{base_target_name}.py") with open(template_path, "w", encoding="utf-8") as f: f.write(shebang + cleaned_code.strip() + "\n") logging.info(f"Generated cleaned template: {template_path}") - return template_path \ No newline at end of file + return template_path diff --git a/fuzz/collect_fuzz_python.py b/fuzz/collect_fuzz_python.py index ccf14fa..d4592e0 100644 --- a/fuzz/collect_fuzz_python.py +++ b/fuzz/collect_fuzz_python.py @@ -19,9 +19,15 @@ from difflib import SequenceMatcher from itertools import islice from datetime import datetime +import re + +# Import AST-related functionality +from ast_utils import ( + TestFunctionTransformer, + TestGenTransformer, + generate_test_template, +) -# 导入AST相关的功能 -from ast_utils import TestFunctionTransformer, TestGenTransformer, generate_test_template def build_image(repos: list[str], jobs: int): """ @@ -200,22 +206,23 @@ def _transform_repo(repo: str): project_name = os.path.basename(repo) oss_fuzz_dir = Path(repo).parent.parent raw_targets = discover_targets(project_name, oss_fuzz_dir) - - # 只需移除目标名称中的 "_print1",不要添加任何新后缀 + + # Simply remove "_print1" from target names, don't add any new suffix transformed_targets = [t.replace("_print1", "") for t in raw_targets] - - # 去重 + + # Remove duplicates targets = list(set(transformed_targets)) - - # 传递给 generate_test_template 的是简单目标名称 + + # Pass simple target names to generate_test_template return [generate_test_template(t, repo) for t in targets] with ProcessingPool(jobs) as p: return list(p.map(_transform_repo, repos)) + def substitute_one_repo( repo: str, - targets: list[tuple], # 每个元素是 (transformed_target, raw_target) + targets: list[tuple], # Each element is (transformed_target, raw_target) n_fuzz: int, strategy: str, max_len: int, @@ -225,35 +232,36 @@ def substitute_one_repo( Copy files from fuzz target template and generate multiple testgen files based on fuzz inputs using AST transformations """ + input_dir = pjoin(repo, "fuzz_inputs") template_dir = pjoin(repo, "tests-gen") os.makedirs(template_dir, exist_ok=True) for transformed_target, raw_target in targets: - # 使用转换后的目标名称构建模板文件路径 + # Build template file path using transformed target name source_file = pjoin(template_dir, transformed_target + ".py") - - # 使用原始目标名称构建输入文件路径 + + # Build input file path using raw target name input_path = pjoin(input_dir, raw_target) - - # 确保源文件存在 + + # Ensure source file exists if not os.path.exists(source_file): logging.warning(f"Source file not found: {source_file}") continue if not os.path.exists(input_path): logging.warning(f"Input file not found: {input_path}") continue - - # 读取所有有效的输入数据 + + # Read all valid input data valid_inputs = [] with open(input_path, "rb") as f_input: lines = f_input.readlines() - # 文件已关闭,现在处理数据 + # File is closed, now process data for line in lines: - # 使用 errors='replace' 确保解码不会失败 + # Use errors='replace' to ensure decoding doesn't fail decoded = line.decode("utf-8", errors="replace") - - # 只处理以 b' 或 b" 开头的行 + + # Only process lines starting with b' or b" if decoded.startswith(("b'", 'b"')): if decoded.startswith("b'") and decoded.endswith("'\n"): byte_data = line[2:-2] @@ -261,21 +269,21 @@ def substitute_one_repo( byte_data = line[2:-2] else: continue - + if 0 < len(byte_data) <= max_len: valid_inputs.append(byte_data) - # 对于其他行,如果长度在范围内且不是以 b' 或 b" 开头,也考虑加入 + # For other lines, if length is within range and doesn't start with b' or b", also consider adding elif 0 < len(line) <= max_len: valid_inputs.append(line) if not valid_inputs: - # 使用 transformed_target 而不是 target_name + # Use transformed_target instead of target_name logging.warning(f"No valid inputs found for {transformed_target}") continue - # 使用 transformed_target 而不是 target_name + # Use transformed_target instead of target_name logging.info(f"Loaded {len(valid_inputs)} inputs for {transformed_target}") - # 策略选择输入 + # Strategy for selecting inputs if strategy == "shuffle": random.shuffle(valid_inputs) inputs = valid_inputs[:n_fuzz] @@ -284,44 +292,47 @@ def substitute_one_repo( else: inputs = valid_inputs[:n_fuzz] - # 每个 fuzz input 生成一个单独的文件(使用 AST) + # Generate a separate file for each fuzz input (using AST) for idx, fuzz_input in enumerate(inputs, start=1): with open(source_file, "r") as f_src: code = f_src.read() try: - # 解析为 AST + # Parse into AST tree = ast.parse(code) - # 应用转换器 + # Apply transformer transformer = TestGenTransformer(idx, fuzz_input) new_tree = transformer.visit(tree) ast.fix_missing_locations(new_tree) - # 确保找到并处理了测试函数 + # Ensure test function was found and processed if not transformer.found_test_function: logging.warning(f"No test_ function found in {source_file}") continue - # 生成新代码 + # Generate new code new_code = astunparse.unparse(new_tree) - # 使用 transformed_target 而不是 target_name + # Use transformed_target instead of target_name out_path = pjoin(template_dir, f"{transformed_target}.testgen_{idx}.py") with open(out_path, "w") as f_out: f_out.write(new_code) - # 格式化代码 + # Format code try: subprocess.run(["black", out_path], check=False) except FileNotFoundError: logging.warning("Black formatter not found, skipping formatting") - + except SyntaxError as e: logging.error(f"Syntax error when processing {source_file}: {e}") except Exception as e: - # 使用 transformed_target 而不是 target_name - logging.error(f"Error generating test case for {transformed_target}: {e}") + # Use transformed_target instead of target_name + logging.error( + f"Error generating test case for {transformed_target}: {e}" + ) + def testgen_repos( repos: list[str], @@ -348,10 +359,10 @@ def testgen_repos( project_name = os.path.basename(repo) oss_fuzz_dir = Path(repo).parent.parent raw_targets = discover_targets(project_name, oss_fuzz_dir) - - # 保存原始目标名称和转换后的目标名称 + + # Save original target names and transformed target names transformed_targets = [t.replace("_print1", "") for t in raw_targets] - targets = list(zip(transformed_targets, raw_targets)) # (转换后, 原始) + targets = list(zip(transformed_targets, raw_targets)) # (transformed, raw) target_map[repo] = targets # Process each repository in parallel @@ -359,17 +370,18 @@ def testgen_repos( list( p.map( lambda item: substitute_one_repo( - item[0], # repo path - item[1], # list of (transformed, raw) targets - n_fuzz, - strategy, - max_len, - sim_thresh + item[0], # repo path + item[1], # list of (transformed, raw) targets + n_fuzz, + strategy, + max_len, + sim_thresh, ), target_map.items(), ) ) + def main( repo_id: str = "data/valid_projects.txt", repo_root: str = "fuzz/oss-fuzz/projects/", @@ -431,4 +443,4 @@ def main( if __name__ == "__main__": logging.basicConfig(level=logging.INFO) - fire.Fire(main) \ No newline at end of file + fire.Fire(main) From dd0a8ab3ebe8520fd14885d6a6d3f870439ce903 Mon Sep 17 00:00:00 2001 From: joyguoguo <599325370@qq.com> Date: Mon, 18 Aug 2025 22:00:55 +0000 Subject: [PATCH 128/134] use relative address --- fuzz/modify_fuzz_files.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fuzz/modify_fuzz_files.py b/fuzz/modify_fuzz_files.py index 1ed8b48..1f5d2a4 100644 --- a/fuzz/modify_fuzz_files.py +++ b/fuzz/modify_fuzz_files.py @@ -41,7 +41,7 @@ def visit_FunctionDef(self, node): return new_content def main( - projects_path="/home/jiayiguo/FuzzAug/fuzz/oss-fuzz/projects", + projects_path="fuzz/oss-fuzz/projects", valid_projects_file="data/valid_projects.txt" ): """ From b9956f30a6827500075d6b703092c71e4ee3e61c Mon Sep 17 00:00:00 2001 From: joyguoguo <599325370@qq.com> Date: Mon, 18 Aug 2025 22:01:48 +0000 Subject: [PATCH 129/134] use relative address --- fuzz/clean_fuzz_dir.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fuzz/clean_fuzz_dir.py b/fuzz/clean_fuzz_dir.py index 9a9bdf8..af383e7 100644 --- a/fuzz/clean_fuzz_dir.py +++ b/fuzz/clean_fuzz_dir.py @@ -3,7 +3,7 @@ import shutil import fire -def clean_project_dirs(root_dir="/home/jiayiguo/FuzzAug/fuzz/oss-fuzz/projects"): +def clean_project_dirs(root_dir="/fuzz/oss-fuzz/projects"): """ 清理 oss-fuzz 项目目录下的 fuzz_inputs 和 tests-gen 文件夹 From e771b99d8a6a46d04676a3f8384adb5ef6b6dddd Mon Sep 17 00:00:00 2001 From: joyguoguo <599325370@qq.com> Date: Mon, 18 Aug 2025 23:32:02 +0000 Subject: [PATCH 130/134] remove the class outside of the function --- fuzz/modify_fuzz_files.py | 63 +++++++++++++++++---------------------- 1 file changed, 27 insertions(+), 36 deletions(-) diff --git a/fuzz/modify_fuzz_files.py b/fuzz/modify_fuzz_files.py index 1f5d2a4..c57f0c9 100644 --- a/fuzz/modify_fuzz_files.py +++ b/fuzz/modify_fuzz_files.py @@ -3,39 +3,40 @@ import ast import fire + +class InsertPrintTransformer(ast.NodeTransformer): + def visit_FunctionDef(self, node): + if node.name in ("TestOneInput", "TestInput") and node.args.args: + first_arg_name = node.args.args[0].arg + print_stmt = ast.Expr( + value=ast.Call( + func=ast.Name(id='print', ctx=ast.Load()), + args=[ast.Name(id=first_arg_name, ctx=ast.Load())], + keywords=[] + ) + ) + # 添加空body检查 + if not node.body: + node.body.append(print_stmt) + else: + # 增强重复检查逻辑 + first_stmt = node.body[0] + if not (isinstance(first_stmt, ast.Expr) + and isinstance(first_stmt.value, ast.Call) + and hasattr(first_stmt.value.func, 'id') + and first_stmt.value.func.id == 'print'): + node.body.insert(0, print_stmt) + return node + def add_print_to_testoneinput(file_path): with open(file_path, 'r') as f: content = f.read() - # 解析 AST tree = ast.parse(content) - - class InsertPrintTransformer(ast.NodeTransformer): - def visit_FunctionDef(self, node): - if node.name in ("TestOneInput", "TestInput") and node.args.args: - first_arg_name = node.args.args[0].arg - # 创建 print(参数名) 语句 - print_stmt = ast.Expr( - value=ast.Call( - func=ast.Name(id='print', ctx=ast.Load()), - args=[ast.Name(id=first_arg_name, ctx=ast.Load())], - keywords=[] - ) - ) - # 确保没有重复插入 - if not ( - isinstance(node.body[0], ast.Expr) - and isinstance(node.body[0].value, ast.Call) - and getattr(node.body[0].value.func, "id", None) == "print" - ): - node.body.insert(0, print_stmt) - return node - transformer = InsertPrintTransformer() new_tree = transformer.visit(tree) ast.fix_missing_locations(new_tree) - # 转回代码 import astor new_content = astor.to_source(new_tree) return new_content @@ -44,19 +45,12 @@ def main( projects_path="fuzz/oss-fuzz/projects", valid_projects_file="data/valid_projects.txt" ): - """ - 给 fuzz target 的 TestOneInput / TestInput 函数开头插入 print(参数名) - - Args: - projects_path (str): OSS-Fuzz 项目的根目录 - valid_projects_file (str): 包含有效项目名的文件路径 - """ + """为fuzz target添加打印语句""" with open(valid_projects_file, 'r') as f: projects = [line.strip() for line in f if line.strip()] for project in projects: project_dir = os.path.join(projects_path, project) - if not os.path.isdir(project_dir): continue @@ -64,11 +58,8 @@ def main( for file in files: if file.startswith('fuzz_') and file.endswith('.py'): file_path = os.path.join(root, file) - try: new_content = add_print_to_testoneinput(file_path) - - # 保存修改后的文件 new_file_path = file_path.rsplit('.', 1)[0] + '_print1.py' with open(new_file_path, 'w') as f: f.write(new_content) @@ -78,4 +69,4 @@ def main( print(f"Error processing {file_path}: {str(e)}") if __name__ == "__main__": - fire.Fire(main) + fire.Fire(main) \ No newline at end of file From 4bc7c941743b7009c9812bd858982ac9881c4cc4 Mon Sep 17 00:00:00 2001 From: joyguoguo <599325370@qq.com> Date: Mon, 18 Aug 2025 23:34:37 +0000 Subject: [PATCH 131/134] add tuple's type --- fuzz/collect_fuzz_python.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fuzz/collect_fuzz_python.py b/fuzz/collect_fuzz_python.py index d4592e0..c058fe7 100644 --- a/fuzz/collect_fuzz_python.py +++ b/fuzz/collect_fuzz_python.py @@ -222,7 +222,7 @@ def _transform_repo(repo: str): def substitute_one_repo( repo: str, - targets: list[tuple], # Each element is (transformed_target, raw_target) + targets: list[tuple[str,str]], # Each element is (transformed_target, raw_target) n_fuzz: int, strategy: str, max_len: int, From e527188315e9419b60d7be9aec30cf33f886cf94 Mon Sep 17 00:00:00 2001 From: joyguoguo <599325370@qq.com> Date: Mon, 18 Aug 2025 23:58:00 +0000 Subject: [PATCH 132/134] Properly handle indentation and process data after the file is closed. --- fuzz/collect_fuzz_python.py | 38 ++++++++++++++++++------------------- 1 file changed, 19 insertions(+), 19 deletions(-) diff --git a/fuzz/collect_fuzz_python.py b/fuzz/collect_fuzz_python.py index c058fe7..1d45df1 100644 --- a/fuzz/collect_fuzz_python.py +++ b/fuzz/collect_fuzz_python.py @@ -256,25 +256,25 @@ def substitute_one_repo( valid_inputs = [] with open(input_path, "rb") as f_input: lines = f_input.readlines() - # File is closed, now process data - for line in lines: - # Use errors='replace' to ensure decoding doesn't fail - decoded = line.decode("utf-8", errors="replace") - - # Only process lines starting with b' or b" - if decoded.startswith(("b'", 'b"')): - if decoded.startswith("b'") and decoded.endswith("'\n"): - byte_data = line[2:-2] - elif decoded.startswith('b"') and decoded.endswith('"\n'): - byte_data = line[2:-2] - else: - continue - - if 0 < len(byte_data) <= max_len: - valid_inputs.append(byte_data) - # For other lines, if length is within range and doesn't start with b' or b", also consider adding - elif 0 < len(line) <= max_len: - valid_inputs.append(line) + # File is closed, now process data + for line in lines: + # Use errors='replace' to ensure decoding doesn't fail + decoded = line.decode("utf-8", errors="replace") + + # Only process lines starting with b' or b" + if decoded.startswith(("b'", 'b"')): + if decoded.startswith("b'") and decoded.endswith("'\n"): + byte_data = line[2:-2] + elif decoded.startswith('b"') and decoded.endswith('"\n'): + byte_data = line[2:-2] + else: + continue + + if 0 < len(byte_data) <= max_len: + valid_inputs.append(byte_data) + # For other lines, if length is within range and doesn't start with b' or b", also consider adding + elif 0 < len(line) <= max_len: + valid_inputs.append(line) if not valid_inputs: # Use transformed_target instead of target_name From bb2d9e8cbc16e8b571d2f6e68989498c9c54533d Mon Sep 17 00:00:00 2001 From: joyguoguo <599325370@qq.com> Date: Tue, 19 Aug 2025 07:03:50 +0000 Subject: [PATCH 133/134] correct tne relative path --- fuzz/clean_fuzz_dir.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fuzz/clean_fuzz_dir.py b/fuzz/clean_fuzz_dir.py index af383e7..d420915 100644 --- a/fuzz/clean_fuzz_dir.py +++ b/fuzz/clean_fuzz_dir.py @@ -3,7 +3,7 @@ import shutil import fire -def clean_project_dirs(root_dir="/fuzz/oss-fuzz/projects"): +def clean_project_dirs(root_dir="fuzz/oss-fuzz/projects"): """ 清理 oss-fuzz 项目目录下的 fuzz_inputs 和 tests-gen 文件夹 From 8eb4971fcd457efe3d3b9a24964de72eb7f61602 Mon Sep 17 00:00:00 2001 From: joyguoguo <599325370@qq.com> Date: Tue, 19 Aug 2025 07:27:56 +0000 Subject: [PATCH 134/134] add black to requirements.txt --- requirements.txt | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index e06d264..25dfb8c 100644 --- a/requirements.txt +++ b/requirements.txt @@ -18,4 +18,5 @@ dacite==1.8.1 pathos==0.3.2 bitsandbytes==0.43.2 pandas==2.2.2 -matplotlib==3.9.2 \ No newline at end of file +matplotlib==3.9.2 +black==25.1.0 \ No newline at end of file